Skip to content

Commit bae6b07

Browse files
author
H. Peter Anvin (Intel)
committed
BR 3392705: AVX512: reinstate the SSE-like opcodes for VPCMPEQ/GT
The VPCMP instructions are controlled by an immediate byte, but there is also a set of SSE-derived legacy opcodes for VPCMPEQ and VPCMPGT. For the specific cases of VPCMPEQ and VPCMPGT, prefer those opcodes since they are one byte shorter. Reported-by: ig <[email protected]> Signed-off-by: H. Peter Anvin (Intel) <[email protected]>
1 parent 6299a31 commit bae6b07

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

test/vpcmp.asm

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
bits 64
2+
vpcmpeqb k2{k2},zmm0,zmm1
3+
vpcmpgtb k2{k2},zmm0,zmm1
4+
vpcmpeqw k2{k2},zmm0,zmm1
5+
vpcmpgtw k2{k2},zmm0,zmm1
6+
vpcmpeqd k2{k2},zmm0,zmm1
7+
vpcmpgtd k2{k2},zmm0,zmm1
8+
vpcmpeqq k2{k2},zmm0,zmm1
9+
vpcmpgtq k2{k2},zmm0,zmm1
10+
11+
vpcmpb k2{k2},zmm0,zmm1,0
12+
vpcmpb k2{k2},zmm0,zmm1,6
13+
vpcmpw k2{k2},zmm0,zmm1,0
14+
vpcmpw k2{k2},zmm0,zmm1,6
15+
vpcmpd k2{k2},zmm0,zmm1,0
16+
vpcmpd k2{k2},zmm0,zmm1,6
17+
vpcmpq k2{k2},zmm0,zmm1,0
18+
vpcmpq k2{k2},zmm0,zmm1,6
19+
20+
vpcmpneqb k2{k2},zmm0,zmm1
21+
vpcmpleb k2{k2},zmm0,zmm1
22+
vpcmpneqw k2{k2},zmm0,zmm1
23+
vpcmplew k2{k2},zmm0,zmm1
24+
vpcmpneqd k2{k2},zmm0,zmm1
25+
vpcmpled k2{k2},zmm0,zmm1
26+
vpcmpneqq k2{k2},zmm0,zmm1
27+
vpcmpleq k2{k2},zmm0,zmm1

x86/insns.dat

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4811,6 +4811,33 @@ VPBROADCASTW ymmreg|mask|z,reg64 [rm: evex.256.66.0f38.w0 7b
48114811
VPBROADCASTW zmmreg|mask|z,reg16 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
48124812
VPBROADCASTW zmmreg|mask|z,reg32 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
48134813
VPBROADCASTW zmmreg|mask|z,reg64 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
4814+
; VPCMPEQx and VPCMPGTx come in two flavors: SSE-like, and VPCMP with immediate. They are both
4815+
; valid, but prefer the SSE version as it is one byte shorter.
4816+
VPCMPEQB kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
4817+
VPCMPEQB kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
4818+
VPCMPEQB kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 74 /r ] AVX512BW,FUTURE
4819+
VPCMPEQD kreg|mask,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
4820+
VPCMPEQD kreg|mask,ymmreg,ymmrm256|b32 [rvm:fv: evex.nds.256.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
4821+
VPCMPEQD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.66.0f.w0 76 /r ] AVX512,FUTURE
4822+
VPCMPEQQ kreg|mask,xmmreg,xmmrm128|b64 [rvm:fv: evex.nds.128.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
4823+
VPCMPEQQ kreg|mask,ymmreg,ymmrm256|b64 [rvm:fv: evex.nds.256.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
4824+
VPCMPEQQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.66.0f38.w1 29 /r ] AVX512,FUTURE
4825+
VPCMPEQW kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
4826+
VPCMPEQW kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
4827+
VPCMPEQW kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 75 /r ] AVX512BW,FUTURE
4828+
VPCMPGTB kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
4829+
VPCMPGTB kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
4830+
VPCMPGTB kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 64 /r ] AVX512BW,FUTURE
4831+
VPCMPGTD kreg|mask,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
4832+
VPCMPGTD kreg|mask,ymmreg,ymmrm256|b32 [rvm:fv: evex.nds.256.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
4833+
VPCMPGTD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.66.0f.w0 66 /r ] AVX512,FUTURE
4834+
VPCMPGTQ kreg|mask,xmmreg,xmmrm128|b64 [rvm:fv: evex.nds.128.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
4835+
VPCMPGTQ kreg|mask,ymmreg,ymmrm256|b64 [rvm:fv: evex.nds.256.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
4836+
VPCMPGTQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.66.0f38.w1 37 /r ] AVX512,FUTURE
4837+
VPCMPGTW kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
4838+
VPCMPGTW kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
4839+
VPCMPGTW kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 65 /r ] AVX512BW,FUTURE
4840+
; The systematic VPCMP with immediate instructions
48144841
VPCMPEQB kreg|mask,xmmreg,xmmrm128 [rvmi:fvm: evex.nds.128.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
48154842
VPCMPEQB kreg|mask,ymmreg,ymmrm256 [rvmi:fvm: evex.nds.256.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
48164843
VPCMPEQB kreg|mask,zmmreg,zmmrm512 [rvmi:fvm: evex.nds.512.66.0f3a.w0 3f /r 00 ] AVX512BW,FUTURE

0 commit comments

Comments
 (0)