diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34f90b33bc4ba..02c5fdb99419c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -643,8 +643,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>; defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile>; defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile>; -defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile>; -defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>; +defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; +defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>; @@ -1755,8 +1755,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30 defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">; defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">; defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">; -defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; -defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; +defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">; +defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">; defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">; defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">; defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index d46f010a2dafb..de918157d8005 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -1268,11 +1268,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi // GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_i16_f16 v5, s1, s2 // GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1304,7 +1304,7 @@ v_cvt_pk_norm_i16_f16 v5, null, exec_lo v_cvt_pk_norm_i16_f16 v5, -1, exec_hi // GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 // GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] @@ -1313,11 +1313,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h +// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| +// GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| +// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_u16_f16 v5, s1, s2 // GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1349,7 +1361,7 @@ v_cvt_pk_norm_u16_f16 v5, null, exec_lo v_cvt_pk_norm_u16_f16 v5, -1, exec_hi // GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 // GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] @@ -1358,6 +1370,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h +// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| +// GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| +// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + v_cvt_pk_u16_f32 v5, v1, v2 // GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 36d959faa9984..dba5d5ed54540 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1124,89 +1124,101 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4757,17 +4769,18 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_xor_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 479bd19eaac89..1f9d971be3abe 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -628,29 +628,41 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3057,17 +3069,18 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x40,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 62bebd00ee51f..addbade32f433 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1286,11 +1286,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi // GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_i16_f16 v5, s1, s2 // GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1331,11 +1331,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pknorm_i16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pknorm_i16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_u16_f16 v5, s1, s2 // GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1376,6 +1388,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_u16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pknorm_u16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + v_cvt_pk_u16_f32 v5, v1, v2 // GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index e7b12ec2deff5..296adf9c34eb1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1442,6 +1442,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1484,6 +1490,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index b879e59b3608b..1e74ef2dbac35 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -840,6 +840,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -852,6 +858,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 857da7cd58cfb..fe3ca0559cbad 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1375,10 +1375,16 @@ # GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 # GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1419,11 +1425,41 @@ 0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 # GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1464,6 +1500,30 @@ 0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 # GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 93992cba43771..70d57dd8a111e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -4113,88 +4113,196 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index a339d553fb1b6..176a922969128 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -2349,28 +2349,76 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index d31e96af43d89..7ff0ab9deb533 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1393,10 +1393,16 @@ # GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 # GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1437,11 +1443,29 @@ 0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 # GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1482,6 +1506,18 @@ 0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 # GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 53c1f2eb0cf12..22c0bb07aa6bb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4452,88 +4452,196 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index a77b90c4d31bc..79de08d6c98f5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2622,28 +2622,76 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]