diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 6ff283b9c8075..9fbc22221f99b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -518,55 +518,20 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB9_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB9_6 -; RV64ZVE32F-NEXT: .LBB9_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB9_7 -; RV64ZVE32F-NEXT: .LBB9_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB9_8 -; RV64ZVE32F-NEXT: .LBB9_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 -; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB9_3 -; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a1, 0(a1) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB9_4 -; RV64ZVE32F-NEXT: .LBB9_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lbu a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue, <4 x i8> %passthru) + %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) ret <4 x i8> %v } @@ -1242,55 +1207,20 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB20_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB20_6 -; RV64ZVE32F-NEXT: .LBB20_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB20_7 -; RV64ZVE32F-NEXT: .LBB20_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB20_8 -; RV64ZVE32F-NEXT: .LBB20_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB20_2 -; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB20_3 -; RV64ZVE32F-NEXT: .LBB20_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lh a1, 0(a1) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB20_4 -; RV64ZVE32F-NEXT: .LBB20_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: lh a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x i16> %passthru) + %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) ret <4 x i16> %v } @@ -2326,55 +2256,20 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB32_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB32_6 -; RV64ZVE32F-NEXT: .LBB32_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB32_7 -; RV64ZVE32F-NEXT: .LBB32_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB32_8 -; RV64ZVE32F-NEXT: .LBB32_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB32_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB32_2 -; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB32_3 -; RV64ZVE32F-NEXT: .LBB32_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lw a1, 0(a1) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB32_4 -; RV64ZVE32F-NEXT: .LBB32_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x i32> %passthru) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) ret <4 x i32> %v } @@ -3839,117 +3734,48 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { ; ; RV32ZVE32F-LABEL: mgather_truemask_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a6, v9 -; RV32ZVE32F-NEXT: bnez zero, .LBB45_5 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a2, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) -; RV32ZVE32F-NEXT: andi a4, a6, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB45_6 -; RV32ZVE32F-NEXT: .LBB45_2: -; RV32ZVE32F-NEXT: lw a4, 12(a1) -; RV32ZVE32F-NEXT: lw a5, 8(a1) -; RV32ZVE32F-NEXT: andi a7, a6, 4 -; RV32ZVE32F-NEXT: bnez a7, .LBB45_7 -; RV32ZVE32F-NEXT: .LBB45_3: -; RV32ZVE32F-NEXT: lw a7, 20(a1) -; RV32ZVE32F-NEXT: lw t0, 16(a1) -; RV32ZVE32F-NEXT: andi a6, a6, 8 -; RV32ZVE32F-NEXT: bnez a6, .LBB45_8 -; RV32ZVE32F-NEXT: .LBB45_4: -; RV32ZVE32F-NEXT: lw a6, 28(a1) -; RV32ZVE32F-NEXT: lw a1, 24(a1) -; RV32ZVE32F-NEXT: j .LBB45_9 -; RV32ZVE32F-NEXT: .LBB45_5: -; RV32ZVE32F-NEXT: lw a2, 4(a1) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: andi a4, a6, 2 -; RV32ZVE32F-NEXT: beqz a4, .LBB45_2 -; RV32ZVE32F-NEXT: .LBB45_6: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) -; RV32ZVE32F-NEXT: andi a7, a6, 4 -; RV32ZVE32F-NEXT: beqz a7, .LBB45_3 -; RV32ZVE32F-NEXT: .LBB45_7: # %cond.load4 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a3, v9 +; RV32ZVE32F-NEXT: lw a4, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s t0, v9 -; RV32ZVE32F-NEXT: lw a7, 4(t0) -; RV32ZVE32F-NEXT: lw t0, 0(t0) -; RV32ZVE32F-NEXT: andi a6, a6, 8 -; RV32ZVE32F-NEXT: beqz a6, .LBB45_4 -; RV32ZVE32F-NEXT: .LBB45_8: # %cond.load7 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a5, v9 +; RV32ZVE32F-NEXT: lw a6, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw a6, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 0(a1) -; RV32ZVE32F-NEXT: .LBB45_9: # %else8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw t0, 16(a0) -; RV32ZVE32F-NEXT: sw a7, 20(a0) -; RV32ZVE32F-NEXT: sw a1, 24(a0) -; RV32ZVE32F-NEXT: sw a6, 28(a0) +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 +; RV32ZVE32F-NEXT: lw t0, 4(a7) +; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 28(a0) +; RV32ZVE32F-NEXT: sw a7, 24(a0) +; RV32ZVE32F-NEXT: sw a5, 20(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a3, 12(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a5, v8 -; RV64ZVE32F-NEXT: bnez zero, .LBB45_5 -; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: ld a3, 0(a1) -; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 -; RV64ZVE32F-NEXT: bnez a4, .LBB45_6 -; RV64ZVE32F-NEXT: .LBB45_2: -; RV64ZVE32F-NEXT: ld a4, 8(a2) -; RV64ZVE32F-NEXT: andi a6, a5, 4 -; RV64ZVE32F-NEXT: bnez a6, .LBB45_7 -; RV64ZVE32F-NEXT: .LBB45_3: -; RV64ZVE32F-NEXT: ld a6, 16(a2) -; RV64ZVE32F-NEXT: andi a5, a5, 8 -; RV64ZVE32F-NEXT: bnez a5, .LBB45_8 -; RV64ZVE32F-NEXT: .LBB45_4: -; RV64ZVE32F-NEXT: ld a1, 24(a2) -; RV64ZVE32F-NEXT: j .LBB45_9 -; RV64ZVE32F-NEXT: .LBB45_5: -; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB45_2 -; RV64ZVE32F-NEXT: .LBB45_6: # %cond.load1 +; RV64ZVE32F-NEXT: ld a2, 24(a1) +; RV64ZVE32F-NEXT: ld a3, 16(a1) ; RV64ZVE32F-NEXT: ld a4, 8(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: ld a2, 0(a2) +; RV64ZVE32F-NEXT: ld a3, 0(a3) ; RV64ZVE32F-NEXT: ld a4, 0(a4) -; RV64ZVE32F-NEXT: andi a6, a5, 4 -; RV64ZVE32F-NEXT: beqz a6, .LBB45_3 -; RV64ZVE32F-NEXT: .LBB45_7: # %cond.load4 -; RV64ZVE32F-NEXT: ld a6, 16(a1) -; RV64ZVE32F-NEXT: ld a6, 0(a6) -; RV64ZVE32F-NEXT: andi a5, a5, 8 -; RV64ZVE32F-NEXT: beqz a5, .LBB45_4 -; RV64ZVE32F-NEXT: .LBB45_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: ld a1, 0(a1) -; RV64ZVE32F-NEXT: .LBB45_9: # %else8 -; RV64ZVE32F-NEXT: sd a3, 0(a0) +; RV64ZVE32F-NEXT: sd a2, 24(a0) +; RV64ZVE32F-NEXT: sd a3, 16(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) -; RV64ZVE32F-NEXT: sd a1, 24(a0) +; RV64ZVE32F-NEXT: sd a1, 0(a0) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x i64> %passthru) + %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru) ret <4 x i64> %v } @@ -7190,55 +7016,20 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB61_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB61_6 -; RV64ZVE32F-NEXT: .LBB61_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB61_7 -; RV64ZVE32F-NEXT: .LBB61_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB61_8 -; RV64ZVE32F-NEXT: .LBB61_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB61_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 -; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB61_3 -; RV64ZVE32F-NEXT: .LBB61_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB61_4 -; RV64ZVE32F-NEXT: .LBB61_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: flh fa5, 0(a1) +; RV64ZVE32F-NEXT: flh fa4, 0(a2) +; RV64ZVE32F-NEXT: flh fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x half> %passthru) + %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) ret <4 x half> %v } @@ -8148,55 +7939,20 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB71_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_6 -; RV64ZVE32F-NEXT: .LBB71_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_7 -; RV64ZVE32F-NEXT: .LBB71_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB71_8 -; RV64ZVE32F-NEXT: .LBB71_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB71_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_2 -; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_3 -; RV64ZVE32F-NEXT: .LBB71_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB71_4 -; RV64ZVE32F-NEXT: .LBB71_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: flw fa5, 0(a1) +; RV64ZVE32F-NEXT: flw fa4, 0(a2) +; RV64ZVE32F-NEXT: flw fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x float> %passthru) + %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru) ret <4 x float> %v } @@ -9627,95 +9383,40 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt ; ; RV32ZVE32F-LABEL: mgather_truemask_v4f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB84_6 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB84_7 -; RV32ZVE32F-NEXT: .LBB84_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB84_8 -; RV32ZVE32F-NEXT: .LBB84_3: # %else5 -; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_5 -; RV32ZVE32F-NEXT: .LBB84_4: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 +; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: .LBB84_5: # %else8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: fsd fa1, 8(a0) -; RV32ZVE32F-NEXT: fsd fa2, 16(a0) +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: fsd fa3, 24(a0) +; RV32ZVE32F-NEXT: fsd fa4, 16(a0) +; RV32ZVE32F-NEXT: fsd fa2, 8(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB84_6: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB84_2 -; RV32ZVE32F-NEXT: .LBB84_7: # %cond.load1 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v9 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB84_3 -; RV32ZVE32F-NEXT: .LBB84_8: # %cond.load4 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v9 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_4 -; RV32ZVE32F-NEXT: j .LBB84_5 ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB84_6 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB84_7 -; RV64ZVE32F-NEXT: .LBB84_2: # %else2 -; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: bnez a3, .LBB84_8 -; RV64ZVE32F-NEXT: .LBB84_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_5 -; RV64ZVE32F-NEXT: .LBB84_4: # %cond.load7 -; RV64ZVE32F-NEXT: ld a1, 24(a1) -; RV64ZVE32F-NEXT: fld fa3, 0(a1) -; RV64ZVE32F-NEXT: .LBB84_5: # %else8 -; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: fsd fa1, 8(a0) -; RV64ZVE32F-NEXT: fsd fa2, 16(a0) -; RV64ZVE32F-NEXT: fsd fa3, 24(a0) -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB84_6: # %cond.load -; RV64ZVE32F-NEXT: ld a3, 0(a1) -; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB84_2 -; RV64ZVE32F-NEXT: .LBB84_7: # %cond.load1 -; RV64ZVE32F-NEXT: ld a3, 8(a1) -; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB84_3 -; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load4 +; RV64ZVE32F-NEXT: ld a2, 24(a1) ; RV64ZVE32F-NEXT: ld a3, 16(a1) -; RV64ZVE32F-NEXT: fld fa2, 0(a3) -; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_4 -; RV64ZVE32F-NEXT: j .LBB84_5 - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x double> %passthru) +; RV64ZVE32F-NEXT: ld a4, 8(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: fld fa5, 0(a2) +; RV64ZVE32F-NEXT: fld fa4, 0(a3) +; RV64ZVE32F-NEXT: fld fa3, 0(a4) +; RV64ZVE32F-NEXT: fld fa2, 0(a1) +; RV64ZVE32F-NEXT: fsd fa5, 24(a0) +; RV64ZVE32F-NEXT: fsd fa4, 16(a0) +; RV64ZVE32F-NEXT: fsd fa3, 8(a0) +; RV64ZVE32F-NEXT: fsd fa2, 0(a0) +; RV64ZVE32F-NEXT: ret + %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru) ret <4 x double> %v } @@ -12885,10 +12586,8 @@ define <4 x i32> @mgather_unit_stride_load(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12899,10 +12598,8 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12912,10 +12609,8 @@ define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12925,10 +12620,8 @@ define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12959,51 +12652,15 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_narrow_edge_case: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB106_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB106_6 -; RV64ZVE32F-NEXT: .LBB106_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB106_7 -; RV64ZVE32F-NEXT: .LBB106_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB106_8 -; RV64ZVE32F-NEXT: .LBB106_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB106_5: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB106_2 -; RV64ZVE32F-NEXT: .LBB106_6: # %cond.load1 -; RV64ZVE32F-NEXT: lw a2, -512(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB106_3 -; RV64ZVE32F-NEXT: .LBB106_7: # %cond.load4 -; RV64ZVE32F-NEXT: lw a2, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB106_4 -; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load7 -; RV64ZVE32F-NEXT: lw a0, -512(a0) +; RV64ZVE32F-NEXT: addi a1, a0, -512 +; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero +; RV64ZVE32F-NEXT: vmv.v.i v0, 5 +; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64ZVE32F-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -13011,404 +12668,198 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-LABEL: mgather_strided_unaligned: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmset.m v8 -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vsll.vi v10, v10, 2 -; RV32-NEXT: vadd.vx v10, v10, a0 -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vadd.vx v8, v8, a0 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: # implicit-def: $v8 -; RV32-NEXT: beqz zero, .LBB107_9 -; RV32-NEXT: # %bb.1: # %else -; RV32-NEXT: andi a1, a0, 2 -; RV32-NEXT: bnez a1, .LBB107_10 -; RV32-NEXT: .LBB107_2: # %else2 -; RV32-NEXT: andi a1, a0, 4 -; RV32-NEXT: bnez a1, .LBB107_11 -; RV32-NEXT: .LBB107_3: # %else5 -; RV32-NEXT: andi a1, a0, 8 -; RV32-NEXT: bnez a1, .LBB107_12 -; RV32-NEXT: .LBB107_4: # %else8 -; RV32-NEXT: andi a1, a0, 16 -; RV32-NEXT: bnez a1, .LBB107_13 -; RV32-NEXT: .LBB107_5: # %else11 -; RV32-NEXT: andi a1, a0, 32 -; RV32-NEXT: bnez a1, .LBB107_14 -; RV32-NEXT: .LBB107_6: # %else14 -; RV32-NEXT: andi a1, a0, 64 -; RV32-NEXT: bnez a1, .LBB107_15 -; RV32-NEXT: .LBB107_7: # %else17 -; RV32-NEXT: andi a0, a0, -128 -; RV32-NEXT: bnez a0, .LBB107_16 -; RV32-NEXT: .LBB107_8: # %else20 -; RV32-NEXT: ret -; RV32-NEXT: .LBB107_9: # %cond.load -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.x.s a1, v10 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 -; RV32-NEXT: andi a1, a0, 2 -; RV32-NEXT: beqz a1, .LBB107_2 -; RV32-NEXT: .LBB107_10: # %cond.load1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 1 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 1 -; RV32-NEXT: andi a1, a0, 4 -; RV32-NEXT: beqz a1, .LBB107_3 -; RV32-NEXT: .LBB107_11: # %cond.load4 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 2 -; RV32-NEXT: andi a1, a0, 8 -; RV32-NEXT: beqz a1, .LBB107_4 -; RV32-NEXT: .LBB107_12: # %cond.load7 +; RV32-NEXT: lbu a1, 0(a0) +; RV32-NEXT: lbu a0, 1(a0) ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 3 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 3 -; RV32-NEXT: andi a1, a0, 16 -; RV32-NEXT: beqz a1, .LBB107_5 -; RV32-NEXT: .LBB107_13: # %cond.load10 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 4 -; RV32-NEXT: andi a1, a0, 32 -; RV32-NEXT: beqz a1, .LBB107_6 -; RV32-NEXT: .LBB107_14: # %cond.load13 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 5 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 5 -; RV32-NEXT: andi a1, a0, 64 -; RV32-NEXT: beqz a1, .LBB107_7 -; RV32-NEXT: .LBB107_15: # %cond.load16 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 6 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 6 -; RV32-NEXT: andi a0, a0, -128 -; RV32-NEXT: beqz a0, .LBB107_8 -; RV32-NEXT: .LBB107_16: # %cond.load19 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v10, 7 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a0, 0(a0) +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vmv.x.s a2, v10 +; RV32-NEXT: lbu a3, 1(a2) +; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: slli a3, a3, 8 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vmv.x.s a1, v10 +; RV32-NEXT: lbu a3, 0(a1) +; RV32-NEXT: lbu a1, 1(a1) +; RV32-NEXT: vslidedown.vi v10, v8, 3 +; RV32-NEXT: vmv.x.s a4, v10 +; RV32-NEXT: lbu a5, 1(a4) +; RV32-NEXT: lbu a4, 0(a4) ; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 7 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: slli a5, a5, 8 +; RV32-NEXT: or a4, a5, a4 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 4 +; RV32-NEXT: vmv.x.s a3, v10 +; RV32-NEXT: lbu a5, 0(a3) +; RV32-NEXT: lbu a3, 1(a3) +; RV32-NEXT: vslidedown.vi v10, v8, 5 +; RV32-NEXT: vmv.x.s a6, v10 +; RV32-NEXT: lbu a7, 1(a6) +; RV32-NEXT: lbu a6, 0(a6) +; RV32-NEXT: slli a3, a3, 8 +; RV32-NEXT: or a3, a3, a5 +; RV32-NEXT: slli a7, a7, 8 +; RV32-NEXT: or a5, a7, a6 +; RV32-NEXT: vslidedown.vi v10, v8, 6 +; RV32-NEXT: vmv.x.s a6, v10 +; RV32-NEXT: lbu a7, 0(a6) +; RV32-NEXT: lbu a6, 1(a6) +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vmv.x.s t0, v8 +; RV32-NEXT: lbu t1, 1(t0) +; RV32-NEXT: lbu t0, 0(t0) +; RV32-NEXT: slli a6, a6, 8 +; RV32-NEXT: or a6, a6, a7 +; RV32-NEXT: slli t1, t1, 8 +; RV32-NEXT: or a7, t1, t0 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v9, v8, a4 +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a6 +; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_strided_unaligned: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vmset.m v8 -; RV64V-NEXT: vid.v v12 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vadd.vx v12, v12, a0 -; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64V-NEXT: vmv.x.s a0, v8 -; RV64V-NEXT: # implicit-def: $v8 -; RV64V-NEXT: beqz zero, .LBB107_11 -; RV64V-NEXT: # %bb.1: # %else -; RV64V-NEXT: andi a1, a0, 2 -; RV64V-NEXT: bnez a1, .LBB107_12 -; RV64V-NEXT: .LBB107_2: # %else2 -; RV64V-NEXT: andi a1, a0, 4 -; RV64V-NEXT: bnez a1, .LBB107_13 -; RV64V-NEXT: .LBB107_3: # %else5 -; RV64V-NEXT: andi a1, a0, 8 -; RV64V-NEXT: beqz a1, .LBB107_5 -; RV64V-NEXT: .LBB107_4: # %cond.load7 -; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v12, 3 -; RV64V-NEXT: vmv.x.s a1, v10 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 3 -; RV64V-NEXT: .LBB107_5: # %else8 -; RV64V-NEXT: addi sp, sp, -320 -; RV64V-NEXT: .cfi_def_cfa_offset 320 -; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill -; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill +; RV64V-NEXT: addi sp, sp, -128 +; RV64V-NEXT: .cfi_def_cfa_offset 128 +; RV64V-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64V-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; RV64V-NEXT: .cfi_offset ra, -8 ; RV64V-NEXT: .cfi_offset s0, -16 -; RV64V-NEXT: addi s0, sp, 320 +; RV64V-NEXT: addi s0, sp, 128 ; RV64V-NEXT: .cfi_def_cfa s0, 0 ; RV64V-NEXT: andi sp, sp, -64 -; RV64V-NEXT: andi a1, a0, 16 -; RV64V-NEXT: bnez a1, .LBB107_14 -; RV64V-NEXT: # %bb.6: # %else11 -; RV64V-NEXT: andi a1, a0, 32 -; RV64V-NEXT: bnez a1, .LBB107_15 -; RV64V-NEXT: .LBB107_7: # %else14 -; RV64V-NEXT: andi a1, a0, 64 -; RV64V-NEXT: bnez a1, .LBB107_16 -; RV64V-NEXT: .LBB107_8: # %else17 -; RV64V-NEXT: andi a0, a0, -128 -; RV64V-NEXT: beqz a0, .LBB107_10 -; RV64V-NEXT: .LBB107_9: # %cond.load19 -; RV64V-NEXT: mv a0, sp ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a0) -; RV64V-NEXT: ld a0, 56(sp) -; RV64V-NEXT: lbu a1, 1(a0) -; RV64V-NEXT: lbu a0, 0(a0) -; RV64V-NEXT: slli a1, a1, 8 -; RV64V-NEXT: or a0, a1, a0 -; RV64V-NEXT: vmv.s.x v9, a0 -; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64V-NEXT: vslideup.vi v8, v9, 7 -; RV64V-NEXT: .LBB107_10: # %else20 -; RV64V-NEXT: addi sp, s0, -320 -; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload -; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload -; RV64V-NEXT: addi sp, sp, 320 -; RV64V-NEXT: ret -; RV64V-NEXT: .LBB107_11: # %cond.load -; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vmv.x.s a1, v12 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 -; RV64V-NEXT: andi a1, a0, 2 -; RV64V-NEXT: beqz a1, .LBB107_2 -; RV64V-NEXT: .LBB107_12: # %cond.load1 +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vsll.vi v8, v8, 2 +; RV64V-NEXT: vadd.vx v8, v8, a0 +; RV64V-NEXT: vmv.x.s a0, v8 +; RV64V-NEXT: lbu a1, 0(a0) +; RV64V-NEXT: lbu a0, 1(a0) ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64V-NEXT: vslidedown.vi v9, v12, 1 -; RV64V-NEXT: vmv.x.s a1, v9 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 1 -; RV64V-NEXT: andi a1, a0, 4 -; RV64V-NEXT: beqz a1, .LBB107_3 -; RV64V-NEXT: .LBB107_13: # %cond.load4 +; RV64V-NEXT: vslidedown.vi v12, v8, 1 +; RV64V-NEXT: vmv.x.s a2, v12 +; RV64V-NEXT: lbu a3, 1(a2) +; RV64V-NEXT: lbu a2, 0(a2) +; RV64V-NEXT: slli a0, a0, 8 +; RV64V-NEXT: or a0, a0, a1 +; RV64V-NEXT: slli a1, a3, 8 +; RV64V-NEXT: or a1, a1, a2 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v12, 2 -; RV64V-NEXT: vmv.x.s a1, v10 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 2 -; RV64V-NEXT: andi a1, a0, 8 -; RV64V-NEXT: bnez a1, .LBB107_4 -; RV64V-NEXT: j .LBB107_5 -; RV64V-NEXT: .LBB107_14: # %cond.load10 -; RV64V-NEXT: addi a1, sp, 192 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 224(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 4 -; RV64V-NEXT: andi a1, a0, 32 -; RV64V-NEXT: beqz a1, .LBB107_7 -; RV64V-NEXT: .LBB107_15: # %cond.load13 -; RV64V-NEXT: addi a1, sp, 128 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 168(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 5 -; RV64V-NEXT: andi a1, a0, 64 -; RV64V-NEXT: beqz a1, .LBB107_8 -; RV64V-NEXT: .LBB107_16: # %cond.load16 -; RV64V-NEXT: addi a1, sp, 64 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 112(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) +; RV64V-NEXT: vslidedown.vi v12, v8, 2 +; RV64V-NEXT: vmv.x.s a2, v12 +; RV64V-NEXT: lbu a3, 0(a2) +; RV64V-NEXT: lbu a2, 1(a2) +; RV64V-NEXT: vslidedown.vi v12, v8, 3 +; RV64V-NEXT: vmv.x.s a4, v12 +; RV64V-NEXT: lbu a5, 0(a4) +; RV64V-NEXT: lbu a4, 1(a4) +; RV64V-NEXT: mv a6, sp +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64V-NEXT: vse64.v v8, (a6) +; RV64V-NEXT: ld a6, 32(sp) ; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 6 -; RV64V-NEXT: andi a0, a0, -128 -; RV64V-NEXT: bnez a0, .LBB107_9 -; RV64V-NEXT: j .LBB107_10 +; RV64V-NEXT: or a2, a2, a3 +; RV64V-NEXT: slli a4, a4, 8 +; RV64V-NEXT: lbu a3, 1(a6) +; RV64V-NEXT: ld a7, 40(sp) +; RV64V-NEXT: lbu a6, 0(a6) +; RV64V-NEXT: or a4, a4, a5 +; RV64V-NEXT: slli a3, a3, 8 +; RV64V-NEXT: lbu a5, 1(a7) +; RV64V-NEXT: or a3, a3, a6 +; RV64V-NEXT: lbu a6, 0(a7) +; RV64V-NEXT: ld a7, 48(sp) +; RV64V-NEXT: slli a5, a5, 8 +; RV64V-NEXT: ld t0, 56(sp) +; RV64V-NEXT: or a5, a5, a6 +; RV64V-NEXT: lbu a6, 1(a7) +; RV64V-NEXT: lbu a7, 0(a7) +; RV64V-NEXT: lbu t1, 1(t0) +; RV64V-NEXT: lbu t0, 0(t0) +; RV64V-NEXT: slli a6, a6, 8 +; RV64V-NEXT: or a6, a6, a7 +; RV64V-NEXT: slli t1, t1, 8 +; RV64V-NEXT: or a7, t1, t0 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a4 +; RV64V-NEXT: vmv.v.x v8, a3 +; RV64V-NEXT: vslide1down.vx v8, v8, a5 +; RV64V-NEXT: vslide1down.vx v8, v8, a6 +; RV64V-NEXT: vmv.v.i v0, 15 +; RV64V-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t +; RV64V-NEXT: addi sp, s0, -128 +; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64V-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64V-NEXT: addi sp, sp, 128 +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_strided_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB107_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_10 -; RV64ZVE32F-NEXT: .LBB107_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_11 -; RV64ZVE32F-NEXT: .LBB107_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_12 -; RV64ZVE32F-NEXT: .LBB107_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_13 -; RV64ZVE32F-NEXT: .LBB107_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_14 -; RV64ZVE32F-NEXT: .LBB107_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_15 -; RV64ZVE32F-NEXT: .LBB107_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB107_16 -; RV64ZVE32F-NEXT: .LBB107_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB107_9: # %cond.load -; RV64ZVE32F-NEXT: lbu a2, 1(a0) -; RV64ZVE32F-NEXT: lbu a3, 0(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_2 -; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1 -; RV64ZVE32F-NEXT: lbu a2, 5(a0) -; RV64ZVE32F-NEXT: lbu a3, 4(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_3 -; RV64ZVE32F-NEXT: .LBB107_11: # %cond.load4 +; RV64ZVE32F-NEXT: lbu a1, 1(a0) +; RV64ZVE32F-NEXT: lbu a2, 0(a0) +; RV64ZVE32F-NEXT: lbu a3, 5(a0) +; RV64ZVE32F-NEXT: lbu a4, 4(a0) +; RV64ZVE32F-NEXT: slli a1, a1, 8 +; RV64ZVE32F-NEXT: or a1, a1, a2 +; RV64ZVE32F-NEXT: slli a3, a3, 8 +; RV64ZVE32F-NEXT: or a3, a3, a4 ; RV64ZVE32F-NEXT: lbu a2, 9(a0) -; RV64ZVE32F-NEXT: lbu a3, 8(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_4 -; RV64ZVE32F-NEXT: .LBB107_12: # %cond.load7 -; RV64ZVE32F-NEXT: lbu a2, 13(a0) -; RV64ZVE32F-NEXT: lbu a3, 12(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_5 -; RV64ZVE32F-NEXT: .LBB107_13: # %cond.load10 -; RV64ZVE32F-NEXT: lbu a2, 17(a0) -; RV64ZVE32F-NEXT: lbu a3, 16(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_6 -; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load13 -; RV64ZVE32F-NEXT: lbu a2, 21(a0) -; RV64ZVE32F-NEXT: lbu a3, 20(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_7 -; RV64ZVE32F-NEXT: .LBB107_15: # %cond.load16 -; RV64ZVE32F-NEXT: lbu a2, 25(a0) -; RV64ZVE32F-NEXT: lbu a3, 24(a0) +; RV64ZVE32F-NEXT: lbu a4, 8(a0) +; RV64ZVE32F-NEXT: lbu a5, 13(a0) +; RV64ZVE32F-NEXT: lbu a6, 12(a0) ; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB107_8 -; RV64ZVE32F-NEXT: .LBB107_16: # %cond.load19 -; RV64ZVE32F-NEXT: lbu a1, 29(a0) +; RV64ZVE32F-NEXT: or a2, a2, a4 +; RV64ZVE32F-NEXT: slli a5, a5, 8 +; RV64ZVE32F-NEXT: or a4, a5, a6 +; RV64ZVE32F-NEXT: lbu a5, 17(a0) +; RV64ZVE32F-NEXT: lbu a6, 16(a0) +; RV64ZVE32F-NEXT: lbu a7, 21(a0) +; RV64ZVE32F-NEXT: lbu t0, 20(a0) +; RV64ZVE32F-NEXT: slli a5, a5, 8 +; RV64ZVE32F-NEXT: or a5, a5, a6 +; RV64ZVE32F-NEXT: slli a7, a7, 8 +; RV64ZVE32F-NEXT: or a6, a7, t0 +; RV64ZVE32F-NEXT: lbu a7, 25(a0) +; RV64ZVE32F-NEXT: lbu t0, 24(a0) +; RV64ZVE32F-NEXT: lbu t1, 29(a0) ; RV64ZVE32F-NEXT: lbu a0, 28(a0) -; RV64ZVE32F-NEXT: slli a1, a1, 8 -; RV64ZVE32F-NEXT: or a0, a1, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: slli a7, a7, 8 +; RV64ZVE32F-NEXT: or a7, a7, t0 +; RV64ZVE32F-NEXT: slli t1, t1, 8 +; RV64ZVE32F-NEXT: or a0, t1, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13430,91 +12881,27 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB108_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_10 -; RV64ZVE32F-NEXT: .LBB108_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_11 -; RV64ZVE32F-NEXT: .LBB108_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_12 -; RV64ZVE32F-NEXT: .LBB108_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_13 -; RV64ZVE32F-NEXT: .LBB108_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_14 -; RV64ZVE32F-NEXT: .LBB108_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_15 -; RV64ZVE32F-NEXT: .LBB108_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB108_16 -; RV64ZVE32F-NEXT: .LBB108_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB108_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_2 -; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_3 -; RV64ZVE32F-NEXT: .LBB108_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_4 -; RV64ZVE32F-NEXT: .LBB108_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_5 -; RV64ZVE32F-NEXT: .LBB108_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_6 -; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_7 -; RV64ZVE32F-NEXT: .LBB108_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 24(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB108_8 -; RV64ZVE32F-NEXT: .LBB108_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 26(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 10(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 24(a0) +; RV64ZVE32F-NEXT: lh a6, 26(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13538,92 +12925,28 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB109_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_10 -; RV64ZVE32F-NEXT: .LBB109_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_11 -; RV64ZVE32F-NEXT: .LBB109_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_12 -; RV64ZVE32F-NEXT: .LBB109_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_13 -; RV64ZVE32F-NEXT: .LBB109_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_14 -; RV64ZVE32F-NEXT: .LBB109_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_15 -; RV64ZVE32F-NEXT: .LBB109_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB109_16 -; RV64ZVE32F-NEXT: .LBB109_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB109_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 4 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_2 -; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1 +; RV64ZVE32F-NEXT: addi a1, a0, 4 ; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_3 -; RV64ZVE32F-NEXT: .LBB109_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_4 -; RV64ZVE32F-NEXT: .LBB109_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_5 -; RV64ZVE32F-NEXT: .LBB109_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_6 -; RV64ZVE32F-NEXT: .LBB109_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_7 -; RV64ZVE32F-NEXT: .LBB109_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 28(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB109_8 -; RV64ZVE32F-NEXT: .LBB109_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 30(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 12(a0) +; RV64ZVE32F-NEXT: lh a4, 14(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 28(a0) +; RV64ZVE32F-NEXT: lh a7, 30(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 20 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13647,201 +12970,73 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB110_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_10 -; RV64ZVE32F-NEXT: .LBB110_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_11 -; RV64ZVE32F-NEXT: .LBB110_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_12 -; RV64ZVE32F-NEXT: .LBB110_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_13 -; RV64ZVE32F-NEXT: .LBB110_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_14 -; RV64ZVE32F-NEXT: .LBB110_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_15 -; RV64ZVE32F-NEXT: .LBB110_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB110_16 -; RV64ZVE32F-NEXT: .LBB110_8: # %else20 +; RV64ZVE32F-NEXT: addi a1, a0, 28 +; RV64ZVE32F-NEXT: lh a2, 30(a0) +; RV64ZVE32F-NEXT: lh a3, 24(a0) +; RV64ZVE32F-NEXT: lh a4, 26(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 16(a0) +; RV64ZVE32F-NEXT: lh a7, 18(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 20 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB110_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 28 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_2 -; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1 + %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) + ret <8 x i16> %v +} + +; TODO: Recognize as strided load with SEW=32 +define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { +; RV32-LABEL: mgather_reverse_strided_2xSEW: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, a0, 28 +; RV32-NEXT: li a1, -8 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vlse32.v v8, (a0), a1 +; RV32-NEXT: ret +; +; RV64V-LABEL: mgather_reverse_strided_2xSEW: +; RV64V: # %bb.0: +; RV64V-NEXT: addi a0, a0, 28 +; RV64V-NEXT: li a1, -8 +; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-NEXT: vlse32.v v8, (a0), a1 +; RV64V-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: addi a1, a0, 28 ; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_3 -; RV64ZVE32F-NEXT: .LBB110_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 24(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_4 -; RV64ZVE32F-NEXT: .LBB110_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 26(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_5 -; RV64ZVE32F-NEXT: .LBB110_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_6 -; RV64ZVE32F-NEXT: .LBB110_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_7 -; RV64ZVE32F-NEXT: .LBB110_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB110_8 -; RV64ZVE32F-NEXT: .LBB110_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 -; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) - ret <8 x i16> %v -} - -; TODO: Recognize as strided load with SEW=32 -define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { -; RV32-LABEL: mgather_reverse_strided_2xSEW: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, 28 -; RV32-NEXT: li a1, -8 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vlse32.v v8, (a0), a1 -; RV32-NEXT: ret -; -; RV64V-LABEL: mgather_reverse_strided_2xSEW: -; RV64V: # %bb.0: -; RV64V-NEXT: addi a0, a0, 28 -; RV64V-NEXT: li a1, -8 -; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-NEXT: vlse32.v v8, (a0), a1 -; RV64V-NEXT: ret -; -; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: -; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB111_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_10 -; RV64ZVE32F-NEXT: .LBB111_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_11 -; RV64ZVE32F-NEXT: .LBB111_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_12 -; RV64ZVE32F-NEXT: .LBB111_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_13 -; RV64ZVE32F-NEXT: .LBB111_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_14 -; RV64ZVE32F-NEXT: .LBB111_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_15 -; RV64ZVE32F-NEXT: .LBB111_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB111_16 -; RV64ZVE32F-NEXT: .LBB111_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB111_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 28 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_2 -; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_3 -; RV64ZVE32F-NEXT: .LBB111_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_4 -; RV64ZVE32F-NEXT: .LBB111_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_5 -; RV64ZVE32F-NEXT: .LBB111_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_6 -; RV64ZVE32F-NEXT: .LBB111_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_7 -; RV64ZVE32F-NEXT: .LBB111_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB111_8 -; RV64ZVE32F-NEXT: .LBB111_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 22(a0) +; RV64ZVE32F-NEXT: lh a5, 14(a0) +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a7, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 12 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13864,91 +13059,27 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB112_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_10 -; RV64ZVE32F-NEXT: .LBB112_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_11 -; RV64ZVE32F-NEXT: .LBB112_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_12 -; RV64ZVE32F-NEXT: .LBB112_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_13 -; RV64ZVE32F-NEXT: .LBB112_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_14 -; RV64ZVE32F-NEXT: .LBB112_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_15 -; RV64ZVE32F-NEXT: .LBB112_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB112_16 -; RV64ZVE32F-NEXT: .LBB112_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB112_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_2 -; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_3 -; RV64ZVE32F-NEXT: .LBB112_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_4 -; RV64ZVE32F-NEXT: .LBB112_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_5 -; RV64ZVE32F-NEXT: .LBB112_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_6 -; RV64ZVE32F-NEXT: .LBB112_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_7 -; RV64ZVE32F-NEXT: .LBB112_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB112_8 -; RV64ZVE32F-NEXT: .LBB112_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13974,91 +13105,27 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB113_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_10 -; RV64ZVE32F-NEXT: .LBB113_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_11 -; RV64ZVE32F-NEXT: .LBB113_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_12 -; RV64ZVE32F-NEXT: .LBB113_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_13 -; RV64ZVE32F-NEXT: .LBB113_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_14 -; RV64ZVE32F-NEXT: .LBB113_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_15 -; RV64ZVE32F-NEXT: .LBB113_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB113_16 -; RV64ZVE32F-NEXT: .LBB113_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB113_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_2 -; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_3 -; RV64ZVE32F-NEXT: .LBB113_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_4 -; RV64ZVE32F-NEXT: .LBB113_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_5 -; RV64ZVE32F-NEXT: .LBB113_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_6 -; RV64ZVE32F-NEXT: .LBB113_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_7 -; RV64ZVE32F-NEXT: .LBB113_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB113_8 -; RV64ZVE32F-NEXT: .LBB113_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14085,92 +13152,27 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB114_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_10 -; RV64ZVE32F-NEXT: .LBB114_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_11 -; RV64ZVE32F-NEXT: .LBB114_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_12 -; RV64ZVE32F-NEXT: .LBB114_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_13 -; RV64ZVE32F-NEXT: .LBB114_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_14 -; RV64ZVE32F-NEXT: .LBB114_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_15 -; RV64ZVE32F-NEXT: .LBB114_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB114_16 -; RV64ZVE32F-NEXT: .LBB114_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB114_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 2 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_2 -; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_3 -; RV64ZVE32F-NEXT: .LBB114_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_4 -; RV64ZVE32F-NEXT: .LBB114_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_5 -; RV64ZVE32F-NEXT: .LBB114_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_6 -; RV64ZVE32F-NEXT: .LBB114_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_7 -; RV64ZVE32F-NEXT: .LBB114_15: # %cond.load16 +; RV64ZVE32F-NEXT: addi a1, a0, 2 ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB114_8 -; RV64ZVE32F-NEXT: .LBB114_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14200,91 +13202,27 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB115_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_10 -; RV64ZVE32F-NEXT: .LBB115_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_11 -; RV64ZVE32F-NEXT: .LBB115_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_12 -; RV64ZVE32F-NEXT: .LBB115_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_13 -; RV64ZVE32F-NEXT: .LBB115_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_14 -; RV64ZVE32F-NEXT: .LBB115_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_15 -; RV64ZVE32F-NEXT: .LBB115_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB115_16 -; RV64ZVE32F-NEXT: .LBB115_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB115_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_2 -; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_3 -; RV64ZVE32F-NEXT: .LBB115_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_4 -; RV64ZVE32F-NEXT: .LBB115_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_5 -; RV64ZVE32F-NEXT: .LBB115_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_6 -; RV64ZVE32F-NEXT: .LBB115_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_7 -; RV64ZVE32F-NEXT: .LBB115_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB115_8 -; RV64ZVE32F-NEXT: .LBB115_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14311,91 +13249,27 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB116_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_10 -; RV64ZVE32F-NEXT: .LBB116_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_11 -; RV64ZVE32F-NEXT: .LBB116_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_12 -; RV64ZVE32F-NEXT: .LBB116_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_13 -; RV64ZVE32F-NEXT: .LBB116_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_14 -; RV64ZVE32F-NEXT: .LBB116_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_15 -; RV64ZVE32F-NEXT: .LBB116_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB116_16 -; RV64ZVE32F-NEXT: .LBB116_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB116_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_2 -; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_3 -; RV64ZVE32F-NEXT: .LBB116_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_4 -; RV64ZVE32F-NEXT: .LBB116_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_5 -; RV64ZVE32F-NEXT: .LBB116_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_6 -; RV64ZVE32F-NEXT: .LBB116_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_7 -; RV64ZVE32F-NEXT: .LBB116_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB116_8 -; RV64ZVE32F-NEXT: .LBB116_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14407,10 +13281,8 @@ define <8 x i16> @mgather_shuffle_reverse(ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14433,92 +13305,27 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB118_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_10 -; RV64ZVE32F-NEXT: .LBB118_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_11 -; RV64ZVE32F-NEXT: .LBB118_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_12 -; RV64ZVE32F-NEXT: .LBB118_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_13 -; RV64ZVE32F-NEXT: .LBB118_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_14 -; RV64ZVE32F-NEXT: .LBB118_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_15 -; RV64ZVE32F-NEXT: .LBB118_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB118_16 -; RV64ZVE32F-NEXT: .LBB118_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB118_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_2 -; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_3 -; RV64ZVE32F-NEXT: .LBB118_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 10(a0) ; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_4 -; RV64ZVE32F-NEXT: .LBB118_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_5 -; RV64ZVE32F-NEXT: .LBB118_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_6 -; RV64ZVE32F-NEXT: .LBB118_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_7 -; RV64ZVE32F-NEXT: .LBB118_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB118_8 -; RV64ZVE32F-NEXT: .LBB118_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 14(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14545,91 +13352,27 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB119_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_10 -; RV64ZVE32F-NEXT: .LBB119_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_11 -; RV64ZVE32F-NEXT: .LBB119_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_12 -; RV64ZVE32F-NEXT: .LBB119_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_13 -; RV64ZVE32F-NEXT: .LBB119_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_14 -; RV64ZVE32F-NEXT: .LBB119_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_15 -; RV64ZVE32F-NEXT: .LBB119_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB119_16 -; RV64ZVE32F-NEXT: .LBB119_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB119_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_2 -; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_3 -; RV64ZVE32F-NEXT: .LBB119_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 4(a0) ; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_4 -; RV64ZVE32F-NEXT: .LBB119_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_5 -; RV64ZVE32F-NEXT: .LBB119_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_6 -; RV64ZVE32F-NEXT: .LBB119_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_7 -; RV64ZVE32F-NEXT: .LBB119_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB119_8 -; RV64ZVE32F-NEXT: .LBB119_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 2(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 12(a0) +; RV64ZVE32F-NEXT: lh a6, 14(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -15076,7 +13819,7 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV64ZVE32F-NEXT: addi sp, sp, 144 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> - %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> shufflevector(<32 x i1> insertelement(<32 x i1> poison, i1 true, i32 0), <32 x i1> poison, <32 x i32> zeroinitializer), <32 x i64> poison) + %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison) ret <32 x i64> %x } @@ -15119,7 +13862,7 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr i32, ptr %base, <4 x i64> - %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> shufflevector(<4 x i1> insertelement(<4 x i1> poison, i1 true, i32 0), <4 x i1> poison, <4 x i32> zeroinitializer), <4 x i32> poison) + %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %x } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index b2ff471455631..aa815e18ac101 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -336,49 +336,19 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB6_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB6_6 -; RV64ZVE32F-NEXT: .LBB6_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB6_7 -; RV64ZVE32F-NEXT: .LBB6_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB6_8 -; RV64ZVE32F-NEXT: .LBB6_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB6_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB6_2 -; RV64ZVE32F-NEXT: .LBB6_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse8.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB6_3 -; RV64ZVE32F-NEXT: .LBB6_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse8.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB6_4 -; RV64ZVE32F-NEXT: .LBB6_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) ret void } @@ -883,49 +853,19 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB15_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB15_6 -; RV64ZVE32F-NEXT: .LBB15_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB15_7 -; RV64ZVE32F-NEXT: .LBB15_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB15_8 -; RV64ZVE32F-NEXT: .LBB15_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB15_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB15_2 -; RV64ZVE32F-NEXT: .LBB15_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB15_3 -; RV64ZVE32F-NEXT: .LBB15_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB15_4 -; RV64ZVE32F-NEXT: .LBB15_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void } @@ -1788,49 +1728,19 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB26_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB26_6 -; RV64ZVE32F-NEXT: .LBB26_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB26_7 -; RV64ZVE32F-NEXT: .LBB26_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB26_8 -; RV64ZVE32F-NEXT: .LBB26_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB26_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB26_2 -; RV64ZVE32F-NEXT: .LBB26_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB26_3 -; RV64ZVE32F-NEXT: .LBB26_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB26_4 -; RV64ZVE32F-NEXT: .LBB26_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void } @@ -3163,50 +3073,22 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV32ZVE32F-NEXT: lw a2, 24(a0) ; RV32ZVE32F-NEXT: lw a3, 20(a0) ; RV32ZVE32F-NEXT: lw a4, 16(a0) -; RV32ZVE32F-NEXT: lw a7, 12(a0) +; RV32ZVE32F-NEXT: lw a5, 12(a0) ; RV32ZVE32F-NEXT: lw a6, 8(a0) -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB39_5 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a5, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB39_6 -; RV32ZVE32F-NEXT: .LBB39_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a5, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB39_7 -; RV32ZVE32F-NEXT: .LBB39_3: # %else4 -; RV32ZVE32F-NEXT: andi a5, a5, 8 -; RV32ZVE32F-NEXT: bnez a5, .LBB39_8 -; RV32ZVE32F-NEXT: .LBB39_4: # %else6 -; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB39_5: # %cond.store -; RV32ZVE32F-NEXT: lw t0, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t1, v8 -; RV32ZVE32F-NEXT: sw t0, 4(t1) -; RV32ZVE32F-NEXT: sw a0, 0(t1) -; RV32ZVE32F-NEXT: andi a0, a5, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB39_2 -; RV32ZVE32F-NEXT: .LBB39_6: # %cond.store1 +; RV32ZVE32F-NEXT: lw a7, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s t0, v8 +; RV32ZVE32F-NEXT: sw a7, 0(t0) +; RV32ZVE32F-NEXT: sw a0, 4(t0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: sw a6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a5, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB39_3 -; RV32ZVE32F-NEXT: .LBB39_7: # %cond.store3 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) -; RV32ZVE32F-NEXT: andi a5, a5, 8 -; RV32ZVE32F-NEXT: beqz a5, .LBB39_4 -; RV32ZVE32F-NEXT: .LBB39_8: # %cond.store5 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a2, 0(a0) @@ -3216,46 +3098,19 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a2, 24(a1) -; RV64ZVE32F-NEXT: ld a4, 16(a1) -; RV64ZVE32F-NEXT: ld a7, 8(a1) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a5, 16(a0) -; RV64ZVE32F-NEXT: ld t0, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a6, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB39_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a6, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_6 -; RV64ZVE32F-NEXT: .LBB39_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a6, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_7 -; RV64ZVE32F-NEXT: .LBB39_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a6, 8 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_8 -; RV64ZVE32F-NEXT: .LBB39_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB39_5: # %cond.store +; RV64ZVE32F-NEXT: ld a3, 16(a1) +; RV64ZVE32F-NEXT: ld a4, 8(a1) ; RV64ZVE32F-NEXT: ld a1, 0(a1) -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: sd a0, 0(a1) -; RV64ZVE32F-NEXT: andi a0, a6, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_2 -; RV64ZVE32F-NEXT: .LBB39_6: # %cond.store1 -; RV64ZVE32F-NEXT: sd t0, 0(a7) -; RV64ZVE32F-NEXT: andi a0, a6, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_3 -; RV64ZVE32F-NEXT: .LBB39_7: # %cond.store3 -; RV64ZVE32F-NEXT: sd a5, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a6, 8 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_4 -; RV64ZVE32F-NEXT: .LBB39_8: # %cond.store5 -; RV64ZVE32F-NEXT: sd a3, 0(a2) +; RV64ZVE32F-NEXT: ld a5, 0(a0) +; RV64ZVE32F-NEXT: ld a6, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) +; RV64ZVE32F-NEXT: sd a5, 0(a1) +; RV64ZVE32F-NEXT: sd a6, 0(a4) +; RV64ZVE32F-NEXT: sd a7, 0(a3) +; RV64ZVE32F-NEXT: sd a0, 0(a2) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) ret void } @@ -6168,49 +6023,19 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB55_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB55_6 -; RV64ZVE32F-NEXT: .LBB55_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB55_7 -; RV64ZVE32F-NEXT: .LBB55_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB55_8 -; RV64ZVE32F-NEXT: .LBB55_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB55_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB55_2 -; RV64ZVE32F-NEXT: .LBB55_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB55_3 -; RV64ZVE32F-NEXT: .LBB55_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB55_4 -; RV64ZVE32F-NEXT: .LBB55_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void } @@ -7020,49 +6845,19 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB65_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB65_6 -; RV64ZVE32F-NEXT: .LBB65_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB65_7 -; RV64ZVE32F-NEXT: .LBB65_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB65_8 -; RV64ZVE32F-NEXT: .LBB65_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB65_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB65_2 -; RV64ZVE32F-NEXT: .LBB65_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB65_3 -; RV64ZVE32F-NEXT: .LBB65_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB65_4 -; RV64ZVE32F-NEXT: .LBB65_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void } @@ -8368,43 +8163,15 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { ; ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB78_5 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB78_6 -; RV32ZVE32F-NEXT: .LBB78_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB78_7 -; RV32ZVE32F-NEXT: .LBB78_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB78_8 -; RV32ZVE32F-NEXT: .LBB78_4: # %else6 -; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB78_5: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB78_2 -; RV32ZVE32F-NEXT: .LBB78_6: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB78_3 -; RV32ZVE32F-NEXT: .LBB78_7: # %cond.store3 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v9 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB78_4 -; RV32ZVE32F-NEXT: .LBB78_8: # %cond.store5 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v9 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) @@ -8412,43 +8179,16 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB78_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB78_6 -; RV64ZVE32F-NEXT: .LBB78_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB78_7 -; RV64ZVE32F-NEXT: .LBB78_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB78_8 -; RV64ZVE32F-NEXT: .LBB78_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB78_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB78_2 -; RV64ZVE32F-NEXT: .LBB78_6: # %cond.store1 -; RV64ZVE32F-NEXT: fsd fa1, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB78_3 -; RV64ZVE32F-NEXT: .LBB78_7: # %cond.store3 -; RV64ZVE32F-NEXT: fsd fa2, 0(a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB78_4 -; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store5 -; RV64ZVE32F-NEXT: fsd fa3, 0(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) +; RV64ZVE32F-NEXT: fsd fa0, 0(a1) +; RV64ZVE32F-NEXT: fsd fa1, 0(a2) +; RV64ZVE32F-NEXT: fsd fa2, 0(a3) +; RV64ZVE32F-NEXT: fsd fa3, 0(a0) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) ret void } @@ -11344,10 +11084,8 @@ define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11358,10 +11096,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11373,10 +11109,8 @@ define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11399,89 +11133,31 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) { ; ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB96_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_10 -; RV64ZVE32F-NEXT: .LBB96_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_11 -; RV64ZVE32F-NEXT: .LBB96_3: # %else4 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_12 -; RV64ZVE32F-NEXT: .LBB96_4: # %else6 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_13 -; RV64ZVE32F-NEXT: .LBB96_5: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_14 -; RV64ZVE32F-NEXT: .LBB96_6: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_15 -; RV64ZVE32F-NEXT: .LBB96_7: # %else12 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB96_16 -; RV64ZVE32F-NEXT: .LBB96_8: # %else14 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB96_9: # %cond.store -; RV64ZVE32F-NEXT: addi a2, a0, 8 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_2 -; RV64ZVE32F-NEXT: .LBB96_10: # %cond.store1 -; RV64ZVE32F-NEXT: addi a2, a0, 10 +; RV64ZVE32F-NEXT: addi a1, a0, 6 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: addi a3, a0, 2 +; RV64ZVE32F-NEXT: addi a4, a0, 14 +; RV64ZVE32F-NEXT: addi a5, a0, 12 +; RV64ZVE32F-NEXT: addi a6, a0, 10 +; RV64ZVE32F-NEXT: addi a7, a0, 8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a7) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_3 -; RV64ZVE32F-NEXT: .LBB96_11: # %cond.store3 -; RV64ZVE32F-NEXT: addi a2, a0, 12 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a6) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_4 -; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store5 -; RV64ZVE32F-NEXT: addi a2, a0, 14 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a5) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_5 -; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a4) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_6 -; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store9 -; RV64ZVE32F-NEXT: addi a2, a0, 2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_7 -; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11 -; RV64ZVE32F-NEXT: addi a2, a0, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB96_8 -; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13 -; RV64ZVE32F-NEXT: addi a0, a0, 6 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 45215480166e5..2c62cbd583d00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1349,16 +1349,16 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1390,24 +1390,22 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vadd_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1435,11 +1433,7 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 3042d9dd1cbbe..3db44e87109bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -1022,16 +1022,16 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmax.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1064,8 +1064,6 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 36a9e6d42fec3..c97c2232715f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -1021,16 +1021,16 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1063,8 +1063,6 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index 8a6dccd76e9b8..eaa19110a2a28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -1022,16 +1022,16 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmin.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1064,8 +1064,6 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index a56514c70bb07..48175e5b905ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -1021,16 +1021,16 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vminu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1063,8 +1063,6 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 348b301ef2555..291629de6dcfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -1362,16 +1362,16 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vsadd_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1403,24 +1403,22 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vsadd_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1448,11 +1446,7 @@ define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index 6584817466719..d38ee1148e894 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -1358,16 +1358,16 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-LABEL: vsaddu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1399,24 +1399,22 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vsaddu_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1444,11 +1442,7 @@ define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 586fef71796e6..2caa2ff41a7d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -1402,16 +1402,16 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vssub_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1444,24 +1444,22 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssub.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vssub_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1490,11 +1488,7 @@ define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a2 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 5374e44bf9e82..6313f31bc1a61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -1397,16 +1397,16 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-LABEL: vssubu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1439,24 +1439,22 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vssubu_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1485,11 +1483,7 @@ define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a2 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index c61d83256c706..b78b8663eac90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -616,8 +616,6 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -625,6 +623,8 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -705,9 +705,7 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index bdf82f8b71355..69c76152910e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -616,8 +616,6 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -625,6 +623,8 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -705,9 +705,7 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 9ab7fdb82fd00..8bc2334282653 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -544,8 +544,6 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -553,6 +551,8 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -599,9 +599,7 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index fc5213b91e61e..f934127f978dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -497,8 +497,6 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -506,6 +504,8 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -548,9 +548,7 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index c4e472acca6ed..eb4994914fad9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -545,8 +545,6 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -554,6 +552,8 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -600,9 +600,7 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 47edb4e645020..f366a2922d079 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -545,8 +545,6 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -554,6 +552,8 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -600,9 +600,7 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 9a67eb2b53b2f..79c940bdf089a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -545,8 +545,6 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index 4e60077e5db9b..939a45e15c103 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -301,8 +301,6 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -310,6 +308,8 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -338,9 +338,7 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 73a59932774d6..df2bc523cd7a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -269,8 +269,6 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfabs_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -278,6 +276,8 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -296,9 +296,7 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 0c6a9792d7d27..c69a7bc5cece4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -625,8 +625,6 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -634,6 +632,8 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -662,9 +662,7 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -730,8 +728,6 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -739,6 +735,8 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -760,9 +758,7 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 0775a180d5aeb..3ad17e85570a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -571,8 +571,6 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -580,6 +578,8 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -608,9 +608,7 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -676,8 +674,6 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -685,6 +681,8 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -706,9 +704,7 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll index 2c814016bc4b7..7556b3ace5c6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll @@ -301,8 +301,6 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -310,6 +308,8 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -338,9 +338,7 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index b830d31637dc6..755c665376128 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -301,8 +301,6 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -310,6 +308,8 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -338,9 +338,7 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll index ee7a7816c5fc6..30d5919238cfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll @@ -571,8 +571,6 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -580,6 +578,8 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -608,9 +608,7 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -676,8 +674,6 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -685,6 +681,8 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -706,9 +704,7 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index 47a832af15e2c..1db5fa1720a27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -269,8 +269,6 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfneg_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -278,6 +276,8 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -296,9 +296,7 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index f1b7b003f539a..d6caad15e40a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -269,8 +269,6 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -278,6 +276,8 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -296,9 +296,7 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index ca436e29c9de5..2eae18d7cc493 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -571,8 +571,6 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -580,6 +578,8 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -608,9 +608,7 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -676,8 +674,6 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -685,6 +681,8 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -706,9 +704,7 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v }