Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 1 addition & 31 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,6 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
(store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
}

multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
int log2sew,
LMULInfo vlmul,
VReg reg_class,
int sew = !shl(1, log2sew)> {
defvar load_instr =
!cast<Instruction>("VL"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
defvar store_instr =
!cast<Instruction>("VS"#!substr(vlmul.MX, 1)#"R_V");

// Load
def : Pat<(type (load GPR:$rs1)),
(load_instr GPR:$rs1)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1)>;
}

multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m> {
defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
Expand Down Expand Up @@ -895,23 +877,11 @@ multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm, string suffix = ""> {
//===----------------------------------------------------------------------===//

// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
FractionalGroupFloatVectors,
FractionalGroupBFloatVectors) in
foreach vti = AllVectors in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach mti = AllMasks in
let Predicates = [HasVInstructions] in
defm : VPatUSLoadStoreMaskSDNode<mti>;
Expand Down
54 changes: 54 additions & 0 deletions llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {

private:
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
bool convertVMergeToVMv(MachineInstr &MI) const;

Expand Down Expand Up @@ -155,6 +156,58 @@ bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const {
}
}

/// Convert unit strided unmasked loads and stores to whole-register equivalents
/// to avoid the dependency on $vl and $vtype.
///
/// %x = PseudoVLE8_V_M1 %passthru, %ptr, %vlmax, policy
/// PseudoVSE8_V_M1 %v, %ptr, %vlmax
///
/// ->
///
/// %x = VL1RE8_V %passthru, %ptr
/// VS1R_V %v, %ptr
bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
#define CASE_WHOLE_REGISTER_LMUL_SEW(lmul, sew) \
case RISCV::PseudoVLE##sew##_V_M##lmul: \
NewOpc = RISCV::VL##lmul##RE##sew##_V; \
break; \
case RISCV::PseudoVSE##sew##_V_M##lmul: \
NewOpc = RISCV::VS##lmul##R_V; \
break;
#define CASE_WHOLE_REGISTER_LMUL(lmul) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 8) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 16) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 32) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 64)

unsigned NewOpc;
switch (MI.getOpcode()) {
CASE_WHOLE_REGISTER_LMUL(1)
CASE_WHOLE_REGISTER_LMUL(2)
CASE_WHOLE_REGISTER_LMUL(4)
CASE_WHOLE_REGISTER_LMUL(8)
default:
return false;
}

MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel)
return false;

// Stores don't have a policy op
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invert this sentence? Like Pseudos for Unit-Stride Loads have a policy op, we should remove it as Whole Register Loads don't have one.
I was confused by this at the first glance as thought "why should we do this if stores have no policy op" came to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is a bit confusing, I think I put that there because unit strided stores might not have a policy op hence, hence why I'm only checking for the policy op and not the VL or SEW. Will reword

if (RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags))
MI.removeOperand(RISCVII::getVecPolicyOpNum(MI.getDesc()));
MI.removeOperand(RISCVII::getSEWOpNum(MI.getDesc()));
MI.removeOperand(RISCVII::getVLOpNum(MI.getDesc()));
// Stores don't have a passthru
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

if (RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()))
MI.removeOperand(1);

MI.setDesc(TII->get(NewOpc));

return true;
}

// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
Expand Down Expand Up @@ -281,6 +334,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
Changed |= convertToVLMAX(MI);
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
}
}
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are marked with optnone so the machine SSA optimisation passes aren't run, including RISCVVectorPeephole.

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, <vscale x 16
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: sub a0, s0, a0
; CHECK-NEXT: addi a0, a0, -64
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: sw a2, -36(s0)
; CHECK-NEXT: sw a3, -40(s0)
; CHECK-NEXT: sw a4, -44(s0)
Expand Down Expand Up @@ -85,7 +86,8 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: slli s1, s1, 3
; CHECK-NEXT: sub s1, s0, s1
; CHECK-NEXT: addi s1, s1, -112
; CHECK-NEXT: vs8r.v v8, (s1)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vse32.v v8, (s1)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: sw a0, -76(s0)
; CHECK-NEXT: sw a0, -80(s0)
Expand All @@ -99,7 +101,7 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: sw a0, -112(s0)
; CHECK-NEXT: lw a0, -76(s0)
; CHECK-NEXT: lw a1, -80(s0)
; CHECK-NEXT: vl8re32.v v8, (s1)
; CHECK-NEXT: vle32.v v8, (s1)
; CHECK-NEXT: lw a2, -84(s0)
; CHECK-NEXT: lw a3, -88(s0)
; CHECK-NEXT: lw a4, -92(s0)
Expand All @@ -115,7 +117,8 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: lw a0, -76(s0)
; CHECK-NEXT: lw a1, -80(s0)
; CHECK-NEXT: vl8re32.v v8, (s1)
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (s1)
; CHECK-NEXT: lw a2, -84(s0)
; CHECK-NEXT: lw a3, -88(s0)
; CHECK-NEXT: lw a4, -92(s0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 8)
; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test stops after isel, so this is expected since RISCVVectorPeephole isn't run.

; CHECK-NEXT: PseudoRET
%a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
%b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
Expand All @@ -36,7 +36,7 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size from %ir.p, align 8)
; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoRET
%a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
%b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
Expand Down
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,14 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
store <vscale x 1 x double> %hi, ptr %out
ret <vscale x 16 x double> %lo
}

define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
; CHECK-LABEL: vpload_all_active_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1r.v v8, (a0)
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale()
%evl = mul i32 %vscale, 8
%load = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x i8> %load
}
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -459,3 +459,14 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
call void @llvm.vp.store.nxv17f64.p0(<vscale x 17 x double> %val, ptr %ptr, <vscale x 17 x i1> %m, i32 %evl)
ret void
}

define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
; CHECK-LABEL: vpstore_all_active_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale()
%evl = mul i32 %vscale, 8
call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret void
}
40 changes: 8 additions & 32 deletions llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -487,42 +487,18 @@ define <vscale x 8 x double> @vfmerge_nzv_nxv8f64(<vscale x 8 x double> %va, <vs
define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %va, <vscale x 16 x double> %vb) {
; CHECK-LABEL: vselect_combine_regression:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v24, v16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vl8re64.v v8, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v0, v16, 0
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vmseq.vi v7, v24, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vle64.v v16, (a1), v0.t
; CHECK-NEXT: ret
%cond = icmp eq <vscale x 16 x i64> %va, zeroinitializer
%sel = select <vscale x 16 x i1> %cond, <vscale x 16 x double> %vb, <vscale x 16 x double> zeroinitializer
Expand Down