Skip to content

Commit f39a3be

Browse files
committed
[VPlan] Introduce and use m_SpecificSInt, rebase updates
1 parent 290b2ff commit f39a3be

File tree

4 files changed

+49
-35
lines changed

4 files changed

+49
-35
lines changed

llvm/include/llvm/ADT/APInt.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,18 @@ class [[nodiscard]] APInt {
561561
return I1.zext(I2.getBitWidth()) == I2;
562562
}
563563

564+
/// Determine if two APInts have the same value, after sign-extending
565+
/// one of them (if needed!) to ensure that the bit-widths match.
566+
static bool isSameSignedValue(const APInt &I1, const APInt &I2) {
567+
if (I1.getBitWidth() == I2.getBitWidth())
568+
return I1 == I2;
569+
570+
if (I1.getBitWidth() > I2.getBitWidth())
571+
return I1 == I2.sext(I1.getBitWidth());
572+
573+
return I1.sext(I2.getBitWidth()) == I2;
574+
}
575+
564576
/// Overload to compute a hash_code for an APInt value.
565577
LLVM_ABI friend hash_code hash_value(const APInt &Arg);
566578

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,9 @@ struct deferredval_ty {
9595
/// whichever value m_VPValue(X) populated.
9696
inline deferredval_ty m_Deferred(VPValue *const &V) { return V; }
9797

98-
/// Match an integer constant or vector of constants if Pred::isValue returns
99-
/// true for the APInt. \p BitWidth optionally specifies the bitwidth the
100-
/// matched constant must have. If it is 0, the matched constant can have any
101-
/// bitwidth.
98+
/// Match an integer constant if Pred::isValue returns true for the APInt. \p
99+
/// BitWidth optionally specifies the bitwidth the matched constant must have.
100+
/// If it is 0, the matched constant can have any bitwidth.
102101
template <typename Pred, unsigned BitWidth = 0> struct int_pred_ty {
103102
Pred P;
104103

@@ -123,15 +122,18 @@ template <typename Pred, unsigned BitWidth = 0> struct int_pred_ty {
123122
}
124123
};
125124

126-
/// Match a specified integer value or vector of all elements of that
127-
/// value. \p BitWidth optionally specifies the bitwidth the matched constant
128-
/// must have. If it is 0, the matched constant can have any bitwidth.
125+
/// Match a specified signed or unsigned integer value.
129126
struct is_specific_int {
130127
APInt Val;
128+
bool IsSigned;
131129

132-
is_specific_int(APInt Val) : Val(std::move(Val)) {}
130+
is_specific_int(APInt Val, bool IsSigned = false)
131+
: Val(std::move(Val)), IsSigned(IsSigned) {}
133132

134-
bool isValue(const APInt &C) const { return APInt::isSameValue(Val, C); }
133+
bool isValue(const APInt &C) const {
134+
return IsSigned ? APInt::isSameSignedValue(Val, C)
135+
: APInt::isSameValue(Val, C);
136+
}
135137
};
136138

137139
template <unsigned Bitwidth = 0>
@@ -141,6 +143,11 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
141143
return specific_intval<0>(is_specific_int(APInt(64, V)));
142144
}
143145

146+
inline specific_intval<0> m_SpecificSInt(int64_t V) {
147+
return specific_intval<0>(
148+
is_specific_int(APInt(64, V, /*isSigned=*/true), /*IsSigned=*/true));
149+
}
150+
144151
inline specific_intval<1> m_False() {
145152
return specific_intval<1>(is_specific_int(APInt(64, 0)));
146153
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2933,11 +2933,11 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
29332933
VEPR->getGEPNoWrapFlags(), &EVL, VEPR->getDebugLoc());
29342934
};
29352935

2936-
auto m_VecEndPtrVF = [&Plan](VPValue *&Addr) { // NOLINT
2936+
auto m_VecEndPtrVF = [&Plan](VPValue *&Addr, int64_t Stride) { // NOLINT
29372937
return m_VecEndPtr(
29382938
m_VPValue(Addr),
29392939
m_c_Mul(
2940-
m_VPValue(),
2940+
m_SpecificSInt(Stride),
29412941
m_Sub(m_ZExtOrTruncOrSelf(m_Specific(&Plan->getVF())), m_One())));
29422942
};
29432943

@@ -2951,7 +2951,10 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
29512951
if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) &&
29522952
match(ReversedVal,
29532953
m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
2954-
match(EndPtr, m_VecEndPtrVF(Addr)) &&
2954+
isa<VPVectorEndPointerRecipe>(EndPtr) &&
2955+
match(EndPtr,
2956+
m_VecEndPtrVF(
2957+
Addr, cast<VPVectorEndPointerRecipe>(EndPtr)->getStride())) &&
29552958
cast<VPWidenLoadRecipe>(ReversedVal)->isReverse()) {
29562959
auto *LoadR = new VPWidenLoadEVLRecipe(
29572960
*cast<VPWidenLoadRecipe>(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask);
@@ -2971,7 +2974,10 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
29712974
if (match(&CurRecipe,
29722975
m_MaskedStore(m_VPValue(EndPtr), m_Reverse(m_VPValue(ReversedVal)),
29732976
m_RemoveMask(HeaderMask, Mask))) &&
2974-
match(EndPtr, m_VecEndPtrVF(Addr)) &&
2977+
isa<VPVectorEndPointerRecipe>(EndPtr) &&
2978+
match(EndPtr,
2979+
m_VecEndPtrVF(
2980+
Addr, cast<VPVectorEndPointerRecipe>(EndPtr)->getStride())) &&
29752981
cast<VPWidenStoreRecipe>(CurRecipe).isReverse()) {
29762982
auto *NewReverse = new VPWidenIntrinsicRecipe(
29772983
Intrinsic::experimental_vp_reverse,
@@ -3550,10 +3556,9 @@ void VPlanTransforms::createInterleaveGroups(
35503556
// the pointer operand of the interleaved access is supposed to be uniform.
35513557
if (IG->isReverse()) {
35523558
B.setInsertPoint(InsertPos);
3553-
auto *ReversePtr = B.createVectorEndPointerRecipe(
3559+
Addr = B.createVectorEndPointerRecipe(
35543560
Addr, getLoadStoreType(IRInsertPos), -(int64_t)IG->getFactor(), NW,
35553561
&Plan.getVF(), InsertPos->getDebugLoc());
3556-
Addr = ReversePtr;
35573562
}
35583563
auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
35593564
InsertPos->getMask(), NeedsMaskForGaps,

llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,13 +1008,11 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr %
10081008
; IC1VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
10091009
; IC1VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
10101010
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1011-
; IC1VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
1012-
; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3
1011+
; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3
10131012
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
10141013
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10151014
; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1016-
; IC1VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1017-
; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1015+
; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3
10181016
; IC1VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
10191017
; IC1VF4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10201018
; IC1VF4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE2]]
@@ -1068,14 +1066,10 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr %
10681066
; IC4VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -1)
10691067
; IC4VF4-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -1)
10701068
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1071-
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1072-
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1073-
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -4
1074-
; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -3
1075-
; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -8
1076-
; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 -3
1077-
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -12
1078-
; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3
1069+
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3
1070+
; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -7
1071+
; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -11
1072+
; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -15
10791073
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
10801074
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
10811075
; IC4VF4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
@@ -1085,14 +1079,10 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr %
10851079
; IC4VF4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10861080
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10871081
; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1088-
; IC4VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 0
1089-
; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 -3
1090-
; IC4VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4
1091-
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 -3
1092-
; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -8
1093-
; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -3
1094-
; IC4VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -12
1095-
; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 -3
1082+
; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3
1083+
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -7
1084+
; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -11
1085+
; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -15
10961086
; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP16]], align 1
10971087
; IC4VF4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
10981088
; IC4VF4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1

0 commit comments

Comments
 (0)