diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 44d4d92d4a7e2..332f2dc5f9b34 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -78,6 +78,11 @@ class VPBuilder { setInsertPoint(TheBB, IP); } + VPlan &getPlan() const { + assert(getInsertBlock() && "Expected insert point to be set"); + return *getInsertBlock()->getPlan(); + } + /// Clear the insertion point: created instructions will not be inserted into /// a block. void clearInsertionPoint() { @@ -280,7 +285,7 @@ class VPBuilder { } VPValue *createElementCount(Type *Ty, ElementCount EC) { - VPlan &Plan = *getInsertBlock()->getPlan(); + VPlan &Plan = getPlan(); VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue()); if (EC.isScalable()) { VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty); @@ -353,6 +358,13 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL)); } + /// Create and insert a VectorEndPointerRecipe: requires insert-point to be + /// set. + VPVectorEndPointerRecipe * + createVectorEndPointerRecipe(VPValue *Ptr, Type *SourceElementType, + int64_t Stride, GEPNoWrapFlags GEPFlags, + VPValue *VF, DebugLoc DbgLoc); + VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr) { return tryInsertInstruction(new VPExpandSCEVRecipe(Expr)); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index abac45b265d10..1e571f6a5be09 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7731,16 +7731,16 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI, CM.foldTailByMasking() || !GEP ? GEPNoWrapFlags::none() : GEP->getNoWrapFlags().withoutNoUnsignedWrap(); - VectorPtr = new VPVectorEndPointerRecipe( - Ptr, &Plan.getVF(), getLoadStoreType(I), - /*Stride*/ -1, Flags, VPI->getDebugLoc()); + VectorPtr = Builder.createVectorEndPointerRecipe( + Ptr, getLoadStoreType(I), + /*Stride=*/-1, Flags, &Plan.getVF(), VPI->getDebugLoc()); } else { VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), GEP ? GEP->getNoWrapFlags() : GEPNoWrapFlags::none(), VPI->getDebugLoc()); + Builder.insert(VectorPtr); } - Builder.insert(VectorPtr); Ptr = VectorPtr; } @@ -8093,6 +8093,30 @@ VPRecipeBuilder::tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R, return tryToWiden(VPI); } +VPVectorEndPointerRecipe * +VPBuilder::createVectorEndPointerRecipe(VPValue *Ptr, Type *SourceElementTy, + int64_t Stride, GEPNoWrapFlags GEPFlags, + VPValue *VF, DebugLoc DbgLoc) { + // Offset for Part 0 = Stride * (VF - 1). + VPlan &Plan = getPlan(); + VPTypeAnalysis TypeInfo(Plan); + const DataLayout &DL = + Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(TypeInfo.inferScalarType(Ptr)); + Type *VFTy = TypeInfo.inferScalarType(VF); + VPValue *VFCast = + createScalarZExtOrTrunc(VF, IndexTy, VFTy, DebugLoc::getUnknown()); + VPInstruction *VFMinusOne = createOverflowingOp( + Instruction::Sub, {VFCast, Plan.getConstantInt(IndexTy, 1u)}, + {true, true}); + VPInstruction *StridexVFMinusOne = createOverflowingOp( + Instruction::Mul, + {VFMinusOne, Plan.getConstantInt(IndexTy, Stride, /*IsSigned=*/true)}); + auto *VEPR = tryInsertInstruction(new VPVectorEndPointerRecipe( + Ptr, StridexVFMinusOne, SourceElementTy, Stride, GEPFlags, DbgLoc)); + return VEPR; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { if (ElementCount::isKnownGT(MinVF, MaxVF)) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fd8d959c65df7..525f516452758 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1994,8 +1994,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { /// A recipe to compute a pointer to the last element of each part of a widened /// memory access for widened memory accesses of SourceElementTy. Used for /// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. -class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, - public VPUnrollPartAccessor<2> { +class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags { Type *SourceElementTy; /// The constant stride of the pointer computed by this recipe, expressed in @@ -2003,9 +2002,9 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, int64_t Stride; public: - VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, + VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *Offset, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) - : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF}, + : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, Offset}, GEPFlags, DL), SourceElementTy(SourceElementTy), Stride(Stride) { assert(Stride < 0 && "Stride must be negative"); @@ -2014,8 +2013,8 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC) Type *getSourceElementType() const { return SourceElementTy; } - VPValue *getVFValue() { return getOperand(1); } - const VPValue *getVFValue() const { return getOperand(1); } + int64_t getStride() const { return Stride; } + VPValue *getOffset() const { return getOperand(1); } void execute(VPTransformState &State) override; @@ -2041,8 +2040,8 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, } VPVectorEndPointerRecipe *clone() override { - return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), - getSourceElementType(), Stride, + return new VPVectorEndPointerRecipe(getOperand(0), getOffset(), + getSourceElementType(), getStride(), getGEPNoWrapFlags(), getDebugLoc()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 9428070b02f1a..72343e811aa55 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -95,10 +95,9 @@ struct deferredval_ty { /// whichever value m_VPValue(X) populated. inline deferredval_ty m_Deferred(VPValue *const &V) { return V; } -/// Match an integer constant or vector of constants if Pred::isValue returns -/// true for the APInt. \p BitWidth optionally specifies the bitwidth the -/// matched constant must have. If it is 0, the matched constant can have any -/// bitwidth. +/// Match an integer constant if Pred::isValue returns true for the APInt. \p +/// BitWidth optionally specifies the bitwidth the matched constant must have. +/// If it is 0, the matched constant can have any bitwidth. template struct int_pred_ty { Pred P; @@ -119,15 +118,17 @@ template struct int_pred_ty { } }; -/// Match a specified integer value or vector of all elements of that -/// value. \p BitWidth optionally specifies the bitwidth the matched constant -/// must have. If it is 0, the matched constant can have any bitwidth. +/// Match a specified signed or unsigned integer value. struct is_specific_int { APInt Val; + bool IsSigned; - is_specific_int(APInt Val) : Val(std::move(Val)) {} + is_specific_int(APInt Val, bool IsSigned = false) + : Val(std::move(Val)), IsSigned(IsSigned) {} - bool isValue(const APInt &C) const { return APInt::isSameValue(Val, C); } + bool isValue(const APInt &C) const { + return APInt::isSameValue(Val, C, IsSigned); + } }; template @@ -137,6 +138,11 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) { return specific_intval<0>(is_specific_int(APInt(64, V))); } +inline specific_intval<0> m_SpecificSInt(int64_t V) { + return specific_intval<0>( + is_specific_int(APInt(64, V, /*isSigned=*/true), /*IsSigned=*/true)); +} + inline specific_intval<1> m_False() { return specific_intval<1>(is_specific_int(APInt(64, 0))); } @@ -549,6 +555,15 @@ m_ZExtOrSelf(const Op0_t &Op0) { return m_CombineOr(m_ZExt(Op0), Op0); } +template +inline match_combine_or< + match_combine_or, + AllRecipe_match>, + Op0_t> +m_ZExtOrTruncOrSelf(const Op0_t &Op0) { + return m_CombineOr(m_CombineOr(m_ZExt(Op0), m_Trunc(Op0)), Op0); +} + template inline AllRecipe_match m_Binary(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6fb4225f32800..bed28f870a127 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2594,29 +2594,10 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; - unsigned CurrentPart = getUnrollPart(*this); - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); - - // The wide store needs to start at the last vector element. - Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); - if (IndexTy != RunTimeVF->getType()) - RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); - // NumElt = Stride * CurrentPart * RunTimeVF - Value *NumElt = Builder.CreateMul( - ConstantInt::getSigned(IndexTy, Stride * (int64_t)CurrentPart), - RunTimeVF); - // LastLane = Stride * (RunTimeVF - 1) - Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1)); - if (Stride != 1) - LastLane = - Builder.CreateMul(ConstantInt::getSigned(IndexTy, Stride), LastLane); - Value *Ptr = State.get(getOperand(0), VPLane(0)); - Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, NumElt, "", + Value *Ptr = State.get(getOperand(0), true); + Value *Offset = State.get(getOffset(), true); + Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "", getGEPNoWrapFlags()); - ResultPtr = Builder.CreateGEP(getSourceElementType(), ResultPtr, LastLane, "", - getGEPNoWrapFlags()); - State.set(this, ResultPtr, /*IsScalar*/ true); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 57fb29c5ec618..9715ddab34c77 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2994,10 +2994,19 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, /// Adjust any end pointers so that they point to the end of EVL lanes not VF. auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) { - auto *EVLEndPtr = cast(EndPtr)->clone(); - EVLEndPtr->insertBefore(&CurRecipe); - EVLEndPtr->setOperand(1, &EVL); - return EVLEndPtr; + auto *VEPR = cast(EndPtr); + VPBuilder Builder(&CurRecipe); + return Builder.createVectorEndPointerRecipe( + VEPR->getOperand(0), VEPR->getSourceElementType(), VEPR->getStride(), + VEPR->getGEPNoWrapFlags(), &EVL, VEPR->getDebugLoc()); + }; + + auto m_VecEndPtrVF = [&Plan](VPValue *&Addr, int64_t Stride) { // NOLINT + return m_VecEndPtr( + m_VPValue(Addr), + m_c_Mul( + m_SpecificSInt(Stride), + m_Sub(m_ZExtOrTruncOrSelf(m_Specific(&Plan->getVF())), m_One()))); }; if (match(&CurRecipe, @@ -3010,7 +3019,10 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, if (match(&CurRecipe, m_Reverse(m_VPValue(ReversedVal))) && match(ReversedVal, m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && - match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && + isa(EndPtr) && + match(EndPtr, + m_VecEndPtrVF( + Addr, cast(EndPtr)->getStride())) && cast(ReversedVal)->isReverse()) { auto *LoadR = new VPWidenLoadEVLRecipe( *cast(ReversedVal), AdjustEndPtr(EndPtr), EVL, Mask); @@ -3030,7 +3042,10 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_Reverse(m_VPValue(ReversedVal)), m_RemoveMask(HeaderMask, Mask))) && - match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) && + isa(EndPtr) && + match(EndPtr, + m_VecEndPtrVF( + Addr, cast(EndPtr)->getStride())) && cast(CurRecipe).isReverse()) { auto *NewReverse = new VPWidenIntrinsicRecipe( Intrinsic::experimental_vp_reverse, @@ -3121,10 +3136,10 @@ static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - assert(all_of(Plan.getVF().users(), - IsaPred) && - "User of VF that we can't transform to EVL."); + assert( + all_of(Plan.getVF().users(), IsaPred) && + "User of VF that we can't transform to EVL."); Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { return isa(U); }); @@ -3608,6 +3623,7 @@ void VPlanTransforms::createInterleaveGroups( // Get or create the start address for the interleave group. VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); + VPBuilder B(InsertPos); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { // We cannot re-use the address of member zero because it does not // dominate the insert position. Instead, use the address of the insert @@ -3623,7 +3639,6 @@ void VPlanTransforms::createInterleaveGroups( IG->getIndex(IRInsertPos), /*IsSigned=*/true); VPValue *OffsetVPV = Plan.getConstantInt(-Offset); - VPBuilder B(InsertPos); Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW); } // If the group is reverse, adjust the index to refer to the last vector @@ -3631,11 +3646,10 @@ void VPlanTransforms::createInterleaveGroups( // lane, rather than directly getting the pointer for lane VF - 1, because // the pointer operand of the interleaved access is supposed to be uniform. if (IG->isReverse()) { - auto *ReversePtr = new VPVectorEndPointerRecipe( - Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos), - -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc()); - ReversePtr->insertBefore(InsertPos); - Addr = ReversePtr; + B.setInsertPoint(InsertPos); + Addr = B.createVectorEndPointerRecipe( + Addr, getLoadStoreType(IRInsertPos), -(int64_t)IG->getFactor(), NW, + &Plan.getVF(), InsertPos->getDebugLoc()); } auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps, diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 75b8164a33a7f..2af60b0be533d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -342,6 +342,25 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { Copy->addOperand(VFxPart); continue; } + if (auto *VEPR = dyn_cast(&R)) { + VPBuilder Builder(VEPR); + VPValue *PrevOffset = + cast(getValueForPart(VEPR, Part - 1)) + ->getOffset(); + Type *IndexTy = TypeInfo.inferScalarType(PrevOffset); + Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF()); + VPValue *VF = Builder.createScalarZExtOrTrunc( + &Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown()); + // Offset = PrevOffset + Stride * VF. + VPValue *VFxStride = Builder.createOverflowingOp( + Instruction::Mul, {VF, Plan.getConstantInt(IndexTy, VEPR->getStride(), + /*IsSigned=*/true)}); + VPValue *Offset = Builder.createOverflowingOp(Instruction::Add, + {PrevOffset, VFxStride}); + Copy->setOperand(0, VEPR->getOperand(0)); + Copy->setOperand(1, Offset); + continue; + } if (auto *Red = dyn_cast(&R)) { auto *Phi = dyn_cast(R.getOperand(0)); if (Phi && Phi->isOrdered()) { @@ -361,13 +380,10 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { // Add operand indicating the part to generate code for, to recipes still // requiring it. - if (isa(Copy) || + if (isa(Copy) || match(Copy, m_VPInstruction())) Copy->addOperand(getConstantInt(Part)); - - if (isa(R)) - Copy->setOperand(0, R.getOperand(0)); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 9098b9ce8562d..001eb07dde2f3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -162,7 +162,18 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } return true; }; - return all_of(EVL.users(), [this, &VerifyEVLUse](VPUser *U) { + auto VerifyEVLUseInVecEndPtr = [&EVL](auto &VEPRs) { + if (all_of(VEPRs, [&EVL](VPUser *U) { + auto *VEPR = cast(U); + return match(VEPR->getOffset(), + m_c_Mul(m_SpecificSInt(VEPR->getStride()), + m_Sub(m_Specific(&EVL), m_One()))); + })) + return true; + errs() << "Expected VectorEndPointer with EVL operand\n"; + return false; + }; + return all_of(EVL.users(), [&](VPUser *U) { return TypeSwitch(U) .Case([&](const VPWidenIntrinsicRecipe *S) { return VerifyEVLUse(*S, S->getNumOperands() - 1); @@ -177,16 +188,29 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } return VerifyEVLUse(*R, 2); }) - .Case( + .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case( [&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { if (I->getOpcode() == Instruction::PHI || - I->getOpcode() == Instruction::ICmp || - I->getOpcode() == Instruction::Sub) + I->getOpcode() == Instruction::ICmp) return VerifyEVLUse(*I, 1); + if (I->getOpcode() == Instruction::Sub) { + // If Sub has a single user that's a SingleDefRecipe (which is + // exepcted to be a Mul), filter its users, in turn, to get + // VectorEndPointerRecipes, and verify that all the offsets match + // (EVL - 1) * Stride. + auto *VPI = + dyn_cast_if_present(I->getSingleUser()); + if (VPI) { + auto VEPRs = make_filter_range(VPI->users(), + IsaPred); + if (!VEPRs.empty()) + return VerifyEVLUseInVecEndPtr(VEPRs); + } + return VerifyEVLUse(*I, 1); + } switch (I->getOpcode()) { case Instruction::Add: break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index ce23149c5cca7..dde1b8bc607d3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -359,6 +359,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 2, [[TMP5]] ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 @@ -371,8 +373,6 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 2, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) @@ -383,9 +383,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 2, [[TMP15]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP8]] ; CHECK-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP12]]) ; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP13]]) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) @@ -1550,6 +1548,8 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 4, [[TMP6]] ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 @@ -1562,8 +1562,6 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 4, [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) @@ -1580,9 +1578,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw [[REVERSE4]], [[VEC_IND]] ; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw [[REVERSE5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP25:%.*]] = sub nsw i64 4, [[TMP22]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP9]] ; CHECK-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP17]]) ; CHECK-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP18]]) ; CHECK-NEXT: [[REVERSE8:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index ba6bd8f36f5e4..9ee593c658c0f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -24,28 +24,22 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 1, [[TMP5]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP22]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = fadd [[WIDE_LOAD]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD1]], splat (double 1.000000e+00) -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]] -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]] ; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 8 ; CHECK-NEXT: store [[TMP17]], ptr [[TMP24]], align 8 @@ -101,28 +95,22 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP7]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP8]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[N]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP15]], [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP25]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP18]], align 8 ; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], splat (i64 1) ; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD3]], splat (i64 1) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP8]] -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]] ; CHECK-NEXT: store [[TMP19]], ptr [[TMP23]], align 8 ; CHECK-NEXT: store [[TMP20]], ptr [[TMP27]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 054a2706cb232..bf165d9ccaf01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -34,14 +34,12 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 -7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x double> [[WIDE_LOAD]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <8 x double> [[REVERSE]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 -7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 -7 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: store <8 x double> [[REVERSE3]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -97,14 +95,12 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i64> [[WIDE_LOAD]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i64> [[REVERSE]], splat (i64 1) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 -7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -7 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: store <8 x i64> [[REVERSE3]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -153,6 +149,8 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP10:%.*]] = sub nuw nsw i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP10]], -1 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -170,30 +168,18 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[PARTIAL_REDUCE6]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32( [[VEC_PHI3]], [[TMP8]]) ; CHECK-NEXT: [[PARTIAL_REDUCE7]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32( [[VEC_PHI4]], [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[TMP18]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 -2, [[TMP4]] -; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 -1, [[TMP21]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP4]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP20]], [[TMP12]] +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP25]], [[TMP12]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[TMP23]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 -3, [[TMP4]] -; CHECK-NEXT: [[TMP26:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP15]] ; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv8i16( [[BROADCAST_SPLAT]]) -; CHECK-NEXT: store [[REVERSE]], ptr [[TMP14]], align 2 +; CHECK-NEXT: store [[REVERSE]], ptr [[TMP23]], align 2 ; CHECK-NEXT: store [[REVERSE]], ptr [[TMP19]], align 2 -; CHECK-NEXT: store [[REVERSE]], ptr [[TMP24]], align 2 +; CHECK-NEXT: store [[REVERSE]], ptr [[TMP28]], align 2 ; CHECK-NEXT: store [[REVERSE]], ptr [[TMP29]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -227,8 +213,7 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[TMP35:%.*]] = sext <4 x i16> [[BROADCAST_SPLAT15]] to <4 x i32> ; CHECK-NEXT: [[PARTIAL_REDUCE16]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v4i32(<2 x i32> [[VEC_PHI13]], <4 x i32> [[TMP35]]) ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i16, ptr [[TMP36]], i64 0 -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[TMP37]], i64 -3 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[TMP36]], i64 -3 ; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLAT15]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: store <4 x i16> [[REVERSE17]], ptr [[TMP38]], align 2 ; CHECK-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX12]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index d0dfc8e6fbd2f..a2e5edd3127c5 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -61,8 +61,7 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst, ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[INDEX]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 9ea95658818fe..42a9bcc62a114 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -254,8 +254,7 @@ define void @strides_different_direction(ptr noalias nocapture %A, ptr noalias n ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[N:%.*]], [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 850947addf9c4..5c494af1289ef 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -261,22 +261,14 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 -; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -7 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -11 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -15 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -19 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -23 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -27 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -31 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 @@ -340,8 +332,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -1 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) @@ -384,22 +375,14 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 -; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -7 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -11 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -15 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -19 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -23 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -27 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -31 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 @@ -463,8 +446,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dbg-tail-folding-by-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dbg-tail-folding-by-evl.ll index 1af6ba9f78695..3870aa83d5346 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dbg-tail-folding-by-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dbg-tail-folding-by-evl.ll @@ -30,13 +30,10 @@ define void @reverse_store(ptr %a, i64 %n) !dbg !3 { ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[TMP8]], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( [[TMP7]], splat (i1 true), i32 [[TMP4]]), !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP4]] to i64, !dbg [[DBG8]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]], !dbg [[DBG8]] -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], 1, !dbg [[DBG8]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 -1, [[TMP13]], !dbg [[DBG8]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[TMP9]], i64 [[TMP12]], !dbg [[DBG8]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 [[TMP14]], !dbg [[DBG8]] -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP10]], ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP4]]), !dbg [[DBG8]] +; CHECK-NEXT: [[TMP11:%.*]] = sub nuw nsw i64 [[TMP5]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP9]], i64 [[TMP12]], !dbg [[DBG8]] +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP10]], ptr align 8 [[TMP13]], splat (i1 true), i32 [[TMP4]]), !dbg [[DBG8]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll index b016ff85a7142..fa247b1a042a3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll @@ -19,16 +19,13 @@ define void @reverse_predicated_store(i1 %c, ptr %dst, i64 %n) #0 { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( zeroinitializer, splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 -1, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[ARRAYIDX]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[ARRAYIDX]], i64 [[TMP7]] ; CHECK-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP12]], ptr align 4 [[TMP9]], [[VP_REVERSE_MASK]], i32 [[TMP1]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP4]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index e47f3167e58cc..220410e993765 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -29,25 +29,17 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP10:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; RV64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 [[TMP11]] +; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 +; RV64-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP22]], 1 +; RV64-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], -1 +; RV64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP5]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; RV64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP25]] -; RV64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP18]] +; RV64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP5]] ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -70,21 +62,15 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] -; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP11]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[TMP10]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 [[TMP12]] +; RV32-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP9]], 1 +; RV32-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], -1 +; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[TMP4]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP15:%.*]] = add [[REVERSE]], splat (i32 1) ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP20:%.*]] = mul i32 -1, [[TMP19]] -; RV32-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP17]] -; RV32-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP18]], i32 [[TMP20]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP4]] ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]] @@ -110,39 +96,27 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 [[TMP12]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 [[TMP5]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add i64 [[TMP15]], [[TMP10]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP19]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD1]]) ; RV64-UF2-NEXT: [[TMP20:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP21:%.*]] = add [[REVERSE2]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 -1, [[TMP24]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP29]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP28]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP20]]) ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP21]]) ; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP27]], align 4 @@ -214,25 +188,17 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP28:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP25]] -; RV64-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP28]] -; RV64-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP38]], i64 [[TMP26]] +; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 +; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP36]], 1 +; RV64-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 +; RV64-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP18]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP27]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 -; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] -; RV64-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP30]], i64 [[TMP31]] -; RV64-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i64 [[TMP33]] +; RV64-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP30]], i64 [[TMP18]] ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -277,21 +243,15 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP18]] -; RV32-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i32 [[TMP17]] -; RV32-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP20]], i32 [[TMP19]] +; RV32-NEXT: [[TMP9:%.*]] = sub nuw nsw i32 [[TMP16]], 1 +; RV32-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], -1 +; RV32-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP15]], i32 [[TMP10]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP28]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP22:%.*]] = add [[REVERSE]], splat (i32 1) ; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) -; RV32-NEXT: [[TMP21:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP27:%.*]] = mul i32 -1, [[TMP26]] -; RV32-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 [[TMP21]] -; RV32-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i32 [[TMP27]] +; RV32-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP23]], i32 [[TMP10]] ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] @@ -346,6 +306,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = sub nuw nsw i64 [[TMP18]], 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP26]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -354,33 +316,19 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP22:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP32:%.*]] = mul i64 -1, [[TMP31]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 [[TMP18]], -1 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[TMP30]], [[TMP27]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP30]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[TMP32]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP25]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP33]], align 4 ; RV64-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP34]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[REVERSE5:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD4]]) ; RV64-UF2-NEXT: [[TMP35:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP36:%.*]] = add [[REVERSE5]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP38:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP39:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP40:%.*]] = mul i64 -1, [[TMP39]] -; RV64-UF2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP38]] -; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i64 [[TMP40]] -; RV64-UF2-NEXT: [[TMP43:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP44:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP45:%.*]] = mul i64 -1, [[TMP44]] -; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP43]] -; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP46]], i64 [[TMP45]] +; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[TMP25]] ; RV64-UF2-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP35]]) ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP36]]) ; RV64-UF2-NEXT: store [[REVERSE6]], ptr [[TMP42]], align 4 @@ -465,25 +413,17 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP28:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP25]] -; RV64-NEXT: [[TMP38:%.*]] = getelementptr float, ptr [[TMP23]], i64 [[TMP28]] -; RV64-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP38]], i64 [[TMP26]] +; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 +; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP36]], 1 +; RV64-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 +; RV64-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP23]], i64 [[TMP18]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP27]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 -; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] -; RV64-NEXT: [[TMP34:%.*]] = getelementptr float, ptr [[TMP30]], i64 [[TMP31]] -; RV64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[TMP34]], i64 [[TMP33]] +; RV64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[TMP30]], i64 [[TMP18]] ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -528,21 +468,15 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP18]] -; RV32-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP15]], i32 [[TMP17]] -; RV32-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP20]], i32 [[TMP19]] +; RV32-NEXT: [[TMP9:%.*]] = sub nuw nsw i32 [[TMP16]], 1 +; RV32-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], -1 +; RV32-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP15]], i32 [[TMP10]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP28]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP22:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) -; RV32-NEXT: [[TMP21:%.*]] = mul i32 0, [[TMP16]] -; RV32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP16]], 1 -; RV32-NEXT: [[TMP27:%.*]] = mul i32 -1, [[TMP26]] -; RV32-NEXT: [[TMP24:%.*]] = getelementptr float, ptr [[TMP23]], i32 [[TMP21]] -; RV32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP24]], i32 [[TMP27]] +; RV32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 [[TMP10]] ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] @@ -597,6 +531,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = sub nuw nsw i64 [[TMP18]], 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP26]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -605,33 +541,19 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP22:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP32:%.*]] = mul i64 -1, [[TMP31]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 [[TMP18]], -1 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[TMP30]], [[TMP27]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP30]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP33]], i64 [[TMP32]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP25]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP33]], align 4 ; RV64-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP34]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[REVERSE5:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD4]]) ; RV64-UF2-NEXT: [[TMP35:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP36:%.*]] = fadd [[REVERSE5]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP38:%.*]] = mul i64 0, [[TMP18]] -; RV64-UF2-NEXT: [[TMP39:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP40:%.*]] = mul i64 -1, [[TMP39]] -; RV64-UF2-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP38]] -; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP41]], i64 [[TMP40]] -; RV64-UF2-NEXT: [[TMP43:%.*]] = mul i64 -1, [[TMP18]] -; RV64-UF2-NEXT: [[TMP44:%.*]] = sub i64 [[TMP18]], 1 -; RV64-UF2-NEXT: [[TMP45:%.*]] = mul i64 -1, [[TMP44]] -; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP43]] -; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP46]], i64 [[TMP45]] +; RV64-UF2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 [[TMP25]] ; RV64-UF2-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP35]]) ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP36]]) ; RV64-UF2-NEXT: store [[REVERSE6]], ptr [[TMP42]], align 4 @@ -692,25 +614,17 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP24]] -; RV64-NEXT: [[TMP10:%.*]] = sub i64 [[TMP24]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP9]] -; RV64-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i64 [[TMP11]] +; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 +; RV64-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP22]], 1 +; RV64-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], -1 +; RV64-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP5]] ; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; RV64-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP25]] -; RV64-NEXT: [[TMP21:%.*]] = getelementptr float, ptr [[TMP20]], i64 [[TMP18]] +; RV64-NEXT: [[TMP21:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP5]] ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -733,21 +647,15 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP11]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP8]], i32 [[TMP10]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP13]], i32 [[TMP12]] +; RV32-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP9]], 1 +; RV32-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], -1 +; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP8]], i32 [[TMP4]] ; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP15:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) -; RV32-NEXT: [[TMP17:%.*]] = mul i32 0, [[TMP9]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 [[TMP9]], 1 -; RV32-NEXT: [[TMP20:%.*]] = mul i32 -1, [[TMP19]] -; RV32-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP17]] -; RV32-NEXT: [[TMP22:%.*]] = getelementptr float, ptr [[TMP18]], i32 [[TMP20]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP4]] ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]] @@ -773,39 +681,27 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] ; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 [[TMP12]], -1 ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = mul i64 [[TMP5]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add i64 [[TMP15]], [[TMP10]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP17]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 4 ; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP19]], align 4 ; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) ; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD1]]) ; RV64-UF2-NEXT: [[TMP20:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP21:%.*]] = fadd [[REVERSE2]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP25:%.*]] = mul i64 -1, [[TMP24]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP25]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP30:%.*]] = mul i64 -1, [[TMP29]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP28]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP31]], i64 [[TMP30]] +; RV64-UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP20]]) ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP21]]) ; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP27]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index b0371ff239365..58ec6450a67b9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -20,24 +20,16 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP18]], 1 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 -1, [[TMP11]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP20]], 1 +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP4]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP6]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 [[TMP19]], 1 -; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP23]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP6]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -60,25 +52,19 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 +; NO-VP-NEXT: [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1 +; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 [[TMP9]], -1 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; NO-VP-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], -1 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] -; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 ; NO-VP-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] -; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP16]] -; NO-VP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP18]] +; NO-VP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP12]] ; NO-VP-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4i32( [[REVERSE]]) ; NO-VP-NEXT: store [[REVERSE1]], ptr [[TMP20]], align 4 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] @@ -142,27 +128,19 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 [[TMP26]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP15]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[TMP15:%.*]] = sub nuw nsw i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP15]], -1 +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP7]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP27]], 1 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP30]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP7]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP26]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] ; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: @@ -183,6 +161,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL1:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP11]], -1 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] @@ -192,21 +172,13 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[OFFSET_IDX1]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp slt [[WIDE_LOAD]], splat (i32 100) -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 -1, [[TMP13]] -; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP11]], i64 [[TMP12]] +; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP8]] ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP14]] ; NO-VP-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], [[REVERSE]], poison) ; NO-VP-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_MASKED_LOAD]]) ; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP3]] -; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP3]], 1 -; NO-VP-NEXT: [[TMP20:%.*]] = mul i64 -1, [[TMP19]] -; NO-VP-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP18]] -; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP20]] +; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP14]] ; NO-VP-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[REVERSE2]]) ; NO-VP-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[REVERSE4]], ptr align 4 [[TMP22]], [[REVERSE3]]) @@ -282,38 +254,25 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) -; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] -; IF-EVL-NEXT: [[TMP29:%.*]] = sub i64 [[TMP9]], 1 -; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP29]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 [[TMP9]], 1 +; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], -1 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP16]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP30]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP11]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP20]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] -; IF-EVL-NEXT: [[TMP31:%.*]] = sub i64 [[TMP22]], 1 -; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 -1, [[TMP31]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP11]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP26]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: @@ -330,20 +289,17 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[INDEX]] ; NO-VP-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -15 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -15 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; NO-VP-NEXT: [[REVERSE:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], <16 x i8> [[REVERSE]] ; NO-VP-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> align 1 [[TMP3]], <16 x i1> splat (i1 true), <16 x i8> poison) ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -15 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP4]], i64 -15 ; NO-VP-NEXT: [[REVERSE1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_GATHER]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP6]], align 1 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -15 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -15 ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP9]], align 1 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index e523de9de7a26..b75623186362d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -24,14 +24,11 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]] ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 -1, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i64 [[TMP7]] -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[REVERSE]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP7]] +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[REVERSE]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll index 8b6ce538474c7..9cb2b8d842b59 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll @@ -33,13 +33,19 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[SCALAR_STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_B:%.+]]> = getelementptr inbounds ir<[[B:%.+]]>, ir<[[IDX_PROM]]> -; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVL]]> +; CHECK-NEXT: EMIT-SCALAR vp<[[EVLEXT:%.+]]> = zext vp<[[EVL]]> to i64 +; CHECK-NEXT: EMIT vp<[[EVLSUB:%.+]]> = sub nuw nsw vp<[[EVLEXT]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[EVLMUL:%.+]]> = mul vp<[[EVLSUB]]>, ir<-1> +; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVLMUL]]> ; CHECK-NEXT: WIDEN ir<[[LOAD_B:%.+]]> = vp.load vp<[[VEC_END_PTR_B]]>, vp<[[EVL]]> ; CHECK-NEXT: WIDEN-INTRINSIC vp<[[VAL_B:%.+]]> = call llvm.experimental.vp.reverse(ir<[[LOAD_B]]>, ir, vp<[[EVL]]>) ; CHECK-NEXT: WIDEN ir<[[ADD_RESULT:%.+]]> = add vp<[[VAL_B]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_A:%.+]]> = getelementptr inbounds ir<[[A:%.+]]>, ir<[[IDX_PROM]]> ; CHECK-NEXT: WIDEN-INTRINSIC vp<[[STORE_VAL:%.+]]> = call llvm.experimental.vp.reverse(ir<[[ADD_RESULT]]>, ir, vp<[[EVL]]>) -; CHECK-NEXT: vp<[[VEC_END_PTR_A:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_A]]>, vp<[[EVL]]> +; CHECK-NEXT: EMIT-SCALAR vp<[[EVLEXT2:%.+]]> = zext vp<[[EVL]]> to i64 +; CHECK-NEXT: EMIT vp<[[EVLSUB2:%.+]]> = sub nuw nsw vp<[[EVLEXT2]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[EVLMUL2:%.+]]> = mul vp<[[EVLSUB2]]>, ir<-1> +; CHECK-NEXT: vp<[[VEC_END_PTR_A:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_A]]>, vp<[[EVLMUL2]]> ; CHECK-NEXT: WIDEN vp.store vp<[[VEC_END_PTR_A]]>, vp<[[STORE_VAL]]>, vp<[[EVL]]> ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[EVL]]>, vp<[[EVL_PHI]]> ; CHECK-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll index f0ea63c498a40..df802392a50f5 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll @@ -27,8 +27,7 @@ define void @induction_phi_and_branch_cost(ptr %end, ptr %start.1, ptr %start.2) ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], -4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 -3 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 1f33d63d2073c..32695ea02f8fd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -23,8 +23,7 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP23]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP6]], <4 x i1> [[REVERSE]], <4 x i32> poison) ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/end-pointer-signed.ll b/llvm/test/Transforms/LoopVectorize/X86/end-pointer-signed.ll index dad6c069344bd..831f41dce85a5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/end-pointer-signed.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/end-pointer-signed.ll @@ -22,10 +22,8 @@ define void @test(ptr %dest, i32 %n) "target-cpu"="pentium4" { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DEST]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 -1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP3]], i32 -2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i32 -3 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 1808e80a97060..4c4c8d4a448a5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1117,18 +1117,14 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -12 -; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]] -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22]] -; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -3 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -7 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -11 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -15 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META22:![0-9]+]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> @@ -1138,22 +1134,18 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -3 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -4 -; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -3 -; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -3 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -12 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -3 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 -3 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -7 +; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i64 -11 +; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -15 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP15]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE13]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP16]], <4 x i1> [[REVERSE13]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP19]], <4 x i1> [[REVERSE15]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE15]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP21]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP18]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD14]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD16]], <4 x double> poison, <4 x i32> @@ -1163,22 +1155,18 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE23]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -3 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -4 -; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -3 -; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 -; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -3 -; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -12 -; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -3 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP27]], i64 -3 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP27]], i64 -7 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP27]], i64 -11 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP27]], i64 -15 ; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE27]], ptr align 8 [[TMP30]], <4 x i1> [[REVERSE13]]), !alias.scope [[META27]], !noalias [[META29]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE25]], ptr align 8 [[TMP32]], <4 x i1> [[REVERSE15]]), !alias.scope [[META27]], !noalias [[META29]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP34]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP19]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE27]], ptr align 8 [[TMP20]], <4 x i1> [[REVERSE13]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE25]], ptr align 8 [[TMP21]], <4 x i1> [[REVERSE15]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP22]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1208,18 +1196,14 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -7 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -24 -; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -7 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] -; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META34]] -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -7 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -15 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -23 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -31 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META34:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD7]], <8 x i32> poison, <8 x i32> @@ -1229,22 +1213,18 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -7 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -7 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -16 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -7 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -24 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -7 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 -7 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -15 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i64 -23 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -31 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP15]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE13]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP16]], <8 x i1> [[REVERSE13]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP19]], <8 x i1> [[REVERSE15]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE15]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP21]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP18]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD14]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD16]], <8 x double> poison, <8 x i32> @@ -1254,22 +1234,18 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE23]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -7 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -7 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -16 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -7 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -24 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -7 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP27]], i64 -7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP27]], i64 -15 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP27]], i64 -23 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP27]], i64 -31 ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE27:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP28]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE27]], ptr align 8 [[TMP30]], <8 x i1> [[REVERSE13]]), !alias.scope [[META39]], !noalias [[META41]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE25]], ptr align 8 [[TMP32]], <8 x i1> [[REVERSE15]]), !alias.scope [[META39]], !noalias [[META41]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP34]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP19]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE27]], ptr align 8 [[TMP20]], <8 x i1> [[REVERSE13]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE25]], ptr align 8 [[TMP21]], <8 x i1> [[REVERSE15]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP22]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX512-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 1026177370912..8919ed60823ea 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -31,10 +31,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP26]], i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -3 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP26]], i64 -4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP14]], i64 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP26]], i64 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP26]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> @@ -116,8 +114,7 @@ define i1 @test_exit_compare_other_users() #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 79, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [1 x i8], ptr @gg, i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -24 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -7 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -31 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 @@ -136,8 +133,7 @@ define i1 @test_exit_compare_other_users() #0 { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 79, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [1 x i8], ptr @gg, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 76 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index c756a54ec6d2b..272d9868c98d0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -30,8 +30,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 6c63b823b7666..456cb9f832f24 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -130,8 +130,7 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -177,8 +176,7 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; INTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 ; INTER-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; INTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -465,8 +463,7 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) { ; INTER-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0 -; INTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -6 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -6 ; INTER-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; INTER-NEXT: [[VEC_PHI:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 018578b15d4b9..320c69cb0c8f8 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1648,8 +1648,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll index dee377d61ba30..65242f6705a25 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll @@ -81,8 +81,7 @@ define i32 @induction_trunc_wrapflags(ptr %p) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 326, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[VEC_IND]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 86dfd70ea754f..1c987c84aa40b 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -19,10 +19,8 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VEC_IND]], <4 x i64> [[STEP_ADD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE]], ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll index 309fcece3f7ee..6c57294add0b7 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-gep-nowrap-flags.ll @@ -15,8 +15,7 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -25,8 +24,7 @@ define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> @@ -77,8 +75,7 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -87,8 +84,7 @@ define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> @@ -139,8 +135,7 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -6 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> @@ -149,8 +144,7 @@ define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -6 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 2a19ebdd77219..f804e74db100a 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -16,8 +16,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -52,14 +51,10 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 @@ -172,8 +167,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -498,8 +492,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -822,8 +815,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -858,14 +850,10 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 @@ -1020,13 +1008,11 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC1VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) ; IC1VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 ; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]] -; IC1VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]] -; IC1VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 -; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3 +; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 ; IC1VF4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE2]] @@ -1080,14 +1066,10 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -1) ; IC4VF4-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -1) ; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]] -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -4 -; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -3 -; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -8 -; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 -3 -; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -12 -; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -7 +; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -11 +; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; IC4VF4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 @@ -1097,14 +1079,10 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]] -; IC4VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 0 -; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 -3 -; IC4VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4 -; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 -3 -; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -8 -; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -3 -; IC4VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -12 -; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 -3 +; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -7 +; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -11 +; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -15 ; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP16]], align 1 ; IC4VF4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 ; IC4VF4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 @@ -1270,13 +1248,11 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC1VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -1) ; IC1VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; IC1VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 0 -; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 -3 +; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] -; IC1VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 0 -; IC1VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 -3 +; IC1VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; IC1VF4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD3]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE]], [[REVERSE4]] @@ -1339,13 +1315,11 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -1) ; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 0 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] -; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 0 -; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 -3 +; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; IC4VF4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD3]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE]], [[REVERSE4]] diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 62ffd2eb1a6fc..8fc506758e35b 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1289,8 +1289,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP0]], i64 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 84e4aebd9425a..e20a6673a2b16 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -296,14 +296,12 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 @@ -388,8 +386,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) @@ -544,8 +541,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 314476bea54df..9114dd86e6d44 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -173,8 +173,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) @@ -203,8 +202,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], [[N]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 0 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 995c2016339a4..fe0c839f3f030 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -38,8 +38,7 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 33b3d263e634a..98da110a44e8c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -56,8 +56,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -119,8 +118,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 -1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index 557c7e570766c..1490e4d15249c 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -36,8 +36,7 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[IV_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x ptr> [[WIDE_LOAD]], <2 x ptr> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[REVERSE]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 71c75e52d4050..51d2b825dc914 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -14,8 +14,7 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -60,8 +59,7 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -106,8 +104,7 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 2eb79c405e528..f72afc8a3a391 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,10 +20,8 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -74,10 +72,8 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -134,10 +130,8 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -221,10 +215,8 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 @@ -274,10 +266,8 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -7 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 5a1844ac450e7..28a643e05339a 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -39,12 +39,10 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2_DIFF]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i64 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 -3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 -3 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[REVERSE]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE4]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index af272955abbd2..dd7a8a87a921b 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -99,8 +99,7 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 -3 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 5be2b09a504c0..8ef9e872a530e 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -996,22 +996,20 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 -3 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META46:![0-9]+]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 -3 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] +; CHECK-NEXT: [[TMP24:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] -; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP29]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[REVERSE5]], ptr [[TMP28]], align 4, !alias.scope [[META49]], !noalias [[META46]] +; CHECK-NEXT: [[TMP27:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] +; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[REVERSE5]], ptr [[TMP26]], align 4, !alias.scope [[META49]], !noalias [[META46]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP51:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP15]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] @@ -1020,13 +1018,13 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] -; CHECK-NEXT: [[TMP31:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]] -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 -; CHECK-NEXT: [[TMP33:%.*]] = add nsw i64 [[INNER_IV]], [[TMP17]] -; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP33]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4 -; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[TMP29:%.*]] = add nsw i64 [[INNER_IV]], [[TMP16]] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add nsw i64 [[INNER_IV]], [[TMP17]] +; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX8_US]], align 4 +; CHECK-NEXT: [[ADD9_US:%.*]] = add nsw i32 [[TMP32]], [[TMP30]] ; CHECK-NEXT: store i32 [[ADD9_US]], ptr [[ARRAYIDX8_US]], align 4 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], -1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[INNER_IV]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index c193a1229a8f9..350769d4d3cf3 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -563,14 +563,10 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_INTERIM:%.*]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i64 -3 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -3 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -7 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -11 +; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 @@ -580,14 +576,10 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD3]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 0 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 -3 -; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i64 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -12 -; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -7 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -11 +; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 @@ -799,14 +791,10 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_INTERIM:%.*]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -3 +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -3 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -7 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -11 +; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 @@ -816,14 +804,10 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD3]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -12 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -3 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -7 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -11 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -15 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 82caebc9aa1b7..19973c9f2eea6 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1549,13 +1549,11 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY_INTERIM:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index e10cb2794aadb..4a4675f71af9b 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -153,8 +153,7 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -1 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 2f063166b053c..f369b00307750 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1144,6 +1144,8 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: EMIT vp<[[SUB:%.+]]> = sub nuw nsw vp<[[VF]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[SUB]]>, ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -1153,10 +1155,10 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%ptr.iv.next>, vp<[[MUL]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: EMIT vp<%9> = reverse ir<%l> -; CHECK-NEXT: WIDEN ir<%c.1> = icmp ne vp<%9>, ir<0> +; CHECK-NEXT: EMIT vp<[[REV:%.+]]> = reverse ir<%l> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp ne vp<[[REV]]>, ir<0> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: {