@@ -421,7 +421,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
421421
422422 // Hook up the new basic block to its predecessors.
423423 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
424- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
424+ auto *VPRB = dyn_cast<VPRegionBlock>(PredVPBlock);
425+
426+ // The exiting block that leads to this block might be an early exit from
427+ // a loop region.
428+ VPBasicBlock *PredVPBB = VPRB && VPRB->getEarlyExit () == this
429+ ? cast<VPBasicBlock>(VPRB->getEarlyExiting ())
430+ : PredVPBlock->getExitingBasicBlock ();
431+
425432 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
426433 BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
427434
@@ -443,6 +450,11 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
443450 // Set each forward successor here when it is created, excluding
444451 // backedges. A backward successor is set when the branch is created.
445452 unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
453+ VPRegionBlock *PredParentRegion =
454+ dyn_cast_or_null<VPRegionBlock>(PredVPBB->getParent ());
455+ if (PredParentRegion->getEarlyExiting () == PredVPBB) {
456+ idx = 1 - idx;
457+ }
446458 assert (!TermBr->getSuccessor (idx) &&
447459 " Trying to reset an existing successor block." );
448460 TermBr->setSuccessor (idx, NewBB);
@@ -499,6 +511,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
499511 !((SingleHPred = getSingleHierarchicalPredecessor ()) &&
500512 SingleHPred->getExitingBasicBlock () == PrevVPBB &&
501513 PrevVPBB->getSingleHierarchicalSuccessor () &&
514+ PrevVPBB != getEnclosingLoopRegion ()->getEarlyExiting () &&
502515 (SingleHPred->getParent () == getEnclosingLoopRegion () &&
503516 !IsLoopRegion (SingleHPred))) && /* B */
504517 !(Replica && getPredecessors ().empty ())) { /* C */
@@ -517,7 +530,8 @@ void VPBasicBlock::execute(VPTransformState *State) {
517530 UnreachableInst *Terminator = State->Builder .CreateUnreachable ();
518531 // Register NewBB in its loop. In innermost loops its the same for all
519532 // BB's.
520- if (State->CurrentVectorLoop )
533+ if (State->CurrentVectorLoop &&
534+ this != getEnclosingLoopRegion ()->getEarlyExit ())
521535 State->CurrentVectorLoop ->addBasicBlockToLoop (NewBB, *State->LI );
522536 State->Builder .SetInsertPoint (Terminator);
523537 State->CFG .PrevBB = NewBB;
@@ -635,7 +649,11 @@ const VPRecipeBase *VPBasicBlock::getTerminator() const {
635649}
636650
637651bool VPBasicBlock::isExiting () const {
638- return getParent () && getParent ()->getExitingBasicBlock () == this ;
652+ const VPRegionBlock *VPRB = getParent ();
653+ if (!VPRB)
654+ return false ;
655+ return VPRB->getExitingBasicBlock () == this ||
656+ VPRB->getEarlyExiting () == this ;
639657}
640658
641659#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -876,13 +894,15 @@ static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
876894VPlanPtr VPlan::createInitialVPlan (Type *InductionTy,
877895 PredicatedScalarEvolution &PSE,
878896 bool RequiresScalarEpilogueCheck,
879- bool TailFolded, Loop *TheLoop) {
897+ bool TailFolded, Loop *TheLoop,
898+ BasicBlock *EarlyExitingBB,
899+ BasicBlock *EarlyExitBB) {
880900 VPIRBasicBlock *Entry = createVPIRBasicBlockFor (TheLoop->getLoopPreheader ());
881901 VPBasicBlock *VecPreheader = new VPBasicBlock (" vector.ph" );
882902 auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
883903
884904 // Create SCEV and VPValue for the trip count.
885- const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount ();
905+ const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount ();
886906 assert (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && " Invalid loop count" );
887907 ScalarEvolution &SE = *PSE.getSE ();
888908 const SCEV *TripCount =
@@ -902,6 +922,13 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
902922 VPBasicBlock *MiddleVPBB = new VPBasicBlock (" middle.block" );
903923 VPBlockUtils::insertBlockAfter (MiddleVPBB, TopRegion);
904924
925+ if (EarlyExitingBB) {
926+ VPBasicBlock *EarlyExitVPBB = new VPBasicBlock (" vector.early.exit" );
927+ TopRegion->setEarlyExit (EarlyExitVPBB);
928+ VPBlockUtils::connectBlocks (TopRegion, EarlyExitVPBB);
929+ TopRegion->setOrigEarlyExit (EarlyExitBB);
930+ }
931+
905932 VPBasicBlock *ScalarPH = new VPBasicBlock (" scalar.ph" );
906933 if (!RequiresScalarEpilogueCheck) {
907934 VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
@@ -916,7 +943,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
916943 // 2) If we require a scalar epilogue, there is no conditional branch as
917944 // we unconditionally branch to the scalar preheader. Do nothing.
918945 // 3) Otherwise, construct a runtime check.
919- BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock ();
946+ BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock ();
920947 auto *VPExitBlock = createVPIRBasicBlockFor (IRExitBlock);
921948 // The connection order corresponds to the operands of the conditional branch.
922949 VPBlockUtils::insertBlockAfter (VPExitBlock, MiddleVPBB);
@@ -992,7 +1019,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
9921019// / VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
9931020// / have a single predecessor, which is rewired to the new VPIRBasicBlock. All
9941021// / successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
995- static void replaceVPBBWithIRVPBB (VPBasicBlock *VPBB, BasicBlock *IRBB) {
1022+ static VPIRBasicBlock *replaceVPBBWithIRVPBB (VPBasicBlock *VPBB,
1023+ BasicBlock *IRBB) {
9961024 VPIRBasicBlock *IRVPBB = createVPIRBasicBlockFor (IRBB);
9971025 for (auto &R : make_early_inc_range (*VPBB)) {
9981026 assert (!R.isPhi () && " Tried to move phi recipe to end of block" );
@@ -1006,6 +1034,7 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
10061034 VPBlockUtils::disconnectBlocks (VPBB, Succ);
10071035 }
10081036 delete VPBB;
1037+ return IRVPBB;
10091038}
10101039
10111040// / Generate the code inside the preheader and body of the vectorized loop.
@@ -1029,7 +1058,7 @@ void VPlan::execute(VPTransformState *State) {
10291058 // VPlan execution rather than earlier during VPlan construction.
10301059 BasicBlock *MiddleBB = State->CFG .ExitBB ;
10311060 VPBasicBlock *MiddleVPBB =
1032- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor () );
1061+ cast<VPBasicBlock>(getVectorLoopRegion ()->getSuccessors ()[ 0 ] );
10331062 // Find the VPBB for the scalar preheader, relying on the current structure
10341063 // when creating the middle block and its successrs: if there's a single
10351064 // predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1043,7 +1072,14 @@ void VPlan::execute(VPTransformState *State) {
10431072 assert (!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
10441073 " scalar preheader cannot be wrapped already" );
10451074 replaceVPBBWithIRVPBB (ScalarPhVPBB, ScalarPh);
1046- replaceVPBBWithIRVPBB (MiddleVPBB, MiddleBB);
1075+ MiddleVPBB = replaceVPBBWithIRVPBB (MiddleVPBB, MiddleBB);
1076+
1077+ // Ensure the middle block is still the first successor.
1078+ for (auto *Succ : getVectorLoopRegion ()->getSuccessors ())
1079+ if (Succ == MiddleVPBB) {
1080+ getVectorLoopRegion ()->moveSuccessorToFront (MiddleVPBB);
1081+ break ;
1082+ }
10471083
10481084 // Disconnect the middle block from its single successor (the scalar loop
10491085 // header) in both the CFG and DT. The branch will be recreated during VPlan
@@ -1104,6 +1140,20 @@ void VPlan::execute(VPTransformState *State) {
11041140 cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
11051141 }
11061142
1143+ // Patch up early exiting vector block to jump to the original scalar loop's
1144+ // early exit block.
1145+ if (getVectorLoopRegion ()->getEarlyExit ()) {
1146+ VPBasicBlock *EarlyExitVPBB =
1147+ cast<VPBasicBlock>(getVectorLoopRegion ()->getEarlyExit ());
1148+ BasicBlock *VectorEarlyExitBB = State->CFG .VPBB2IRBB [EarlyExitVPBB];
1149+ BasicBlock *OrigEarlyExitBB = getVectorLoopRegion ()->getOrigEarlyExit ();
1150+ BranchInst *BI = BranchInst::Create (OrigEarlyExitBB);
1151+ BI->insertBefore (VectorEarlyExitBB->getTerminator ());
1152+ VectorEarlyExitBB->getTerminator ()->eraseFromParent ();
1153+ State->CFG .DTU .applyUpdates (
1154+ {{DominatorTree::Insert, VectorEarlyExitBB, OrigEarlyExitBB}});
1155+ }
1156+
11071157 State->CFG .DTU .flush ();
11081158 assert (State->CFG .DTU .getDomTree ().verify (
11091159 DominatorTree::VerificationLevel::Fast) &&
@@ -1212,9 +1262,10 @@ LLVM_DUMP_METHOD
12121262void VPlan::dump () const { print (dbgs ()); }
12131263#endif
12141264
1215- void VPlan::addLiveOut (PHINode *PN, VPValue *V) {
1216- assert (LiveOuts.count (PN) == 0 && " an exit value for PN already exists" );
1217- LiveOuts.insert ({PN, new VPLiveOut (PN, V)});
1265+ void VPlan::addLiveOut (PHINode *PN, VPValue *V, VPBasicBlock *IncomingBlock) {
1266+ auto Key = std::pair<PHINode *, VPBasicBlock *>(PN, IncomingBlock);
1267+ assert (LiveOuts.count (Key) == 0 && " an exit value for PN already exists" );
1268+ LiveOuts.insert ({Key, new VPLiveOut (PN, V)});
12181269}
12191270
12201271static void remapOperands (VPBlockBase *Entry, VPBlockBase *NewEntry,
@@ -1285,8 +1336,9 @@ VPlan *VPlan::duplicate() {
12851336 remapOperands (Entry, NewEntry, Old2NewVPValues);
12861337
12871338 // Clone live-outs.
1288- for (const auto &[_, LO] : LiveOuts)
1289- NewPlan->addLiveOut (LO->getPhi (), Old2NewVPValues[LO->getOperand (0 )]);
1339+ for (const auto &[Key, LO] : LiveOuts)
1340+ NewPlan->addLiveOut (LO->getPhi (), Old2NewVPValues[LO->getOperand (0 )],
1341+ Key.second );
12901342
12911343 // Initialize remaining fields of cloned VPlan.
12921344 NewPlan->VFs = VFs;
0 commit comments