Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 49 additions & 35 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
"Instruction shouldn't have been visited.");

if (auto *Br = dyn_cast<BranchInst>(Inst)) {
if (TheLoop->getLoopLatch() == BB ||
any_of(successors(BB),
[this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
continue;

// Conditional branch instruction are represented using BranchOnCond
// recipes.
if (Br->isConditional()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: seems a bit clearer to early continue?

Suggested change
if (Br->isConditional()) {
if (!Br->isConditional())
continue;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, ignore, we continue also after handling a conditional branch.

Expand Down Expand Up @@ -251,6 +246,8 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
BB2VPBB[Entry->getIRBasicBlock()] = Entry;
for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;

// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
Expand All @@ -276,7 +273,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
setVPBBPredsFromBB(VPBB, BB);

Expand Down Expand Up @@ -307,24 +303,12 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);

// Don't connect any blocks outside the current loop except the latches for
// inner loops.
// TODO: Also connect exit blocks during initial VPlan construction.
if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
}
if (!LoopForBB->contains(IRSucc1)) {
VPBB->setOneSuccessor(Successor0);
continue;
}
}

VPBB->setTwoSuccessors(Successor0, Successor1);
}

for (auto *EB : Plan->getExitBlocks())
setVPBBPredsFromBB(EB, EB->getIRBasicBlock());

// 2. The whole CFG has been built at this point so all the input Values must
// have a VPlan counterpart. Fix VPlan header phi by adding their
// corresponding VPlan operands.
Expand Down Expand Up @@ -425,21 +409,31 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
assert(LatchVPBB->getNumSuccessors() <= 1 &&
"Latch has more than one successor");
if (Succ)
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
assert(Succ && "Latch expected to be left with a single successor");

// Use a temporary placeholder between LatchVPBB and its successor, to
// preserve the original predecessor/successor order of the blocks.
auto *PlaceHolder = Plan.createVPBasicBlock("Region place holder");
VPBlockUtils::insertOnEdge(LatchVPBB, Succ, PlaceHolder);
VPBlockUtils::disconnectBlocks(LatchVPBB, PlaceHolder);
VPBlockUtils::connectBlocks(PreheaderVPBB, PlaceHolder);

auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
false /*isReplicator*/);
// All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
// because VPlan is expected to end at top level latch disconnected above.
// All VPBB's reachable shallowly from HeaderVPB belong to the current region,
// except the exit blocks reachable via non-latch exiting blocks,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// except the exit blocks reachable via non-latch exiting blocks,
// except the exit blocks reachable via non-latch exiting blocks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed thanks!

SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
Plan.getExitBlocks().end());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
Plan.getExitBlocks().end());

?

for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
VPBB->setParent(R);
if (!ExitBlocks.contains(VPBB))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can now reach exit blocks, contrary to above comment, via early exits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, comment updated, thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm2, createLoopRegion() is called after prepareForVectorization(), which according to the changes below should have removed all early-exit edges, so is this check (if VPBB is an exit block) needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not reachable in the latest version, removed thanks!

VPBB->setParent(R);

VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PlaceHolder introduced earlier may now be used instead as follows

Suggested change
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

if (Succ)
VPBlockUtils::connectBlocks(R, Succ);
VPBlockUtils::insertOnEdge(PlaceHolder, Succ, R);

// Remove placeholder block.
VPBlockUtils::disconnectBlocks(R, PlaceHolder);
VPBlockUtils::disconnectBlocks(PlaceHolder, R);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inserting R first after Preheader (clearer to instead explicitly insert R on the edge from Preheader to PlaceHolder?) and then inserting R again on the edge between PlaceHolder and Succ seems a bit confusing, thereby creating bidirectional edges between R and PlaceHolder which are then removed. It's like inserting R both before PlaceHolder and after it, but both these insertions require R to be disconnected. In essence we want to replace PlaceHolder with R.
Another alternative is to have an empty region as a placeholder rather than an empty basic block, and then move/copy R's entry and exit blocks into it:

  auto *PlaceHolder = Plan.createVPRegionBlock("", false /*isReplicator*/);

followed by

  PlaceHolder.setEntry(R.getEntry());
  PlaceHolder.setExit(R.getExit());

essentially turning R to be the temporary block and PlaceHolder to be R, constructed outside in rather than inside out?

Copy link
Collaborator

@ayalz ayalz May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps suffice to be a bit clearer by slightly reordering (would Exit be a better name for Succ?):

// Have R replace PlaceHolder as successor of Preheader.
BlockUtils::insertOnEdge(PreheaderBlock, PlaceHolder, R);
BlockUtils::disconnectBlocks(R, PlaceHolder);
// Have R replace PlaceHolder as predecessor of Exit.
BlockUtils::insertOnEdge(PlaceHolder, Exit, R);
BlockUtils::disconnectBlocks(PlaceHolder, R);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually there's no need to construct a placeholder region, we can simply create an empty main region first and then set entry/exiting after adjusting the CFG. Updated, thanks!

}

// Add the necessary canonical IV and branch recipes required to control the
Expand Down Expand Up @@ -491,12 +485,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());

VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
LatchVPB->swapSuccessors();
// Canonical LatchVPB has header block as last successor. If it has another
// successor, the latter is an exit block - insert middle block on its edge.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm a bit confused by the word "latter" here. Does "latter" refer to "another successor"? Perhaps it's easier just to say that if the latchvpb is not canonical the early exit block(s) come first, with the (canonical?) exit to the middle block being last? If I've understood the layout correctly.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LatchVPB is canonical - as a result of calling canonicalHeaderAndLatch() above. Being a canonical latch means LatchVPB has header block as its last successor, and this property is maintained. If LatchVPB has another successor, in addition to header, this other successor (appears first and) is an exit block. In this case middle block is inserted on the edge from LatchVPB to its first exit block successor. Should "the latter" be replaced with "this other successor"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to say

If it has another successor, this successor is an exit block

// Otherwise, add middle block as another successor retaining header as last.
if (LatchVPB->getNumSuccessors() == 2) {
VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0];
VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);
} else {
VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
LatchVPB->swapSuccessors();
}

addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);

// Disconnect all edges to exit blocks other than from the middle block.
// TODO: VPlans with early exits should be explicitly converted to a form only
// exiting via the latch here, including adjusting the exit condition, instead
// of simplify disconnecting the edges and adjusting the VPlan later.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO: VPlans with early exits should be explicitly converted to a form only
// exiting via the latch here, including adjusting the exit condition, instead
// of simplify disconnecting the edges and adjusting the VPlan later.
// TODO: VPlans with early exits should be explicitly converted to a form
// exiting only via the latch here, including adjusting the exit condition,
// instead of simply disconnecting the edges and adjusting the VPlan later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks!

for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
if (Pred == MiddleVPBB)
continue;
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a bit odd at first as it seems to be undoing what you did above with VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);. I presume that's because the vector.early.exit VPBB sits between the latch block and the original IR early exit block?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The edge from MiddleVPBB to LatchExitVPB is retained here explicitly via the early-continue exclusion. The edges removed here are early-exits from non-latch Pred block to early.exit block. Block vector.early.exit is introduced by handleUncountableEarlyExit() which currently takes place later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above with insertOnEdge we only handle the edge exiting from the latch. The exit block via the latch will now be connected to the middle block.

The loop here disconnects all early exits and they will be handled later: either by requiring at least one scalar iteration, nothing more needs to be done, or introducing the early exit control flow to go to the early exit via the additional middle block. For the latter case, the VPlan is now incomplete/incorrect.

To avoid this, we should directly handle the uncountable early exits here, which is done in #138393. This way, we do not need to rely on IR references in handleUncountableEarlyExit and the VPlan remains complete/correct throughout.

}
}

// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
// vectorizing loops with uncountable early exits.
Expand All @@ -523,6 +538,8 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
// 3) Otherwise, construct a runtime check.

if (!RequiresScalarEpilogueCheck) {
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
// The exit blocks are unreachable, remove their recipes to make sure no
// users remain that may pessimize transforms.
Expand All @@ -534,9 +551,6 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
}

// The connection order corresponds to the operands of the conditional branch.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"// with middle block already connected to exit block."?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added thanks

BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);

auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ define void @foo(i64 %n) {
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
; CHECK-NEXT: Successor(s): outer.header
; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec>
; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
br label %outer.header
Expand Down
Loading