Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
DenseSet<BasicBlock *> DeadBlocks;
// PHI nodes we have visited before.
DenseSet<Instruction *> VisitedPHIs;
// PHI nodes forming a strongly connected component.
DenseSet<PHINode *> StronglyConnectedPHIs;
// PHI nodes we have visited once without successfully constant folding them.
// Once the InstCostVisitor has processed all the specialization arguments,
// it should be possible to determine whether those PHIs can be folded
Expand Down Expand Up @@ -217,6 +219,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);

void discoverStronglyConnectedComponent(PHINode *PN, unsigned Depth);

Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitPHINode(PHINode &I);
Constant *visitFreezeInst(FreezeInst &I);
Expand Down
133 changes: 107 additions & 26 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));

static cl::opt<unsigned> MaxDiscoveryDepth(
"funcspec-max-discovery-depth", cl::init(10), cl::Hidden,
cl::desc("The maximum recursion depth allowed when searching for strongly "
"connected phis"));

static cl::opt<unsigned> MaxIncomingPhiValues(
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
"The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
cl::desc("The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));

static cl::opt<unsigned> MaxBlockPredecessors(
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
Expand All @@ -64,9 +69,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));
"funcspec-min-latency-savings", cl::init(45), cl::Hidden,
cl::desc("Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
Expand Down Expand Up @@ -262,30 +267,86 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
return estimateBasicBlocks(WorkList);
}

void InstCostVisitor::discoverStronglyConnectedComponent(PHINode *PN,
unsigned Depth) {
if (Depth > MaxDiscoveryDepth)
return;

if (PN->getNumIncomingValues() > MaxIncomingPhiValues)
return;

if (!StronglyConnectedPHIs.insert(PN).second)
return;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);
if (auto *Phi = dyn_cast<PHINode>(V)) {
if (Phi == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;
discoverStronglyConnectedComponent(Phi, Depth + 1);
}
}
}

Constant *InstCostVisitor::visitPHINode(PHINode &I) {
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
return nullptr;

bool Inserted = VisitedPHIs.insert(&I).second;
Constant *Const = nullptr;
SmallVector<PHINode *, 8> UnknownIncomingValues;

for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
Value *V = I.getIncomingValue(Idx);
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
continue;
Constant *C = findConstantFor(V, KnownConstants);
if (!C) {
if (Inserted)
PendingPHIs.push_back(&I);
return nullptr;
auto CanConstantFoldPhi = [&](PHINode *PN) -> bool {
UnknownIncomingValues.clear();

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
else if (C != Const)
return false;
} else if (auto *Phi = dyn_cast<PHINode>(V)) {
// It's not a strongly connected phi. Collect it and bail at the end.
if (!StronglyConnectedPHIs.contains(Phi))
UnknownIncomingValues.push_back(Phi);
} else {
// We can't reason about anything else.
return false;
}
}
return UnknownIncomingValues.empty();
};

if (CanConstantFoldPhi(&I))
return Const;

if (Inserted) {
// First time we are seeing this phi. We'll retry later, after all
// the constant arguments have been propagated. Bail for now.
PendingPHIs.push_back(&I);
return nullptr;
}

for (PHINode *Phi : UnknownIncomingValues)
discoverStronglyConnectedComponent(Phi, 1);

bool CannotConstantFoldPhi = false;
for (PHINode *Phi : StronglyConnectedPHIs) {
if (!CanConstantFoldPhi(Phi)) {
CannotConstantFoldPhi = true;
break;
}
if (!Const)
Const = C;
else if (C != Const)
return nullptr;
}
return Const;
StronglyConnectedPHIs.clear();
return CannotConstantFoldPhi ? nullptr : Const;
}

Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
Expand Down Expand Up @@ -809,20 +870,40 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
unsigned FuncGrowth) -> bool {
// No check required.
if (ForceSpecialization)
if (ForceSpecialization) {
LLVM_DEBUG(dbgs() << "Force is on\n");
return true;
}
// Minimum inlining bonus.
if (Score > MinInliningBonus * FuncSize / 100)
if (Score > MinInliningBonus * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Min inliningbous: Score = " << Score
<< " > " << MinInliningBonus * FuncSize / 100 << "\n");
return true;
}
// Minimum codesize savings.
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Min CodeSize Saving: CodeSize = "
<< B.CodeSize << " > "
<< MinCodeSizeSavings * FuncSize / 100 << "\n");
return false;
}
// Minimum latency savings.
if (B.Latency < MinLatencySavings * FuncSize / 100)
if (B.Latency < MinLatencySavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Min Latency Saving: Latency = "
<< B.Latency << " > " << MinLatencySavings * FuncSize / 100
<< "\n");
return false;
}
// Maximum codesize growth.
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Max Func Growth: CodeSize = "
<< FuncGrowth / FuncSize << " > "
<< MaxCodeSizeGrowth << "\n");
return false;
}
return true;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-depth=5 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
; FUNCSPEC-LABEL: define i64 @bar(
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; FUNCSPEC-NEXT: entry:
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; FUNCSPEC-NEXT: ret i64 [[ADD]]
;
; NOFUNCSPEC-LABEL: define i64 @bar(
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; NOFUNCSPEC-NEXT: entry:
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
entry:
br i1 %c1, label %l1, label %l9

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l1_75, label %l6

l1_75:
br i1 %c4, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
br label %l2

l4:
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
br i1 %c5, label %l3, label %l6

l5:
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br label %l4

l6:
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
br i1 %c6, label %l4, label %l6_5

l6_5:
br i1 %c7, label %l5, label %l8

l7:
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
br i1 %c8, label %l5, label %l8

l8:
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br i1 %c9, label %l7, label %l9

l9:
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
%sub = sub i64 %phi9, 1
%mul = mul i64 %sub, 2
br i1 %c10, label %l7, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
ret i64 %res
}