Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);

void discoverTransitivelyIncomngValues(DenseSet<PHINode *> &PhiNodes,
PHINode *PN, unsigned Depth);

Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitPHINode(PHINode &I);
Constant *visitFreezeInst(FreezeInst &I);
Expand Down
207 changes: 183 additions & 24 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));

static cl::opt<unsigned> MaxDiscoveryDepth(
"funcspec-max-discovery-depth", cl::init(10), cl::Hidden,
cl::desc("The maximum recursion depth allowed when searching for strongly "
"connected phis"));

static cl::opt<unsigned> MaxIncomingPhiValues(
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
"The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
cl::desc("The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));

static cl::opt<unsigned> MaxBlockPredecessors(
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
Expand All @@ -64,9 +69,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));
"funcspec-min-latency-savings", cl::init(45), cl::Hidden,
cl::desc("Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
Expand Down Expand Up @@ -262,30 +267,163 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
return estimateBasicBlocks(WorkList);
}

// This function is finding candidates for a PHINode is part of a chain or graph
// of PHINodes that all link to each other. That means, if the original input to
// the chain is a constant all the other values are also that constant.
//
// The caller of this function will later check that no other nodes are involved
// that are non-constant, and discard it from the possible conversions.
//
// For example:
//
// %a = load %0
// %c = phi [%a, %d]
// %d = phi [%e, %c]
// %e = phi [%c, %f]
// %f = phi [%j, %h]
// %j = phi [%h, %j]
// %h = phi [%g, %c]
//
// This is only showing the PHINodes, not the branches that choose the
// different paths.
//
// A depth limit is used to avoid extreme recurusion.
// A max number of incoming phi values ensures that expensive searches
// are avoided.
void InstCostVisitor::discoverTransitivelyIncomngValues(
DenseSet<PHINode *> &PHINodes, PHINode *PN, unsigned Depth) {
if (Depth > MaxDiscoveryDepth) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Discover PHI nodes too deep ("
<< Depth << ">" << MaxDiscoveryDepth << ")\n");
return;
}

if (PN->getNumIncomingValues() > MaxIncomingPhiValues) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: Discover PHI nodes has too many values ("
<< PN->getNumIncomingValues() << ">" << MaxIncomingPhiValues
<< ")\n");
return;
}

// Already seen this, no more processing needed.
if (!PHINodes.insert(PN).second)
return;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);
if (auto *Phi = dyn_cast<PHINode>(V)) {
if (Phi == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;
discoverTransitivelyIncomngValues(PHINodes, Phi, Depth + 1);
}
}
}

Constant *InstCostVisitor::visitPHINode(PHINode &I) {
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
return nullptr;

// PHI nodes
DenseSet<PHINode *> TransitivePHIs;

bool Inserted = VisitedPHIs.insert(&I).second;
Constant *Const = nullptr;
SmallVector<PHINode *, 8> UnknownIncomingValues;

auto canConstantFoldPhiTrivially = [&](PHINode *PN) -> Constant * {
Constant *Const = nullptr;

for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
Value *V = I.getIncomingValue(Idx);
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
UnknownIncomingValues.clear();
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return nullptr;
continue;
Constant *C = findConstantFor(V, KnownConstants);
if (!C) {
if (Inserted)
PendingPHIs.push_back(&I);
}
if (auto *Phi = dyn_cast<PHINode>(V)) {
UnknownIncomingValues.push_back(Phi);
continue;
}

// We can't reason about anything else.
return nullptr;
}
if (!Const)
Const = C;
else if (C != Const)
return UnknownIncomingValues.empty() ? Const : nullptr;
};

if (Constant *Const = canConstantFoldPhiTrivially(&I))
return Const;

if (Inserted) {
// First time we are seeing this phi. We'll retry later, after all
// the constant arguments have been propagated. Bail for now.
PendingPHIs.push_back(&I);
return nullptr;
}

// Try to see if we can collect a nest of transitive phis. Bail if
// it's too complex.
for (PHINode *Phi : UnknownIncomingValues)
discoverTransitivelyIncomngValues(TransitivePHIs, Phi, 1);


// A nested set of PHINodes can be constantfolded if:
// - It has a constant input.
// - It is always the SAME constant.
auto canConstantFoldNestedPhi = [&](PHINode *PN) -> Constant * {
Constant *Const = nullptr;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return nullptr;
continue;
}
if (auto *Phi = dyn_cast<PHINode>(V)) {
// It's not a Transitive phi. Bail out.
if (!TransitivePHIs.contains(Phi))
return nullptr;
continue;
}

// We can't reason about anything else.
return nullptr;
}
return Const;
};

// All TransitivePHIs have to be the SAME constant.
Constant *Retval = nullptr;
for (PHINode *Phi : TransitivePHIs) {
if (Constant *Const = canConstantFoldNestedPhi(Phi)) {
if (!Retval)
Retval = Const;
else if (Retval != Const)
return nullptr;
}
}
return Const;

return Retval;
}

Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
Expand Down Expand Up @@ -809,20 +947,41 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
unsigned FuncGrowth) -> bool {
// No check required.
if (ForceSpecialization)
if (ForceSpecialization) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Force is on\n");
return true;
}
// Minimum inlining bonus.
if (Score > MinInliningBonus * FuncSize / 100)
if (Score > MinInliningBonus * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Sufficient inlining bonus (" << Score
<< " > " << MinInliningBonus * FuncSize / 100 << ")\n");
return true;
}
// Minimum codesize savings.
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Insufficient CodeSize Savings ("
<< B.CodeSize << " < "
<< MinCodeSizeSavings * FuncSize / 100 << ")\n");
return false;
}
// Minimum latency savings.
if (B.Latency < MinLatencySavings * FuncSize / 100)
if (B.Latency < MinLatencySavings * FuncSize / 100) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Insufficient Latency Savings ("
<< B.Latency << " < " << MinLatencySavings * FuncSize / 100
<< ")\n");
return false;
}
// Maximum codesize growth.
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) {
LLVM_DEBUG(dbgs()
<< "FnSpecialization: Function Growth exceeds threshold ("
<< FuncGrowth / FuncSize << " > " << MaxCodeSizeGrowth
<< ")\n");
return false;
}
return true;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-depth=5 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
; FUNCSPEC-LABEL: define i64 @bar(
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; FUNCSPEC-NEXT: entry:
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; FUNCSPEC-NEXT: ret i64 [[ADD]]
;
; NOFUNCSPEC-LABEL: define i64 @bar(
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; NOFUNCSPEC-NEXT: entry:
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
entry:
br i1 %c1, label %l1, label %l9

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l1_75, label %l6

l1_75:
br i1 %c4, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
br label %l2

l4:
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
br i1 %c5, label %l3, label %l6

l5:
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br label %l4

l6:
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
br i1 %c6, label %l4, label %l6_5

l6_5:
br i1 %c7, label %l5, label %l8

l7:
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
br i1 %c8, label %l5, label %l8

l8:
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br i1 %c9, label %l7, label %l9

l9:
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
%sub = sub i64 %phi9, 1
%mul = mul i64 %sub, 2
br i1 %c10, label %l7, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
ret i64 %res
}