Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 0170b8d

Browse files
aartbikcommit-bot@chromium.org
authored andcommitted
Revert "[vm/compiler] Use loop framework for AOT inline heuristics"
This reverts commit daae20d. Reason for revert: The kernel-precomp bots are seeing this error: ../../runtime/vm/object.h: 3134: error: Handle check failed: saw 2249186640 expected Function Not sure what that is yet, but reverting to get bots green again while I investigate. Original change's description: > [vm/compiler] Use loop framework for AOT inline heuristics > > Rationale: > Without proper execution counters, the inline AOT inliner > marks every call site "cold", effectively disabling inlining > altogether. This change introduces loop-based static heuristic > that assumes statements nested inside loops are executed more > frequently. This results in more inlining. > > Note: > Conservative version is used for now which yields > more performance without increasing code size too much. > There is still a lot of performance left at the table > which we could exploit if we fine tune heuristics > regarding code size. > > Bug: > dart-lang/sdk#34473 > dart-lang/sdk#32167 > > > Change-Id: I86ba60f93bdab363cd22ab6bdbcf6688f2042fea > Reviewed-on: https://dart-review.googlesource.com/c/81187 > Commit-Queue: Aart Bik <[email protected]> > Reviewed-by: Alexander Markov <[email protected]> [email protected],[email protected],[email protected] Change-Id: If5ca82966966ebef4ec0b4e921515d23f6bd492b No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://dart-review.googlesource.com/c/81335 Reviewed-by: Aart Bik <[email protected]> Commit-Queue: Aart Bik <[email protected]>
1 parent 9aff930 commit 0170b8d

3 files changed

Lines changed: 15 additions & 68 deletions

File tree

runtime/vm/compiler/backend/il.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,10 +1600,6 @@ bool BlockEntryInstr::IsLoopHeader() const {
16001600
return loop_info_ != nullptr && loop_info_->header() == this;
16011601
}
16021602

1603-
intptr_t BlockEntryInstr::NestingDepth() const {
1604-
return loop_info_ == nullptr ? 0 : loop_info_->NestingDepth();
1605-
}
1606-
16071603
// Helper to mutate the graph during inlining. This block should be
16081604
// replaced with new_block as a predecessor of all of this block's
16091605
// successors. For each successor, the predecessors will be reordered

runtime/vm/compiler/backend/il.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1360,7 +1360,6 @@ class BlockEntryInstr : public Instruction {
13601360
LoopInfo* loop_info() const { return loop_info_; }
13611361
void set_loop_info(LoopInfo* loop_info) { loop_info_ = loop_info; }
13621362
bool IsLoopHeader() const;
1363-
intptr_t NestingDepth() const;
13641363

13651364
virtual BlockEntryInstr* GetBlock() { return this; }
13661365

runtime/vm/compiler/backend/inliner.cc

Lines changed: 15 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ struct InlinedInfo {
254254
// A collection of call sites to consider for inlining.
255255
class CallSites : public ValueObject {
256256
public:
257-
explicit CallSites(intptr_t threshold)
257+
explicit CallSites(FlowGraph* flow_graph, intptr_t threshold)
258258
: inlining_depth_threshold_(threshold),
259259
static_calls_(),
260260
closure_calls_(),
@@ -264,29 +264,18 @@ class CallSites : public ValueObject {
264264
PolymorphicInstanceCallInstr* call;
265265
double ratio;
266266
const FlowGraph* caller_graph;
267-
intptr_t nesting_depth;
268267
InstanceCallInfo(PolymorphicInstanceCallInstr* call_arg,
269-
FlowGraph* flow_graph,
270-
intptr_t depth)
271-
: call(call_arg),
272-
ratio(0.0),
273-
caller_graph(flow_graph),
274-
nesting_depth(depth) {}
268+
FlowGraph* flow_graph)
269+
: call(call_arg), ratio(0.0), caller_graph(flow_graph) {}
275270
const Function& caller() const { return caller_graph->function(); }
276271
};
277272

278273
struct StaticCallInfo {
279274
StaticCallInstr* call;
280275
double ratio;
281276
FlowGraph* caller_graph;
282-
intptr_t nesting_depth;
283-
StaticCallInfo(StaticCallInstr* value,
284-
FlowGraph* flow_graph,
285-
intptr_t depth)
286-
: call(value),
287-
ratio(0.0),
288-
caller_graph(flow_graph),
289-
nesting_depth(depth) {}
277+
StaticCallInfo(StaticCallInstr* value, FlowGraph* flow_graph)
278+
: call(value), ratio(0.0), caller_graph(flow_graph) {}
290279
const Function& caller() const { return caller_graph->function(); }
291280
};
292281

@@ -326,32 +315,6 @@ class CallSites : public ValueObject {
326315
instance_calls_.Clear();
327316
}
328317

329-
// Heuristic that maps the loop nesting depth to a static estimate of number
330-
// of times code at that depth is executed (code at each higher nesting
331-
// depth is assumed to execute 10x more often up to depth 3).
332-
static intptr_t AotCallCountApproximation(intptr_t nesting_depth) {
333-
switch (nesting_depth) {
334-
case 0:
335-
// Note that we use value 0, and not 1, i.e. any straightline code
336-
// outside a loop is assumed to be very cold. With value 1, inlining
337-
// inside loops is still favored over inlining inside straightline
338-
// code, but for a method without loops, *all* call sites are inlined
339-
// (potentially more performance, at the expense of larger code size).
340-
// TODO(ajcbik): use 1 and fine tune other heuristics
341-
return 0;
342-
case 1:
343-
return 10;
344-
case 2:
345-
return 100;
346-
default:
347-
return 1000;
348-
}
349-
}
350-
351-
// Computes the ratio for each call site in a method, defined as the
352-
// number of times a call site is executed over the maximum number of
353-
// times any call site is executed in the method. JIT uses actual call
354-
// counts whereas AOT uses a static estimate based on nesting depth.
355318
void ComputeCallSiteRatio(intptr_t static_call_start_ix,
356319
intptr_t instance_call_start_ix) {
357320
const intptr_t num_static_calls =
@@ -362,26 +325,21 @@ class CallSites : public ValueObject {
362325
intptr_t max_count = 0;
363326
GrowableArray<intptr_t> instance_call_counts(num_instance_calls);
364327
for (intptr_t i = 0; i < num_instance_calls; ++i) {
365-
const InstanceCallInfo& info =
366-
instance_calls_[i + instance_call_start_ix];
367-
intptr_t aggregate_count =
368-
FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth)
369-
: info.call->CallCount();
328+
const intptr_t aggregate_count =
329+
instance_calls_[i + instance_call_start_ix].call->CallCount();
370330
instance_call_counts.Add(aggregate_count);
371331
if (aggregate_count > max_count) max_count = aggregate_count;
372332
}
373333

374334
GrowableArray<intptr_t> static_call_counts(num_static_calls);
375335
for (intptr_t i = 0; i < num_static_calls; ++i) {
376-
const StaticCallInfo& info = static_calls_[i + static_call_start_ix];
377336
intptr_t aggregate_count =
378-
FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth)
379-
: info.call->CallCount();
337+
static_calls_[i + static_call_start_ix].call->CallCount();
380338
static_call_counts.Add(aggregate_count);
381339
if (aggregate_count > max_count) max_count = aggregate_count;
382340
}
383341

384-
// Note that max_count can be 0 if none of the calls was executed.
342+
// max_count can be 0 if none of the calls was executed.
385343
for (intptr_t i = 0; i < num_instance_calls; ++i) {
386344
const double ratio =
387345
(max_count == 0)
@@ -446,26 +404,20 @@ class CallSites : public ValueObject {
446404
const bool inline_only_recognized_methods =
447405
(depth == inlining_depth_threshold_);
448406

449-
// In AOT, compute loop hierarchy.
450-
if (FLAG_precompiled_mode) {
451-
graph->GetLoopHierarchy();
452-
}
453-
454407
const intptr_t instance_call_start_ix = instance_calls_.length();
455408
const intptr_t static_call_start_ix = static_calls_.length();
456409
for (BlockIterator block_it = graph->postorder_iterator(); !block_it.Done();
457410
block_it.Advance()) {
458-
BlockEntryInstr* entry = block_it.Current();
459-
const intptr_t depth = entry->NestingDepth();
460-
for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
411+
for (ForwardInstructionIterator it(block_it.Current()); !it.Done();
412+
it.Advance()) {
461413
Instruction* current = it.Current();
462414
if (current->IsPolymorphicInstanceCall()) {
463415
PolymorphicInstanceCallInstr* instance_call =
464416
current->AsPolymorphicInstanceCall();
465417
if (!inline_only_recognized_methods ||
466418
instance_call->IsSureToCallSingleRecognizedTarget() ||
467419
instance_call->HasOnlyDispatcherOrImplicitAccessorTargets()) {
468-
instance_calls_.Add(InstanceCallInfo(instance_call, graph, depth));
420+
instance_calls_.Add(InstanceCallInfo(instance_call, graph));
469421
} else {
470422
// Method not inlined because inlining too deep and method
471423
// not recognized.
@@ -481,7 +433,7 @@ class CallSites : public ValueObject {
481433
if (!inline_only_recognized_methods ||
482434
static_call->function().IsRecognized() ||
483435
static_call->function().IsDispatcherOrImplicitAccessor()) {
484-
static_calls_.Add(StaticCallInfo(static_call, graph, depth));
436+
static_calls_.Add(StaticCallInfo(static_call, graph));
485437
} else {
486438
// Method not inlined because inlining too deep and method
487439
// not recognized.
@@ -799,8 +751,8 @@ class CallSiteInliner : public ValueObject {
799751
return;
800752
}
801753
// Create two call site collections to swap between.
802-
CallSites sites1(inlining_depth_threshold_);
803-
CallSites sites2(inlining_depth_threshold_);
754+
CallSites sites1(caller_graph_, inlining_depth_threshold_);
755+
CallSites sites2(caller_graph_, inlining_depth_threshold_);
804756
CallSites* call_sites_temp = NULL;
805757
collected_call_sites_ = &sites1;
806758
inlining_call_sites_ = &sites2;

0 commit comments

Comments
 (0)