Skip to content

Commit ad30479

Browse files
authored
[mono][interp] Remove no_inlining functionality for dead bblocks (#110468)
Many methods in the BCL, especially hwintrins related, contain a lot of code that is detected as dead during compilation. On mono, inlining happens during IL import and a lot of optimizations are run as later passes. This exposed the issue where we have a lot of dead code bloat from inlining, with optimizations later running on it. A simple solution for this problem was tracking jump counts for each bblock (#97514), which are initialized when bblocks are first created, before IL import stage. Then a small set of IL import level optimizations were added, in order to reduce the jump targets of each bblock. As we were further importing IL, if we reached a bblock with 0 jump targets, we would disable inlining into it, in order to reduce code bloat. Disabling code emit altogether was too challenging. Another limitation of this approach was that we would fail to detect dead code if it was part of a loop. The results were good however, by reducing mem usage in `System.Numerics.Tensor.Tests` from 6GB to 600MB. For an unrelated issue, the order in which we generate bblocks was redesigned in order to account for bblock stack state initialization in weird control flow scenarios (#108731). This was achieved by deferring IL import into bblocks that were not yet reached from other live bblocks. A side effect of this is that we no longer generate code at all in unreachable bblocks, completely superseding the previous approach while addressing both the problems of inlining into loops or generating IR for dead IL. In the previously mentioned test suite, this further reduced the memory usage to 300MB. Remnants of the unnecessary `no_inlining` approach still lingered in the code, leading to disabling of inline optimization in some reachable code. This triggered a significant performance regression which this PR addresses.
1 parent 5f41aab commit ad30479

File tree

2 files changed

+7
-44
lines changed

2 files changed

+7
-44
lines changed

src/mono/mono/mini/interp/transform.c

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -760,8 +760,6 @@ handle_branch (TransformData *td, int long_op, int offset)
760760
init_bb_stack_state (td, target_bb);
761761

762762
if (long_op != MINT_CALL_HANDLER) {
763-
if (td->cbb->no_inlining)
764-
target_bb->jump_targets--;
765763
// We don't link finally blocks into the cfg (or other handler blocks for that matter)
766764
interp_link_bblocks (td, td->cbb, target_bb);
767765
}
@@ -803,8 +801,6 @@ one_arg_branch(TransformData *td, int mint_op, int offset, int inst_size)
803801
return FALSE;
804802
} else {
805803
// branch condition always false, it is a NOP
806-
int target = GPTRDIFF_TO_INT (td->ip + offset + inst_size - td->il_code);
807-
td->offset_to_bb [target]->jump_targets--;
808804
return TRUE;
809805
}
810806
} else {
@@ -901,8 +897,6 @@ two_arg_branch(TransformData *td, int mint_op, int offset, int inst_size)
901897
return FALSE;
902898
} else {
903899
// branch condition always false, it is a NOP
904-
int target = GPTRDIFF_TO_INT (td->ip + offset + inst_size - td->il_code);
905-
td->offset_to_bb [target]->jump_targets--;
906900
return TRUE;
907901
}
908902
} else {
@@ -2884,9 +2878,6 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodS
28842878
if (td->disable_inlining)
28852879
return FALSE;
28862880

2887-
if (td->cbb->no_inlining)
2888-
return FALSE;
2889-
28902881
// Exception handlers are always uncommon, with the exception of finally.
28912882
int inner_clause = td->clause_indexes [td->current_il_offset];
28922883
if (inner_clause != -1 && td->header->clauses [inner_clause].flags != MONO_EXCEPTION_CLAUSE_FINALLY)
@@ -4151,7 +4142,6 @@ get_basic_blocks (TransformData *td, MonoMethodHeader *header, gboolean make_lis
41514142
unsigned char *target;
41524143
ptrdiff_t cli_addr;
41534144
const MonoOpcode *opcode;
4154-
InterpBasicBlock *bb;
41554145

41564146
td->offset_to_bb = (InterpBasicBlock**)mono_mempool_alloc0 (td->mempool, (unsigned int)(sizeof (InterpBasicBlock*) * (end - start + 1)));
41574147
get_bb (td, start, make_list);
@@ -4160,21 +4150,18 @@ get_basic_blocks (TransformData *td, MonoMethodHeader *header, gboolean make_lis
41604150
MonoExceptionClause *c = header->clauses + i;
41614151
if (start + c->try_offset > end || start + c->try_offset + c->try_len > end)
41624152
return FALSE;
4163-
bb = get_bb (td, start + c->try_offset, make_list);
4164-
bb->jump_targets++;
4153+
get_bb (td, start + c->try_offset, make_list);
41654154
mono_bitset_set (il_targets, c->try_offset);
41664155
mono_bitset_set (il_targets, c->try_offset + c->try_len);
41674156
if (start + c->handler_offset > end || start + c->handler_offset + c->handler_len > end)
41684157
return FALSE;
4169-
bb = get_bb (td, start + c->handler_offset, make_list);
4170-
bb->jump_targets++;
4158+
get_bb (td, start + c->handler_offset, make_list);
41714159
mono_bitset_set (il_targets, c->handler_offset);
41724160
mono_bitset_set (il_targets, c->handler_offset + c->handler_len);
41734161
if (c->flags == MONO_EXCEPTION_CLAUSE_FILTER) {
41744162
if (start + c->data.filter_offset > end)
41754163
return FALSE;
4176-
bb = get_bb (td, start + c->data.filter_offset, make_list);
4177-
bb->jump_targets++;
4164+
get_bb (td, start + c->data.filter_offset, make_list);
41784165
mono_bitset_set (il_targets, c->data.filter_offset);
41794166
}
41804167
}
@@ -4207,8 +4194,7 @@ get_basic_blocks (TransformData *td, MonoMethodHeader *header, gboolean make_lis
42074194
target = start + cli_addr + 2 + (signed char)ip [1];
42084195
if (target > end)
42094196
return FALSE;
4210-
bb = get_bb (td, target, make_list);
4211-
bb->jump_targets++;
4197+
get_bb (td, target, make_list);
42124198
ip += 2;
42134199
get_bb (td, ip, make_list);
42144200
mono_bitset_set (il_targets, GPTRDIFF_TO_UINT32 (target - start));
@@ -4217,8 +4203,7 @@ get_basic_blocks (TransformData *td, MonoMethodHeader *header, gboolean make_lis
42174203
target = start + cli_addr + 5 + (gint32)read32 (ip + 1);
42184204
if (target > end)
42194205
return FALSE;
4220-
bb = get_bb (td, target, make_list);
4221-
bb->jump_targets++;
4206+
get_bb (td, target, make_list);
42224207
ip += 5;
42234208
get_bb (td, ip, make_list);
42244209
mono_bitset_set (il_targets, GPTRDIFF_TO_UINT32 (target - start));
@@ -4231,15 +4216,13 @@ get_basic_blocks (TransformData *td, MonoMethodHeader *header, gboolean make_lis
42314216
target = start + cli_addr;
42324217
if (target > end)
42334218
return FALSE;
4234-
bb = get_bb (td, target, make_list);
4235-
bb->jump_targets++;
4219+
get_bb (td, target, make_list);
42364220
mono_bitset_set (il_targets, GPTRDIFF_TO_UINT32 (target - start));
42374221
for (j = 0; j < n; ++j) {
42384222
target = start + cli_addr + (gint32)read32 (ip);
42394223
if (target > end)
42404224
return FALSE;
4241-
bb = get_bb (td, target, make_list);
4242-
bb->jump_targets++;
4225+
get_bb (td, target, make_list);
42434226
ip += 4;
42444227
mono_bitset_set (il_targets, GPTRDIFF_TO_UINT32 (target - start));
42454228
}
@@ -5446,13 +5429,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
54465429

54475430
/* We are starting a new basic block. Change cbb and link them together */
54485431
if (link_bblocks) {
5449-
if (!new_bb->jump_targets && td->cbb->no_inlining) {
5450-
// This is a bblock that is not branched to and falls through from
5451-
// a dead predecessor. It means it is dead.
5452-
new_bb->no_inlining = TRUE;
5453-
if (td->verbose_level)
5454-
g_print ("Disable inlining in BB%d\n", new_bb->index);
5455-
}
54565432
/*
54575433
* By default we link cbb with the new starting bblock, unless the previous
54585434
* instruction is an unconditional branch (BR, LEAVE, ENDFINALLY)
@@ -5472,16 +5448,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
54725448
}
54735449
// link_bblocks remains true, which is the default
54745450
} else {
5475-
if (!new_bb->jump_targets) {
5476-
// This is a bblock that is not branched to and it is not linked to the
5477-
// predecessor. It means it is dead.
5478-
new_bb->no_inlining = TRUE;
5479-
if (td->verbose_level)
5480-
g_print ("Disable inlining in BB%d\n", new_bb->index);
5481-
} else {
5482-
g_assert (new_bb->jump_targets > 0);
5483-
}
5484-
54855451
if (new_bb->stack_height >= 0) {
54865452
// This is relevant only for copying the vars associated with the values on the stack
54875453
memcpy (td->stack, new_bb->stack_state, new_bb->stack_height * sizeof(td->stack [0]));

src/mono/mono/mini/interp/transform.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ struct _InterpBasicBlock {
147147
StackInfo *stack_state;
148148

149149
int index;
150-
int jump_targets;
151150

152151
InterpBasicBlock *try_bblock;
153152

@@ -160,8 +159,6 @@ struct _InterpBasicBlock {
160159
// This block has special semantics and it shouldn't be optimized away
161160
guint preserve : 1;
162161
guint dead: 1;
163-
// This bblock is detectead early as being dead, we don't inline into it
164-
guint no_inlining: 1;
165162
// If patchpoint is set we will store mapping information between native offset and bblock index within
166163
// InterpMethod. In the unoptimized method we will map from native offset to the bb_index while in the
167164
// optimized method we will map the bb_index to the corresponding native offset.

0 commit comments

Comments
 (0)