From e286df14e03c76f91106870a56f092bdf9c18e46 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sun, 5 Jun 2022 23:22:41 +0300 Subject: [PATCH 1/7] [mono][interp] Don't allocate some vars as execution stack It is not really true and it serves no purpose here. --- src/mono/mono/mini/interp/transform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 1a0ff8ca9fdc72..ea08aa160459b4 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -2783,7 +2783,7 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi else vtsize = MINT_STACK_SLOT_SIZE; - dreg = create_interp_stack_local (td, stack_type [ret_mt], klass, vtsize); + dreg = create_interp_local (td, get_type_from_stack (stack_type [ret_mt], klass)); // For valuetypes, we need to control the lifetime of the valuetype. // MINT_NEWOBJ_VT_INLINED takes the address of this reg and we should keep @@ -2791,7 +2791,7 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi interp_add_ins (td, MINT_DEF); interp_ins_set_dreg (td->last_ins, dreg); } else { - dreg = create_interp_stack_local (td, stack_type [ret_mt], klass, MINT_STACK_SLOT_SIZE); + dreg = create_interp_local (td, get_type_from_stack (stack_type [ret_mt], klass)); } // Allocate `this` pointer @@ -5537,7 +5537,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, td->sp -= csignature->param_count; // First arg is dummy var, it is null when passed to the ctor - call_args [0] = create_interp_stack_local (td, stack_type [ret_mt], NULL, MINT_STACK_SLOT_SIZE); + call_args [0] = create_interp_local (td, get_type_from_stack (stack_type [ret_mt], NULL)); for (int i = 0; i < csignature->param_count; i++) { call_args [i + 1] = td->sp [i].local; } From 2e937d7d355fc9466a31113a6900fa2307315ace Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 6 Jun 2022 00:04:50 +0300 Subject: [PATCH 2/7] [mono][interp] Print code before any optimizations take place Fix bitrotten mono_interp_print_td_code. Don't print IL_SEQ_POINT opcodes since they are too noisy. --- src/mono/mono/mini/interp/transform.c | 26 +++++++++++++------------- src/mono/mono/mini/interp/transform.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index ea08aa160459b4..9dd287a11eea27 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1490,12 +1490,15 @@ dump_interp_inst (InterpInst *ins) g_string_free (str, TRUE); } -static G_GNUC_UNUSED void +static void dump_interp_bb (InterpBasicBlock *bb) { g_print ("BB%d:\n", bb->index); - for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) - dump_interp_inst (ins); + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { + // Avoid some noise + if (ins->opcode != MINT_NOP && ins->opcode != MINT_IL_SEQ_POINT) + dump_interp_inst (ins); + } } @@ -1521,15 +1524,9 @@ mono_interp_print_code (InterpMethod *imethod) void mono_interp_print_td_code (TransformData *td) { - InterpInst *ins = td->first_ins; - - char *name = mono_method_full_name (td->method, TRUE); - g_print ("IR for \"%s\"\n", name); - g_free (name); - while (ins) { - dump_interp_inst (ins); - ins = ins->next; - } + g_print ("Unoptimized IR:\n"); + for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) + dump_interp_bb (bb); } @@ -8333,7 +8330,7 @@ interp_cprop (TransformData *td) gint32 *sregs = &ins->sregs [0]; gint32 dreg = ins->dreg; - if (td->verbose_level && ins->opcode != MINT_NOP) + if (td->verbose_level && ins->opcode != MINT_NOP && ins->opcode != MINT_IL_SEQ_POINT) dump_interp_inst (ins); for (int i = 0; i < num_sregs; i++) { @@ -9515,6 +9512,9 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (td->has_localloc) interp_fix_localloc_ret (td); + if (td->verbose_level) + mono_interp_print_td_code (td); + if (td->optimized) interp_optimize_code (td); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 6e3599af486b72..2656c51f303762 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -177,7 +177,7 @@ typedef struct const unsigned char *il_code; const unsigned char *ip; const unsigned char *in_start; - InterpInst *last_ins, *first_ins; + InterpInst *last_ins; int code_size; int *in_offsets; int current_il_offset; From 3eadb34bfb7039b9c13b7746458377fd50c7dfc8 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 13 Jun 2022 10:22:22 +0300 Subject: [PATCH 3/7] [mono][interp] Use td->optimized directly in more places --- src/mono/mono/mini/interp/transform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 9dd287a11eea27..291f5b442660bb 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -762,9 +762,9 @@ handle_branch (TransformData *td, int long_op, int offset) if (offset < 0 && td->sp == td->stack && !td->inlined_method) { // Backwards branch inside unoptimized method where the IL stack is empty // This is candidate for a patchpoint - if (!td->rtm->optimized) + if (!td->optimized) target_bb->emit_patchpoint = TRUE; - if (mono_interp_tiering_enabled () && !target_bb->patchpoint_data && td->rtm->optimized) { + if (mono_interp_tiering_enabled () && !target_bb->patchpoint_data && td->optimized) { // The optimized imethod will store mapping from bb index to native offset so it // can resume execution in the optimized method, once we tier up in patchpoint td->patchpoint_data_n++; @@ -4304,7 +4304,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, if (td->verbose_level) { char *tmp = mono_disasm_code (NULL, method, td->ip, end); char *name = mono_method_full_name (method, TRUE); - g_print ("Method %s, optimized %d, original code:\n", name, rtm->optimized); + g_print ("Method %s, optimized %d, original code:\n", name, td->optimized); g_print ("%s\n", tmp); g_free (tmp); g_free (name); @@ -7718,7 +7718,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in static int add_patchpoint_data (TransformData *td, int patchpoint_data_index, int native_offset, int key) { - if (td->rtm->optimized) { + if (td->optimized) { td->patchpoint_data [patchpoint_data_index++] = key; td->patchpoint_data [patchpoint_data_index++] = native_offset; } else { From ad01ce9f62a7f5aeb886a8b090053d286a2e90f9 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 14 Jun 2022 13:46:52 +0300 Subject: [PATCH 4/7] [mono][interp] Add dummy MINT_LDNULL instruction We were pushing local that wasn't defined by any instruction, potentially confusing the var offset allocator. --- src/mono/mono/mini/interp/transform.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 291f5b442660bb..d4e3e05b38b59d 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -6812,6 +6812,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, if (jit_icall_id == MONO_JIT_ICALL_mono_threads_attach_coop) { rtm->needs_thread_attach = 1; + // Add dummy return value + interp_add_ins (td, MINT_LDNULL); + interp_ins_set_dreg (td->last_ins, dreg); } else if (jit_icall_id == MONO_JIT_ICALL_mono_threads_detach_coop) { g_assert (rtm->needs_thread_attach); } else { @@ -6974,7 +6977,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, if (info->sig->ret->type != MONO_TYPE_VOID) { // Push a dummy coop gc var + interp_add_ins (td, MINT_LDNULL); push_simple_type (td, STACK_TYPE_I); + td->last_ins->dreg = td->sp [-1].local; interp_add_ins (td, MINT_MONO_ENABLE_GCTRANS); } else { // Pop the unused gc var From 46473b77be657b61d2fc804135ac13f1a5c65fbf Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Wed, 8 Jun 2022 10:21:10 +0300 Subject: [PATCH 5/7] [mono][interp] Add fast offset allocator, to be used by unoptimized code This is the old offset allocation scheme that we were using originally, before the var offset allocator was added. Vars have the same offset as they would have in IL code, based on their position on the execution stack at the moment when they are pushed. Whenever we push/pop on the execution stack we keep track of the used stack size. Every var pushed on the execution stack will therefore have this stack_offset remembered. Once the entire IL code is traversed and we have all the global locals allocated, the real offset of the execution stack locals can be determined. It is computed as the originally determined stack_offset added with the offset of the execution stack start (size of the global locals space). With this offset allocator, calls no longer need to store all the call args sregs and the return_offset is always the same as call_args_offset. This is because all vars are directly placed in the right position and no optimizations can move them around. The offset of the return value will therefore be also the offset where all the args are placed. The limitation with this way of allocating offsets is that we run into the same problems with opcodes that don't have typical stack usage (use values, pop them, store result). This happens with newobj opcodes. The opcode receives the params, and then it needs to call a ctor with these same params and a newly allocated this object. Since we can't use a var offset allocation pass to compute the offset ideally, the newobj opcodes in the case of unoptimized code must move these params around on the stack, in order to make room for `this`. --- src/mono/mono/mini/interp/interp.c | 56 ++++++ src/mono/mono/mini/interp/mintops.def | 2 + src/mono/mono/mini/interp/transform.c | 277 +++++++++++++++++++------- src/mono/mono/mini/interp/transform.h | 11 +- 4 files changed, 274 insertions(+), 72 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index f3fe3cf66db1e0..f43100eb788055 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -5374,6 +5374,19 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip += 4; goto call; } + MINT_IN_CASE(MINT_NEWOBJ_STRING_UNOPT) { + // Same as MINT_NEWOBJ_STRING but copy params into right place on stack + cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; + return_offset = ip [1]; + call_args_offset = ip [1]; + + int param_size = ip [3]; + if (param_size) + memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + LOCAL_VAR (call_args_offset, gpointer) = NULL; + ip += 4; + goto call; + } MINT_IN_CASE(MINT_NEWOBJ) { MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [4]]; INIT_VTABLE (vtable); @@ -5474,6 +5487,49 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip += 4; goto call; } + MINT_IN_CASE(MINT_NEWOBJ_SLOW_UNOPT) { + call_args_offset = ip [1]; + guint16 param_size = ip [3]; + guint16 ret_size = ip [4]; + gpointer this_ptr; + + // Should only be called in unoptimized code. This opcode moves the params around + // to compensate for the lack of use of a proper offset allocator in unoptimized code. + gboolean is_vt = ret_size != 0; + if (!is_vt) + ret_size = MINT_STACK_SLOT_SIZE; + + cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; + + MonoClass *newobj_class = cmethod->method->klass; + + // We allocate space on the stack for return value and for this pointer, that is passed to ctor + if (param_size) + memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + + if (is_vt) { + this_ptr = locals + call_args_offset; + memset (this_ptr, 0, ret_size); + call_args_offset += ret_size; + } else { + // FIXME push/pop LMF + MonoVTable *vtable = mono_class_vtable_checked (newobj_class, error); + if (!is_ok (error) || !mono_runtime_class_init_full (vtable, error)) { + MonoException *exc = interp_error_convert_to_exception (frame, error, ip); + g_assert (exc); + THROW_EX (exc, ip); + } + error_init_reuse (error); + this_ptr = mono_object_new_checked (newobj_class, error); + mono_interp_error_cleanup (error); // FIXME: do not swallow the error + LOCAL_VAR (call_args_offset, gpointer) = this_ptr; // return value + call_args_offset += MINT_STACK_SLOT_SIZE; + } + LOCAL_VAR (call_args_offset, gpointer) = this_ptr; + return_offset = call_args_offset; // unused, prevent warning + ip += 5; + goto call; + } MINT_IN_CASE(MINT_INTRINS_SPAN_CTOR) { gpointer ptr = LOCAL_VAR (ip [2], gpointer); int len = LOCAL_VAR (ip [3], gint32); diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 3f67b609bf8bc5..b237b7a32d0780 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -338,6 +338,8 @@ OPDEF(MINT_JMP, "jmp", 2, 0, 0, MintOpMethodToken) OPDEF(MINT_ENDFILTER, "endfilter", 2, 0, 1, MintOpNoArgs) +OPDEF(MINT_NEWOBJ_SLOW_UNOPT, "newobj_slow_unopt", 5, 1, 0, MintOpMethodToken) +OPDEF(MINT_NEWOBJ_STRING_UNOPT, "newobj_string_unopt", 4, 1, 0, MintOpMethodToken) OPDEF(MINT_NEWOBJ_SLOW, "newobj_slow", 4, 1, 1, MintOpMethodToken) OPDEF(MINT_NEWOBJ_ARRAY, "newobj_array", 5, 1, 1, MintOpMethodToken) OPDEF(MINT_NEWOBJ_STRING, "newobj_string", 4, 1, 1, MintOpMethodToken) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index d4e3e05b38b59d..2f749b619a50c1 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -273,11 +273,17 @@ interp_prev_ins (InterpInst *ins) stack_size, n, (td)->ip - (td)->il_code); \ } while (0) +#define ENSURE_STACK_SIZE(td, size) \ + do { \ + if ((size) > td->max_stack_size) \ + td->max_stack_size = size; \ + } while (0) + #define ENSURE_I4(td, sp_off) \ do { \ - if ((td)->sp [-sp_off].type == STACK_TYPE_I8) { \ + if ((td)->sp [-(sp_off)].type == STACK_TYPE_I8) { \ /* Same representation in memory, nothing to do */ \ - (td)->sp [-sp_off].type = STACK_TYPE_I4; \ + (td)->sp [-(sp_off)].type = STACK_TYPE_I4; \ } \ } while (0) @@ -407,12 +413,27 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size) } static int -create_interp_stack_local (TransformData *td, int type, MonoClass *k, int type_size) +get_tos_offset (TransformData *td) +{ + if (td->sp == td->stack) + return 0; + else + return td->sp [-1].offset + td->sp [-1].size; +} + +// Create a local for sp +static void +create_interp_stack_local (TransformData *td, StackInfo *sp, int type_size) { - int local = create_interp_local_explicit (td, get_type_from_stack (type, k), type_size); + int local = create_interp_local_explicit (td, get_type_from_stack (sp->type, sp->klass), type_size); td->locals [local].flags |= INTERP_LOCAL_FLAG_EXECUTION_STACK; - return local; + if (!td->optimized) { + td->locals [local].stack_offset = sp->offset; + // Additional space that is allocated for the frame, when we don't run the var offset allocator + ENSURE_STACK_SIZE(td, sp->offset + sp->size); + } + sp->local = local; } static void @@ -433,8 +454,9 @@ push_type_explicit (TransformData *td, int type, MonoClass *k, int type_size) td->sp->type = GINT_TO_UINT8 (type); td->sp->klass = k; td->sp->flags = 0; - td->sp->local = create_interp_stack_local (td, type, k, type_size); + td->sp->offset = get_tos_offset (td); td->sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE); + create_interp_stack_local (td, td->sp, type_size); td->sp++; } @@ -475,7 +497,7 @@ static void set_type_and_local (TransformData *td, StackInfo *sp, MonoClass *klass, int type) { SET_TYPE (sp, type, klass); - sp->local = create_interp_stack_local (td, type, NULL, MINT_STACK_SLOT_SIZE); + create_interp_stack_local (td, sp, MINT_STACK_SLOT_SIZE); } static void @@ -702,6 +724,10 @@ get_mov_for_type (int mt, gboolean needs_sext) static void fixup_newbb_stack_locals (TransformData *td, InterpBasicBlock *newbb) { + // If not optimized, it is enough for vars to have same offset on the stack. It is not + // mandatory for sregs and dregs to match. + if (!td->optimized) + return; if (newbb->stack_height <= 0) return; @@ -2938,6 +2964,9 @@ get_virt_method_slot (MonoMethod *method) static int* create_call_args (TransformData *td, int num_args) { + // We don't need to know the sregs for calls in unoptimized code + if (!td->optimized) + return NULL; int *call_args = (int*) mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int)); for (int i = 0; i < num_args; i++) call_args [i] = td->sp [i].local; @@ -2987,11 +3016,29 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target calli = FALSE; native = FALSE; // The function pointer is passed last, but the wrapper expects it as first argument - // Switch the arguments - StackInfo sp_fp = td->sp [-1]; - StackInfo *start = &td->sp [-csignature->param_count - 1]; - memmove (start + 1, start, csignature->param_count * sizeof (StackInfo)); - *start = sp_fp; + // Switch the arguments. + // When the var offset allocator is not used, in unoptimized code, we have to manually + // push the values into the correct order. In optimized code, we just need to know what + // local is the execution stack position during compilation, so we can just do a memmove + // of the StackInfo + if (td->optimized) { + StackInfo sp_fp = td->sp [-1]; + StackInfo *start = &td->sp [-csignature->param_count - 1]; + memmove (start + 1, start, csignature->param_count * sizeof (StackInfo)); + *start = sp_fp; + } else { + int *arg_locals = mono_mempool_alloc0 (td->mempool, sizeof (int) * csignature->param_count); + int fp_local = create_interp_local (td, m_class_get_byval_arg (mono_defaults.int_class)); + // Pop everything into locals. Push after into correct order + store_local (td, fp_local); + for (int i = csignature->param_count - 1; i >= 0; i--) { + arg_locals [i] = create_interp_local (td, csignature->params [i]); + store_local (td, arg_locals [i]); + } + load_local (td, fp_local); + for (int i = 0; i < csignature->param_count; i++) + load_local (td, arg_locals [i]); + } // The method we are calling has a different signature csignature = mono_method_signature_internal (target_method); @@ -5530,24 +5577,36 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, td->last_ins->flags |= INTERP_INST_FLAG_CALL; td->last_ins->info.call_args = call_args; } else if (klass == mono_defaults.string_class) { - int *call_args = (int*)mono_mempool_alloc (td->mempool, (csignature->param_count + 2) * sizeof (int)); - td->sp -= csignature->param_count; + if (!td->optimized) { + int tos_offset = get_tos_offset (td); + td->sp -= csignature->param_count; + guint32 params_stack_size = tos_offset - get_tos_offset (td); + + interp_add_ins (td, MINT_NEWOBJ_STRING_UNOPT); + td->last_ins->data [0] = get_data_item_index (td, mono_interp_get_imethod (m)); + td->last_ins->data [1] = params_stack_size; + push_type (td, stack_type [ret_mt], klass); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + } else { + int *call_args = (int*)mono_mempool_alloc (td->mempool, (csignature->param_count + 2) * sizeof (int)); + td->sp -= csignature->param_count; - // First arg is dummy var, it is null when passed to the ctor - call_args [0] = create_interp_local (td, get_type_from_stack (stack_type [ret_mt], NULL)); - for (int i = 0; i < csignature->param_count; i++) { - call_args [i + 1] = td->sp [i].local; - } - call_args [csignature->param_count + 1] = -1; + // First arg is dummy var, it is null when passed to the ctor + call_args [0] = create_interp_local (td, get_type_from_stack (stack_type [ret_mt], NULL)); + for (int i = 0; i < csignature->param_count; i++) { + call_args [i + 1] = td->sp [i].local; + } + call_args [csignature->param_count + 1] = -1; - interp_add_ins (td, MINT_NEWOBJ_STRING); - td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (m)); - push_type (td, stack_type [ret_mt], klass); + interp_add_ins (td, MINT_NEWOBJ_STRING); + td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (m)); + push_type (td, stack_type [ret_mt], klass); - interp_ins_set_dreg (td->last_ins, td->sp [-1].local); - interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); - td->last_ins->flags |= INTERP_INST_FLAG_CALL; - td->last_ins->info.call_args = call_args; + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); + td->last_ins->flags |= INTERP_INST_FLAG_CALL; + td->last_ins->info.call_args = call_args; + } } else if (m_class_get_image (klass) == mono_defaults.corlib && (!strcmp (m_class_get_name (m->klass), "Span`1") || !strcmp (m_class_get_name (m->klass), "ReadOnlySpan`1")) && @@ -5561,6 +5620,31 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local); push_type_vt (td, klass, mono_class_value_size (klass, NULL)); interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + } else if (!td->optimized) { + int tos = get_tos_offset (td); + td->sp -= csignature->param_count; + int param_size = tos - get_tos_offset (td); + + interp_add_ins (td, MINT_NEWOBJ_SLOW_UNOPT); + td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (m)); + td->last_ins->data [1] = param_size; + + gboolean is_vt = m_class_is_valuetype (klass); + if (is_vt) { + int vtsize = mono_class_value_size (klass, NULL); + vtsize = ALIGN_TO (vtsize, MINT_STACK_SLOT_SIZE); + td->last_ins->data [2] = vtsize; + ENSURE_STACK_SIZE(td, (int)(tos + vtsize + MINT_STACK_SLOT_SIZE)); + if (ret_mt == MINT_TYPE_VT) + push_type_vt (td, klass, vtsize); + else + push_type (td, stack_type [ret_mt], klass); + } else { + td->last_ins->data [2] = 0; + ENSURE_STACK_SIZE(td, (int)(tos + 2 * MINT_STACK_SLOT_SIZE)); + push_type (td, stack_type [ret_mt], klass); + } + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); } else { td->sp -= csignature->param_count; @@ -7472,6 +7556,23 @@ handle_relocations (TransformData *td) } } +static void +alloc_unopt_global_local (TransformData *td, int local, gpointer data) +{ + // Execution stack locals are resolved when we emit the instruction in the code stream, + // once all global locals have their offset resolved + if (td->locals [local].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK) + return; + // Check if already resolved + if (td->locals [local].offset != -1) + return; + + int offset = td->total_locals_size; + int size = td->locals [local].size; + td->locals [local].offset = offset; + td->total_locals_size = ALIGN_TO (offset + size, MINT_STACK_SLOT_SIZE); +} + static int get_inst_length (InterpInst *ins) { @@ -7485,6 +7586,34 @@ get_inst_length (InterpInst *ins) return mono_interp_oplen [ins->opcode]; } +static void +foreach_local_var (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int, gpointer)) +{ + int opcode = ins->opcode; + if (mono_interp_op_sregs [opcode]) { + for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { + int sreg = ins->sregs [i]; + + if (sreg == MINT_CALL_ARGS_SREG) { + int *call_args = ins->info.call_args; + if (call_args) { + int var = *call_args; + while (var != -1) { + callback (td, var, data); + call_args++; + var = *call_args; + } + } + } else { + callback (td, sreg, data); + } + } + } + + if (mono_interp_op_dregs [opcode]) + callback (td, ins->dreg, data); +} + static int compute_native_offset_estimates (TransformData *td) { @@ -7502,6 +7631,8 @@ compute_native_offset_estimates (TransformData *td) if (MINT_IS_NOP (opcode)) continue; noe += get_inst_length (ins); + if (!td->optimized) + foreach_local_var (td, ins, NULL, alloc_unopt_global_local); } } return noe; @@ -7542,6 +7673,27 @@ get_short_brop (int opcode) return opcode; } +static int +get_local_offset (TransformData *td, int local) +{ + if (td->locals [local].offset != -1) + return td->locals [local].offset; + + // FIXME Some vars might end up with unitialized offset because they are not declared at all in the code. + // This can happen if the bblock declaring the var gets removed, while other unreachable bblocks, that access + // the var are also not removed. This limitation is due to bblock removal using IN count for removing a bblock, + // which doesn't account for cycles. + if (td->optimized) + return -1; + + // If we use the optimized offset allocator, all locals should have had their offsets already allocated + g_assert (!td->optimized); + // The only remaining locals to allocate are the ones from the execution stack + g_assert (td->locals [local].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK); + + td->locals [local].offset = td->total_locals_size + td->locals [local].stack_offset; + return td->locals [local].offset; +} static guint16* emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *ins) @@ -7567,7 +7719,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in *ip++ = opcode; if (opcode == MINT_SWITCH) { int labels = READ32 (&ins->data [0]); - *ip++ = GINT_TO_UINT16 (td->locals [ins->sregs [0]].offset); + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0])); // Write number of switch labels *ip++ = ins->data [0]; *ip++ = ins->data [1]; @@ -7585,7 +7737,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in const int br_offset = GPTRDIFF_TO_INT (start_ip - td->new_code); gboolean has_imm = opcode >= MINT_BEQ_I4_IMM_SP && opcode <= MINT_BLT_UN_I8_IMM_SP; for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) - *ip++ = GINT_TO_UINT16 (td->locals [ins->sregs [i]].offset); + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i])); if (has_imm) *ip++ = ins->data [0]; @@ -7653,8 +7805,8 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in guint16 mt = ins->data [1]; guint16 fsize = ins->data [2]; - int dest_off = td->locals [ins->dreg].offset; - int src_off = td->locals [ins->sregs [0]].offset + foff; + int dest_off = get_local_offset (td, ins->dreg); + int src_off = get_local_offset (td, ins->sregs [0]) + foff; if (mt == MINT_TYPE_VT || fsize) opcode = MINT_MOV_VT; else @@ -7694,21 +7846,29 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in // actually vars. Resolve their offset int num_vars = mono_interp_oplen [opcode] - 1; for (int i = 0; i < num_vars; i++) - *ip++ = GINT_TO_UINT16 (td->locals [ins->data [i]].offset); + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->data [i])); } else { if (mono_interp_op_dregs [opcode]) - *ip++ = GINT_TO_UINT16 (td->locals [ins->dreg].offset); + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->dreg)); if (mono_interp_op_sregs [opcode]) { for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { - if (ins->sregs [i] == MINT_CALL_ARGS_SREG) - *ip++ = GINT_TO_UINT16 (td->locals [ins->info.call_args [0]].offset); - else - *ip++ = GINT_TO_UINT16 (td->locals [ins->sregs [i]].offset); + if (ins->sregs [i] == MINT_CALL_ARGS_SREG) { + int offset; + // In the unoptimized case the return and the start of the param area are always at the + // same offset. Use the dreg offset so we don't need to rely on existing call_args. + if (td->optimized) + offset = get_local_offset (td, ins->info.call_args [0]); + else + offset = get_local_offset (td, ins->dreg); + *ip++ = GINT_TO_UINT16 (offset); + } else { + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i])); + } } } else if (opcode == MINT_LDLOCA_S) { // This opcode receives a local but it is not viewed as a sreg since we don't load the value - *ip++ = GINT_TO_UINT16 (td->locals [ins->sregs [0]].offset); + *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0])); } int left = get_inst_length (ins) - GPTRDIFF_TO_INT(ip - start_ip); @@ -8258,34 +8418,6 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de } } -static void -foreach_local_var (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int, gpointer)) -{ - int opcode = ins->opcode; - if (mono_interp_op_sregs [opcode]) { - for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { - int sreg = ins->sregs [i]; - - if (sreg == MINT_CALL_ARGS_SREG) { - int *call_args = ins->info.call_args; - if (call_args) { - int var = *call_args; - while (var != -1) { - callback (td, var, data); - call_args++; - var = *call_args; - } - } - } else { - callback (td, sreg, data); - } - } - } - - if (mono_interp_op_dregs [opcode]) - callback (td, ins->dreg, data); -} - static void clear_local_defs (TransformData *td, int var, void *data) { @@ -9520,10 +9652,10 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (td->verbose_level) mono_interp_print_td_code (td); - if (td->optimized) + if (td->optimized) { interp_optimize_code (td); - - interp_alloc_offsets (td); + interp_alloc_offsets (td); + } generate_compacted_code (td); @@ -9578,8 +9710,11 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (c->flags & MONO_EXCEPTION_CLAUSE_FILTER) c->data.filter_offset = get_native_offset (td, c->data.filter_offset); } - rtm->alloca_size = td->total_locals_size; - rtm->locals_size = td->param_area_offset; + // When optimized (using the var offset allocator), total_locals_size contains also the param area. + // When unoptimized, the param area is stored in the same order, within the IL execution stack. + g_assert (!td->optimized || !td->max_stack_size); + rtm->alloca_size = td->total_locals_size + td->max_stack_size; + rtm->locals_size = td->optimized ? td->param_area_offset : td->total_locals_size; rtm->data_items = (gpointer*)mono_mem_manager_alloc0 (td->mem_manager, td->n_data_items * sizeof (td->data_items [0])); memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0])); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 2656c51f303762..7b5526738f055e 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -34,6 +34,8 @@ typedef struct * the stack a new local is created. */ int local; + /* The offset from the execution stack start where this is stored. Used by the fast offset allocator */ + int offset; /* Saves how much stack this is using. It is a multiple of MINT_VT_ALIGNMENT */ int size; } StackInfo; @@ -155,7 +157,13 @@ typedef struct { int indirects; int offset; int size; - int live_start, live_end; + union { + // live_start and live_end are used by the offset allocator for optimized code + int live_start; + // used only by the fast offset allocator, which only works for unoptimized code + int stack_offset; + }; + int live_end; // index of first basic block where this var is used int bb_index; union { @@ -190,6 +198,7 @@ typedef struct unsigned int stack_capacity; gint32 param_area_offset; gint32 total_locals_size; + gint32 max_stack_size; InterpLocal *locals; int *local_ref_count; unsigned int il_locals_offset; From 79570b0022298f1fa73b162ab457511a354d744d Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 18 Jun 2022 21:08:14 +0300 Subject: [PATCH 6/7] [mono][interp] Add dreg to all calls in unoptimized code All calls need to have a dreg (a dummy one if it is void call), in order for unoptimized offset allocator to determine the offset of the call. In unoptimized code, the offset of the first argument is always the same as the offset of the return, if any. --- src/mono/mono/mini/interp/transform.c | 53 +++++++++++++++++++++------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 2f749b619a50c1..4c8712353b3ded 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1240,16 +1240,22 @@ interp_generate_mae_throw (TransformData *td, MonoMethod *method, MonoMethod *ta td->last_ins->data [0] = get_data_item_index (td, target_method); td->sp -= 2; - int *call_args = (int*)mono_mempool_alloc (td->mempool, 3 * sizeof (int)); - call_args [0] = td->sp [0].local; - call_args [1] = td->sp [1].local; - call_args [2] = -1; interp_add_ins (td, MINT_ICALL_PP_V); interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func); - td->last_ins->info.call_args = call_args; td->last_ins->flags |= INTERP_INST_FLAG_CALL; + if (td->optimized) { + int *call_args = (int*)mono_mempool_alloc (td->mempool, 3 * sizeof (int)); + call_args [0] = td->sp [0].local; + call_args [1] = td->sp [1].local; + call_args [2] = -1; + td->last_ins->info.call_args = call_args; + } else { + // Unoptimized code needs every call to have a dreg for offset allocation, + // even if call is void + td->last_ins->dreg = td->sp [0].local; + } } static void @@ -1262,6 +1268,11 @@ interp_generate_void_throw (TransformData *td, MonoJitICallId icall_id) td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func); td->last_ins->info.call_args = NULL; td->last_ins->flags |= INTERP_INST_FLAG_CALL; + if (!td->optimized) { + push_simple_type (td, STACK_TYPE_I4); + td->sp--; + td->last_ins->dreg = td->sp [0].local; + } } static void @@ -1277,15 +1288,21 @@ interp_generate_ipe_throw_with_msg (TransformData *td, MonoError *error_msg) td->last_ins->data [0] = get_data_item_index (td, msg); td->sp -= 1; - int *call_args = (int*)mono_mempool_alloc (td->mempool, 2 * sizeof (int)); - call_args [0] = td->sp [0].local; - call_args [1] = -1; interp_add_ins (td, MINT_ICALL_P_V); interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func); - td->last_ins->info.call_args = call_args; td->last_ins->flags |= INTERP_INST_FLAG_CALL; + if (td->optimized) { + int *call_args = (int*)mono_mempool_alloc (td->mempool, 2 * sizeof (int)); + call_args [0] = td->sp [0].local; + call_args [1] = -1; + td->last_ins->info.call_args = call_args; + } else { + // Unoptimized code needs every call to have a dreg for offset allocation, + // even if call is void + td->last_ins->dreg = td->sp [0].local; + } } static int @@ -3179,8 +3196,6 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target td->sp -= num_args; guint32 params_stack_size = get_stack_size (td->sp, num_args); - int *call_args = create_call_args (td, num_args); - if (is_virtual) { interp_add_ins (td, MINT_CKNULL); interp_ins_set_sreg (td->last_ins, td->sp->local); @@ -3196,7 +3211,16 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (target_method)); td->last_ins->data [1] = GUINT32_TO_UINT16 (params_stack_size); td->last_ins->flags |= INTERP_INST_FLAG_CALL; - td->last_ins->info.call_args = call_args; + + if (td->optimized) { + int *call_args = create_call_args (td, num_args); + td->last_ins->info.call_args = call_args; + } else { + // Dummy dreg + push_simple_type (td, STACK_TYPE_I4); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + td->sp--; + } int in_offset = GPTRDIFF_TO_INT (td->ip - td->il_code); if (interp_ip_in_cbb (td, in_offset + 5)) @@ -6892,6 +6916,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, mt = mint_type (info->sig->ret); push_simple_type (td, stack_type [mt]); dreg = td->sp [-1].local; + } else if (!td->optimized) { + // Dummy dreg + push_simple_type (td, stack_type [STACK_TYPE_I4]); + dreg = td->sp [-1].local; + td->sp--; } if (jit_icall_id == MONO_JIT_ICALL_mono_threads_attach_coop) { From 7f537bb7194ef19d024c0211104ec7b27e5f96df Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 25 Jul 2022 19:50:49 +0300 Subject: [PATCH 7/7] [mono][interp] Fix issue with passing of exvars Unoptimized code can't use a global local directly (like the exvar), it must first be pushed to a new var on the execution stack. Add a mov instruction when we start executing the basic block for a handler. --- src/mono/mono/mini/interp/transform.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 4c8712353b3ded..a4f826e31a9f24 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -4494,6 +4494,20 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, init_bb_stack_state (td, new_bb); } link_bblocks = TRUE; + // Unoptimized code cannot access exception object directly from the exvar, we need + // to push it explicitly on the execution stack + if (!td->optimized) { + int index = td->clause_indexes [in_offset]; + if (index != -1 && new_bb->stack_height == 1 && header->clauses [index].handler_offset == in_offset) { + int exvar = td->clause_vars [index]; + g_assert (td->stack [0].local == exvar); + td->sp--; + push_simple_type (td, STACK_TYPE_O); + interp_add_ins (td, MINT_MOV_P); + interp_ins_set_sreg (td->last_ins, exvar); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + } + } } td->offset_to_bb [in_offset] = td->cbb; td->in_start = td->ip; @@ -9643,9 +9657,6 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (td->prof_coverage) td->coverage_info = mono_profiler_coverage_alloc (method, header->code_size); - interp_method_compute_offsets (td, rtm, mono_method_signature_internal (method), header, error); - goto_if_nok (error, exit); - if (verbose_method_name) { const char *name = verbose_method_name; @@ -9663,6 +9674,9 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG } } + interp_method_compute_offsets (td, rtm, mono_method_signature_internal (method), header, error); + goto_if_nok (error, exit); + td->stack = (StackInfo*)g_malloc0 ((header->max_stack + 1) * sizeof (td->stack [0])); td->stack_capacity = header->max_stack + 1; td->sp = td->stack;