From 977f0c39d1ab7816770a8aeb5812bd0ccad30e69 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Thu, 10 Feb 2022 12:50:22 -0800 Subject: [PATCH] Increase arm32/arm64 maximum instruction group size We require that the maximum number of prolog instructions all fit in one instruction group. Recent changes appear to have increased the number of instructions we are generating the prolog, leading to NOWAY assert on Release builds and test failure on linux-arm64. Bump up the number to avoid this problem, and leave some headroom for possible additional needs. Fixes #64162, #64793. --- src/coreclr/jit/emit.cpp | 5 +++-- src/coreclr/jit/emit.h | 47 +++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 4cabb028b115b0..462b9876f97169 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1454,9 +1454,10 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) assert(IsCodeAligned(emitCurIGsize)); - /* Make sure we have enough space for the new instruction */ + // Make sure we have enough space for the new instruction. + // `igInsCnt` is currently a byte, so we can't have more than 255 instructions in a single insGroup. - if ((emitCurIGfreeNext + sz >= emitCurIGfreeEndp) || emitForceNewIG) + if ((emitCurIGfreeNext + sz >= emitCurIGfreeEndp) || emitForceNewIG || (emitCurIGinsCnt >= 255)) { emitNxtIG(true); } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 66706ecaf488e2..1571fc00cbafa1 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1785,19 +1785,50 @@ class emitter void emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins); void spillIntArgRegsToShadowSlots(); -/************************************************************************/ -/* The logic that creates and keeps track of instruction groups */ -/************************************************************************/ + /************************************************************************/ + /* The logic that creates and keeps track of instruction groups */ + /************************************************************************/ + + // SC_IG_BUFFER_SIZE defines the size, in bytes, of the single, global instruction group buffer. + // When a label is reached, or the buffer is filled, the precise amount of the buffer that was + // used is copied to a newly allocated, precisely sized buffer, and the global buffer is reset + // for use with the next set of instructions (see emitSavIG). If the buffer was filled before + // reaching a label, the next instruction group will be an "overflow", or "extension" group + // (marked with IGF_EXTEND). Thus, the size of the global buffer shouldn't matter (as long as it + // can hold at least one of the largest instruction descriptor forms), since we can always overflow + // to subsequent instruction groups. + // + // The only place where this fixed instruction group size is a problem is in the main function prolog, + // where we only support a single instruction group, and no extension groups. We should really fix that. + // Thus, the buffer size needs to be large enough to hold the maximum number of instructions that + // can possibly be generated into the prolog instruction group. That is difficult to statically determine. + // + // If we do generate an overflow prolog group, we will hit a NOWAY assert and fall back to MinOpts. + // This should reduce the number of instructions generated into the prolog. + // + // Note that OSR prologs require additional code not seen in normal prologs. + // + // Also, note that DEBUG and non-DEBUG builds have different instrDesc sizes, and there are multiple + // sizes of instruction descriptors, so the number of instructions that will fit in the largest + // instruction group depends on the instruction mix as well as DEBUG/non-DEBUG build type. See the + // EMITTER_STATS output for various statistics related to this. + // + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARMARCH -// The only place where this limited instruction group size is a problem is -// in the prolog, where we only support a single instruction group. We should really fix that. // ARM32 and ARM64 both can require a bigger prolog instruction group. One scenario is where // a function uses all the incoming integer and single-precision floating-point arguments, // and must store them all to the frame on entry. If the frame is very large, we generate -// ugly code like "movw r10, 0x488; add r10, sp; vstr s0, [r10]" for each store, which -// eats up our insGroup buffer. -#define SC_IG_BUFFER_SIZE (100 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) +// ugly code like: +// movw r10, 0x488 +// add r10, sp +// vstr s0, [r10] +// for each store, or, to load arguments into registers: +// movz xip1, #0x6cd0 +// movk xip1, #2 LSL #16 +// ldr w8, [fp, xip1] // [V10 arg10] +// which eats up our insGroup buffer. +#define SC_IG_BUFFER_SIZE (200 * sizeof(emitter::instrDesc)) #else // !TARGET_ARMARCH #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) #endif // !TARGET_ARMARCH