@@ -7787,4 +7787,143 @@ void CodeGen::genArm64EmitterUnitTests()
77877787}
77887788#endif // defined(DEBUG)
77897789
7790+ // ------------------------------------------------------------------------
7791+ // genAllocLclFrame: Probe the stack.
7792+ //
7793+ // Notes:
7794+ // This only does the probing; allocating the frame is done when callee-saved registers are saved.
7795+ // This is done before anything has been pushed. The previous frame might have a large outgoing argument
7796+ // space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
7797+ // not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
7798+ // there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
7799+ // page by default, so we need to be more careful. We do an extra probe if we might not have probed
7800+ // recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
7801+ // on Windows as well just to be consistent, even though it should not be necessary.
7802+ //
7803+ // Arguments:
7804+ // frameSize - the size of the stack frame being allocated.
7805+ // initReg - register to use as a scratch register.
7806+ // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
7807+ // this call sets 'initReg' to a non-zero value.
7808+ // maskArgRegsLiveIn - incoming argument registers that are currently live.
7809+ //
7810+ // Return value:
7811+ // None
7812+ //
7813+ void CodeGen::genAllocLclFrame (unsigned frameSize, regNumber initReg, bool * pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
7814+ {
7815+ assert (compiler->compGeneratingProlog );
7816+
7817+ if (frameSize == 0 )
7818+ {
7819+ return ;
7820+ }
7821+
7822+ const target_size_t pageSize = compiler->eeGetPageSize ();
7823+
7824+ // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
7825+ // if the next action on the stack might subtract from SP first, before touching the current SP, then
7826+ // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
7827+ // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
7828+ // but we don't alter SP.
7829+ target_size_t lastTouchDelta = 0 ;
7830+
7831+ assert (!compiler->info .compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
7832+
7833+ if (frameSize < pageSize)
7834+ {
7835+ lastTouchDelta = frameSize;
7836+ }
7837+ else if (frameSize < compiler->getVeryLargeFrameSize ())
7838+ {
7839+ lastTouchDelta = frameSize;
7840+
7841+ for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
7842+ {
7843+ // Generate:
7844+ // movw initReg, -probeOffset
7845+ // ldr wzr, [sp + initReg]
7846+
7847+ instGen_Set_Reg_To_Imm (EA_PTRSIZE, initReg, -(ssize_t )probeOffset);
7848+ GetEmitter ()->emitIns_R_R_R (INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
7849+ regSet.verifyRegUsed (initReg);
7850+ *pInitRegZeroed = false ; // The initReg does not contain zero
7851+
7852+ lastTouchDelta -= pageSize;
7853+ }
7854+
7855+ assert (lastTouchDelta == frameSize % pageSize);
7856+ compiler->unwindPadding ();
7857+ }
7858+ else
7859+ {
7860+ assert (frameSize >= compiler->getVeryLargeFrameSize ());
7861+
7862+ // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
7863+ // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
7864+ // the stack afterward (which means the stack pointer needs to be known).
7865+
7866+ regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask () | ~RBM_INT_CALLEE_SAVED);
7867+ availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
7868+ availMask &= ~genRegMask (initReg); // Remove the pre-calculated initReg
7869+
7870+ regNumber rOffset = initReg;
7871+ regNumber rLimit;
7872+ regMaskTP tempMask;
7873+
7874+ // We pick the next lowest register number for rLimit
7875+ noway_assert (availMask != RBM_NONE);
7876+ tempMask = genFindLowestBit (availMask);
7877+ rLimit = genRegNumFromMask (tempMask);
7878+
7879+ // Generate:
7880+ //
7881+ // mov rOffset, -pageSize // On arm, this turns out to be "movw r1, 0xf000; sxth r1, r1".
7882+ // // We could save 4 bytes in the prolog by using "movs r1, 0" at the
7883+ // // runtime expense of running a useless first loop iteration.
7884+ // mov rLimit, -frameSize
7885+ // loop:
7886+ // ldr wzr, [sp + rOffset]
7887+ // sub rOffset, pageSize
7888+ // cmp rLimit, rOffset
7889+ // b.ls loop // If rLimit is lower or same, we need to probe this rOffset. Note
7890+ // // especially that if it is the same, we haven't probed this page.
7891+
7892+ noway_assert ((ssize_t )(int )frameSize == (ssize_t )frameSize); // make sure framesize safely fits within an int
7893+
7894+ instGen_Set_Reg_To_Imm (EA_PTRSIZE, rOffset, -(ssize_t )pageSize);
7895+ instGen_Set_Reg_To_Imm (EA_PTRSIZE, rLimit, -(ssize_t )frameSize);
7896+
7897+ //
7898+ // Can't have a label inside the ReJIT padding area
7899+ //
7900+ genPrologPadForReJit ();
7901+
7902+ // There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
7903+ // `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.
7904+
7905+ GetEmitter ()->emitIns_R_R_R (INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
7906+ GetEmitter ()->emitIns_R_R_I (INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
7907+ GetEmitter ()->emitIns_R_R (INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
7908+ GetEmitter ()->emitIns_J (INS_bls, NULL , -4 );
7909+
7910+ *pInitRegZeroed = false ; // The initReg does not contain zero
7911+
7912+ compiler->unwindPadding ();
7913+
7914+ lastTouchDelta = frameSize % pageSize;
7915+ }
7916+
7917+ if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
7918+ {
7919+ assert (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
7920+ instGen_Set_Reg_To_Imm (EA_PTRSIZE, initReg, -(ssize_t )frameSize);
7921+ GetEmitter ()->emitIns_R_R_R (INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
7922+ compiler->unwindPadding ();
7923+
7924+ regSet.verifyRegUsed (initReg);
7925+ *pInitRegZeroed = false ; // The initReg does not contain zero
7926+ }
7927+ }
7928+
77907929#endif // _TARGET_ARM64_
0 commit comments