Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit eae780c

Browse files
authored
Implement stack probing using helpers on win-arm and linux-arm (#27184)
1 parent 857797d commit eae780c

11 files changed

Lines changed: 321 additions & 226 deletions

src/inc/jithelpers.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,11 +368,11 @@
368368

369369
JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB)
370370

371-
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
371+
#ifndef _TARGET_ARM64_
372372
JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY)
373-
#else // !_TARGET_X86_ && !_TARGET_AMD64_
373+
#else
374374
JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
375-
#endif // !_TARGET_X86_ && !_TARGET_AMD64_
375+
#endif
376376

377377
#undef JITHELPER
378378
#undef DYNAMICJITHELPER

src/jit/codegenarm.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,4 +1764,88 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
17641764

17651765
#endif // PROFILING_SUPPORTED
17661766

1767+
//------------------------------------------------------------------------
1768+
// genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP.
1769+
//
1770+
// Notes:
1771+
// The first instruction of the prolog is always a push (which touches the lowest address
1772+
// of the stack), either of the LR register or of some argument registers, e.g., in the case of
1773+
// pre-spilling. The LR register is always pushed because we require it to allow for GC return
1774+
// address hijacking (see the comment in CodeGen::genPushCalleeSavedRegisters()). These pushes
1775+
// happen immediately before calling this function, so the SP at the current location has already
1776+
// been touched.
1777+
//
1778+
// Arguments:
1779+
// frameSize - the size of the stack frame being allocated.
1780+
// initReg - register to use as a scratch register.
1781+
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
1782+
// this call sets 'initReg' to a non-zero value.
1783+
// maskArgRegsLiveIn - incoming argument registers that are currently live.
1784+
//
1785+
// Return value:
1786+
// None
1787+
//
1788+
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
1789+
{
1790+
assert(compiler->compGeneratingProlog);
1791+
1792+
if (frameSize == 0)
1793+
{
1794+
return;
1795+
}
1796+
1797+
const target_size_t pageSize = compiler->eeGetPageSize();
1798+
1799+
assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
1800+
1801+
if (frameSize < pageSize)
1802+
{
1803+
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize);
1804+
}
1805+
else if (frameSize < compiler->getVeryLargeFrameSize())
1806+
{
1807+
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
1808+
{
1809+
// Generate:
1810+
// movw initReg, -probeOffset
1811+
// ldr initReg, [SP + initReg]
1812+
1813+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
1814+
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
1815+
}
1816+
1817+
regSet.verifyRegUsed(initReg);
1818+
*pInitRegZeroed = false; // The initReg does not contain zero
1819+
1820+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
1821+
compiler->unwindPadding();
1822+
GetEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, initReg);
1823+
}
1824+
else
1825+
{
1826+
assert(frameSize >= compiler->getVeryLargeFrameSize());
1827+
1828+
genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize,
1829+
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
1830+
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
1831+
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
1832+
compiler->unwindPadding();
1833+
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);
1834+
1835+
if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
1836+
RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
1837+
{
1838+
*pInitRegZeroed = false;
1839+
}
1840+
}
1841+
1842+
compiler->unwindAllocStack(frameSize);
1843+
#ifdef USING_SCOPE_INFO
1844+
if (!doubleAlignOrFramePointerUsed())
1845+
{
1846+
psiAdjustStackLevel(frameSize);
1847+
}
1848+
#endif // USING_SCOPE_INFO
1849+
}
1850+
17671851
#endif // _TARGET_ARM_

src/jit/codegenarm64.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7787,4 +7787,143 @@ void CodeGen::genArm64EmitterUnitTests()
77877787
}
77887788
#endif // defined(DEBUG)
77897789

7790+
//------------------------------------------------------------------------
7791+
// genAllocLclFrame: Probe the stack.
7792+
//
7793+
// Notes:
7794+
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
7795+
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
7796+
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
7797+
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
7798+
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
7799+
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
7800+
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
7801+
// on Windows as well just to be consistent, even though it should not be necessary.
7802+
//
7803+
// Arguments:
7804+
// frameSize - the size of the stack frame being allocated.
7805+
// initReg - register to use as a scratch register.
7806+
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
7807+
// this call sets 'initReg' to a non-zero value.
7808+
// maskArgRegsLiveIn - incoming argument registers that are currently live.
7809+
//
7810+
// Return value:
7811+
// None
7812+
//
7813+
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
7814+
{
7815+
assert(compiler->compGeneratingProlog);
7816+
7817+
if (frameSize == 0)
7818+
{
7819+
return;
7820+
}
7821+
7822+
const target_size_t pageSize = compiler->eeGetPageSize();
7823+
7824+
// What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
7825+
// if the next action on the stack might subtract from SP first, before touching the current SP, then
7826+
// we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
7827+
// a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
7828+
// but we don't alter SP.
7829+
target_size_t lastTouchDelta = 0;
7830+
7831+
assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
7832+
7833+
if (frameSize < pageSize)
7834+
{
7835+
lastTouchDelta = frameSize;
7836+
}
7837+
else if (frameSize < compiler->getVeryLargeFrameSize())
7838+
{
7839+
lastTouchDelta = frameSize;
7840+
7841+
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
7842+
{
7843+
// Generate:
7844+
// movw initReg, -probeOffset
7845+
// ldr wzr, [sp + initReg]
7846+
7847+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
7848+
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
7849+
regSet.verifyRegUsed(initReg);
7850+
*pInitRegZeroed = false; // The initReg does not contain zero
7851+
7852+
lastTouchDelta -= pageSize;
7853+
}
7854+
7855+
assert(lastTouchDelta == frameSize % pageSize);
7856+
compiler->unwindPadding();
7857+
}
7858+
else
7859+
{
7860+
assert(frameSize >= compiler->getVeryLargeFrameSize());
7861+
7862+
// Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
7863+
// until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
7864+
// the stack afterward (which means the stack pointer needs to be known).
7865+
7866+
regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
7867+
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
7868+
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
7869+
7870+
regNumber rOffset = initReg;
7871+
regNumber rLimit;
7872+
regMaskTP tempMask;
7873+
7874+
// We pick the next lowest register number for rLimit
7875+
noway_assert(availMask != RBM_NONE);
7876+
tempMask = genFindLowestBit(availMask);
7877+
rLimit = genRegNumFromMask(tempMask);
7878+
7879+
// Generate:
7880+
//
7881+
// mov rOffset, -pageSize // On arm, this turns out to be "movw r1, 0xf000; sxth r1, r1".
7882+
// // We could save 4 bytes in the prolog by using "movs r1, 0" at the
7883+
// // runtime expense of running a useless first loop iteration.
7884+
// mov rLimit, -frameSize
7885+
// loop:
7886+
// ldr wzr, [sp + rOffset]
7887+
// sub rOffset, pageSize
7888+
// cmp rLimit, rOffset
7889+
// b.ls loop // If rLimit is lower or same, we need to probe this rOffset. Note
7890+
// // especially that if it is the same, we haven't probed this page.
7891+
7892+
noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
7893+
7894+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
7895+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
7896+
7897+
//
7898+
// Can't have a label inside the ReJIT padding area
7899+
//
7900+
genPrologPadForReJit();
7901+
7902+
// There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
7903+
// `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.
7904+
7905+
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
7906+
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
7907+
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
7908+
GetEmitter()->emitIns_J(INS_bls, NULL, -4);
7909+
7910+
*pInitRegZeroed = false; // The initReg does not contain zero
7911+
7912+
compiler->unwindPadding();
7913+
7914+
lastTouchDelta = frameSize % pageSize;
7915+
}
7916+
7917+
if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
7918+
{
7919+
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
7920+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
7921+
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
7922+
compiler->unwindPadding();
7923+
7924+
regSet.verifyRegUsed(initReg);
7925+
*pInitRegZeroed = false; // The initReg does not contain zero
7926+
}
7927+
}
7928+
77907929
#endif // _TARGET_ARM64_

0 commit comments

Comments
 (0)