Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2647,16 +2647,38 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitAlignLoopMaxCodeSize = DEFAULT_MAX_LOOPSIZE_FOR_ALIGN;
#endif

#ifdef TARGET_XARCH
if (opts.compJitAlignLoopAdaptive)
{
// For adaptive alignment, padding limit is equal to the max instruction encoding
// size which is 15 bytes. Hence (32 >> 1) - 1 = 15 bytes.
opts.compJitAlignPaddingLimit = (opts.compJitAlignLoopBoundary >> 1) - 1;
}
else
{
// For non-adaptive alignment, padding limit is 1 less than the alignment boundary
// specified.
opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary - 1;
}
#elif TARGET_ARM64
if (opts.compJitAlignLoopAdaptive)
{
// For adaptive alignment, padding limit is same as specified by the alignment
// boundary because all instructions are 4 bytes long. Hence (32 >> 1) = 16 bytes.
opts.compJitAlignPaddingLimit = (opts.compJitAlignLoopBoundary >> 1);
}
else
{
// For non-adaptive, padding limit is same as specified by the alignment.
opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary;
}
#endif

assert(isPow2(opts.compJitAlignLoopBoundary));
#ifdef TARGET_ARM64
// The minimum encoding size for Arm64 is 4 bytes.
assert(opts.compJitAlignLoopBoundary >= 4);
#endif

#if REGEN_SHORTCUTS || REGEN_CALLPAT
// We never want to have debugging enabled when regenerating GC encoding patterns
Expand Down
142 changes: 138 additions & 4 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4796,6 +4796,81 @@ void emitter::emitJumpDistBind()

#if FEATURE_LOOP_ALIGN

//-----------------------------------------------------------------------------
//
// The next instruction will be a loop head entry point
// So insert an alignment instruction here to ensure that
// we can properly align the code.
//
void emitter::emitLoopAlign(unsigned short paddingBytes)
{
/* Insert a pseudo-instruction to ensure that we align
the next instruction properly */
instrDescAlign* id = emitNewInstrAlign();

#if defined(TARGET_XARCH)
assert(paddingBytes <= MAX_ENCODED_SIZE);
id->idCodeSize(paddingBytes);
#elif defined(TARGET_ARM64)
assert(paddingBytes == INSTR_ENCODED_SIZE);
#endif

id->idaIG = emitCurIG;

/* Append this instruction to this IG's alignment list */
id->idaNext = emitCurIGAlignList;

emitCurIGsize += paddingBytes;

dispIns(id);
emitCurIGAlignList = id;
}

//-----------------------------------------------------------------------------
//
// The next instruction will be a loop head entry point
// So insert alignment instruction(s) here to ensure that
// we can properly align the code.
//
// This emits more than one `INS_align` instruction depending on the
// alignmentBoundary parameter.
//
void emitter::emitLongLoopAlign(unsigned short alignmentBoundary)
{
#if defined(TARGET_XARCH)
unsigned short nPaddingBytes = alignmentBoundary - 1;
unsigned short nAlignInstr = (nPaddingBytes + (MAX_ENCODED_SIZE - 1)) / MAX_ENCODED_SIZE;
unsigned short insAlignCount = nPaddingBytes / MAX_ENCODED_SIZE;
unsigned short lastInsAlignSize = nPaddingBytes % MAX_ENCODED_SIZE;
unsigned short paddingBytes = MAX_ENCODED_SIZE;
#elif defined(TARGET_ARM64)
unsigned short nAlignInstr = alignmentBoundary / INSTR_ENCODED_SIZE;
unsigned short insAlignCount = nAlignInstr;
unsigned short paddingBytes = INSTR_ENCODED_SIZE;
#endif

unsigned short instrDescSize = nAlignInstr * sizeof(instrDescAlign);

// Ensure that all align instructions fall in same IG.
if (emitCurIGfreeNext + instrDescSize >= emitCurIGfreeEndp)
{
emitForceNewIG = true;
}

/* Insert a pseudo-instruction to ensure that we align
the next instruction properly */

while (insAlignCount)
{
emitLoopAlign(paddingBytes);
insAlignCount--;
}

#if defined(TARGET_XARCH)
emitLoopAlign(lastInsAlignSize);
#endif
}

//-----------------------------------------------------------------------------
// emitLoopAlignment: Insert an align instruction at the end of emitCurIG and
// mark it as IGF_LOOP_ALIGN to indicate that next IG is a
Expand All @@ -4805,6 +4880,9 @@ void emitter::emitLoopAlignment()
{
unsigned short paddingBytes;

#if defined(TARGET_XARCH)
// For xarch, each align instruction can be maximum of MAX_ENCODED_SIZE bytes and if
// more padding is needed, multiple MAX_ENCODED_SIZE bytes instructions are added.
if ((emitComp->opts.compJitAlignLoopBoundary > 16) && (!emitComp->opts.compJitAlignLoopAdaptive))
{
paddingBytes = emitComp->opts.compJitAlignLoopBoundary;
Expand All @@ -4815,6 +4893,19 @@ void emitter::emitLoopAlignment()
paddingBytes = MAX_ENCODED_SIZE;
emitLoopAlign(paddingBytes);
}
#elif defined(TARGET_ARM64)
// For Arm64, each align instruction is 4-bytes long because of fixed-length encoding.
// The padding added will be always be in multiple of 4-bytes.
if (emitComp->opts.compJitAlignLoopAdaptive)
{
paddingBytes = emitComp->opts.compJitAlignLoopBoundary >> 1;
}
else
{
paddingBytes = emitComp->opts.compJitAlignLoopBoundary;
}
emitLongLoopAlign(paddingBytes);
#endif

// Mark this IG as need alignment so during emitter we can check the instruction count heuristics of
// all IGs that follows this IG and participate in a loop.
Expand Down Expand Up @@ -5042,6 +5133,7 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
{
assert(!markedLastLoop);
assert(alignInstr->idaIG->isLoopAlign());

alignInstr->idaIG->igFlags &= ~IGF_LOOP_ALIGN;
markedLastLoop = true;
JITDUMP("** Skip alignment for aligned loop IG%02u ~ IG%02u because it encloses the current loop "
Expand All @@ -5054,6 +5146,20 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
break;
}

#if defined(TARGET_XARCH)
if (!emitComp->opts.compJitAlignLoopAdaptive)
#endif
{
// If there are multiple align instructions, skip the align instructions after
// the first align instruction and fast forward to the next IG
insGroup* alignIG = alignInstr->idaIG;
while ((alignInstr != nullptr) && (alignInstr->idaNext != nullptr) &&
(alignInstr->idaNext->idaIG == alignIG))
{
alignInstr = alignInstr->idaNext;
}
}

alignInstr = alignInstr->idaNext;
}

Expand Down Expand Up @@ -5126,26 +5232,45 @@ void emitter::emitLoopAlignAdjustments()
alignIG->igFlags &= ~IGF_LOOP_ALIGN;
}

#ifdef TARGET_XARCH
if (emitComp->opts.compJitAlignLoopAdaptive)
{
assert(actualPaddingNeeded < MAX_ENCODED_SIZE);
alignInstr->idCodeSize(actualPaddingNeeded);
}
else
#endif
{
unsigned paddingToAdj = actualPaddingNeeded;

#ifdef DEBUG
#if defined(TARGET_XARCH)
int instrAdjusted =
(emitComp->opts.compJitAlignLoopBoundary + (MAX_ENCODED_SIZE - 1)) / MAX_ENCODED_SIZE;
#endif
#elif defined(TARGET_ARM64)
unsigned short instrAdjusted = (emitComp->opts.compJitAlignLoopBoundary >> 1) / INSTR_ENCODED_SIZE;
if (!emitComp->opts.compJitAlignLoopAdaptive)
{
instrAdjusted = emitComp->opts.compJitAlignLoopBoundary / INSTR_ENCODED_SIZE;
}
#endif // TARGET_XARCH & TARGET_ARM64
#endif // DEBUG
// Adjust the padding amount in all align instructions in this IG
instrDescAlign *alignInstrToAdj = alignInstr, *prevAlignInstr = nullptr;
for (; alignInstrToAdj != nullptr && alignInstrToAdj->idaIG == alignInstr->idaIG;
alignInstrToAdj = alignInstrToAdj->idaNext)
{

#if defined(TARGET_XARCH)
unsigned newPadding = min(paddingToAdj, MAX_ENCODED_SIZE);
alignInstrToAdj->idCodeSize(newPadding);
#elif defined(TARGET_ARM64)
unsigned newPadding = min(paddingToAdj, INSTR_ENCODED_SIZE);
if (newPadding == 0)
{
alignInstrToAdj->idInsOpt(INS_OPTS_NONE);
}
#endif
paddingToAdj -= newPadding;
prevAlignInstr = alignInstrToAdj;
#ifdef DEBUG
Expand Down Expand Up @@ -5191,7 +5316,7 @@ void emitter::emitLoopAlignAdjustments()
}

//-----------------------------------------------------------------------------
// emitCalculatePaddingForLoopAlignment: Calculate the padding to insert at the
// emitCalculatePaddingForLoopAlignment: Calculate the padding amount to insert at the
// end of 'ig' so the loop that starts after 'ig' is aligned.
//
// Arguments:
Expand Down Expand Up @@ -5268,16 +5393,25 @@ unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, size_t offs
if (emitComp->opts.compJitAlignLoopAdaptive)
{
// adaptive loop alignment
unsigned nMaxPaddingBytes = (1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1)) - 1;
unsigned nPaddingBytes = (-(int)(size_t)offset) & (alignmentBoundary - 1);
unsigned nMaxPaddingBytes = (1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1));
#ifdef TARGET_XARCH
// Max padding for adaptive alignment has alignmentBoundary of 32 bytes with
// max padding limit of 15 bytes ((alignmentBoundary >> 1) - 1)
nMaxPaddingBytes -= 1;
#endif
unsigned nPaddingBytes = (-(int)(size_t)offset) & (alignmentBoundary - 1);

// Check if the alignment exceeds maxPadding limit
if (nPaddingBytes > nMaxPaddingBytes)
{
#ifdef TARGET_XARCH
// Cannot align to 32B, so try to align to 16B boundary.
// Only applicable for xarch. For arm64, it is recommended to align
// at 32B only.
alignmentBoundary >>= 1;
nMaxPaddingBytes = 1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1);
nPaddingBytes = (-(int)(size_t)offset) & (alignmentBoundary - 1);
#endif

// Check if the loop is already at new alignment boundary
if (nPaddingBytes == 0)
Expand Down
24 changes: 23 additions & 1 deletion src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ class emitter
instruction _idIns : 10;
#define MAX_ENCODED_SIZE 15
#elif defined(TARGET_ARM64)
#define INSTR_ENCODED_SIZE 4
static_assert_no_msg(INS_count <= 512);
instruction _idIns : 9;
#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
Expand Down Expand Up @@ -890,6 +891,12 @@ class emitter
}

#elif defined(TARGET_ARM64)

inline bool idIsEmptyAlign() const
{
return (idIns() == INS_align) && (idInsOpt() == INS_OPTS_NONE);
}

unsigned idCodeSize() const
{
int size = 4;
Expand All @@ -913,6 +920,12 @@ class emitter
size = 8;
}
break;
case IF_SN_0A:
if (idIsEmptyAlign())
{
size = 0;
}
break;
default:
break;
}
Expand Down Expand Up @@ -1371,7 +1384,11 @@ class emitter
instrDescAlign* idaNext; // next align in the group/method
insGroup* idaIG; // containing group
};
#endif

void emitLoopAlign(unsigned short paddingBytes);
void emitLongLoopAlign(unsigned short alignmentBoundary);

#endif // FEATURE_LOOP_ALIGN

#if !defined(TARGET_ARM64) // This shouldn't be needed for ARM32, either, but I don't want to touch the ARM32 JIT.
struct instrDescLbl : instrDescJmp
Expand Down Expand Up @@ -2569,6 +2586,11 @@ inline emitter::instrDescAlign* emitter::emitNewInstrAlign()
{
instrDescAlign* newInstr = emitAllocInstrAlign();
newInstr->idIns(INS_align);

#ifdef TARGET_ARM64
newInstr->idInsFmt(IF_SN_0A);
newInstr->idInsOpt(INS_OPTS_ALIGN);
#endif
return newInstr;
}
#endif
Expand Down
Loading