@@ -4796,6 +4796,81 @@ void emitter::emitJumpDistBind()
47964796
47974797#if FEATURE_LOOP_ALIGN
47984798
4799+ // -----------------------------------------------------------------------------
4800+ //
4801+ // The next instruction will be a loop head entry point
4802+ // So insert an alignment instruction here to ensure that
4803+ // we can properly align the code.
4804+ //
4805+ void emitter::emitLoopAlign (unsigned short paddingBytes)
4806+ {
4807+ /* Insert a pseudo-instruction to ensure that we align
4808+ the next instruction properly */
4809+ instrDescAlign* id = emitNewInstrAlign ();
4810+
4811+ #if defined(TARGET_XARCH)
4812+ assert (paddingBytes <= MAX_ENCODED_SIZE);
4813+ id->idCodeSize (paddingBytes);
4814+ #elif defined(TARGET_ARM64)
4815+ assert (paddingBytes == INSTR_ENCODED_SIZE);
4816+ #endif
4817+
4818+ id->idaIG = emitCurIG;
4819+
4820+ /* Append this instruction to this IG's alignment list */
4821+ id->idaNext = emitCurIGAlignList;
4822+
4823+ emitCurIGsize += paddingBytes;
4824+
4825+ dispIns (id);
4826+ emitCurIGAlignList = id;
4827+ }
4828+
4829+ // -----------------------------------------------------------------------------
4830+ //
4831+ // The next instruction will be a loop head entry point
4832+ // So insert alignment instruction(s) here to ensure that
4833+ // we can properly align the code.
4834+ //
4835+ // This emits more than one `INS_align` instruction depending on the
4836+ // alignmentBoundary parameter.
4837+ //
4838+ void emitter::emitLongLoopAlign (unsigned short alignmentBoundary)
4839+ {
4840+ #if defined(TARGET_XARCH)
4841+ unsigned short nPaddingBytes = alignmentBoundary - 1 ;
4842+ unsigned short nAlignInstr = (nPaddingBytes + (MAX_ENCODED_SIZE - 1 )) / MAX_ENCODED_SIZE;
4843+ unsigned short insAlignCount = nPaddingBytes / MAX_ENCODED_SIZE;
4844+ unsigned short lastInsAlignSize = nPaddingBytes % MAX_ENCODED_SIZE;
4845+ unsigned short paddingBytes = MAX_ENCODED_SIZE;
4846+ #elif defined(TARGET_ARM64)
4847+ unsigned short nAlignInstr = alignmentBoundary / INSTR_ENCODED_SIZE;
4848+ unsigned short insAlignCount = nAlignInstr;
4849+ unsigned short paddingBytes = INSTR_ENCODED_SIZE;
4850+ #endif
4851+
4852+ unsigned short instrDescSize = nAlignInstr * sizeof (instrDescAlign);
4853+
4854+ // Ensure that all align instructions fall in same IG.
4855+ if (emitCurIGfreeNext + instrDescSize >= emitCurIGfreeEndp)
4856+ {
4857+ emitForceNewIG = true ;
4858+ }
4859+
4860+ /* Insert a pseudo-instruction to ensure that we align
4861+ the next instruction properly */
4862+
4863+ while (insAlignCount)
4864+ {
4865+ emitLoopAlign (paddingBytes);
4866+ insAlignCount--;
4867+ }
4868+
4869+ #if defined(TARGET_XARCH)
4870+ emitLoopAlign (lastInsAlignSize);
4871+ #endif
4872+ }
4873+
47994874// -----------------------------------------------------------------------------
48004875// emitLoopAlignment: Insert an align instruction at the end of emitCurIG and
48014876// mark it as IGF_LOOP_ALIGN to indicate that next IG is a
@@ -4805,6 +4880,9 @@ void emitter::emitLoopAlignment()
48054880{
48064881 unsigned short paddingBytes;
48074882
4883+ #if defined(TARGET_XARCH)
4884+ // For xarch, each align instruction can be maximum of MAX_ENCODED_SIZE bytes and if
4885+ // more padding is needed, multiple MAX_ENCODED_SIZE bytes instructions are added.
48084886 if ((emitComp->opts .compJitAlignLoopBoundary > 16 ) && (!emitComp->opts .compJitAlignLoopAdaptive ))
48094887 {
48104888 paddingBytes = emitComp->opts .compJitAlignLoopBoundary ;
@@ -4815,6 +4893,19 @@ void emitter::emitLoopAlignment()
48154893 paddingBytes = MAX_ENCODED_SIZE;
48164894 emitLoopAlign (paddingBytes);
48174895 }
4896+ #elif defined(TARGET_ARM64)
4897+ // For Arm64, each align instruction is 4-bytes long because of fixed-length encoding.
4898+ // The padding added will be always be in multiple of 4-bytes.
4899+ if (emitComp->opts .compJitAlignLoopAdaptive )
4900+ {
4901+ paddingBytes = emitComp->opts .compJitAlignLoopBoundary >> 1 ;
4902+ }
4903+ else
4904+ {
4905+ paddingBytes = emitComp->opts .compJitAlignLoopBoundary ;
4906+ }
4907+ emitLongLoopAlign (paddingBytes);
4908+ #endif
48184909
48194910 // Mark this IG as need alignment so during emitter we can check the instruction count heuristics of
48204911 // all IGs that follows this IG and participate in a loop.
@@ -5042,6 +5133,7 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
50425133 {
50435134 assert (!markedLastLoop);
50445135 assert (alignInstr->idaIG ->isLoopAlign ());
5136+
50455137 alignInstr->idaIG ->igFlags &= ~IGF_LOOP_ALIGN;
50465138 markedLastLoop = true ;
50475139 JITDUMP (" ** Skip alignment for aligned loop IG%02u ~ IG%02u because it encloses the current loop "
@@ -5054,6 +5146,20 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
50545146 break ;
50555147 }
50565148
5149+ #if defined(TARGET_XARCH)
5150+ if (!emitComp->opts .compJitAlignLoopAdaptive )
5151+ #endif
5152+ {
5153+ // If there are multiple align instructions, skip the align instructions after
5154+ // the first align instruction and fast forward to the next IG
5155+ insGroup* alignIG = alignInstr->idaIG ;
5156+ while ((alignInstr != nullptr ) && (alignInstr->idaNext != nullptr ) &&
5157+ (alignInstr->idaNext ->idaIG == alignIG))
5158+ {
5159+ alignInstr = alignInstr->idaNext ;
5160+ }
5161+ }
5162+
50575163 alignInstr = alignInstr->idaNext ;
50585164 }
50595165
@@ -5126,26 +5232,45 @@ void emitter::emitLoopAlignAdjustments()
51265232 alignIG->igFlags &= ~IGF_LOOP_ALIGN;
51275233 }
51285234
5235+ #ifdef TARGET_XARCH
51295236 if (emitComp->opts .compJitAlignLoopAdaptive )
51305237 {
51315238 assert (actualPaddingNeeded < MAX_ENCODED_SIZE);
51325239 alignInstr->idCodeSize (actualPaddingNeeded);
51335240 }
51345241 else
5242+ #endif
51355243 {
51365244 unsigned paddingToAdj = actualPaddingNeeded;
51375245
51385246#ifdef DEBUG
5247+ #if defined(TARGET_XARCH)
51395248 int instrAdjusted =
51405249 (emitComp->opts .compJitAlignLoopBoundary + (MAX_ENCODED_SIZE - 1 )) / MAX_ENCODED_SIZE;
5141- #endif
5250+ #elif defined(TARGET_ARM64)
5251+ unsigned short instrAdjusted = (emitComp->opts .compJitAlignLoopBoundary >> 1 ) / INSTR_ENCODED_SIZE;
5252+ if (!emitComp->opts .compJitAlignLoopAdaptive )
5253+ {
5254+ instrAdjusted = emitComp->opts .compJitAlignLoopBoundary / INSTR_ENCODED_SIZE;
5255+ }
5256+ #endif // TARGET_XARCH & TARGET_ARM64
5257+ #endif // DEBUG
51425258 // Adjust the padding amount in all align instructions in this IG
51435259 instrDescAlign *alignInstrToAdj = alignInstr, *prevAlignInstr = nullptr ;
51445260 for (; alignInstrToAdj != nullptr && alignInstrToAdj->idaIG == alignInstr->idaIG ;
51455261 alignInstrToAdj = alignInstrToAdj->idaNext )
51465262 {
5263+
5264+ #if defined(TARGET_XARCH)
51475265 unsigned newPadding = min (paddingToAdj, MAX_ENCODED_SIZE);
51485266 alignInstrToAdj->idCodeSize (newPadding);
5267+ #elif defined(TARGET_ARM64)
5268+ unsigned newPadding = min (paddingToAdj, INSTR_ENCODED_SIZE);
5269+ if (newPadding == 0 )
5270+ {
5271+ alignInstrToAdj->idInsOpt (INS_OPTS_NONE);
5272+ }
5273+ #endif
51495274 paddingToAdj -= newPadding;
51505275 prevAlignInstr = alignInstrToAdj;
51515276#ifdef DEBUG
@@ -5191,7 +5316,7 @@ void emitter::emitLoopAlignAdjustments()
51915316}
51925317
51935318// -----------------------------------------------------------------------------
5194- // emitCalculatePaddingForLoopAlignment: Calculate the padding to insert at the
5319+ // emitCalculatePaddingForLoopAlignment: Calculate the padding amount to insert at the
51955320// end of 'ig' so the loop that starts after 'ig' is aligned.
51965321//
51975322// Arguments:
@@ -5268,16 +5393,25 @@ unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, size_t offs
52685393 if (emitComp->opts .compJitAlignLoopAdaptive )
52695394 {
52705395 // adaptive loop alignment
5271- unsigned nMaxPaddingBytes = (1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1 )) - 1 ;
5272- unsigned nPaddingBytes = (-(int )(size_t )offset) & (alignmentBoundary - 1 );
5396+ unsigned nMaxPaddingBytes = (1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1 ));
5397+ #ifdef TARGET_XARCH
5398+ // Max padding for adaptive alignment has alignmentBoundary of 32 bytes with
5399+ // max padding limit of 15 bytes ((alignmentBoundary >> 1) - 1)
5400+ nMaxPaddingBytes -= 1 ;
5401+ #endif
5402+ unsigned nPaddingBytes = (-(int )(size_t )offset) & (alignmentBoundary - 1 );
52735403
52745404 // Check if the alignment exceeds maxPadding limit
52755405 if (nPaddingBytes > nMaxPaddingBytes)
52765406 {
5407+ #ifdef TARGET_XARCH
52775408 // Cannot align to 32B, so try to align to 16B boundary.
5409+ // Only applicable for xarch. For arm64, it is recommended to align
5410+ // at 32B only.
52785411 alignmentBoundary >>= 1 ;
52795412 nMaxPaddingBytes = 1 << (maxLoopBlocksAllowed - minBlocksNeededForLoop + 1 );
52805413 nPaddingBytes = (-(int )(size_t )offset) & (alignmentBoundary - 1 );
5414+ #endif
52815415
52825416 // Check if the loop is already at new alignment boundary
52835417 if (nPaddingBytes == 0 )
0 commit comments