Skip to content
Open
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2b00468
Enable ARM (32-bit) deb/rpm package generation
Dec 9, 2025
e648f54
Fix #74020: Optimize consecutive shifts in JIT Lowering
Dec 14, 2025
4aa8c26
Enhance shift optimization: Handle LSH, Overshift, and Mixed shifts
Dec 15, 2025
cca8115
Fix signed/unsigned comparison warning in LowerShift
Dec 15, 2025
f8f91a3
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 15, 2025
6e6dbfa
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 15, 2025
a1c8cb4
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 15, 2025
f6b8ebd
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 16, 2025
e007bbe
Fix mixed shift types logic and build/analysis warnings
Dec 17, 2025
c6eae44
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 17, 2025
5ef1f97
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 18, 2025
d842ec1
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 18, 2025
8c2c205
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Dec 22, 2025
b2a98d0
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 2, 2026
7f01503
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 26, 2026
2f8851e
Remove accidental changes to Directory.Build.props
Jan 27, 2026
5bd93c6
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 27, 2026
d12a9ae
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 27, 2026
2f6f21f
Fix NativeAOT regression: Correctly handle MixedOp (RSH over RSZ) ove…
Jan 27, 2026
e2579d0
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 27, 2026
abb1684
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 28, 2026
7537d9e
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 29, 2026
99b56bf
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 29, 2026
227fdb7
Merge branch 'main' into fix/74020-jit-shift-opt
csa7mdm Jan 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8668,6 +8668,148 @@ void Lowering::LowerShift(GenTreeOp* shift)
shift->gtOp2->ClearContained();
}

if (comp->opts.OptimizationEnabled() && shift->OperIs(GT_LSH, GT_RSH, GT_RSZ) && shift->gtGetOp2()->IsCnsIntOrI())
{
GenTree* op1 = shift->gtGetOp1();
ssize_t c2 = shift->gtGetOp2()->AsIntCon()->IconValue();
unsigned bitWidth = genTypeSize(shift->TypeGet()) * 8;

// Case 1: (shift (shift x c1) c2)
// We can combine if:
// 1. Same operation (LSH/LSH, RSH/RSH, RSZ/RSZ)
bool sameOp = op1->OperIs(shift->OperGet());

if (sameOp && op1->gtGetOp2()->IsCnsIntOrI() && !op1->IsMultiRegNode())
{
ssize_t c1 = op1->gtGetOp2()->AsIntCon()->IconValue();
unsigned innerBitWidth = genTypeSize(op1->TypeGet()) * 8;

// Only optimize if types match (simplifies width checks)
if (op1->TypeGet() == shift->TypeGet())
{
// We use a larger type to check for overflow (though shift counts likely small)
// But conceptually c1+c2 can be large.
ssize_t combined = c1 + c2;

if ((c1 > 0) && (c2 > 0))
{
if (combined < (ssize_t)bitWidth)
{
JITDUMP("Optimizing consecutive shifts: (x %s %d) %s %d -> x %s %d\n",
GenTree::OpName(op1->OperGet()), (int)c1, GenTree::OpName(shift->OperGet()), (int)c2,
GenTree::OpName(shift->OperGet()), (int)combined);
// If we had RSH(RSZ), result is RSZ.
if (mixedOp)
{
shift->SetOper(GT_RSZ);
}

shift->gtGetOp2()->AsIntCon()->SetIconValue(combined);
shift->gtOp1 = op1->gtGetOp1();
op1->gtGetOp1()->ClearContained();
BlockRange().Remove(op1->gtGetOp2());
BlockRange().Remove(op1);
}
else
{
// Overshift Case
JITDUMP("Optimizing overshift: (x %s %d) %s %d\n", GenTree::OpName(op1->OperGet()), (int)c1,
GenTree::OpName(shift->OperGet()), (int)c2);

if (shift->OperIs(GT_RSH) && !mixedOp)
{
// RSH saturates to sign bit (shift by bitWidth - 1)
// (x >> 30) >> 30 -> x >> 31 (for 32-bit)
JITDUMP(" -> x >> %d\n", bitWidth - 1);

shift->gtGetOp2()->AsIntCon()->SetIconValue(static_cast<ssize_t>(bitWidth) - 1);
shift->gtOp1 = op1->gtGetOp1();
op1->gtGetOp1()->ClearContained();
BlockRange().Remove(op1->gtGetOp2());
BlockRange().Remove(op1);
}
else
{
// LSH or RSZ -> 0
// (x << 30) << 2 -> 0
// (x >>> 30) >>> 2 -> 0
JITDUMP(" -> 0\n");

GenTree* zero = comp->gtNewZeroConNode(shift->TypeGet());
BlockRange().InsertAfter(shift, zero);

LIR::Use use;
if (BlockRange().TryGetUse(shift, &use))
{
use.ReplaceWith(zero);
}
else
{
zero->SetUnusedValue();
}

// Remove the entire chain if possible, or at least the outer shift
// Note: op1 might still be used elsewhere if ref counts > 1?
// But peephole assumes single use usually or we just disconnect.
// The LIR::Use check handles the result use.
// We remove 'shift' and its op2.
BlockRange().Remove(shift->gtGetOp2());
BlockRange().Remove(shift);

// We don't remove op1 here as it might be used elsewhere (unlikely in this peephole context but safer)
// Actually if we disconnect it from shift, and it has no other uses...
// But let's leave DCE to handle op1 if it becomes dead.
}
}
}
}
}
// Case 2: (shift (cast (shift x c1)) c2)
// Optimization for: RSZ(CAST(RSZ(x, c1)), c2) -> CAST(RSZ(x, c1 + c2))
else if (shift->OperIs(GT_RSZ) && op1->OperIs(GT_CAST) && !op1->gtOverflow() && !op1->IsMultiRegNode())
{
GenTree* cast = op1;
GenTree* innerShift = cast->gtGetOp1();

// Only optimize if strict widening or same width (narrowing casts can have side effects on bits)
// Example: (long)(intVar >>> 30) >>> 2
// If normal: (long)(00...011) >>> 2 = 0
// If combined: (long)(intVar) >>> 32 = 0 (maybe?)
// But: (short)(intVar >>> 16) >>> 1
// Real: (short)(0x....1234) -> 0x1234 -> 0x091a
// Combined: (short)(intVar >>> 17) -> 0x091a
// However, truncation behavior is subtle.
// Prompt requested: "Ensure cast doesn't change signedness or truncate in a way that invalidates the optimization"
// Safest is to disable for narrowing.
bool isNarrowing = genTypeSize(cast->TypeGet()) < genTypeSize(innerShift->TypeGet());

if (!isNarrowing && innerShift->OperIs(GT_RSZ) && innerShift->gtGetOp2()->IsCnsIntOrI() && !innerShift->IsMultiRegNode())
{
ssize_t c1 = innerShift->gtGetOp2()->AsIntCon()->IconValue();
unsigned innerBitWidth = genTypeSize(innerShift->TypeGet()) * 8;

if ((c1 > 0) && (c2 > 0) && ((c1 + c2) < (ssize_t)innerBitWidth))
{
JITDUMP("Optimizing distinct type shifts: (cast (x >> %d)) >> %d -> cast (x >> %d)\n", (int)c1,
(int)c2, (int)(c1 + c2));

innerShift->gtGetOp2()->AsIntCon()->SetIconValue(c1 + c2);

// Replace uses of 'shift' with 'cast', bypassing 'shift'
LIR::Use use;
if (BlockRange().TryGetUse(shift, &use))
{
use.ReplaceWith(cast);
}

// Remove 'c2' and turn 'shift' into NOP
BlockRange().Remove(shift->gtGetOp2());
shift->gtBashToNOP();
}
}
}
}

ContainCheckShiftRotate(shift);

#ifdef TARGET_ARM64
Expand Down
Loading