Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit d5a4ade

Browse files
committed
Reimplement compare flags reuse using SETCC/JCC
1 parent 2db9bf2 commit d5a4ade

File tree

8 files changed

+84
-164
lines changed

8 files changed

+84
-164
lines changed

src/jit/codegenarm64.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3298,40 +3298,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
32983298
assert(!op1->isUsedFromMemory());
32993299
assert(!op2->isUsedFromMemory());
33003300

3301-
// Case of op1 == 0 or op1 != 0:
3302-
// Optimize generation of 'test' instruction if op1 sets flags.
3303-
//
3304-
// This behavior is designed to match the unexpected behavior
3305-
// of XARCH genCompareInt();
3306-
//
3307-
// TODO-Cleanup Review GTF_USE_FLAGS usage
3308-
// https://github.com/dotnet/coreclr/issues/14093
3309-
if ((tree->gtFlags & GTF_USE_FLAGS) != 0)
3310-
{
3311-
// op1 must set flags
3312-
assert(op1->gtSetFlags());
3313-
3314-
// Must be compare against zero.
3315-
assert(!tree->OperIs(GT_TEST_EQ, GT_TEST_NE));
3316-
assert(op2->IsIntegralConst(0));
3317-
assert(op2->isContained());
3318-
3319-
// Just consume the operands
3320-
genConsumeOperands(tree);
3321-
3322-
// No need to generate compare instruction since
3323-
// op1 sets flags
3324-
3325-
// Are we evaluating this into a register?
3326-
if (targetReg != REG_NA)
3327-
{
3328-
genSetRegToCond(targetReg, tree);
3329-
genProduceReg(tree);
3330-
}
3331-
3332-
return;
3333-
}
3334-
33353301
genConsumeOperands(tree);
33363302

33373303
emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));

src/jit/codegenxarch.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6243,45 +6243,6 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
62436243
var_types op2Type = op2->TypeGet();
62446244
regNumber targetReg = tree->gtRegNum;
62456245

6246-
// Case of op1 == 0 or op1 != 0:
6247-
// Optimize generation of 'test' instruction if op1 sets flags.
6248-
//
6249-
// TODO-Cleanup Review GTF_USE_FLAGS usage
6250-
// https://github.com/dotnet/coreclr/issues/14093
6251-
//
6252-
// Note that if LSRA has inserted any GT_RELOAD/GT_COPY before
6253-
// op1, it will not modify the flags set by codegen of op1.
6254-
// Similarly op1 could also be reg-optional at its use and
6255-
// it was spilled after producing its result in a register.
6256-
// Spill code too will not modify the flags set by op1.
6257-
GenTree* realOp1 = op1->gtSkipReloadOrCopy();
6258-
if ((tree->gtFlags & GTF_USE_FLAGS) != 0)
6259-
{
6260-
// op1 must set ZF and SF flags
6261-
assert(realOp1->gtSetZSFlags());
6262-
assert(realOp1->gtSetFlags());
6263-
6264-
// Must be (in)equality against zero.
6265-
assert(tree->OperIs(GT_EQ, GT_NE));
6266-
assert(op2->IsIntegralConst(0));
6267-
assert(op2->isContained());
6268-
6269-
// Just consume the operands
6270-
genConsumeOperands(tree);
6271-
6272-
// No need to generate test instruction since
6273-
// op1 sets flags
6274-
6275-
// Are we evaluating this into a register?
6276-
if (targetReg != REG_NA)
6277-
{
6278-
genSetRegToCond(targetReg, tree);
6279-
genProduceReg(tree);
6280-
}
6281-
6282-
return;
6283-
}
6284-
62856246
genConsumeOperands(tree);
62866247

62876248
assert(!op1->isContainedIntOrIImmed());

src/jit/compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9781,11 +9781,11 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
97819781
{
97829782
chars += printf("[SPILLED_OP2]");
97839783
}
9784-
#endif
97859784
if (tree->gtFlags & GTF_ZSF_SET)
97869785
{
97879786
chars += printf("[ZSF_SET]");
97889787
}
9788+
#endif
97899789
#if FEATURE_SET_FLAGS
97909790
if (tree->gtFlags & GTF_SET_FLAGS)
97919791
{

src/jit/gentree.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -856,11 +856,11 @@ struct GenTree
856856
#ifdef LEGACY_BACKEND
857857
#define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
858858
#define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled
859+
#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand
859860
#else // !LEGACY_BACKEND
860861
#define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use
861862
#endif // !LEGACY_BACKEND
862863

863-
#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand
864864
#define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags. Use gtSetFlags() to check this flag.
865865
#define GTF_USE_FLAGS 0x00001000 // Indicates that this node uses the flags bits.
866866

@@ -2126,12 +2126,14 @@ struct GenTree
21262126
bool gtSetFlags() const;
21272127
bool gtRequestSetFlags();
21282128

2129+
#ifdef LEGACY_BACKEND
21292130
// Returns true if the codegen of this tree node
21302131
// sets ZF and SF flags.
21312132
bool gtSetZSFlags() const
21322133
{
21332134
return (gtFlags & GTF_ZSF_SET) != 0;
21342135
}
2136+
#endif
21352137

21362138
#ifdef DEBUG
21372139
bool gtIsValid64RsltMul();

src/jit/lower.cpp

Lines changed: 78 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,6 @@ GenTree* Lowering::LowerNode(GenTree* node)
142142
ContainCheckBinary(node->AsOp());
143143
break;
144144

145-
#ifdef _TARGET_XARCH_
146-
case GT_NEG:
147-
// Codegen of this tree node sets ZF and SF flags.
148-
if (!varTypeIsFloating(node))
149-
{
150-
node->gtFlags |= GTF_ZSF_SET;
151-
}
152-
break;
153-
#endif // _TARGET_XARCH_
154-
155145
case GT_MUL:
156146
case GT_MULHI:
157147
#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
@@ -189,8 +179,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
189179
case GT_TEST_NE:
190180
case GT_CMP:
191181
case GT_JCMP:
192-
LowerCompare(node);
193-
break;
182+
return LowerCompare(node);
194183

195184
case GT_JTRUE:
196185
ContainCheckJTrue(node->AsOp());
@@ -2148,6 +2137,9 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
21482137
// Arguments:
21492138
// cmp - the compare node
21502139
//
2140+
// Return Value:
2141+
// The next node to lower.
2142+
//
21512143
// Notes:
21522144
// - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific).
21532145
// - Decomposes long comparisons that produce a value (X86 specific).
@@ -2156,8 +2148,11 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
21562148
// - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could
21572149
// be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
21582150
// - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific)
2151+
// - Transform RELOP(OP, 0) into SETCC(OP) or JCC(OP) if OP can set the
2152+
// condition flags appropriately (XARCH/ARM64 specific but could be extended
2153+
// to ARM32 as well if ARM32 codegen supports GTF_SET_FLAGS).
21592154

2160-
void Lowering::LowerCompare(GenTree* cmp)
2155+
GenTree* Lowering::LowerCompare(GenTree* cmp)
21612156
{
21622157
#ifndef _TARGET_64BIT_
21632158
if (cmp->gtGetOp1()->TypeGet() == TYP_LONG)
@@ -2363,7 +2358,7 @@ void Lowering::LowerCompare(GenTree* cmp)
23632358
cmp->AsCC()->gtCondition = condition;
23642359
}
23652360

2366-
return;
2361+
return cmp->gtNext;
23672362
}
23682363
#endif
23692364

@@ -2567,11 +2562,10 @@ void Lowering::LowerCompare(GenTree* cmp)
25672562
}
25682563
}
25692564
}
2570-
#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
25712565

2572-
#ifdef _TARGET_XARCH_
25732566
if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE))
25742567
{
2568+
#ifdef _TARGET_XARCH_
25752569
//
25762570
// Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT
25772571
// results in smaller and faster code. It also doesn't have special register
@@ -2614,10 +2608,76 @@ void Lowering::LowerCompare(GenTree* cmp)
26142608

26152609
cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED;
26162610

2617-
return;
2611+
return cmp->gtNext;
2612+
}
2613+
#endif // _TARGET_XARCH_
2614+
}
2615+
#ifdef _TARGET_XARCH_
2616+
else if (cmp->OperIs(GT_EQ, GT_NE))
2617+
#else // _TARGET_ARM64_
2618+
else
2619+
#endif
2620+
{
2621+
GenTree* op1 = cmp->gtGetOp1();
2622+
GenTree* op2 = cmp->gtGetOp2();
2623+
2624+
// TODO-CQ: right now the below peep is inexpensive and gets the benefit in most
2625+
// cases because in majority of cases op1, op2 and cmp would be in that order in
2626+
// execution. In general we should be able to check that all the nodes that come
2627+
// after op1 do not modify the flags so that it is safe to avoid generating a
2628+
// test instruction.
2629+
2630+
if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) &&
2631+
#ifdef _TARGET_XARCH_
2632+
op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG))
2633+
#else // _TARGET_ARM64_
2634+
op1->OperIs(GT_AND, GT_ADD, GT_SUB))
2635+
#endif
2636+
{
2637+
op1->gtFlags |= GTF_SET_FLAGS;
2638+
op1->SetUnusedValue();
2639+
2640+
BlockRange().Remove(op2);
2641+
2642+
GenTree* next = cmp->gtNext;
2643+
GenTree* cc;
2644+
genTreeOps ccOp;
2645+
LIR::Use cmpUse;
2646+
2647+
// Fast check for the common case - relop used by a JTRUE that immediately follows it.
2648+
if ((next != nullptr) && next->OperIs(GT_JTRUE) && (next->gtGetOp1() == cmp))
2649+
{
2650+
cc = next;
2651+
ccOp = GT_JCC;
2652+
next = nullptr;
2653+
BlockRange().Remove(cmp);
2654+
}
2655+
else if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE))
2656+
{
2657+
cc = cmpUse.User();
2658+
ccOp = GT_JCC;
2659+
next = nullptr;
2660+
BlockRange().Remove(cmp);
2661+
}
2662+
else // The relop is not used by a JTRUE or it is not used at all.
2663+
{
2664+
// Transform the relop node it into a SETCC. If it's not used we could remove
2665+
// it completely but that means doing more work to handle a rare case.
2666+
cc = cmp;
2667+
ccOp = GT_SETCC;
2668+
}
2669+
2670+
genTreeOps condition = cmp->OperGet();
2671+
cc->ChangeOper(ccOp);
2672+
cc->AsCC()->gtCondition = condition;
2673+
cc->gtFlags |= GTF_USE_FLAGS | (cmp->gtFlags & GTF_UNSIGNED);
2674+
2675+
return next;
26182676
}
26192677
}
2678+
#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
26202679

2680+
#ifdef _TARGET_XARCH_
26212681
if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
26222682
{
26232683
if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
@@ -2635,6 +2695,7 @@ void Lowering::LowerCompare(GenTree* cmp)
26352695
}
26362696
#endif // _TARGET_XARCH_
26372697
ContainCheckCompare(cmp->AsOp());
2698+
return cmp->gtNext;
26382699
}
26392700

26402701
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
@@ -5377,16 +5438,6 @@ void Lowering::ContainCheckNode(GenTree* node)
53775438
ContainCheckBinary(node->AsOp());
53785439
break;
53795440

5380-
#ifdef _TARGET_XARCH_
5381-
case GT_NEG:
5382-
// Codegen of this tree node sets ZF and SF flags.
5383-
if (!varTypeIsFloating(node))
5384-
{
5385-
node->gtFlags |= GTF_ZSF_SET;
5386-
}
5387-
break;
5388-
#endif // _TARGET_XARCH_
5389-
53905441
#if defined(_TARGET_X86_)
53915442
case GT_MUL_LONG:
53925443
#endif

src/jit/lower.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class Lowering : public Phase
137137
// Call Lowering
138138
// ------------------------------
139139
void LowerCall(GenTree* call);
140-
void LowerCompare(GenTree* tree);
140+
GenTree* LowerCompare(GenTree* tree);
141141
void LowerJmpMethod(GenTree* jmp);
142142
void LowerRet(GenTree* ret);
143143
GenTree* LowerDelegateInvoke(GenTreeCall* call);

src/jit/lowerarmarch.cpp

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -700,29 +700,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
700700
GenTreePtr op1 = cmp->gtOp.gtOp1;
701701
GenTreePtr op2 = cmp->gtOp.gtOp2;
702702

703-
// If op1 codegen can set flags op2 is an immediate 0
704-
// we don't need to generate cmp instruction,
705-
// provided we don't have another GenTree node between op1
706-
// and cmp that could potentially modify flags.
707-
//
708-
// TODO-CQ: right now the below peep is inexpensive and
709-
// gets the benefit in most of cases because in majority
710-
// of cases op1, op2 and cmp would be in that order in
711-
// execution. In general we should be able to check that all
712-
// the nodes that come after op1 in execution order do not
713-
// modify the flags so that it is safe to avoid generating a
714-
// test instruction. Such a check requires that on each
715-
// GenTree node we need to set the info whether its codegen
716-
// will modify flags.
717-
if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) &&
718-
!cmp->OperIs(GT_TEST_EQ, GT_TEST_NE) && op1->OperIs(GT_ADD, GT_AND, GT_SUB))
719-
{
720-
assert(!op1->gtSetFlags());
721-
op1->gtFlags |= GTF_SET_FLAGS;
722-
cmp->gtFlags |= GTF_USE_FLAGS;
723-
}
724-
725-
if (!varTypeIsFloating(cmp) && op2->IsCnsIntOrI() && ((cmp->gtFlags & GTF_USE_FLAGS) == 0))
703+
if (!varTypeIsFloating(cmp) && op2->IsCnsIntOrI())
726704
{
727705
LIR::Use cmpUse;
728706
bool useJCMP = false;

src/jit/lowerxarch.cpp

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,41 +1843,6 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
18431843
{
18441844
MakeSrcContained(cmp, op1);
18451845
}
1846-
// If op1 codegen sets ZF and SF flags and ==/!= against
1847-
// zero, we don't need to generate test instruction,
1848-
// provided we don't have another GenTree node between op1
1849-
// and cmp that could potentially modify flags.
1850-
//
1851-
// TODO-CQ: right now the below peep is inexpensive and
1852-
// gets the benefit in most of cases because in majority
1853-
// of cases op1, op2 and cmp would be in that order in
1854-
// execution. In general we should be able to check that all
1855-
// the nodes that come after op1 in execution order do not
1856-
// modify the flags so that it is safe to avoid generating a
1857-
// test instruction. Such a check requires that on each
1858-
// GenTree node we need to set the info whether its codegen
1859-
// will modify flags.
1860-
//
1861-
// TODO-CQ: We can optimize compare against zero in the
1862-
// following cases by generating the branch as indicated
1863-
// against each case.
1864-
// 1) unsigned compare
1865-
// < 0 - always FALSE
1866-
// <= 0 - ZF=1 and jne
1867-
// > 0 - ZF=0 and je
1868-
// >= 0 - always TRUE
1869-
//
1870-
// 2) signed compare
1871-
// < 0 - SF=1 and js
1872-
// >= 0 - SF=0 and jns
1873-
else if (cmp->OperIs(GT_EQ, GT_NE) && op1->gtSetZSFlags() && op2->IsIntegralConst(0) &&
1874-
(op1->gtNext == op2) && (op2->gtNext == cmp))
1875-
{
1876-
// Require codegen of op1 to set the flags.
1877-
assert(!op1->gtSetFlags());
1878-
op1->gtFlags |= GTF_SET_FLAGS;
1879-
cmp->gtFlags |= GTF_USE_FLAGS;
1880-
}
18811846
else
18821847
{
18831848
SetRegOptional(op1);
@@ -2050,9 +2015,6 @@ void Lowering::ContainCheckBinary(GenTreeOp* node)
20502015
return;
20512016
}
20522017

2053-
// Codegen of these tree nodes sets ZF and SF flags.
2054-
node->gtFlags |= GTF_ZSF_SET;
2055-
20562018
// We're not marking a constant hanging on the left of an add
20572019
// as containable so we assign it to a register having CQ impact.
20582020
// TODO-XArch-CQ: Detect this case and support both generating a single instruction

0 commit comments

Comments
 (0)