From 7fbe973b38cb41798c725378b37f83311df300b2 Mon Sep 17 00:00:00 2001 From: Anthony Canino Date: Fri, 15 Oct 2021 11:07:23 -0700 Subject: [PATCH 1/2] XARCH: Remove redudant tests for GT_LT/GT_GE relops. We can now optimize cases such as `(x + y < 0)` or `for (int x = v; x >= 0; x--)` using the flag tracking logic during the emit stage. Notably, cases that would generate... ``` add reg0, reg1 test reg0, reg0 jge LABEL ``` now transform to ``` add reg0, reg1 jns LABEL ``` This transform is valid for signed GE and signed LT only. --- src/coreclr/jit/codegenlinear.cpp | 10 ++++ src/coreclr/jit/codegenxarch.cpp | 5 ++ src/coreclr/jit/emitxarch.cpp | 83 +++++++++++++++++++++++++++++++ src/coreclr/jit/emitxarch.h | 2 + src/coreclr/jit/gentree.h | 9 ++++ 5 files changed, 109 insertions(+) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index e0f48954c65cd3..f99078d1748e46 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2618,6 +2618,16 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) condition = GenCondition(GenCondition::P); } + + if (relop->MarkedForSignJumpOpt()) + { + // If relop was previously marked for a signed jump check optimization because of SF flag + // reuse, replace jge/jl with jns/js. + + assert(relop->OperGet() == GT_LT || relop->OperGet() == GT_GE); + condition = (relop->OperGet() == GT_LT) ? GenCondition(GenCondition::S) : GenCondition(GenCondition::NS); + } + #endif inst_JCC(condition, compiler->compCurBB->bbJumpDest); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 128de7c9fcd5b3..cb9a92cbaccbc0 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6232,6 +6232,11 @@ void CodeGen::genCompareInt(GenTree* treeNode) { JITDUMP("Not emitting compare due to flags being already set\n"); } + else if (canReuseFlags && emit->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(type), tree)) + { + JITDUMP("Not emitting compare due to sign being already set, follow up instr will transform jump\n"); + tree->gtFlags |= GTF_RELOP_SJUMP_OPT; + } else { emit->emitInsBinary(ins, emitTypeSize(type), op1, op2); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index e07984760e018e..fd24d8571397ec 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -164,6 +164,21 @@ bool emitter::DoesWriteZeroFlag(instruction ins) return (CodeGenInterface::instInfo[ins] & Writes_ZF) != 0; } +//------------------------------------------------------------------------ +// DoesWriteSignFlag: check if the instruction writes the +// SF flag. +// +// Arguments: +// ins - instruction to test +// +// Return Value: +// true if instruction writes the SF flag, false otherwise. +// +bool emitter::DoesWriteSignFlag(instruction ins) +{ + return (CodeGenInterface::instInfo[ins] & Writes_SF) != 0; +} + //------------------------------------------------------------------------ // DoesResetOverflowAndCarryFlags: check if the instruction resets the // OF and CF flag to 0. @@ -393,6 +408,74 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr return false; } +//------------------------------------------------------------------------ +// AreFlagsSetToForSignJumpOpt: checks if the previous instruction set the SF if the tree +// node qualifies for a jg/jle to jns/js optimization +// +// Arguments: +// reg - register of interest +// opSize - size of register +// relop - relational tree node +// +// Return Value: +// true if the tree node qualifies for the jg/jle to jns/js optimization +// false if not, or if we can't safely determine +// +// Notes: +// Currently only looks back one instruction. +bool emitter::AreFlagsSetForSignJumpOpt(regNumber reg, emitAttr opSize, GenTree* relop) +{ + assert(reg != REG_NA); + + // Don't look back across IG boundaries (possible control flow) + if (emitCurIGinsCnt == 0 && ((emitCurIG->igFlags & IGF_EXTEND) == 0)) + { + return false; + } + + instrDesc* id = emitLastIns; + instruction lastIns = id->idIns(); + insFormat fmt = id->idInsFmt(); + + // make sure op1 is a reg + switch (fmt) + { + case IF_RWR_CNS: + case IF_RRW_CNS: + case IF_RRW_SHF: + case IF_RWR_RRD: + case IF_RRW_RRD: + case IF_RWR_MRD: + case IF_RWR_SRD: + case IF_RRW_SRD: + case IF_RWR_ARD: + case IF_RRW_ARD: + case IF_RWR: + case IF_RRD: + case IF_RRW: + break; + default: + return false; + } + + if (id->idReg1() != reg) + { + return false; + } + + // If we have a GT_GE/GT_LT which generates an jge/jl, and the previous instruction + // sets the SF, we can omit a test instruction and check for jns/js. + if ((relop->OperGet() == GT_GE || relop->OperGet() == GT_LT) && !GenCondition::FromRelop(relop).IsUnsigned()) + { + if (DoesWriteSignFlag(lastIns) && IsFlagsAlwaysModified(id)) + { + return id->idOpSize() == opSize; + } + } + + return false; +} + //------------------------------------------------------------------------ // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or // "reg/mem, reg, imm" form for the legacy, VEX, and EVEX diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 8217d088417cc1..9177aa293fcc8d 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -116,6 +116,7 @@ static bool IsJmpInstruction(instruction ins); bool AreUpper32BitsZero(regNumber reg); bool AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps treeOps); +bool AreFlagsSetForSignJumpOpt(regNumber reg, emitAttr opSize, GenTree* tree); bool hasRexPrefix(code_t code) { @@ -190,6 +191,7 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool DoesWriteZeroFlag(instruction ins); +bool DoesWriteSignFlag(instruction ins); bool DoesResetOverflowAndCarryFlags(instruction ins); bool IsFlagsAlwaysModified(instrDesc* id); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 091c2464e69502..e8fdab24eb8360 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -530,6 +530,8 @@ enum GenTreeFlags : unsigned int GTF_RELOP_QMARK = 0x20000000, // GT_ -- the node is the condition for ?: GTF_RELOP_ZTT = 0x08000000, // GT_ -- Loop test cloned for converting while-loops into do-while // with explicit "loop test" in the header block. + GTF_RELOP_SJUMP_OPT = 0x04000000, // GT_ -- Swap signed jl/jge with js/jns during emitter, reuses flags + // from previous instruction. GTF_JCMP_EQ = 0x80000000, // GTF_JCMP_EQ -- Branch on equal rather than not equal GTF_JCMP_TST = 0x40000000, // GTF_JCMP_TST -- Use bit test instruction rather than compare against zero instruction @@ -3027,6 +3029,13 @@ struct GenTreeOp : public GenTreeUnOp { } #endif + + // True if this relop is marked for a transform during the emitter + // phase, e.g., jge => jns + bool MarkedForSignJumpOpt() const + { + return (gtFlags & GTF_RELOP_SJUMP_OPT) != 0; + } }; struct GenTreeVal : public GenTree From 19b3a91c77dbff6b4d091243009b8a112e8721f8 Mon Sep 17 00:00:00 2001 From: Anthony Canino Date: Tue, 9 Nov 2021 13:18:52 -0500 Subject: [PATCH 2/2] Add a few asserts related to flag reuse optimizations. --- src/coreclr/jit/codegenxarch.cpp | 2 ++ src/coreclr/jit/emitxarch.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index cb9a92cbaccbc0..44759f12354fdc 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6227,6 +6227,8 @@ void CodeGen::genCompareInt(GenTree* treeNode) assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL)); // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type)); + // Sign jump optimization should only be set the following check + assert((tree->gtFlags & GTF_RELOP_SJUMP_OPT) == 0); if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), tree->OperGet())) { diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index fd24d8571397ec..60227fd06c60b4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -353,6 +353,11 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr { assert(reg != REG_NA); + if (!emitComp->opts.OptimizationEnabled()) + { + return false; + } + // Don't look back across IG boundaries (possible control flow) if (emitCurIGinsCnt == 0 && ((emitCurIG->igFlags & IGF_EXTEND) == 0)) { @@ -427,6 +432,11 @@ bool emitter::AreFlagsSetForSignJumpOpt(regNumber reg, emitAttr opSize, GenTree* { assert(reg != REG_NA); + if (!emitComp->opts.OptimizationEnabled()) + { + return false; + } + // Don't look back across IG boundaries (possible control flow) if (emitCurIGinsCnt == 0 && ((emitCurIG->igFlags & IGF_EXTEND) == 0)) {