-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Implement DivRem intrinsic for X86 #66551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
00da1f4
d679cca
4eba5c5
6f8fbbf
997d8a5
c0c3dd6
c08cee7
1804350
46c3f78
2dbd2e9
1b6d09b
3ef3600
acaf211
728440d
4eace5d
3182ed8
48c12a4
be5b33c
d868194
ff65608
b31f896
0bd1327
51959c0
8f1d9a2
f4aece2
75dda8d
43049a9
d485b61
aeb114a
aa57f26
9345e57
65c0af6
2bfa332
7b41fd6
df922de
924fc42
1e7ff05
62e6c59
a1b4802
d547337
8199acb
ed12edb
532a8fd
281c1b0
97630a2
491f82e
7487dfc
7a862b2
0fc11c9
8ab6023
8d67890
0d85848
1fd2b74
18c9787
b63dea3
1b0e670
7631033
c0be00a
83192b1
b29e1b4
82c38a3
61fabe2
eea804c
ebe8781
9dc8522
0093b79
8260134
320270a
a98dbde
06f0460
1b3e851
9f192ac
537f678
ca32b24
e1b5fb3
6589700
2a90b4b
aad300a
eb3272c
39ae98d
f8473ec
e00a8c1
9e28279
2d4d8d9
b5e3dd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1582,27 +1582,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, | |
| } | ||
| } | ||
|
|
||
| GenTree* loadIntrinsic = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); | ||
| GenTreeHWIntrinsic* loadIntrinsic = | ||
| gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); | ||
| // This operation contains an implicit indirection | ||
| // it could point into the global heap or | ||
| // it could throw a null reference exception. | ||
| // | ||
| loadIntrinsic->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT); | ||
|
||
|
|
||
| assert(HWIntrinsicInfo::IsMultiReg(intrinsic)); | ||
|
|
||
| const unsigned lclNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg intrinsic")); | ||
| impAssignTempGen(lclNum, loadIntrinsic, sig->retTypeSigClass, (unsigned)CHECK_SPILL_ALL); | ||
|
|
||
| LclVarDsc* varDsc = lvaGetDesc(lclNum); | ||
| // The following is to exclude the fields of the local to have SSA. | ||
| varDsc->lvIsMultiRegRet = true; | ||
|
|
||
| GenTreeLclVar* lclVar = gtNewLclvNode(lclNum, varDsc->lvType); | ||
| lclVar->SetDoNotCSE(); | ||
| lclVar->SetMultiReg(); | ||
|
|
||
| retNode = lclVar; | ||
| retNode = gtNewLclvForMultiRegIntrinsicNode(loadIntrinsic, sig); | ||
| break; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -491,8 +491,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, | |
| { | ||
| case InstructionSet_Vector256: | ||
| case InstructionSet_Vector128: | ||
| case InstructionSet_X86Base: | ||
| return impBaseIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, retType, simdSize); | ||
| case InstructionSet_X86Base: | ||
| case InstructionSet_X86Base_X64: | ||
| return impX86BaseIntrinsic(intrinsic, method, sig, simdBaseJitType); | ||
|
||
| case InstructionSet_SSE: | ||
| return impSSEIntrinsic(intrinsic, method, sig); | ||
| case InstructionSet_SSE2: | ||
|
|
@@ -550,13 +552,8 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, | |
| return nullptr; | ||
| } | ||
|
|
||
| var_types simdBaseType = TYP_UNKNOWN; | ||
|
|
||
| if (intrinsic != NI_X86Base_Pause) | ||
| { | ||
| simdBaseType = JitType2PreciseVarType(simdBaseJitType); | ||
| assert(varTypeIsArithmetic(simdBaseType)); | ||
| } | ||
| var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); | ||
| assert(varTypeIsArithmetic(simdBaseType)); | ||
|
|
||
| switch (intrinsic) | ||
| { | ||
|
|
@@ -2194,16 +2191,62 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, | |
| break; | ||
| } | ||
|
|
||
| default: | ||
| { | ||
| return nullptr; | ||
| } | ||
| } | ||
|
|
||
| return retNode; | ||
| } | ||
|
|
||
| GenTree* Compiler::impX86BaseIntrinsic(NamedIntrinsic intrinsic, | ||
| CORINFO_METHOD_HANDLE method, | ||
| CORINFO_SIG_INFO* sig, | ||
| CorInfoType simdBaseJitType) | ||
| { | ||
| GenTree* retNode = nullptr; | ||
| GenTree* op1 = nullptr; | ||
| GenTree* op2 = nullptr; | ||
| GenTree* op3 = nullptr; | ||
| GenTree* op4 = nullptr; | ||
|
|
||
| var_types retType = JITtype2varType(sig->retType); | ||
|
|
||
| switch (intrinsic) | ||
| { | ||
|
|
||
| case NI_X86Base_Pause: | ||
| { | ||
| assert(sig->numArgs == 0); | ||
| assert(JITtype2varType(sig->retType) == TYP_VOID); | ||
| assert(simdSize == 0); | ||
| assert(retType == TYP_VOID); | ||
| assert(simdBaseJitType == CORINFO_TYPE_UNDEF); | ||
|
|
||
| retNode = gtNewScalarHWIntrinsicNode(TYP_VOID, intrinsic); | ||
| break; | ||
| } | ||
|
|
||
| case NI_X86Base_DivRem: | ||
| case NI_X86Base_X64_DivRem: | ||
| { | ||
| assert(sig->numArgs == 3); | ||
| assert(HWIntrinsicInfo::IsMultiReg(intrinsic)); | ||
| assert(retType == TYP_STRUCT); | ||
| assert(simdBaseJitType != CORINFO_TYPE_UNDEF); | ||
|
|
||
| op3 = impPopStack().val; | ||
| op2 = impPopStack().val; | ||
| op1 = impPopStack().val; | ||
|
|
||
| GenTreeHWIntrinsic* divRemIntrinsic = gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic); | ||
|
|
||
| // Store the type from signature into SIMD base type for convenience | ||
| divRemIntrinsic->SetSimdBaseJitType(simdBaseJitType); | ||
|
|
||
| retNode = gtNewLclvForMultiRegIntrinsicNode(divRemIntrinsic, sig); | ||
| break; | ||
| } | ||
|
|
||
| default: | ||
| { | ||
| return nullptr; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6749,7 +6749,7 @@ bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2) | |
| bool Lowering::CheckMultiRegLclVar(GenTreeLclVar* lclNode, const ReturnTypeDesc* retTypeDesc) | ||
|
||
| { | ||
| bool canEnregister = false; | ||
| #if FEATURE_MULTIREG_RET | ||
| #if FEATURE_MULTIREG_RET || FEATURE_HW_INTRINSICS | ||
huoyaoyuan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| LclVarDsc* varDsc = comp->lvaGetDesc(lclNode->GetLclNum()); | ||
| if ((comp->lvaEnregMultiRegVars) && varDsc->lvPromoted) | ||
| { | ||
|
|
@@ -6776,10 +6776,10 @@ bool Lowering::CheckMultiRegLclVar(GenTreeLclVar* lclNode, const ReturnTypeDesc* | |
| } | ||
| } | ||
| #ifdef TARGET_XARCH | ||
| // For local stores on XARCH we only handle mismatched src/dest register count for | ||
| // calls of SIMD type. If the source was another lclVar similarly promoted, we would | ||
| // For local stores on XARCH we can't handle another lclVar source. | ||
| // If the source was another lclVar similarly promoted, we would | ||
| // have broken it into multiple stores. | ||
| if (lclNode->OperIs(GT_STORE_LCL_VAR) && !lclNode->gtGetOp1()->OperIs(GT_CALL)) | ||
| if (lclNode->OperIs(GT_STORE_LCL_VAR) && lclNode->gtGetOp1()->OperIs(GT_LCL_VAR)) | ||
|
||
| { | ||
| canEnregister = false; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2015,7 +2015,23 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou | |
| } | ||
|
|
||
| int srcCount = 0; | ||
| int dstCount = intrinsicTree->IsValue() ? 1 : 0; | ||
| int dstCount; | ||
|
|
||
| if (intrinsicTree->IsValue()) | ||
| { | ||
| if (HWIntrinsicInfo::IsMultiReg(intrinsicId)) | ||
| { | ||
| dstCount = HWIntrinsicInfo::GetMultiRegCount(intrinsicId); | ||
| } | ||
| else | ||
| { | ||
| dstCount = 1; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| dstCount = 0; | ||
| } | ||
|
|
||
| regMaskTP dstCandidates = RBM_NONE; | ||
|
|
||
|
|
@@ -2193,6 +2209,26 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou | |
| } | ||
| #endif // TARGET_X86 | ||
|
|
||
| case NI_X86Base_DivRem: | ||
| case NI_X86Base_X64_DivRem: | ||
| { | ||
| assert(numArgs == 3); | ||
| assert(dstCount == 2); | ||
| assert(isRMW); | ||
|
|
||
| // DIV implicitly put op1(lower) to EAX and op2(upper) to EDX | ||
| srcCount += BuildOperandUses(op1, RBM_EAX); | ||
| srcCount += BuildOperandUses(op2, RBM_EDX); | ||
| srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); | ||
|
||
|
|
||
| // result put in EAX and EDX | ||
| BuildDef(intrinsicTree, RBM_EAX, 0); | ||
| BuildDef(intrinsicTree, RBM_EDX, 1); | ||
|
|
||
| buildUses = false; | ||
| break; | ||
| } | ||
|
|
||
| case NI_BMI2_MultiplyNoFlags: | ||
| case NI_BMI2_X64_MultiplyNoFlags: | ||
| { | ||
|
|
@@ -2463,7 +2499,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou | |
| } | ||
| else | ||
| { | ||
| assert(dstCount == 0); | ||
| // Currently dstCount = 2 is only used for DivRem, which has special constriants and handled above | ||
| assert((dstCount == 0) || (dstCount == 2)); | ||
|
||
| } | ||
|
|
||
| *pDstCount = dstCount; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.