diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index f0a5386bb30b63..b80c42875f3bbb 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -2958,8 +2958,8 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) ValueNumPair vnPair = tree->gtVNPair; ValueNum vnCns = vnStore->VNConservativeNormalValue(vnPair); - // Check if node evaluates to a constant or Vector.Zero. - if (!vnStore->IsVNConstant(vnCns) && !vnStore->IsVNVectorZero(vnCns)) + // Check if node evaluates to a constant + if (!vnStore->IsVNConstant(vnCns)) { return nullptr; } @@ -3118,23 +3118,52 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) } break; -#if FEATURE_HW_INTRINSICS +#if FEATURE_SIMD case TYP_SIMD8: + { + simd8_t value = vnStore->ConstantValue(vnCns); + + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT); + vecCon->gtSimd8Val = value; + + conValTree = vecCon; + break; + } + case TYP_SIMD12: + { + simd12_t value = vnStore->ConstantValue(vnCns); + + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT); + vecCon->gtSimd12Val = value; + + conValTree = vecCon; + break; + } + case TYP_SIMD16: + { + simd16_t value = vnStore->ConstantValue(vnCns); + + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT); + vecCon->gtSimd16Val = value; + + conValTree = vecCon; + break; + } + case TYP_SIMD32: { - assert(vnStore->IsVNVectorZero(vnCns)); - VNSimdTypeInfo vnInfo = vnStore->GetVectorZeroSimdTypeOfVN(vnCns); + simd32_t value = vnStore->ConstantValue(vnCns); - assert(vnInfo.m_simdBaseJitType != CORINFO_TYPE_UNDEF); - assert(vnInfo.m_simdSize != 0); - assert(getSIMDTypeForSize(vnInfo.m_simdSize) == vnStore->TypeOfVN(vnCns)); + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT); + vecCon->gtSimd32Val = value; - conValTree = gtNewSimdZeroNode(tree->TypeGet(), vnInfo.m_simdBaseJitType, vnInfo.m_simdSize, true); + conValTree = vecCon; + break; } break; -#endif +#endif // FEATURE_SIMD case TYP_BYREF: // Do not support const byref optimization. @@ -5608,8 +5637,7 @@ struct VNAssertionPropVisitorInfo // GenTree* Compiler::optExtractSideEffListFromConst(GenTree* tree) { - assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)) || - vnStore->IsVNVectorZero(vnStore->VNConservativeNormalValue(tree->gtVNPair))); + assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair))); GenTree* sideEffList = nullptr; diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index d3eee9af9dcaaa..5873fca508786b 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -50,6 +50,9 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u CNS_STR + + CNS_VEC + {gtTreeID, d}: [[LngCon={((GenTreeLngCon*)this)->gtLconVal, l}] diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index f3674d33660a30..3a8c00bafaa184 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -287,6 +287,11 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } break; + case GT_CNS_VEC: + { + unreached(); + } + default: unreached(); } diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d03ac91781f5fc..5fb190f95d0a14 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2314,6 +2314,77 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } break; + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = tree->AsVecCon(); + + emitter* emit = GetEmitter(); + emitAttr attr = emitTypeSize(targetType); + + switch (tree->TypeGet()) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + + if (vecCon->IsAllBitsSet()) + { + emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_2S); + } + else if (vecCon->IsZero()) + { + emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_2S); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = tree->GetSingleTempReg(); + + simd8_t constValue = vecCon->gtSimd8Val; + CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); + + emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + } + break; + } + + case TYP_SIMD12: + case TYP_SIMD16: + { + if (vecCon->IsAllBitsSet()) + { + emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_4S); + } + else if (vecCon->IsZero()) + { + emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_4S); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = tree->GetSingleTempReg(); + + simd16_t constValue = vecCon->gtSimd16Val; + CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); + + emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + } + break; + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + + break; + } + default: unreached(); } @@ -2548,10 +2619,18 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) } else if (data->isContained()) { - assert(data->OperIs(GT_BITCAST)); - const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); - assert(!bitcastSrc->isContained()); - dataReg = bitcastSrc->GetRegNum(); + if (data->IsCnsVec()) + { + assert(data->AsVecCon()->IsZero()); + dataReg = REG_ZR; + } + else + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } } else { @@ -2629,7 +2708,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) if (data->isContained()) { // This is only possible for a zero-init or bitcast. - const bool zeroInit = (data->IsIntegralConst(0) || data->IsSIMDZero()); + const bool zeroInit = (data->IsIntegralConst(0) || data->IsVectorZero()); assert(zeroInit || data->OperIs(GT_BITCAST)); if (zeroInit && varTypeIsSIMD(targetType)) @@ -4249,7 +4328,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) assert(!op1->isContained()); assert(op1Type == op2Type); - if (op2->IsFPZero()) + if (op2->IsFloatPositiveZero()) { assert(op2->isContained()); emit->emitIns_R_F(INS_fcmp, cmpSize, op1->GetRegNum(), 0.0); @@ -5088,7 +5167,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) if (op1->isContained()) { // This is only possible for a zero-init. - assert(op1->IsIntegralConst(0) || op1->IsSIMDZero()); + assert(op1->IsIntegralConst(0) || op1->IsVectorZero()); // store lower 8 bytes GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, REG_ZR, varNum, offs); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index f1426fc23487b8..3ed9a59bde5590 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -188,6 +188,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_CNS_INT: case GT_CNS_DBL: + case GT_CNS_VEC: genSetRegToConst(targetReg, targetType, treeNode); genProduceReg(treeNode); break; diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 6212a529fa471c..d89217cdcdab4e 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1660,7 +1660,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) #ifdef FEATURE_SIMD // (In)Equality operation that produces bool result, when compared // against Vector zero, marks its Vector Zero operand as contained. - assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero()); + assert(tree->OperIsLeaf() || tree->IsVectorZero()); #else assert(tree->OperIsLeaf()); #endif diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index bab2c05407b748..97c884f0e4104f 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -451,8 +451,8 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, /*********************************************************************************** * * Generate code to set a register 'targetReg' of type 'targetType' to the constant - * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call - * genProduceReg() on the target register. + * specified by the constant (GT_CNS_INT, GT_CNS_DBL, or GT_CNS_VEC) in 'tree'. This + * does not call genProduceReg() on the target register. */ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) { @@ -507,6 +507,78 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } break; + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = tree->AsVecCon(); + + emitter* emit = GetEmitter(); + emitAttr attr = emitTypeSize(targetType); + + if (vecCon->IsAllBitsSet()) + { +#if defined(FEATURE_SIMD) + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg); +#else + emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg); +#endif // FEATURE_SIMD + break; + } + + if (vecCon->IsZero()) + { +#if defined(FEATURE_SIMD) + emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg); +#else + emit->emitIns_R_R(INS_xorps, attr, targetReg, targetReg); +#endif // FEATURE_SIMD + break; + } + + switch (tree->TypeGet()) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + + simd8_t constValue = vecCon->gtSimd8Val; + CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); + + emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + break; + } + + case TYP_SIMD12: + case TYP_SIMD16: + { + simd16_t constValue = vecCon->gtSimd16Val; + CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); + + emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + break; + } + + case TYP_SIMD32: + { + simd32_t constValue = vecCon->gtSimd32Val; + CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(constValue); + + emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + break; + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + + break; + } + default: unreached(); } @@ -1495,6 +1567,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genProduceReg(treeNode); break; + case GT_CNS_VEC: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + case GT_NOT: case GT_NEG: genCodeForNegNot(treeNode); @@ -4858,7 +4935,8 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) // zero in the target register, because an xor is smaller than a copy. Note that we could // potentially handle this in the register allocator, but we can't always catch it there // because the target may not have a register allocated for it yet. - if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero())) + if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && + (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero())) { op1->SetRegNum(REG_NA); op1->ResetReuseRegVal(); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 34bb62a0daafe5..65e34301f6d8f8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2283,16 +2283,18 @@ class Compiler GenTree* gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle); + GenTreeVecCon* gtNewVconNode(var_types type, CorInfoType simdBaseJitType); + + GenTree* gtNewAllBitsSetConNode(var_types type); + GenTree* gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType); + GenTree* gtNewZeroConNode(var_types type); + GenTree* gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType); GenTree* gtNewOneConNode(var_types type); GenTreeLclVar* gtNewStoreLclVar(unsigned dstLclNum, GenTree* src); -#ifdef FEATURE_SIMD - GenTree* gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize); -#endif - GenTree* gtNewBlkOpNode(GenTree* dst, GenTree* srcOrFillVal, bool isVolatile, bool isCopyBlock); GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg); @@ -2557,11 +2559,6 @@ class Compiler unsigned simdSize, bool isSimdAsHWIntrinsic); - GenTree* gtNewSimdZeroNode(var_types type, - CorInfoType simdBaseJitType, - unsigned simdSize, - bool isSimdAsHWIntrinsic); - GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, @@ -8196,9 +8193,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX SIMDHandlesCache* m_simdHandleCache; - // Get an appropriate "zero" for the given type and class handle. - GenTree* gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle); - // Get the handle for a SIMD type. CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, CorInfoType simdBaseJitType) { @@ -8279,7 +8273,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX clsHnd = gtGetStructHandleForHWSIMD(simdType, simdBaseJitType); } - assert(clsHnd != NO_CLASS_HANDLE); return clsHnd; } #endif // FEATURE_HW_INTRINSICS @@ -10664,6 +10657,7 @@ class GenTreeVisitor case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_MEMORYBARRIER: case GT_JMP: case GT_JCC: diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index c790e2bbbd9d90..bda0cc6d73b533 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4202,6 +4202,7 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_MEMORYBARRIER: case GT_JMP: case GT_JCC: diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 9516058fdfc768..e94c560b9fd536 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -7294,6 +7294,90 @@ CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(double constValue, emitAttr attr return emitComp->eeFindJitDataOffs(cnum); } +//------------------------------------------------------------------------ +// emitSimd8Const: Create a simd8 data section constant. +// +// Arguments: +// constValue - constant value +// +// Return Value: +// A field handle representing the data offset to access the constant. +// +CORINFO_FIELD_HANDLE emitter::emitSimd8Const(simd8_t constValue) +{ + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(FEATURE_SIMD) + unsigned cnsSize = 8; + unsigned cnsAlign = cnsSize; + +#ifdef TARGET_XARCH + if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) + { + cnsAlign = dataSection::MIN_DATA_ALIGN; + } +#endif // TARGET_XARCH + + UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD8); + return emitComp->eeFindJitDataOffs(cnum); +#else + unreached(); +#endif // !FEATURE_SIMD +} + +CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue) +{ + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(FEATURE_SIMD) + unsigned cnsSize = 16; + unsigned cnsAlign = cnsSize; + +#ifdef TARGET_XARCH + if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) + { + cnsAlign = dataSection::MIN_DATA_ALIGN; + } +#endif // TARGET_XARCH + + UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD16); + return emitComp->eeFindJitDataOffs(cnum); +#else + unreached(); +#endif // !FEATURE_SIMD +} + +CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue) +{ + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(FEATURE_SIMD) + unsigned cnsSize = 32; + unsigned cnsAlign = cnsSize; + +#ifdef TARGET_XARCH + if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) + { + cnsAlign = dataSection::MIN_DATA_ALIGN; + } +#endif // TARGET_XARCH + + UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD32); + return emitComp->eeFindJitDataOffs(cnum); +#else + unreached(); +#endif // !FEATURE_SIMD +} + /***************************************************************************** * * Output the given data section at the specified address. diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index b93fded455f32b..18d57d13790794 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1928,6 +1928,9 @@ class emitter private: CORINFO_FIELD_HANDLE emitFltOrDblConst(double constValue, emitAttr attr); + CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue); + CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue); + CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue); regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src); regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2); void emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index eff18c5bea05c6..e69c30956a5756 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1213,22 +1213,12 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed pushedStack.PushConstant(); // TODO: check if it's a loop condition - we unroll such loops. break; - case NI_Vector256_get_Zero: - case NI_Vector256_get_AllBitsSet: - foldableIntrinsic = true; - pushedStack.PushUnknown(); - break; #elif defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS) case NI_Vector64_get_Count: case NI_Vector128_get_Count: foldableIntrinsic = true; pushedStack.PushConstant(); break; - case NI_Vector128_get_Zero: - case NI_Vector128_get_AllBitsSet: - foldableIntrinsic = true; - pushedStack.PushUnknown(); - break; #endif default: diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index ae8119b51e0308..fe0746aa26d72e 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -310,8 +310,8 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) */ GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree) { - noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() || - tree->OperIsHWIntrinsic()); + noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || + tree->OperIsSimdOrHWintrinsic() || tree->IsCnsVec()); // GT_CALL, cannot get address of call. // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode. // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ea690b31eef2ce..fd02c38963dde4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -252,6 +252,7 @@ void GenTree::InitNodeSize() } GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_CNS_VEC] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_CAST] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_FTN_ADDR] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_BOX] = TREE_NODE_SZ_LARGE; @@ -300,6 +301,7 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); @@ -2352,6 +2354,15 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) } break; + case GT_CNS_VEC: + { + if (GenTreeVecCon::Equals(op1->AsVecCon(), op2->AsVecCon())) + { + return true; + } + break; + } + #if 0 // TODO-CQ: Enable this in the future case GT_CNS_LNG: @@ -2815,6 +2826,57 @@ unsigned Compiler::gtHashValue(GenTree* tree) add = tree->AsStrCon()->gtSconCPX; break; + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = tree->AsVecCon(); + add = 0; + + switch (vecCon->TypeGet()) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD32: + { + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[7]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[6]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[5]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[4]); + FALLTHROUGH; + } + + case TYP_SIMD16: + { + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd16Val.u32[3]); + FALLTHROUGH; + } + + case TYP_SIMD12: + { + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd12Val.u32[2]); + FALLTHROUGH; + } + + case TYP_SIMD8: + case TYP_DOUBLE: + case TYP_LONG: + { + // TODO-1stClassStructs: do not retype SIMD nodes + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[1]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[0]); + break; + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + + add = genTreeHashAdd(ulo32(add), vecCon->GetSimdBaseType()); + add = genTreeHashAdd(ulo32(add), vecCon->GetSimdSize()); + break; + } + case GT_JMP: add = tree->AsVal()->gtVal1; break; @@ -4515,8 +4577,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) #if defined(TARGET_XARCH) /* We use fldz and fld1 to load 0.0 and 1.0, but all other */ /* floating point constants are loaded using an indirection */ - if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) || - (*((__int64*)&(tree->AsDblCon()->gtDconVal)) == I64(0x3ff0000000000000))) + if (tree->IsFloatPositiveZero()) { costEx = 1; costSz = 1; @@ -4540,8 +4601,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 2 + 8; } #elif defined(TARGET_ARM64) - if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) || - emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal)) + if (tree->IsFloatPositiveZero() || emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal)) { costEx = 1; costSz = 1; @@ -4561,6 +4621,14 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } break; + case GT_CNS_VEC: + { + costEx = IND_COST_EX; + costSz = 4; + level = 0; + break; + } + case GT_LCL_VAR: level = 1; if (gtIsLikelyRegVar(tree)) @@ -5844,6 +5912,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_MEMORYBARRIER: case GT_JMP: case GT_JCC: @@ -6870,6 +6939,50 @@ GenTree* Compiler::gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle) return node; } +GenTreeVecCon* Compiler::gtNewVconNode(var_types type, CorInfoType simdBaseJitType) +{ + GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type, simdBaseJitType, genTypeSize(type)); + return vecCon; +} + +GenTree* Compiler::gtNewAllBitsSetConNode(var_types type) +{ + GenTree* allBitsSet; + + switch (type) + { + case TYP_INT: + allBitsSet = gtNewIconNode(-1); + break; + + case TYP_LONG: + allBitsSet = gtNewLconNode(-1); + break; + + default: + noway_assert(!"Bad type in gtNewAllBitsSetConNode"); + allBitsSet = nullptr; + break; + } + + return allBitsSet; +} + +GenTree* Compiler::gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType) +{ + assert(varTypeIsSIMD(type)); + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + + GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType); + + vecCon->gtSimd32Val.i64[0] = -1; + vecCon->gtSimd32Val.i64[1] = -1; + vecCon->gtSimd32Val.i64[2] = -1; + vecCon->gtSimd32Val.i64[3] = -1; + + return vecCon; +} + GenTree* Compiler::gtNewZeroConNode(var_types type) { GenTree* zero; @@ -6908,6 +7021,16 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) return zero; } +GenTree* Compiler::gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType) +{ + assert(varTypeIsSIMD(type)); + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + + GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType); + vecCon->gtSimd32Val = {}; + return vecCon; +} + GenTree* Compiler::gtNewOneConNode(var_types type) { GenTree* one; @@ -6946,23 +7069,6 @@ GenTreeLclVar* Compiler::gtNewStoreLclVar(unsigned dstLclNum, GenTree* src) return store; } -#ifdef FEATURE_SIMD -//--------------------------------------------------------------------- -// gtNewSIMDVectorZero: create a GT_SIMD node for Vector.Zero -// -// Arguments: -// simdType - simd vector type -// simdBaseJitType - element type of vector -// simdSize - size of vector in bytes -GenTree* Compiler::gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize) -{ - var_types simdBaseType = genActualType(JitType2PreciseVarType(simdBaseJitType)); - GenTree* initVal = gtNewZeroConNode(simdBaseType); - initVal->gtType = simdBaseType; - return gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, simdSize); -} -#endif // FEATURE_SIMD - GenTreeCall* Compiler::gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di) { return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, di); @@ -7900,6 +8006,20 @@ GenTree* Compiler::gtClone(GenTree* tree, bool complexOK) copy = gtNewLconNode(tree->AsLngCon()->gtLconVal); break; + case GT_CNS_DBL: + { + copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet()); + break; + } + + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType()); + vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val; + copy = vecCon; + break; + } + case GT_LCL_VAR: copy = gtNewLclvNode(tree->AsLclVarCommon()->GetLclNum(), tree->TypeGet() DEBUGARG(tree->AsLclVar()->gtLclILoffs)); @@ -8063,14 +8183,23 @@ GenTree* Compiler::gtCloneExpr( goto DONE; case GT_CNS_DBL: - copy = gtNewDconNode(tree->AsDblCon()->gtDconVal); - copy->gtType = tree->gtType; // keep the same type + { + copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet()); goto DONE; + } case GT_CNS_STR: copy = gtNewSconNode(tree->AsStrCon()->gtSconCPX, tree->AsStrCon()->gtScpHnd); goto DONE; + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType()); + vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val; + copy = vecCon; + goto DONE; + } + case GT_LCL_VAR: if (tree->AsLclVarCommon()->GetLclNum() == varNum) @@ -9028,6 +9157,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_MEMORYBARRIER: case GT_JMP: case GT_JCC: @@ -11002,9 +11132,60 @@ void Compiler::gtDispConst(GenTree* tree) printf(" %#.17g", tree->AsDblCon()->gtDconVal); } break; + case GT_CNS_STR: printf(""); break; + + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = tree->AsVecCon(); + + switch (vecCon->TypeGet()) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + simd8_t simdVal = vecCon->gtSimd8Val; + printf("<0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1]); + break; + } + + case TYP_SIMD12: + { + simd12_t simdVal = vecCon->gtSimd12Val; + printf("<0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2]); + break; + } + + case TYP_SIMD16: + { + simd16_t simdVal = vecCon->gtSimd16Val; + printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2], + simdVal.u32[3]); + break; + } + + case TYP_SIMD32: + { + simd32_t simdVal = vecCon->gtSimd32Val; + printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", simdVal.u64[0], simdVal.u64[1], + simdVal.u64[2], simdVal.u64[3]); + break; + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + break; + } + default: assert(!"unexpected constant node"); } @@ -15025,13 +15206,14 @@ GenTree* Compiler::gtNewTempAssign( if (dstTyp == TYP_UNDEF) { varDsc->lvType = dstTyp = genActualType(valTyp); + } + #if FEATURE_SIMD - if (varTypeIsSIMD(dstTyp)) - { - varDsc->lvSIMDType = 1; - } -#endif + if (varTypeIsSIMD(dstTyp)) + { + varDsc->lvSIMDType = 1; } +#endif #ifdef DEBUG // Make sure the actual types match. @@ -16665,6 +16847,221 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp) } #endif // TARGET_X86 +#if defined(FEATURE_HW_INTRINSICS) +//---------------------------------------------------------------------------------------------- +// IsHWIntrinsicCreateConstant: Determines if a HWIntrinsic node represents a vector constant +// +// Arguments: +// node - The node to check +// simd32Val - The vector constant being constructed +// +// Returns: +// true if node represents a constant; otherwise, false +bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val) +{ + var_types simdType = node->TypeGet(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + + size_t argCnt = node->GetOperandCount(); + size_t cnsArgCnt = 0; + + switch (node->GetHWIntrinsicId()) + { + case NI_Vector128_Create: +#if defined(TARGET_XARCH) + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_Create: + case NI_Vector256_CreateScalarUnsafe: +#elif defined(TARGET_ARM64) + case NI_Vector64_Create: +#endif + { + // These intrinsics are meant to set the same value to every element. + if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType)) + { + // Now assign the rest of the arguments. + for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) + { + HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType); + } + + cnsArgCnt = 1; + } + else + { + for (unsigned i = 1; i <= argCnt; i++) + { + if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, simd32Val, simdBaseType)) + { + cnsArgCnt++; + } + } + } + + assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); + return argCnt == cnsArgCnt; + } + + default: + { + return false; + } + } +} + +//---------------------------------------------------------------------------------------------- +// HandleArgForHWIntrinsicCreate: Processes an argument for the GenTreeVecCon::IsHWIntrinsicCreateConstant method +// +// Arguments: +// arg - The argument to process +// argIdx - The index of the argument being processed +// simd32Val - The vector constant being constructed +// baseType - The base type of the vector constant +// +// Returns: +// true if arg was a constant; otherwise, false +bool GenTreeVecCon::HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType) +{ + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + if (arg->IsCnsIntOrI()) + { + simd32Val.i8[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the constant to have been already zeroed + assert(simd32Val.i8[argIdx] == 0); + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + if (arg->IsCnsIntOrI()) + { + simd32Val.i16[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the constant to have been already zeroed + assert(simd32Val.i16[argIdx] == 0); + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + if (arg->IsCnsIntOrI()) + { + simd32Val.i32[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } + else + { + // We expect the constant to have been already zeroed + assert(simd32Val.i32[argIdx] == 0); + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { +#if defined(TARGET_64BIT) + if (arg->IsCnsIntOrI()) + { + simd32Val.i64[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); + return true; + } +#else + if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI()) + { + // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT + // We need to reconstruct the 64-bit value in order to handle this + + INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal; + gtLconVal <<= 32; + gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal; + + simd32Val.i64[argIdx] = gtLconVal; + return true; + } +#endif // TARGET_64BIT + else + { + // We expect the constant to have been already zeroed + assert(simd32Val.i64[argIdx] == 0); + } + break; + } + + case TYP_FLOAT: + { + if (arg->IsCnsFltOrDbl()) + { + simd32Val.f32[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); + return true; + } + else + { + // We expect the constant to have been already zeroed + // We check against the i32, rather than f32, to account for -0.0 + assert(simd32Val.i32[argIdx] == 0); + } + break; + } + + case TYP_DOUBLE: + { + if (arg->IsCnsFltOrDbl()) + { + simd32Val.f64[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); + return true; + } + else + { + // We expect the constant to have been already zeroed + // We check against the i64, rather than f64, to account for -0.0 + assert(simd32Val.i64[argIdx] == 0); + } + break; + } + + default: + { + unreached(); + } + } + + return false; +} +#endif // FEATURE_HW_INTRINSICS + +//---------------------------------------------------------------------------------------------- +// GetSimdBaseType: Gets the var_type for the SimdBaseJitType of a GenTreeVecCon node +// +// Returns: +// the var_type for the SimdBaseJitType of a GenTreeVecCon +var_types GenTreeVecCon::GetSimdBaseType() const +{ + CorInfoType simdBaseJitType = GetSimdBaseJitType(); + + if (simdBaseJitType == CORINFO_TYPE_UNDEF) + { + return TYP_UNKNOWN; + } + return JitType2PreciseVarType(simdBaseJitType); +} + //------------------------------------------------------------------------ // IsFieldAddr: Is "this" a static or class field address? // @@ -16800,247 +17197,6 @@ bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_ return fieldTyp != TYP_REF; } -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// gtGetSIMDZero: Get a zero value of the appropriate SIMD type. -// -// Arguments: -// var_types - The simdType -// simdBaseJitType - The SIMD base JIT type we need -// simdHandle - The handle for the SIMD type -// -// Return Value: -// A node generating the appropriate Zero, if we are able to discern it, -// otherwise null (note that this shouldn't happen, but callers should -// be tolerant of this case). - -GenTree* Compiler::gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle) -{ - bool found = false; - bool isHWSIMD = true; - noway_assert(m_simdHandleCache != nullptr); - - // First, determine whether this is Vector. - if (simdType == getSIMDVectorType()) - { - switch (simdBaseJitType) - { - case CORINFO_TYPE_FLOAT: - found = (simdHandle == m_simdHandleCache->SIMDFloatHandle); - break; - case CORINFO_TYPE_DOUBLE: - found = (simdHandle == m_simdHandleCache->SIMDDoubleHandle); - break; - case CORINFO_TYPE_INT: - found = (simdHandle == m_simdHandleCache->SIMDIntHandle); - break; - case CORINFO_TYPE_USHORT: - found = (simdHandle == m_simdHandleCache->SIMDUShortHandle); - break; - case CORINFO_TYPE_UBYTE: - found = (simdHandle == m_simdHandleCache->SIMDUByteHandle); - break; - case CORINFO_TYPE_SHORT: - found = (simdHandle == m_simdHandleCache->SIMDShortHandle); - break; - case CORINFO_TYPE_BYTE: - found = (simdHandle == m_simdHandleCache->SIMDByteHandle); - break; - case CORINFO_TYPE_LONG: - found = (simdHandle == m_simdHandleCache->SIMDLongHandle); - break; - case CORINFO_TYPE_UINT: - found = (simdHandle == m_simdHandleCache->SIMDUIntHandle); - break; - case CORINFO_TYPE_ULONG: - found = (simdHandle == m_simdHandleCache->SIMDULongHandle); - break; - case CORINFO_TYPE_NATIVEINT: - found = (simdHandle == m_simdHandleCache->SIMDNIntHandle); - break; - case CORINFO_TYPE_NATIVEUINT: - found = (simdHandle == m_simdHandleCache->SIMDNUIntHandle); - break; - default: - break; - } - if (found) - { - isHWSIMD = false; - } - } - - if (!found) - { - // We must still have isHWSIMD set to true, and the only non-HW types left are the fixed types. - switch (simdType) - { - case TYP_SIMD8: - switch (simdBaseJitType) - { - case CORINFO_TYPE_FLOAT: - if (simdHandle == m_simdHandleCache->SIMDVector2Handle) - { - isHWSIMD = false; - } -#if defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS) - else - { - assert(simdHandle == m_simdHandleCache->Vector64FloatHandle); - } - break; - case CORINFO_TYPE_INT: - assert(simdHandle == m_simdHandleCache->Vector64IntHandle); - break; - case CORINFO_TYPE_USHORT: - assert(simdHandle == m_simdHandleCache->Vector64UShortHandle); - break; - case CORINFO_TYPE_UBYTE: - assert(simdHandle == m_simdHandleCache->Vector64UByteHandle); - break; - case CORINFO_TYPE_SHORT: - assert(simdHandle == m_simdHandleCache->Vector64ShortHandle); - break; - case CORINFO_TYPE_BYTE: - assert(simdHandle == m_simdHandleCache->Vector64ByteHandle); - break; - case CORINFO_TYPE_UINT: - assert(simdHandle == m_simdHandleCache->Vector64UIntHandle); -#endif // defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS) - break; - default: - break; - } - break; - - case TYP_SIMD12: - assert((simdBaseJitType == CORINFO_TYPE_FLOAT) && (simdHandle == m_simdHandleCache->SIMDVector3Handle)); - isHWSIMD = false; - break; - - case TYP_SIMD16: - switch (simdBaseJitType) - { - case CORINFO_TYPE_FLOAT: - if (simdHandle == m_simdHandleCache->SIMDVector4Handle) - { - isHWSIMD = false; - } -#if defined(FEATURE_HW_INTRINSICS) - else - { - assert(simdHandle == m_simdHandleCache->Vector128FloatHandle); - } - break; - case CORINFO_TYPE_DOUBLE: - assert(simdHandle == m_simdHandleCache->Vector128DoubleHandle); - break; - case CORINFO_TYPE_INT: - assert(simdHandle == m_simdHandleCache->Vector128IntHandle); - break; - case CORINFO_TYPE_USHORT: - assert(simdHandle == m_simdHandleCache->Vector128UShortHandle); - break; - case CORINFO_TYPE_UBYTE: - assert(simdHandle == m_simdHandleCache->Vector128UByteHandle); - break; - case CORINFO_TYPE_SHORT: - assert(simdHandle == m_simdHandleCache->Vector128ShortHandle); - break; - case CORINFO_TYPE_BYTE: - assert(simdHandle == m_simdHandleCache->Vector128ByteHandle); - break; - case CORINFO_TYPE_LONG: - assert(simdHandle == m_simdHandleCache->Vector128LongHandle); - break; - case CORINFO_TYPE_UINT: - assert(simdHandle == m_simdHandleCache->Vector128UIntHandle); - break; - case CORINFO_TYPE_ULONG: - assert(simdHandle == m_simdHandleCache->Vector128ULongHandle); - break; - case CORINFO_TYPE_NATIVEINT: - assert(simdHandle == m_simdHandleCache->Vector128NIntHandle); - break; - case CORINFO_TYPE_NATIVEUINT: - assert(simdHandle == m_simdHandleCache->Vector128NUIntHandle); - break; -#endif // defined(FEATURE_HW_INTRINSICS) - - default: - break; - } - break; - -#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - case TYP_SIMD32: - switch (simdBaseJitType) - { - case CORINFO_TYPE_FLOAT: - assert(simdHandle == m_simdHandleCache->Vector256FloatHandle); - break; - case CORINFO_TYPE_DOUBLE: - assert(simdHandle == m_simdHandleCache->Vector256DoubleHandle); - break; - case CORINFO_TYPE_INT: - assert(simdHandle == m_simdHandleCache->Vector256IntHandle); - break; - case CORINFO_TYPE_USHORT: - assert(simdHandle == m_simdHandleCache->Vector256UShortHandle); - break; - case CORINFO_TYPE_UBYTE: - assert(simdHandle == m_simdHandleCache->Vector256UByteHandle); - break; - case CORINFO_TYPE_SHORT: - assert(simdHandle == m_simdHandleCache->Vector256ShortHandle); - break; - case CORINFO_TYPE_BYTE: - assert(simdHandle == m_simdHandleCache->Vector256ByteHandle); - break; - case CORINFO_TYPE_LONG: - assert(simdHandle == m_simdHandleCache->Vector256LongHandle); - break; - case CORINFO_TYPE_UINT: - assert(simdHandle == m_simdHandleCache->Vector256UIntHandle); - break; - case CORINFO_TYPE_ULONG: - assert(simdHandle == m_simdHandleCache->Vector256ULongHandle); - break; - case CORINFO_TYPE_NATIVEINT: - assert(simdHandle == m_simdHandleCache->Vector256NIntHandle); - break; - case CORINFO_TYPE_NATIVEUINT: - assert(simdHandle == m_simdHandleCache->Vector256NUIntHandle); - break; - default: - break; - } - break; -#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS - default: - break; - } - } - - unsigned size = genTypeSize(simdType); - if (isHWSIMD) - { -#if defined(FEATURE_HW_INTRINSICS) - return gtNewSimdZeroNode(simdType, simdBaseJitType, size, /* isSimdAsHWIntrinsic */ false); -#else - JITDUMP("Coudn't find the matching SIMD type for %s<%s> in gtGetSIMDZero\n", varTypeName(simdType), - varTypeName(JitType2PreciseVarType(simdBaseJitType))); - - return nullptr; -#endif // FEATURE_HW_INTRINSICS - } - else - { - return gtNewSIMDVectorZero(simdType, simdBaseJitType, size); - } -} -#endif // FEATURE_SIMD - CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) { CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE; @@ -17158,7 +17314,20 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) } break; #endif + case GT_CNS_VEC: + { +#if defined(FEATURE_HW_INTRINSICS) + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType()); +#endif // FEATURE_HW_INTRINSICS + +#if defined(FEATURE_SIMD) + if (structHnd == NO_CLASS_HANDLE) + { + structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType()); + } +#endif // FEATURE_SIMD break; + } } // TODO-1stClassStructs: add a check that `structHnd != NO_CLASS_HANDLE`, // nowadays it won't work because the right part of an ASG could have struct type without a handle @@ -18357,20 +18526,6 @@ bool GenTree::isContainableHWIntrinsic() const return true; } - default: - { - return false; - } - } -#elif TARGET_ARM64 - switch (AsHWIntrinsic()->GetHWIntrinsicId()) - { - case NI_Vector64_get_Zero: - case NI_Vector128_get_Zero: - { - return true; - } - default: { return false; @@ -18575,7 +18730,7 @@ GenTree* Compiler::gtNewSimdAbsNode( else { GenTree* tmp; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup1; op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -18586,11 +18741,11 @@ GenTree* Compiler::gtNewSimdAbsNode( nullptr DEBUGARG("Clone op1 for vector abs")); // op1 = op1 < Zero - tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + tmp = gtNewZeroConNode(type, simdBaseJitType); op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); // tmp = Zero - op1Dup1 - tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + tmp = gtNewZeroConNode(type, simdBaseJitType); tmp = gtNewSimdBinOpNode(GT_SUB, type, tmp, op1Dup1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); // result = ConditionalSelect(op1, tmp, op1Dup2) @@ -18646,7 +18801,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, } NamedIntrinsic intrinsic = NI_Illegal; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); switch (op) { @@ -19377,7 +19532,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, assert(varTypeIsArithmetic(simdBaseType)); NamedIntrinsic intrinsic = NI_Illegal; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); switch (op) { @@ -20025,8 +20180,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() == Vector128.AllBitsSet - NamedIntrinsic getAllBitsSet = NI_Illegal; - if (simdSize == 32) { // TODO-XArch-CQ: It's a non-trivial amount of work to support these @@ -20034,14 +20187,11 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, // other things, inverting the comparison and potentially support for a // new Avx.TestNotZ intrinsic to ensure the codegen remains efficient. assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - intrinsic = NI_Vector256_op_Equality; - getAllBitsSet = NI_Vector256_get_AllBitsSet; + intrinsic = NI_Vector256_op_Equality; } else { - intrinsic = NI_Vector128_op_Equality; - getAllBitsSet = NI_Vector128_get_AllBitsSet; + intrinsic = NI_Vector128_op_Equality; } op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, @@ -20058,7 +20208,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, simdBaseJitType = CORINFO_TYPE_LONG; } - op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize); + op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType); break; } #elif defined(TARGET_ARM64) @@ -20076,17 +20226,13 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() == Vector128.AllBitsSet - NamedIntrinsic getAllBitsSet = NI_Illegal; - if (simdSize == 8) { - intrinsic = NI_Vector64_op_Equality; - getAllBitsSet = NI_Vector64_get_AllBitsSet; + intrinsic = NI_Vector64_op_Equality; } else { - intrinsic = NI_Vector128_op_Equality; - getAllBitsSet = NI_Vector128_get_AllBitsSet; + intrinsic = NI_Vector128_op_Equality; } op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, @@ -20103,7 +20249,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, simdBaseJitType = CORINFO_TYPE_LONG; } - op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize); + op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType); break; } #else @@ -20186,7 +20332,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, simdBaseJitType = CORINFO_TYPE_LONG; } - op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + op2 = gtNewZeroConNode(simdType, simdBaseJitType); break; } @@ -20231,7 +20377,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, simdBaseJitType = CORINFO_TYPE_LONG; } - op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + op2 = gtNewZeroConNode(simdType, simdBaseJitType); break; } @@ -20506,7 +20652,7 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type, assert(varTypeIsArithmetic(simdBaseType)); NamedIntrinsic intrinsic = NI_Illegal; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); #if defined(TARGET_XARCH) if (simdSize == 32) @@ -20690,7 +20836,7 @@ GenTree* Compiler::gtNewSimdMinNode(var_types type, assert(varTypeIsArithmetic(simdBaseType)); NamedIntrinsic intrinsic = NI_Illegal; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); #if defined(TARGET_XARCH) if (simdSize == 32) @@ -20895,7 +21041,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // code formatting, its too long to reasonably display here. CorInfoType opBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector256_Create, opBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -20936,7 +21082,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // return Avx2.Permute4x64(tmp4.AsUInt64(), SHUFFLE_WYZX).As(); CorInfoType opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x0000FFFF), NI_Vector256_Create, opBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -20976,7 +21122,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).AsUInt32(); CorInfoType opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -21047,7 +21193,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As(); CorInfoType opBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector128_Create, opBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -21074,7 +21220,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // ... CorInfoType opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) { @@ -21133,7 +21279,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); - clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); GenTree* tmp1Dup; tmp1 = impCloneExpr(tmp1, &tmp1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -21168,7 +21314,7 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, // return Sse2.UnpackLow(tmp1, tmp2).As(); CorInfoType opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -21305,7 +21451,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, { // AllBitsSet represents indices that are always "out of range" which means zero should be // selected for every element. We can special-case this down to just returning a zero node - return gtNewSimdZeroNode(type, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + return gtNewZeroConNode(type, simdBaseJitType); } if (op2->IsVectorZero()) @@ -21321,16 +21467,16 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, size_t elementCount = simdSize / elementSize; #if defined(TARGET_XARCH) - uint8_t control = 0; - bool crossLane = false; - bool needsZero = varTypeIsSmallInt(simdBaseType); - uint64_t value = 0; - uint8_t vecCns[32] = {}; - uint8_t mskCns[32] = {}; + uint8_t control = 0; + bool crossLane = false; + bool needsZero = varTypeIsSmallInt(simdBaseType); + uint64_t value = 0; + simd32_t vecCns = {}; + simd32_t mskCns = {}; for (size_t index = 0; index < elementCount; index++) { - value = op2->GetIntegralVectorConstElement(index); + value = op2->GetIntegralVectorConstElement(index, simdBaseType); if (value < elementCount) { @@ -21363,12 +21509,12 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, for (uint32_t i = 0; i < elementSize; i++) { - vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). - mskCns[(index * elementSize) + i] = 0xFF; + mskCns.u8[(index * elementSize) + i] = 0xFF; } } else @@ -21382,12 +21528,12 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, for (uint32_t i = 0; i < elementSize; i++) { - vecCns[(index * elementSize) + i] = 0xFF; + vecCns.u8[(index * elementSize) + i] = 0xFF; // When Ssse3 is not supported, we need to adjust the constant to be Zero // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). - mskCns[(index * elementSize) + i] = 0x00; + mskCns.u8[(index * elementSize) + i] = 0x00; } } } @@ -21424,33 +21570,19 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); - IntrinsicNodeBuilder nodeBuilder1(getAllocator(CMK_ASTNode), 16); - - for (uint32_t i = 0; i < 16; i++) - { - nodeBuilder1.AddOperand(i, gtNewIconNode(vecCns[i])); - } + op2 = gtNewVconNode(TYP_SIMD16, simdBaseJitType); + op2->AsVecCon()->gtSimd16Val = vecCns.v128[0]; - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder1), NI_Vector128_Create, simdBaseJitType, 16, - isSimdAsHWIntrinsic); - - op1Lower = gtNewSimdHWIntrinsicNode(type, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, + op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, isSimdAsHWIntrinsic); GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); - IntrinsicNodeBuilder nodeBuilder2(getAllocator(CMK_ASTNode), 16); - - for (uint32_t i = 0; i < 16; i++) - { - nodeBuilder2.AddOperand(i, gtNewIconNode(vecCns[16 + i])); - } - - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder2), NI_Vector128_Create, simdBaseJitType, 16, - isSimdAsHWIntrinsic); + op2 = gtNewVconNode(TYP_SIMD16, simdBaseJitType); + op2->AsVecCon()->gtSimd16Val = vecCns.v128[1]; - op1Upper = gtNewSimdHWIntrinsicNode(type, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, + op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, isSimdAsHWIntrinsic); return gtNewSimdHWIntrinsicNode(type, op1Lower, op1Upper, gtNewIconNode(1), NI_AVX_InsertVector128, @@ -21459,18 +21591,13 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, if (elementSize == 4) { - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), elementCount); - for (uint32_t i = 0; i < elementCount; i++) { - uint8_t value = (uint8_t)(vecCns[i * elementSize] / elementSize); - nodeBuilder.AddOperand(i, gtNewIconNode(value)); + vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); } - CorInfoType indicesJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; - - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector256_Create, indicesJitType, simdSize, - isSimdAsHWIntrinsic); + op2 = gtNewVconNode(type, simdBaseJitType); + op2->AsVecCon()->gtSimd32Val = vecCns; // swap the operands to match the encoding requirements retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize, @@ -21491,15 +21618,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, { simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); - - for (uint32_t i = 0; i < simdSize; i++) - { - nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i])); - } - - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize, - isSimdAsHWIntrinsic); + op2 = gtNewVconNode(type, simdBaseJitType); + op2->AsVecCon()->gtSimd16Val = vecCns.v128[0]; return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -21544,69 +21664,53 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, { assert(!compIsaSupportedDebugOnly(InstructionSet_SSSE3)); - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); - - for (uint32_t i = 0; i < simdSize; i++) - { - nodeBuilder.AddOperand(i, gtNewIconNode(mskCns[i])); - } + op2 = gtNewVconNode(type, simdBaseJitType); + op2->AsVecCon()->gtSimd16Val = mskCns.v128[0]; - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize, - isSimdAsHWIntrinsic); - - GenTree* zero = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + GenTree* zero = gtNewZeroConNode(type, simdBaseJitType); retNode = gtNewSimdCndSelNode(type, op2, retNode, zero, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } return retNode; #elif defined(TARGET_ARM64) - uint64_t value = 0; - uint8_t vecCns[16] = {}; + uint64_t value = 0; + simd16_t vecCns = {}; for (size_t index = 0; index < elementCount; index++) { - value = op2->GetIntegralVectorConstElement(index); + value = op2->GetIntegralVectorConstElement(index, simdBaseType); if (value < elementCount) { for (uint32_t i = 0; i < elementSize; i++) { - vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); } } else { for (uint32_t i = 0; i < elementSize; i++) { - vecCns[(index * elementSize) + i] = 0xFF; + vecCns.u8[(index * elementSize) + i] = 0xFF; } } } - NamedIntrinsic createIntrinsic = NI_Vector64_Create; NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup; if (simdSize == 16) { - createIntrinsic = NI_Vector128_Create; lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } - IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); - - for (uint32_t i = 0; i < simdSize; i++) - { - nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i])); - } - // VectorTableLookup is only valid on byte/sbyte simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; - op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), createIntrinsic, simdBaseJitType, simdSize, - isSimdAsHWIntrinsic); + op2 = gtNewVconNode(type, simdBaseJitType); + op2->AsVecCon()->gtSimd16Val = vecCns; return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); #else @@ -21677,7 +21781,7 @@ GenTree* Compiler::gtNewSimdSumNode( NamedIntrinsic intrinsic = NI_Illegal; GenTree* tmp = nullptr; - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(simdType, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(simdType, simdBaseJitType, isSimdAsHWIntrinsic); #if defined(TARGET_XARCH) assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType)); @@ -21840,7 +21944,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op, assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); } - op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + op2 = gtNewZeroConNode(type, simdBaseJitType); // Zero - op1 return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -21849,11 +21953,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op, case GT_NOT: { assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX)); - - intrinsic = (simdSize == 32) ? NI_Vector256_get_AllBitsSet : NI_Vector128_get_AllBitsSet; - op2 = gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); - - // op1 ^ AllBitsSet + op2 = gtNewAllBitsSetConNode(type, simdBaseJitType); return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } #elif defined(TARGET_ARM64) @@ -21879,7 +21979,7 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op, else { // Zero - op1 - op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + op2 = gtNewZeroConNode(type, simdBaseJitType); return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } } @@ -22006,11 +22106,11 @@ GenTree* Compiler::gtNewSimdWidenLowerNode( } else { - tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + tmp1 = gtNewZeroConNode(type, simdBaseJitType); if (varTypeIsSigned(simdBaseType)) { - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -22133,7 +22233,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode( else if (varTypeIsFloating(simdBaseType)) { assert(simdBaseType == TYP_FLOAT); - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -22183,11 +22283,11 @@ GenTree* Compiler::gtNewSimdWidenUpperNode( } else { - tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + tmp1 = gtNewZeroConNode(type, simdBaseJitType); if (varTypeIsSigned(simdBaseType)) { - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -22246,7 +22346,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode( assert(intrinsic != NI_Illegal); tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); - zero = gtNewSimdZeroNode(TYP_SIMD16, simdBaseJitType, 16, isSimdAsHWIntrinsic); + zero = gtNewZeroConNode(TYP_SIMD16, simdBaseJitType); tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128, simdBaseJitType, 16, isSimdAsHWIntrinsic); return gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize, @@ -22341,32 +22441,6 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types type, return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } -GenTree* Compiler::gtNewSimdZeroNode(var_types type, - CorInfoType simdBaseJitType, - unsigned simdSize, - bool isSimdAsHWIntrinsic) -{ - assert(IsBaselineSimdIsaSupportedDebugOnly()); - - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - assert(varTypeIsArithmetic(simdBaseType)); - - NamedIntrinsic intrinsic = NI_Illegal; - -#if defined(TARGET_XARCH) - intrinsic = (simdSize == 32) ? NI_Vector256_get_Zero : NI_Vector128_get_Zero; -#elif defined(TARGET_ARM64) - intrinsic = (simdSize > 8) ? NI_Vector128_get_Zero : NI_Vector64_get_Zero; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - return gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); -} - GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID) { return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index a813c2b9300876..34ef15e8ebd0e0 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -534,7 +534,7 @@ enum GenTreeFlags : unsigned int GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP #if defined(TARGET_XARCH) - | GTF_IND_DONT_EXTEND + | GTF_IND_DONT_EXTEND #endif // TARGET_XARCH , @@ -931,7 +931,12 @@ struct GenTree bool isContainedFltOrDblImmed() const { - return isContained() && (OperGet() == GT_CNS_DBL); + return isContained() && OperIs(GT_CNS_DBL); + } + + bool isContainedVecImmed() const + { + return isContained() && OperIs(GT_CNS_VEC); } bool isLclField() const @@ -950,7 +955,7 @@ struct GenTree bool isUsedFromMemory() const { - return ((isContained() && (isMemoryOp() || (OperGet() == GT_LCL_VAR) || (OperGet() == GT_CNS_DBL))) || + return ((isContained() && (isMemoryOp() || OperIs(GT_LCL_VAR, GT_CNS_DBL, GT_CNS_VEC))) || isUsedFromSpillTemp()); } @@ -1097,8 +1102,8 @@ struct GenTree if (gtType == TYP_VOID) { // These are the only operators which can produce either VOID or non-VOID results. - assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSIMD() || - OperIsHWIntrinsic()); + assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSimdOrHWintrinsic() || + IsCnsVec()); return false; } @@ -1155,8 +1160,8 @@ struct GenTree static bool OperIsConst(genTreeOps gtOper) { - static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR)); - return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_STR); + static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR, GT_CNS_VEC)); + return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_VEC); } bool OperIsConst() const @@ -1726,16 +1731,13 @@ struct GenTree bool IsValidCallArgument(); #endif // DEBUG - inline bool IsFPZero() const; inline bool IsIntegralConst(ssize_t constVal) const; - inline bool IsIntegralConstVector(ssize_t constVal) const; - inline bool IsSIMDZero() const; inline bool IsFloatPositiveZero() const; inline bool IsVectorZero() const; inline bool IsVectorAllBitsSet() const; inline bool IsVectorConst(); - inline uint64_t GetIntegralVectorConstElement(size_t index); + inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType); inline bool IsBoxedValue(); @@ -2092,6 +2094,8 @@ struct GenTree inline bool IsCnsNonZeroFltOrDbl() const; + inline bool IsCnsVec() const; + bool IsIconHandle() const { return (gtOper == GT_CNS_INT) && ((gtFlags & GTF_ICON_HDL_MASK) != 0); @@ -3250,6 +3254,197 @@ struct GenTreeStrCon : public GenTree #endif }; +// GenTreeVecCon -- vector constant (GT_CNS_VEC) +// +struct GenTreeVecCon : public GenTree +{ + union { + simd8_t gtSimd8Val; + simd12_t gtSimd12Val; + simd16_t gtSimd16Val; + simd32_t gtSimd32Val; + }; + +private: + // TODO-1stClassStructs: Tracking the size and base type should be unnecessary since the + // size should be `gtType` and the handle should be looked up at callsites where required + + unsigned char gtSimdBaseJitType; // SIMD vector base JIT type + unsigned char gtSimdSize; // SIMD vector size in bytes + +public: + CorInfoType GetSimdBaseJitType() const + { + return (CorInfoType)gtSimdBaseJitType; + } + + void SetSimdBaseJitType(CorInfoType simdBaseJitType) + { + gtSimdBaseJitType = (unsigned char)simdBaseJitType; + assert(gtSimdBaseJitType == simdBaseJitType); + } + + var_types GetSimdBaseType() const; + + unsigned char GetSimdSize() const + { + return gtSimdSize; + } + + void SetSimdSize(unsigned simdSize) + { + gtSimdSize = (unsigned char)simdSize; + assert(gtSimdSize == simdSize); + } + +#if defined(FEATURE_HW_INTRINSICS) + static bool IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val); + + static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType); +#endif // FEATURE_HW_INTRINSICS + + bool IsAllBitsSet() const + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + return (gtSimd8Val.u64[0] == 0xFFFFFFFFFFFFFFFF); + } + + case TYP_SIMD12: + { + return (gtSimd12Val.u32[0] == 0xFFFFFFFF) && (gtSimd12Val.u32[1] == 0xFFFFFFFF) && + (gtSimd12Val.u32[2] == 0xFFFFFFFF); + } + + case TYP_SIMD16: + { + return (gtSimd16Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd16Val.u64[1] == 0xFFFFFFFFFFFFFFFF); + } + + case TYP_SIMD32: + { + return (gtSimd32Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[1] == 0xFFFFFFFFFFFFFFFF) && + (gtSimd32Val.u64[2] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[3] == 0xFFFFFFFFFFFFFFFF); + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right) + { + var_types gtType = left->TypeGet(); + + if (gtType != right->TypeGet()) + { + return false; + } + + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + return (left->gtSimd8Val.u64[0] == right->gtSimd8Val.u64[0]); + } + + case TYP_SIMD12: + { + return (left->gtSimd12Val.u32[0] == right->gtSimd12Val.u32[0]) && + (left->gtSimd12Val.u32[1] == right->gtSimd12Val.u32[1]) && + (left->gtSimd12Val.u32[2] == right->gtSimd12Val.u32[2]); + } + + case TYP_SIMD16: + { + return (left->gtSimd16Val.u64[0] == right->gtSimd16Val.u64[0]) && + (left->gtSimd16Val.u64[1] == right->gtSimd16Val.u64[1]); + } + + case TYP_SIMD32: + { + return (left->gtSimd32Val.u64[0] == right->gtSimd32Val.u64[0]) && + (left->gtSimd32Val.u64[1] == right->gtSimd32Val.u64[1]) && + (left->gtSimd32Val.u64[2] == right->gtSimd32Val.u64[2]) && + (left->gtSimd32Val.u64[3] == right->gtSimd32Val.u64[3]); + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + bool IsZero() const + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + return (gtSimd8Val.u64[0] == 0x0000000000000000); + } + + case TYP_SIMD12: + { + return (gtSimd12Val.u32[0] == 0x00000000) && (gtSimd12Val.u32[1] == 0x00000000) && + (gtSimd12Val.u32[2] == 0x00000000); + } + + case TYP_SIMD16: + { + return (gtSimd16Val.u64[0] == 0x0000000000000000) && (gtSimd16Val.u64[1] == 0x0000000000000000); + } + + case TYP_SIMD32: + { + return (gtSimd32Val.u64[0] == 0x0000000000000000) && (gtSimd32Val.u64[1] == 0x0000000000000000) && + (gtSimd32Val.u64[2] == 0x0000000000000000) && (gtSimd32Val.u64[3] == 0x0000000000000000); + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + GenTreeVecCon(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) + : GenTree(GT_CNS_VEC, type) + , gtSimdBaseJitType((unsigned char)simdBaseJitType) + , gtSimdSize((unsigned char)simdSize) + { + assert(varTypeIsSIMD(type)); + assert(gtSimdBaseJitType == simdBaseJitType); + assert(gtSimdSize == simdSize); + } + +#if DEBUGGABLE_GENTREE + GenTreeVecCon() : GenTree() + { + } +#endif +}; + // Common supertype of LCL_VAR, LCL_FLD, REG_VAR, PHI_ARG // This inherits from UnOp because lclvar stores are Unops struct GenTreeLclVarCommon : public GenTreeUnOp @@ -8065,7 +8260,7 @@ inline bool GenTree::OperIsInitBlkOp() { src = AsBlk()->Data()->gtSkipReloadOrCopy(); } - return src->OperIsInitVal() || src->OperIsConst(); + return src->OperIsInitVal() || src->IsIntegralConst(); } inline bool GenTree::OperIsCopyBlkOp() @@ -8073,21 +8268,6 @@ inline bool GenTree::OperIsCopyBlkOp() return OperIsBlkOp() && !OperIsInitBlkOp(); } -//------------------------------------------------------------------------ -// IsFPZero: Checks whether this is a floating point constant with value 0.0 -// -// Return Value: -// Returns true iff the tree is an GT_CNS_DBL, with value of 0.0. - -inline bool GenTree::IsFPZero() const -{ - if ((gtOper == GT_CNS_DBL) && (AsDblCon()->gtDconVal == 0.0)) - { - return true; - } - return false; -} - //------------------------------------------------------------------------ // IsIntegralConst: Checks whether this is a constant node with the given value // @@ -8118,84 +8298,6 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const return false; } -//------------------------------------------------------------------- -// IsIntegralConstVector: returns true if this is an SIMD vector -// with all its elements equal to an integral constant. -// -// Arguments: -// constVal - const value of vector element -// -// Returns: -// True if this represents an integral const SIMD vector. -// -inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const -{ -#ifdef FEATURE_SIMD - // SIMDIntrinsicInit intrinsic with a const value as initializer - // represents a const vector. - if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit) && - AsSIMD()->Op(1)->IsIntegralConst(constVal)) - { - assert(varTypeIsIntegral(AsSIMD()->GetSimdBaseType())); - assert(AsSIMD()->GetOperandCount() == 1); - return true; - } -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - if (gtOper == GT_HWINTRINSIC) - { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - - if (!varTypeIsIntegral(node->GetSimdBaseType())) - { - // Can't be an integral constant - return false; - } - - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - - if ((node->GetOperandCount() == 0) && (constVal == 0)) - { -#if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); -#elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); -#endif // !TARGET_XARCH && !TARGET_ARM64 - } - else if ((node->GetOperandCount() == 1) && node->Op(1)->IsIntegralConst(constVal)) - { -#if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); -#elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); -#endif // !TARGET_XARCH && !TARGET_ARM64 - } - } -#endif // FEATURE_HW_INTRINSICS - - return false; -} - -//------------------------------------------------------------------- -// IsSIMDZero: returns true if this is an SIMD vector with all its -// elements equal to zero. -// -// Returns: -// True if this represents an integral const SIMD vector. -// -inline bool GenTree::IsSIMDZero() const -{ -#ifdef FEATURE_SIMD - if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit)) - { - return (AsSIMD()->Op(1)->IsIntegralConst(0) || AsSIMD()->Op(1)->IsFPZero()); - } -#endif - - return false; -} - //------------------------------------------------------------------- // IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0) // @@ -8218,56 +8320,30 @@ inline bool GenTree::IsFloatPositiveZero() const } //------------------------------------------------------------------- -// IsVectorZero: returns true if this node is a HWIntrinsic that is Vector*_get_Zero. +// IsVectorZero: returns true if this node is a vector constant with all bits zero. // // Returns: -// True if this represents a HWIntrinsic node that is Vector*_get_Zero. +// True if this node is a vector constant with all bits zero // -// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0), -// however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating -// point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced. -// At some point, it makes sense to normalize this logic to be a single function call rather than have several -// separate ones; preferably this one. inline bool GenTree::IsVectorZero() const { -#ifdef FEATURE_HW_INTRINSICS - if (gtOper == GT_HWINTRINSIC) - { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - -#if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); -#elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); -#endif // !TARGET_XARCH && !TARGET_ARM64 - } -#endif // FEATURE_HW_INTRINSICS - - return false; + return IsCnsVec() && AsVecCon()->IsZero(); } //------------------------------------------------------------------- -// IsVectorAllBitsSet: returns true if this node is a HWIntrinsic that is Vector*_get_AllBitsSet. +// IsVectorAllBitsSet: returns true if this node is a vector constant with all bits set. // // Returns: -// True if this represents a HWIntrinsic node that is Vector*_get_AllBitsSet. +// True if this node is a vector constant with all bits set // inline bool GenTree::IsVectorAllBitsSet() const { -#ifdef FEATURE_HW_INTRINSICS - if (gtOper == GT_HWINTRINSIC) +#ifdef FEATURE_SIMD + if (OperIs(GT_CNS_VEC)) { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - -#if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_get_AllBitsSet) || (intrinsicId == NI_Vector256_get_AllBitsSet); -#elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_get_AllBitsSet) || (intrinsicId == NI_Vector128_get_AllBitsSet); -#endif // !TARGET_XARCH && !TARGET_ARM64 + return AsVecCon()->IsAllBitsSet(); } -#endif // FEATURE_HW_INTRINSICS +#endif // FEATURE_SIMD return false; } @@ -8280,43 +8356,12 @@ inline bool GenTree::IsVectorAllBitsSet() const // inline bool GenTree::IsVectorConst() { -#ifdef FEATURE_HW_INTRINSICS - if (gtOper == GT_HWINTRINSIC) +#ifdef FEATURE_SIMD + if (OperIs(GT_CNS_VEC)) { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - -#if defined(TARGET_XARCH) - if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create)) - { - for (GenTree* arg : Operands()) - { - if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl()) - { - return false; - } - } - - return true; - } -#elif defined(TARGET_ARM64) - if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create)) - { - for (GenTree* arg : Operands()) - { - if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl()) - { - return false; - } - } - - return true; - } -#endif // !TARGET_XARCH && !TARGET_ARM64 - - return IsVectorZero() || IsVectorAllBitsSet(); + return true; } -#endif // FEATURE_HW_INTRINSICS +#endif // FEATURE_SIMD return false; } @@ -8327,67 +8372,60 @@ inline bool GenTree::IsVectorConst() // Returns: // The value of a given element in an integral vector constant // -inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index) +inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types simdBaseType) { #ifdef FEATURE_HW_INTRINSICS - if (gtOper == GT_HWINTRINSIC) + if (IsCnsVec()) { - const GenTreeHWIntrinsic* node = AsHWIntrinsic(); - const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - size_t operandsCount = node->GetOperandCount(); - - CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); - var_types simdBaseType = node->GetSimdBaseType(); + const GenTreeVecCon* node = AsVecCon(); -#if defined(TARGET_XARCH) - if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create)) - { - return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue(); - } -#elif defined(TARGET_ARM64) - if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create)) + switch (simdBaseType) { - return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue(); - } -#endif // !TARGET_XARCH && !TARGET_ARM64 + case TYP_BYTE: + { + return node->gtSimd32Val.i8[index]; + } - if (IsVectorZero()) - { - return 0; - } + case TYP_UBYTE: + { + return node->gtSimd32Val.u8[index]; + } - if (IsVectorAllBitsSet()) - { - switch (simdBaseType) + case TYP_SHORT: { - case TYP_BYTE: - case TYP_UBYTE: - { - return 0xFF; - } + return node->gtSimd32Val.i16[index]; + } - case TYP_SHORT: - case TYP_USHORT: - { - return 0xFFFF; - } + case TYP_USHORT: + { + return node->gtSimd32Val.u16[index]; + } - case TYP_INT: - case TYP_UINT: - { - return 0xFFFFFFFF; - } + case TYP_INT: + case TYP_FLOAT: + { + return node->gtSimd32Val.i32[index]; + } - case TYP_LONG: - case TYP_ULONG: - { - return 0xFFFFFFFFFFFFFFFF; - } + case TYP_UINT: + { + return node->gtSimd32Val.u32[index]; + } - default: - { - unreached(); - } + case TYP_LONG: + case TYP_DOUBLE: + { + return node->gtSimd32Val.i64[index]; + } + + case TYP_ULONG: + { + return node->gtSimd32Val.u64[index]; + } + + default: + { + unreached(); } } } @@ -9009,12 +9047,12 @@ inline bool GenTree::IsIntCnsFitsInI32() inline bool GenTree::IsCnsFltOrDbl() const { - return OperGet() == GT_CNS_DBL; + return OperIs(GT_CNS_DBL); } inline bool GenTree::IsCnsNonZeroFltOrDbl() const { - if (OperGet() == GT_CNS_DBL) + if (IsCnsFltOrDbl()) { double constValue = AsDblCon()->gtDconVal; return *(__int64*)&constValue != 0; @@ -9023,6 +9061,11 @@ inline bool GenTree::IsCnsNonZeroFltOrDbl() const return false; } +inline bool GenTree::IsCnsVec() const +{ + return OperIs(GT_CNS_VEC); +} + inline bool GenTree::IsHelperCall() { return OperGet() == GT_CALL && AsCall()->gtCallType == CT_HELPER; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 0322afcb626167..abc8faef2d7918 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -46,6 +46,7 @@ GTNODE(CNS_INT , GenTreeIntCon ,0,GTK_LEAF) GTNODE(CNS_LNG , GenTreeLngCon ,0,GTK_LEAF) GTNODE(CNS_DBL , GenTreeDblCon ,0,GTK_LEAF) GTNODE(CNS_STR , GenTreeStrCon ,0,GTK_LEAF) +GTNODE(CNS_VEC , GenTreeVecCon ,0,GTK_LEAF) //----------------------------------------------------------------------------- // Unary operators (1 operand): diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index d5fa40b9909bde..1c4c554a0526be 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -60,6 +60,7 @@ GTSTRUCT_1(IntCon , GT_CNS_INT) GTSTRUCT_1(LngCon , GT_CNS_LNG) GTSTRUCT_1(DblCon , GT_CNS_DBL) GTSTRUCT_1(StrCon , GT_CNS_STR) +GTSTRUCT_1(VecCon , GT_CNS_VEC) GTSTRUCT_N(LclVarCommon, GT_LCL_VAR, GT_LCL_FLD, GT_PHI_ARG, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR) GTSTRUCT_3(LclVar , GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR) GTSTRUCT_3(LclFld , GT_LCL_FLD, GT_STORE_LCL_FLD, GT_LCL_FLD_ADDR) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0d379f631328e3..edb2786459d9e9 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -553,8 +553,190 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_Create: case NI_Vector128_Create: { - // We shouldn't handle this as an intrinsic if the - // respective ISAs have been disabled by the user. + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); + assert((sig->numArgs == 1) || (sig->numArgs == simdLength)); + + bool isConstant = true; + + if (varTypeIsFloating(simdBaseType)) + { + for (uint32_t index = 0; index < sig->numArgs; index++) + { + GenTree* arg = impStackTop(index).val; + + if (!arg->IsCnsFltOrDbl()) + { + isConstant = false; + break; + } + } + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + GenTree* arg = impStackTop(index).val; + + if (!arg->IsIntegralConst()) + { + isConstant = false; + break; + } + } + } + + if (isConstant) + { + // Some of the below code assumes 8 or 16 byte SIMD types + assert((simdSize == 8) || (simdSize == 16)); + + // For create intrinsics that take 1 operand, we broadcast the value. + // + // This happens even for CreateScalarUnsafe since the upper bits are + // considered non-deterministic and we can therefore set them to anything. + // + // We do this as it simplifies the logic and allows certain code paths to + // have better codegen, such as for 0, AllBitsSet, or certain small constants + + GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint8_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < simdLength - 1; index++) + { + vecCon->gtSimd16Val.u8[index] = cnsVal; + } + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd16Val.u16[index] = cnsVal; + } + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd16Val.u32[index] = cnsVal; + } + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd16Val.u64[index] = cnsVal; + } + } + break; + } + + case TYP_FLOAT: + { + float cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsDblCon()->gtDconVal); + vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd16Val.f32[index] = cnsVal; + } + } + break; + } + + case TYP_DOUBLE: + { + double cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsDblCon()->gtDconVal); + vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd16Val.f64[index] = cnsVal; + } + } + break; + } + + default: + { + unreached(); + } + } + + retNode = vecCon; + break; + } IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); @@ -791,7 +973,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, /* isSimdAsHWIntrinsic */ false); op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT); - GenTree* zero = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + GenTree* zero = gtNewZeroConNode(simdType, simdBaseJitType); ssize_t index = 8 / genTypeSize(simdBaseType); op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128, @@ -857,10 +1039,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_get_AllBitsSet: case NI_Vector128_get_AllBitsSet: { - assert(!sig->hasThis()); - assert(numArgs == 0); - - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize); + assert(sig->numArgs == 0); + retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType); break; } @@ -868,7 +1048,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_get_Zero: { assert(sig->numArgs == 0); - retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + retNode = gtNewZeroConNode(retType, simdBaseJitType); break; } @@ -892,7 +1072,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // AdvSimd.ExtractVector128(vector, Vector128.Zero, 8 / sizeof(T)).GetLower(); assert(numArgs == 1); op1 = impPopStack().val; - GenTree* zero = gtNewSimdHWIntrinsicNode(retType, NI_Vector128_get_Zero, simdBaseJitType, simdSize); + GenTree* zero = gtNewZeroConNode(retType, simdBaseJitType); ssize_t index = 8 / genTypeSize(simdBaseType); retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128, @@ -1403,15 +1583,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - size_t elementSize = genTypeSize(simdBaseType); - size_t elementCount = simdSize / elementSize; - - if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize) - { - // TODO-ARM64-CQ: Handling reinterpreted vector constants is a bit more complex - break; - } - if (sig->numArgs == 2) { op2 = impSIMDPopStack(retType); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2bf4d7cf817683..add3a61d122236 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -790,17 +790,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - // mvni doesn't support the range of element types, so hard code the 'opts' value. - case NI_Vector64_get_Zero: - case NI_Vector64_get_AllBitsSet: - GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_2S); - break; - - case NI_Vector128_get_Zero: - case NI_Vector128_get_AllBitsSet: - GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S); - break; - case NI_AdvSimd_DuplicateToVector64: case NI_AdvSimd_DuplicateToVector128: case NI_AdvSimd_Arm64_DuplicateToVector64: diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 42b20cea7e2beb..c78c436a2fcac6 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1103,38 +1103,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector128_get_Zero: - case NI_Vector256_get_Zero: - { - emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg); - break; - } - - case NI_Vector128_get_AllBitsSet: - if (varTypeIsFloating(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // The following corresponds to vcmptrueps pseudo-op and not available without VEX prefix. - emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, targetReg, targetReg, 15); - } - else - { - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg); - } - break; - - case NI_Vector256_get_AllBitsSet: - if (varTypeIsIntegral(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg); - } - else - { - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX)); - // The following corresponds to vcmptrueps pseudo-op. - emit->emitIns_SIMD_R_R_R_I(INS_cmpps, attr, targetReg, targetReg, targetReg, 15); - } - break; - default: { unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 6defcc25ce4493..090255492fea23 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -48,9 +48,9 @@ HARDWARE_INTRINSIC(Vector64, EqualsAll, HARDWARE_INTRINSIC(Vector64, EqualsAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, ExtractMostSignificantBits, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, Floor, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, GreaterThan, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, GreaterThanAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -147,9 +147,9 @@ HARDWARE_INTRINSIC(Vector128, EqualsAll, HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 994570fc91b633..8b5f283f02042f 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -65,9 +65,9 @@ HARDWARE_INTRINSIC(Vector128, EqualsAll, HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -162,9 +162,9 @@ HARDWARE_INTRINSIC(Vector256, EqualsAll, HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index d48db6661c9412..7b2155f9b487c7 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -897,7 +897,194 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_Create: case NI_Vector256_Create: + case NI_Vector128_CreateScalarUnsafe: + case NI_Vector256_CreateScalarUnsafe: { + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); + assert((sig->numArgs == 1) || (sig->numArgs == simdLength)); + + bool isConstant = true; + + if (varTypeIsFloating(simdBaseType)) + { + for (uint32_t index = 0; index < sig->numArgs; index++) + { + GenTree* arg = impStackTop(index).val; + + if (!arg->IsCnsFltOrDbl()) + { + isConstant = false; + break; + } + } + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + GenTree* arg = impStackTop(index).val; + + if (!arg->IsIntegralConst()) + { + isConstant = false; + break; + } + } + } + + if (isConstant) + { + // Some of the below code assumes 16 or 32 byte SIMD types + assert((simdSize == 16) || (simdSize == 32)); + + // For create intrinsics that take 1 operand, we broadcast the value. + // + // This happens even for CreateScalarUnsafe since the upper bits are + // considered non-deterministic and we can therefore set them to anything. + // + // We do this as it simplifies the logic and allows certain code paths to + // have better codegen, such as for 0, AllBitsSet, or certain small constants + + GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint8_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < simdLength - 1; index++) + { + vecCon->gtSimd32Val.u8[index] = cnsVal; + } + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd32Val.u16[index] = cnsVal; + } + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd32Val.u32[index] = cnsVal; + } + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); + vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd32Val.u64[index] = cnsVal; + } + } + break; + } + + case TYP_FLOAT: + { + float cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsDblCon()->gtDconVal); + vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd32Val.f32[index] = cnsVal; + } + } + break; + } + + case TYP_DOUBLE: + { + double cnsVal = 0; + + for (uint32_t index = 0; index < sig->numArgs; index++) + { + cnsVal = static_cast(impPopStack().val->AsDblCon()->gtDconVal); + vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal; + } + + if (sig->numArgs == 1) + { + for (uint32_t index = 0; index < (simdLength - 1); index++) + { + vecCon->gtSimd32Val.f64[index] = cnsVal; + } + } + break; + } + + default: + { + unreached(); + } + } + + retNode = vecCon; + break; + } + #if defined(TARGET_X86) if (varTypeIsLong(simdBaseType)) { @@ -919,26 +1106,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_CreateScalarUnsafe: - case NI_Vector256_CreateScalarUnsafe: - { - assert(sig->numArgs == 1); - -#ifdef TARGET_X86 - if (varTypeIsLong(simdBaseType)) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // TARGET_X86 - - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_Divide: case NI_Vector256_Divide: case NI_Vector128_op_Division: @@ -1217,7 +1384,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Vector256_get_AllBitsSet: { assert(sig->numArgs == 0); - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType); break; } @@ -1236,7 +1403,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Vector256_get_Zero: { assert(sig->numArgs == 0); - retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + retNode = gtNewZeroConNode(retType, simdBaseJitType); break; } @@ -1895,6 +2062,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Vector256_Shuffle: { assert((sig->numArgs == 2) || (sig->numArgs == 3)); + assert((simdSize == 16) || (simdSize == 32)); GenTree* indices = impStackTop(0).val; @@ -1907,12 +2075,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, size_t elementSize = genTypeSize(simdBaseType); size_t elementCount = simdSize / elementSize; - if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize) - { - // TODO-XARCH-CQ: Handling reinterpreted vector constants is a bit more complex - break; - } - if (simdSize == 32) { if (!compExactlyDependsOn(InstructionSet_AVX2)) @@ -1927,7 +2089,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, for (size_t index = 0; index < elementCount; index++) { - uint64_t value = indices->GetIntegralVectorConstElement(index); + uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType); if (value >= elementCount) { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 12700f4d4ad03c..a9059ba25b7849 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -334,6 +334,7 @@ void Compiler::impSaveStackState(SavedStack* savePtr, bool copy) case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_LCL_VAR: table->val = gtCloneExpr(tree); break; @@ -1216,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, #endif // FEATURE_HW_INTRINSICS { assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_OBJ, GT_CALL, GT_MKREFANY, GT_RET_EXPR, - GT_COMMA) || + GT_COMMA, GT_CNS_VEC) || ((src->TypeGet() != TYP_STRUCT) && src->OperIsSIMD())); } #endif // DEBUG @@ -1588,7 +1589,7 @@ GenTree* Compiler::impGetStructAddr(GenTree* structVal, return (structVal->AsObj()->Addr()); } else if (oper == GT_CALL || oper == GT_RET_EXPR || oper == GT_OBJ || oper == GT_MKREFANY || - structVal->OperIsSimdOrHWintrinsic()) + structVal->OperIsSimdOrHWintrinsic() || structVal->IsCnsVec()) { unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj")); @@ -1781,6 +1782,12 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, alreadyNormalized = true; break; + case GT_CNS_VEC: + { + assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType)); + break; + } + #ifdef FEATURE_SIMD case GT_SIMD: assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType)); @@ -1820,7 +1827,7 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, } #ifdef FEATURE_SIMD - if (blockNode->OperIsSimdOrHWintrinsic()) + if (blockNode->OperIsSimdOrHWintrinsic() || blockNode->IsCnsVec()) { parent->AsOp()->gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization); alreadyNormalized = true; diff --git a/src/coreclr/jit/importer_vectorization.cpp b/src/coreclr/jit/importer_vectorization.cpp index 7d1b0abe5476d3..a3f2142b1f7273 100644 --- a/src/coreclr/jit/importer_vectorization.cpp +++ b/src/coreclr/jit/importer_vectorization.cpp @@ -161,7 +161,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( int simdSize; var_types simdType; - NamedIntrinsic niZero; NamedIntrinsic niEquals; GenTree* cnsVec1 = nullptr; @@ -192,7 +191,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( simdSize = 32; simdType = TYP_SIMD32; - niZero = NI_Vector256_get_Zero; niEquals = NI_Vector256_op_Equality; // Special case: use a single vector for Length == 16 @@ -217,7 +215,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( simdSize = 16; simdType = TYP_SIMD16; - niZero = NI_Vector128_get_Zero; niEquals = NI_Vector128_op_Equality; // Special case: use a single vector for Length == 8 @@ -239,7 +236,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( return nullptr; } - GenTree* zero = gtNewSimdHWIntrinsicNode(simdType, niZero, baseType, simdSize); + GenTree* zero = gtNewZeroConNode(simdType, baseType); GenTree* offset1 = gtNewIconNode(dataOffset, TYP_I_IMPL); GenTree* offset2 = gtNewIconNode(dataOffset + len * sizeof(USHORT) - simdSize, TYP_I_IMPL); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 67ae437f03b750..4a3ab3e41baff8 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -838,6 +838,41 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) assert(op->isContainedIntOrIImmed()); return OperandDesc(op->AsIntCon()->IconValue(), op->AsIntCon()->ImmedValNeedsReloc(compiler)); + case GT_CNS_VEC: + { + switch (op->TypeGet()) + { +#if defined(FEATURE_SIMD) + case TYP_LONG: + case TYP_DOUBLE: + case TYP_SIMD8: + { + // TODO-1stClassStructs: do not retype SIMD nodes + simd8_t constValue = op->AsVecCon()->gtSimd8Val; + return OperandDesc(emit->emitSimd8Const(constValue)); + } + + case TYP_SIMD12: + case TYP_SIMD16: + { + simd16_t constValue = op->AsVecCon()->gtSimd16Val; + return OperandDesc(emit->emitSimd16Const(constValue)); + } + + case TYP_SIMD32: + { + simd32_t constValue = op->AsVecCon()->gtSimd32Val; + return OperandDesc(emit->emitSimd32Const(constValue)); + } +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + default: unreached(); } diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 7f9c11a2197a8d..c540c484cfff08 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2022,6 +2022,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR case GT_CNS_LNG: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: case GT_CLS_VAR_ADDR: case GT_PHYSREG: // These are all side-effect-free leaf nodes. diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ac9c93860484b0..edeee01e466f00 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -318,8 +318,7 @@ GenTree* Lowering::LowerNode(GenTree* node) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - LowerHWIntrinsic(node->AsHWIntrinsic()); - break; + return LowerHWIntrinsic(node->AsHWIntrinsic()); #endif // FEATURE_HW_INTRINSICS case GT_LCL_FLD: @@ -3502,28 +3501,21 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) else if (src->OperIs(GT_CNS_INT)) { assert(src->IsIntegralConst(0) && "expected an INIT_VAL for non-zero init."); + #ifdef FEATURE_SIMD if (varTypeIsSIMD(lclRegType)) { - CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(lclStore); - if (simdBaseJitType == CORINFO_TYPE_UNDEF) - { - // Lie about the type if we don't know/have it. - simdBaseJitType = CORINFO_TYPE_FLOAT; - } - GenTreeSIMD* simdTree = - comp->gtNewSIMDNode(lclRegType, src, SIMDIntrinsicInit, simdBaseJitType, varDsc->lvExactSize); - BlockRange().InsertAfter(src, simdTree); - LowerSIMD(simdTree); - src = simdTree; - lclStore->gtOp1 = src; - convertToStoreObj = false; + GenTree* zeroCon = comp->gtNewZeroConNode(lclRegType, CORINFO_TYPE_FLOAT); + + BlockRange().InsertAfter(src, zeroCon); + BlockRange().Remove(src); + + src = zeroCon; + lclStore->gtOp1 = src; } - else #endif // FEATURE_SIMD - { - convertToStoreObj = false; - } + + convertToStoreObj = false; } else if (src->OperIs(GT_LCL_VAR)) { diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index c8e7219e8bda7e..7e6acf6f03009b 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -337,17 +337,17 @@ class Lowering final : public Phase void LowerSIMD(GenTreeSIMD* simdNode); #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS - void LowerHWIntrinsic(GenTreeHWIntrinsic* node); + GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); + GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); void LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node); - void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); - void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); - void LowerHWIntrinsicDot(GenTreeHWIntrinsic* node); + GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); + GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node); #if defined(TARGET_XARCH) void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); - void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); + GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode); GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode); GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode); @@ -358,176 +358,6 @@ class Lowering final : public Phase GenTree* LowerModPow2(GenTree* node); GenTree* LowerAddForPossibleContainment(GenTreeOp* node); #endif // !TARGET_XARCH && !TARGET_ARM64 - - union VectorConstant { - int8_t i8[32]; - uint8_t u8[32]; - int16_t i16[16]; - uint16_t u16[16]; - int32_t i32[8]; - uint32_t u32[8]; - int64_t i64[4]; - uint64_t u64[4]; - float f32[8]; - double f64[4]; - }; - - //---------------------------------------------------------------------------------------------- - // VectorConstantIsBroadcastedI64: Check N i64 elements in a constant vector for equality - // - // Arguments: - // vecCns - Constant vector - // count - Amount of i64 components to compare - // - // Returns: - // true if N i64 elements of the given vector are equal - static bool VectorConstantIsBroadcastedI64(VectorConstant& vecCns, int count) - { - assert(count >= 1 && count <= 4); - for (int i = 1; i < count; i++) - { - if (vecCns.i64[i] != vecCns.i64[0]) - { - return false; - } - } - return true; - } - - //---------------------------------------------------------------------------------------------- - // ProcessArgForHWIntrinsicCreate: Processes an argument for the Lowering::LowerHWIntrinsicCreate method - // - // Arguments: - // arg - The argument to process - // argIdx - The index of the argument being processed - // vecCns - The vector constant being constructed - // baseType - The base type of the vector constant - // - // Returns: - // true if arg was a constant; otherwise, false - static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, VectorConstant& vecCns, var_types baseType) - { - switch (baseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i8[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i8[argIdx] == 0); - } - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i16[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i16[argIdx] == 0); - } - break; - } - - case TYP_INT: - case TYP_UINT: - { - if (arg->IsCnsIntOrI()) - { - vecCns.i32[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i32[argIdx] == 0); - } - break; - } - - case TYP_LONG: - case TYP_ULONG: - { -#if defined(TARGET_64BIT) - if (arg->IsCnsIntOrI()) - { - vecCns.i64[argIdx] = static_cast(arg->AsIntCon()->gtIconVal); - return true; - } -#else - if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI()) - { - // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT - // We need to reconstruct the 64-bit value in order to handle this - - INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal; - gtLconVal <<= 32; - gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal; - - vecCns.i64[argIdx] = gtLconVal; - return true; - } -#endif // TARGET_64BIT - else - { - // We expect the VectorConstant to have been already zeroed - assert(vecCns.i64[argIdx] == 0); - } - break; - } - - case TYP_FLOAT: - { - if (arg->IsCnsFltOrDbl()) - { - vecCns.f32[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - // We check against the i32, rather than f32, to account for -0.0 - assert(vecCns.i32[argIdx] == 0); - } - break; - } - - case TYP_DOUBLE: - { - if (arg->IsCnsFltOrDbl()) - { - vecCns.f64[argIdx] = static_cast(arg->AsDblCon()->gtDconVal); - return true; - } - else - { - // We expect the VectorConstant to have been already zeroed - // We check against the i64, rather than f64, to account for -0.0 - assert(vecCns.i64[argIdx] == 0); - } - break; - } - - default: - { - unreached(); - } - } - - return false; - } #endif // FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 02d90fe2eff414..5ee0c27767ee1c 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -53,7 +53,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const if (!varTypeIsFloating(parentNode->TypeGet())) { #ifdef TARGET_ARM64 - if (parentNode->OperIsCompare() && childNode->IsFPZero()) + if (parentNode->OperIsCompare() && childNode->IsFloatPositiveZero()) { // Contain 0.0 constant in fcmp on arm64 // TODO: Enable for arm too (vcmp) @@ -917,7 +917,7 @@ void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { assert(node->TypeGet() != TYP_SIMD32); @@ -940,31 +940,25 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // that the node is modified to either not be a HWIntrinsic node or that it is no longer // the same intrinsic as when it came in. - LowerHWIntrinsicCreate(node); - assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId)); - LowerNode(node); - return; + return LowerHWIntrinsicCreate(node); } case NI_Vector64_Dot: case NI_Vector128_Dot: { - LowerHWIntrinsicDot(node); - return; + return LowerHWIntrinsicDot(node); } case NI_Vector64_op_Equality: case NI_Vector128_op_Equality: { - LowerHWIntrinsicCmpOp(node, GT_EQ); - return; + return LowerHWIntrinsicCmpOp(node, GT_EQ); } case NI_Vector64_op_Inequality: case NI_Vector128_op_Inequality: { - LowerHWIntrinsicCmpOp(node, GT_NE); - return; + return LowerHWIntrinsicCmpOp(node, GT_NE); } case NI_AdvSimd_FusedMultiplyAddScalar: @@ -976,6 +970,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } ContainCheckHWIntrinsic(node); + return node->gtNext; } //---------------------------------------------------------------------------------------------- @@ -1051,7 +1046,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) // node - The hardware intrinsic node. // cmpOp - The comparison operation, currently must be GT_EQ or GT_NE // -void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -1121,7 +1116,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->gtType = TYP_VOID; node->ClearUnusedValue(); LowerNode(node); - return; + return node->gtNext; } NamedIntrinsic cmpIntrinsic; @@ -1211,6 +1206,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->ClearUnusedValue(); LowerNode(node); + return node->gtNext; } //---------------------------------------------------------------------------------------------- @@ -1226,14 +1222,14 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types simdType = node->TypeGet(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); - VectorConstant vecCns = {}; + simd32_t simd32Val = {}; if ((simdSize == 8) && (simdType == TYP_DOUBLE)) { @@ -1246,85 +1242,46 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - size_t argCnt = node->GetOperandCount(); - size_t cnsArgCnt = 0; - - // These intrinsics are meant to set the same value to every element. - if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType)) - { - // Now assign the rest of the arguments. - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) - { - HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType); - } - - cnsArgCnt = 1; - } - else - { - for (unsigned i = 1; i <= argCnt; i++) - { - if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType)) - { - cnsArgCnt++; - } - } - } - assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); + size_t argCnt = node->GetOperandCount(); // Check if we have a cast that we can remove. Note that "IsValidConstForMovImm" // will reset Op(1) if it finds such a cast, so we do not need to handle it here. // TODO-Casts: why are casts from constants checked for here? - if ((argCnt == cnsArgCnt) && (argCnt == 1) && IsValidConstForMovImm(node)) + if (isConstant && (argCnt == 1) && IsValidConstForMovImm(node)) { - // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector + // Set isConstant to false so we get lowered to a DuplicateToVector // intrinsic, which will itself mark the node as contained. - cnsArgCnt = 0; + isConstant = false; } - if (argCnt == cnsArgCnt) + if (isConstant) { + assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16)); + for (GenTree* arg : node->Operands()) { BlockRange().Remove(arg); } - assert((simdSize == 8) || (simdSize == 16)); + GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType); - if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8)) - { - // If we are a single constant or if all parts are the same, we might be able to optimize - // this even further for certain values, such as Zero or AllBitsSet. + vecCon->gtSimd32Val = simd32Val; + BlockRange().InsertBefore(node, vecCon); - if (vecCns.i64[0] == 0) - { - node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero); - return; - } - else if (vecCns.i64[0] == -1) - { - node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet); - return; - } + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(vecCon); + } + else + { + vecCon->SetUnusedValue(); } - unsigned cnsSize = (simdSize == 12) ? 16 : simdSize; - unsigned cnsAlign = cnsSize; - var_types dataType = Compiler::getSIMDTypeForSize(simdSize); - - UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType); - CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum); - GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd); - BlockRange().InsertBefore(node, clsVarAddr); - - node->ChangeOper(GT_IND); - node->AsOp()->gtOp1 = clsVarAddr; - - // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial - // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just - // insert the non-constant values which should still allow some gains. + BlockRange().Remove(node); - return; + return LowerNode(vecCon); } else if (argCnt == 1) { @@ -1349,7 +1306,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) node->ChangeHWIntrinsicId((simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128); } - return; + + return LowerNode(node); } // We have the following (where simd is simd8 or simd16): @@ -1411,6 +1369,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(opN, idx); node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN); + + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -1419,7 +1379,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -1697,8 +1657,9 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // return tmp2.ToScalar(); node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2); + LowerNode(node); - return; + return node->gtNext; } #endif // FEATURE_HW_INTRINSICS @@ -1962,16 +1923,12 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); #ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc)) + if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12)) { // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. - if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister) + if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister) { MakeSrcContained(storeLoc, op1); - if (op1->IsSIMDZero()) - { - MakeSrcContained(op1, op1->AsSIMD()->Op(1)); - } } return; } diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 78ac528ba4c644..7a2cde0c4f5fbe 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -733,17 +733,13 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); #ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc)) + if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12)) { // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. - if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister) + if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister) { // For an InitBlk we want op1 to be contained MakeSrcContained(storeLoc, op1); - if (op1->IsSIMDZero()) - { - MakeSrcContained(op1, op1->gtGetOp1()); - } } return; } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e47878a09a7b20..46990d9f6dbc64 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -918,7 +918,7 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { if (node->TypeGet() == TYP_SIMD12) { @@ -948,17 +948,13 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // it into 2x Vector128.Create intrinsics which themselves are also lowered into other // intrinsics that are not Vector*.Create - LowerHWIntrinsicCreate(node); - assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId)); - LowerNode(node); - return; + return LowerHWIntrinsicCreate(node); } case NI_Vector128_Dot: case NI_Vector256_Dot: { - LowerHWIntrinsicDot(node); - return; + return LowerHWIntrinsicDot(node); } case NI_Vector128_GetElement: @@ -976,30 +972,28 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // However, certain types may not have a direct equivalent // in which case we specially handle them directly as GetElement // and want to do the relevant containment checks. - break; + ContainCheckHWIntrinsic(node); } - return; + + return node->gtNext; } case NI_Vector128_WithElement: case NI_Vector256_WithElement: { - LowerHWIntrinsicWithElement(node); - return; + return LowerHWIntrinsicWithElement(node); } case NI_Vector128_op_Equality: case NI_Vector256_op_Equality: { - LowerHWIntrinsicCmpOp(node, GT_EQ); - return; + return LowerHWIntrinsicCmpOp(node, GT_EQ); } case NI_Vector128_op_Inequality: case NI_Vector256_op_Inequality: { - LowerHWIntrinsicCmpOp(node, GT_NE); - return; + return LowerHWIntrinsicCmpOp(node, GT_NE); } case NI_Vector128_ToScalar: @@ -1217,6 +1211,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } ContainCheckHWIntrinsic(node); + return node->gtNext; } //---------------------------------------------------------------------------------------------- @@ -1226,7 +1221,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // node - The hardware intrinsic node. // cmpOp - The comparison operation, currently must be GT_EQ or GT_NE // -void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -1253,7 +1248,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; - if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + if (!varTypeIsFloating(simdBaseType) && op2->IsVectorZero() && + comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { // On SSE4.1 or higher we can optimize comparisons against zero to // just use PTEST. We can't support it for floating-point, however, @@ -1262,14 +1258,6 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->Op(1) = op1; BlockRange().Remove(op2); - if (op2->AsMultiOp()->GetOperandCount() == 1) - { - // Some zero vectors are Create/Initialization nodes with a constant zero operand - // We should also remove this to avoid dead code - assert(op2->AsMultiOp()->Op(1)->IsIntegralConst(0)); - BlockRange().Remove(op2->AsMultiOp()->Op(1)); - } - LIR::Use op1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(op1Use); op1 = node->Op(1); @@ -1291,7 +1279,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); } - return; + return node->gtNext; } NamedIntrinsic cmpIntrinsic; @@ -1457,7 +1445,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->gtType = TYP_VOID; node->ClearUnusedValue(); - LowerNode(node); + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -1605,14 +1593,14 @@ void Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types simdType = node->gtType; CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); - VectorConstant vecCns = {}; + simd32_t simd32Val = {}; if ((simdSize == 8) && (simdType == TYP_DOUBLE)) { @@ -1634,34 +1622,13 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTree* tmp3 = nullptr; - size_t argCnt = node->GetOperandCount(); - size_t cnsArgCnt = 0; + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); + size_t argCnt = node->GetOperandCount(); - // These intrinsics are meant to set the same value to every element. - if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType)) - { - // Now assign the rest of the arguments. - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) - { - HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType); - } - - cnsArgCnt = 1; - } - else + if (isConstant) { - for (unsigned i = 1; i <= argCnt; i++) - { - if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType)) - { - cnsArgCnt++; - } - } - } - assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); + assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32)); - if (argCnt == cnsArgCnt) - { for (GenTree* arg : node->Operands()) { #if !defined(TARGET_64BIT) @@ -1674,43 +1641,24 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(arg); } - assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32)); + GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType); - if (((simdSize == 16) || (simdSize == 32)) && VectorConstantIsBroadcastedI64(vecCns, simdSize / 8)) - { - // If we are a single constant or if all parts are the same, we might be able to optimize - // this even further for certain values, such as Zero or AllBitsSet. + vecCon->gtSimd32Val = simd32Val; + BlockRange().InsertBefore(node, vecCon); - if (vecCns.i64[0] == 0) - { - node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_Zero : NI_Vector256_get_Zero); - return; - } - else if (vecCns.i64[0] == -1) - { - node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_AllBitsSet : NI_Vector256_get_AllBitsSet); - return; - } + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(vecCon); + } + else + { + vecCon->SetUnusedValue(); } - unsigned cnsSize = (simdSize != 12) ? simdSize : 16; - unsigned cnsAlign = - (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN; - var_types dataType = Compiler::getSIMDTypeForSize(simdSize); - - UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType); - CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum); - GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd); - BlockRange().InsertBefore(node, clsVarAddr); - - node->ChangeOper(GT_IND); - node->AsOp()->gtOp1 = clsVarAddr; - - // TODO-XARCH-CQ: We should be able to modify at least the paths that use Insert to trivially support partial - // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just - // insert the non-constant values which should still allow some gains. + BlockRange().Remove(node); - return; + return LowerNode(vecCon); } else if (argCnt == 1) { @@ -1738,7 +1686,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp1); node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1); - return; + + return LowerNode(node); } assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -1766,9 +1715,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector128_Create, simdBaseJitType, 16); BlockRange().InsertAfter(op1, tmp1); - LowerNode(tmp1); node->Op(1) = tmp1; + LowerNode(tmp1); + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); tmp1 = node->Op(1); @@ -1779,13 +1729,14 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp3 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, tmp2, NI_Vector128_ToVector256Unsafe, simdBaseJitType, 16); BlockRange().InsertAfter(tmp2, tmp3); - LowerNode(tmp3); idx = comp->gtNewIconNode(0x01, TYP_INT); BlockRange().InsertAfter(tmp3, idx); node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, tmp3, tmp1, idx); - return; + LowerNode(tmp3); + + return LowerNode(node); } // We will be constructing the following parts: @@ -1813,7 +1764,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // return Avx2.BroadcastScalarToVector128(tmp1); node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1); - return; + + return LowerNode(node); } switch (simdBaseType) @@ -1825,7 +1777,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { // We will be constructing the following parts: // ... - // tmp2 = HWINTRINSIC simd16 ubyte get_Zero + // tmp2 = CNS_VEC simd16 0 // /--* tmp1 simd16 // +--* tmp2 simd16 // node = * HWINTRINSIC simd16 ubyte Shuffle @@ -1835,8 +1787,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.Zero; // return Ssse3.Shuffle(tmp1, tmp2); - tmp2 = - comp->gtNewSimdHWIntrinsicNode(simdType, NI_Vector128_get_Zero, CORINFO_TYPE_UBYTE, simdSize); + tmp2 = comp->gtNewZeroConNode(simdType, simdBaseJitType); BlockRange().InsertAfter(tmp1, tmp2); LowerNode(tmp2); @@ -2084,7 +2035,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } } - return; + return LowerNode(node); } GenTree* op2 = node->Op(2); @@ -2137,19 +2088,21 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); BlockRange().InsertAfter(node->Op(halfArgCnt), lo); - LowerNode(lo); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); BlockRange().InsertAfter(node->Op(argCnt), hi); - LowerNode(hi); idx = comp->gtNewIconNode(0x01, TYP_INT); BlockRange().InsertAfter(hi, idx); assert(argCnt >= 3); node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, lo, hi, idx); - return; + + LowerNode(lo); + LowerNode(hi); + + return LowerNode(node); } // We will be constructing the following parts: @@ -2564,6 +2517,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) unreached(); } } + + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -2815,7 +2770,7 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types simdType = node->TypeGet(); @@ -3151,6 +3106,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) assert(node != result); LowerNode(node); } + + return node->gtNext; } //---------------------------------------------------------------------------------------------- @@ -3159,7 +3116,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node. // -void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) +GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -3266,8 +3223,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->SetSimdSize(16); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); - LowerNode(node); - return; + + return LowerNode(node); } case TYP_DOUBLE: @@ -3355,8 +3312,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) LowerNode(tmp3); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); - LowerNode(node); - return; + + return LowerNode(node); } multiply = NI_SSE_Multiply; @@ -3396,8 +3353,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) LowerNode(tmp3); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); - LowerNode(node); - return; + + return LowerNode(node); } multiply = NI_SSE2_Multiply; @@ -3464,10 +3421,11 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, cns0, cns1, cns2, cns3, NI_Vector128_Create, CORINFO_TYPE_INT, 16); BlockRange().InsertAfter(cns3, tmp1); - LowerNode(tmp1); op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, tmp1, NI_SSE_And, simdBaseJitType, simdSize); BlockRange().InsertAfter(tmp1, op1); + + LowerNode(tmp1); LowerNode(op1); } } @@ -3751,7 +3709,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // return tmp1.ToScalar(); node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp1); - LowerNode(node); + + return LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -5036,16 +4995,6 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const if (varTypeIsSIMD(storeLoc)) { assert(!op1->IsCnsIntOrI()); - if (storeLoc->TypeIs(TYP_SIMD12) && op1->IsSIMDZero() && varDsc->lvDoNotEnregister) - { - // For a SIMD12 store we can zero from integer registers more easily. - MakeSrcContained(storeLoc, op1); - GenTree* constNode = op1->gtGetOp1(); - assert(constNode->OperIsConst()); - constNode->ClearContained(); - constNode->gtType = TYP_INT; - constNode->SetOper(GT_CNS_INT); - } return; } #endif // FEATURE_SIMD @@ -5568,7 +5517,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) } else #endif // !TARGET_64BIT - if (op1->IsFPZero() || op1->IsIntegralConst(0) || + if (op1->IsFloatPositiveZero() || op1->IsIntegralConst(0) || (varTypeIsIntegral(simdNode->GetSimdBaseType()) && op1->IsIntegralConst(-1))) { MakeSrcContained(simdNode, op1); @@ -6071,6 +6020,11 @@ bool Lowering::TryGetContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode // canBeContained = true; } + else if (node->IsCnsVec()) + { + GenTreeVecCon* vecCon = node->AsVecCon(); + canBeContained = !vecCon->IsAllBitsSet() && !vecCon->IsZero(); + } } return canBeContained; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d0961b2785d9d8..21134d2dab42a5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -168,6 +168,29 @@ int LinearScan::BuildNode(GenTree* tree) } break; + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = tree->AsVecCon(); + + if (vecCon->IsAllBitsSet() || vecCon->IsZero()) + { + // Directly encode constant to instructions. + } + else + { + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } + + srcCount = 0; + assert(dstCount == 1); + + RefPosition* def = BuildDef(tree); + def->getInterval()->isConstant = true; + break; + } + case GT_BOX: case GT_COMMA: case GT_QMARK: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e97b7497766287..9c29ca4c1dc235 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3481,7 +3481,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) // First, define internal registers. #ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12)) + if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12)) { // Need an additional register to extract upper 4 bytes of Vector3, // it has to be float for x86. @@ -3541,20 +3541,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) #endif // !TARGET_64BIT else if (op1->isContained()) { -#ifdef TARGET_XARCH - if (varTypeIsSIMD(storeLoc)) - { - // This is the zero-init case, and we need a register to hold the zero. - // (On Arm64 we can just store REG_ZR.) - assert(op1->IsSIMDZero()); - singleUseRef = BuildUse(op1->gtGetOp1()); - srcCount = 1; - } - else -#endif - { - srcCount = 0; - } + srcCount = 0; } else { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a438def20a339a..59e4a5f49c764e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -935,10 +935,20 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) #if defined(FEATURE_SIMD) && defined(TARGET_ARM64) else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet())) { + GenTree* nodeToCheck = argx; + + if (nodeToCheck->OperIs(GT_OBJ)) + { + nodeToCheck = nodeToCheck->AsObj()->gtOp1; + + if (nodeToCheck->OperIs(GT_ADDR)) + { + nodeToCheck = nodeToCheck->AsOp()->gtOp1; + } + } + // SIMD types do not need the optimization below due to their sizes - if (argx->OperIsSimdOrHWintrinsic() || - (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) && - argx->AsObj()->gtOp1->AsOp()->gtOp1->OperIsSimdOrHWintrinsic())) + if (nodeToCheck->OperIsSimdOrHWintrinsic() || nodeToCheck->IsCnsVec()) { SetNeedsTemp(&arg); } @@ -8933,7 +8943,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) return nullptr; } - if (src->IsCall() || src->OperIsSIMD()) + if (src->IsCall() || src->OperIsSimdOrHWintrinsic() || src->IsCnsVec()) { // Can't take ADDR from these nodes, let fgMorphCopyBlock handle it, #11413. return nullptr; @@ -9185,7 +9195,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) noway_assert(src->IsIntegralConst(0)); noway_assert(destVarDsc != nullptr); - src = gtNewSIMDNode(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size); + src = gtNewZeroConNode(asgType, CORINFO_TYPE_FLOAT); } else #endif @@ -9771,6 +9781,13 @@ GenTree* Compiler::getSIMDStructFromField(GenTree* tree, *simdBaseJitTypeOut = simdNode->GetSimdBaseJitType(); } #endif // FEATURE_HW_INTRINSICS + else if (obj->IsCnsVec()) + { + ret = obj; + GenTreeVecCon* vecCon = obj->AsVecCon(); + *simdSizeOut = vecCon->GetSimdSize(); + *simdBaseJitTypeOut = vecCon->GetSimdBaseJitType(); + } } } if (ret != nullptr) @@ -12611,56 +12628,20 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) return node; } - switch (node->GetHWIntrinsicId()) + simd32_t simd32Val = {}; + + if (GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val)) { - case NI_Vector128_Create: -#if defined(TARGET_XARCH) - case NI_Vector256_Create: -#elif defined(TARGET_ARM64) - case NI_Vector64_Create: -#endif - { - bool hwAllArgsAreConstZero = true; - for (GenTree* arg : node->Operands()) - { - if (!arg->IsIntegralConst(0) && !arg->IsFloatPositiveZero()) - { - hwAllArgsAreConstZero = false; - break; - } - } + GenTreeVecCon* vecCon = gtNewVconNode(node->TypeGet(), node->GetSimdBaseJitType()); - if (hwAllArgsAreConstZero) - { - switch (node->GetHWIntrinsicId()) - { - case NI_Vector128_Create: - { - node->ResetHWIntrinsicId(NI_Vector128_get_Zero); - break; - } -#if defined(TARGET_XARCH) - case NI_Vector256_Create: - { - node->ResetHWIntrinsicId(NI_Vector256_get_Zero); - break; - } -#elif defined(TARGET_ARM64) - case NI_Vector64_Create: - { - node->ResetHWIntrinsicId(NI_Vector64_get_Zero); - break; - } -#endif - default: - unreached(); - } - } - break; + for (GenTree* arg : node->Operands()) + { + DEBUG_DESTROY_NODE(arg); } - default: - break; + vecCon->gtSimd32Val = simd32Val; + INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return vecCon; } return node; @@ -13677,7 +13658,7 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) GenTree* op2 = hw->Op(2); if (!gtIsActiveCSE_Candidate(hw)) { - if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2)) + if (op2->IsVectorZero() && !gtIsActiveCSE_Candidate(op2)) { DEBUG_DESTROY_NODE(hw); DEBUG_DESTROY_NODE(op2); diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index d2b195540db117..539a86b04961a8 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -340,15 +340,10 @@ void MorphInitBlockHelper::MorphStructCases() m_result->gtFlags |= (m_dst->gtFlags & GTF_ALL_EFFECT); #if FEATURE_SIMD - if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode)) + if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode) && m_src->IsIntegralConst(0)) { - // For a SIMD local init we need to call SIMDIntrinsic init. - // We need this block becuase morph does not create SIMD init for promoted lclVars. - assert(m_src->IsIntegralConst(0) || m_src->IsFPZero()); assert(m_dstVarDsc != nullptr); - const var_types asgType = m_asg->TypeGet(); - CorInfoType simdBaseJitType = m_dstVarDsc->GetSimdBaseJitType(); - m_src = m_comp->gtNewSIMDNode(asgType, m_src, SIMDIntrinsicInit, simdBaseJitType, m_blockSize); + m_src = m_comp->gtNewZeroConNode(m_asg->TypeGet(), CORINFO_TYPE_FLOAT); m_result->AsOp()->gtOp2 = m_src; } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index ab5d14166978e1..af04a6b11fcc3d 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -3577,6 +3577,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) case GT_CNS_INT: case GT_CNS_DBL: case GT_CNS_STR: + case GT_CNS_VEC: return true; // We reach here only when CSE_CONSTS is enabled case GT_ARR_ELEM: diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index a850aa53061485..27462c0a44e902 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -134,17 +134,22 @@ void Rationalizer::RewriteSIMDIndir(LIR::Use& use) addr->gtType = simdType; use.ReplaceWith(addr); } - else if (addr->OperIs(GT_ADDR) && addr->AsUnOp()->gtGetOp1()->OperIsSimdOrHWintrinsic()) + else if (addr->OperIs(GT_ADDR)) { - // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node. - // This is a special tree created by impNormStructVal to preserve the class layout - // needed by call morphing on an OBJ node. This information is no longer needed at - // this point (and the address of a SIMD node can't be obtained anyway). + GenTree* location = addr->AsUnOp()->gtGetOp1(); - BlockRange().Remove(indir); - BlockRange().Remove(addr); + if (location->OperIsSimdOrHWintrinsic() || location->IsCnsVec()) + { + // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node. + // This is a special tree created by impNormStructVal to preserve the class layout + // needed by call morphing on an OBJ node. This information is no longer needed at + // this point (and the address of a SIMD node can't be obtained anyway). + + BlockRange().Remove(indir); + BlockRange().Remove(addr); - use.ReplaceWith(addr->AsUnOp()->gtGetOp1()); + use.ReplaceWith(addr->AsUnOp()->gtGetOp1()); + } } #endif // FEATURE_SIMD } @@ -376,20 +381,35 @@ void Rationalizer::RewriteAssignment(LIR::Use& use) { if (location->OperIs(GT_LCL_VAR)) { - var_types simdType = location->TypeGet(); - GenTree* initVal = assignment->AsOp()->gtOp2; + var_types simdType = location->TypeGet(); + GenTree* initVal = assignment->AsOp()->gtOp2; + CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(location); if (simdBaseJitType == CORINFO_TYPE_UNDEF) { // Lie about the type if we don't know/have it. simdBaseJitType = CORINFO_TYPE_FLOAT; } - GenTreeSIMD* simdTree = - comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, genTypeSize(simdType)); - assignment->gtOp2 = simdTree; - value = simdTree; - BlockRange().InsertAfter(initVal, simdTree); + if (initVal->IsIntegralConst(0)) + { + GenTree* zeroCon = comp->gtNewZeroConNode(simdType, simdBaseJitType); + + assignment->gtOp2 = zeroCon; + value = zeroCon; + + BlockRange().InsertAfter(initVal, zeroCon); + BlockRange().Remove(initVal); + } + else + { + GenTreeSIMD* simdTree = comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, + genTypeSize(simdType)); + assignment->gtOp2 = simdTree; + value = simdTree; + + BlockRange().InsertAfter(initVal, simdTree); + } } } #endif // FEATURE_SIMD @@ -733,6 +753,22 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge } #endif // FEATURE_HW_INTRINSICS +#if defined(FEATURE_SIMD) + case GT_CNS_VEC: + { + GenTreeVecCon* vecCon = node->AsVecCon(); + + // TODO-1stClassStructs: do not retype SIMD nodes + + if ((vecCon->TypeIs(TYP_I_IMPL)) && (vecCon->GetSimdSize() == TARGET_POINTER_SIZE)) + { + assert(genTypeSize(vecCon->GetSimdBaseType()) == 4); + vecCon->gtType = TYP_SIMD8; + } + break; + } +#endif // FEATURE_SIMD + default: // Check that we don't have nodes not allowed in HIR here. assert((node->DebugOperKind() & DBK_NOTHIR) == 0); diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 8388f1ef3bc617..f59c09b72a7c04 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -36,6 +36,112 @@ enum SIMDLevel #endif }; +struct simd8_t +{ + union { + float f32[2]; + double f64[1]; + int8_t i8[8]; + int16_t i16[4]; + int32_t i32[2]; + int64_t i64[1]; + uint8_t u8[8]; + uint16_t u16[4]; + uint32_t u32[2]; + uint64_t u64[1]; + }; + + bool operator==(const simd8_t& other) const + { + return (u64[0] == other.u64[0]); + } + + bool operator!=(const simd8_t& other) const + { + return (u64[0] != other.u64[0]); + } +}; + +struct simd12_t +{ + union { + float f32[3]; + int8_t i8[12]; + int16_t i16[6]; + int32_t i32[3]; + uint8_t u8[12]; + uint16_t u16[6]; + uint32_t u32[3]; + }; + + bool operator==(const simd12_t& other) const + { + return (u32[0] == other.u32[0]) && (u32[1] == other.u32[1]) && (u32[2] == other.u32[2]); + } + + bool operator!=(const simd12_t& other) const + { + return (u32[0] != other.u32[0]) || (u32[1] != other.u32[1]) || (u32[2] != other.u32[2]); + } +}; + +struct simd16_t +{ + union { + float f32[4]; + double f64[2]; + int8_t i8[16]; + int16_t i16[8]; + int32_t i32[4]; + int64_t i64[2]; + uint8_t u8[16]; + uint16_t u16[8]; + uint32_t u32[4]; + uint64_t u64[2]; + simd8_t v64[2]; + }; + + bool operator==(const simd16_t& other) const + { + return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]); + } + + bool operator!=(const simd16_t& other) const + { + return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]); + } +}; + +struct simd32_t +{ + union { + float f32[8]; + double f64[4]; + int8_t i8[32]; + int16_t i16[16]; + int32_t i32[8]; + int64_t i64[4]; + uint8_t u8[32]; + uint16_t u16[16]; + uint32_t u32[8]; + uint64_t u64[4]; + simd8_t v64[4]; + simd16_t v128[2]; + }; + + bool operator==(const simd32_t& other) const + { + return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]) && (u64[2] == other.u64[2]) && + (u64[3] == other.u64[3]); + } + + bool operator!=(const simd32_t& other) const + { + return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]) || (u64[2] != other.u64[2]) || + (u64[3] != other.u64[3]); + } +}; + #ifdef FEATURE_SIMD #ifdef DEBUG diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index ab12655bc8afc8..9dee43ea9596b8 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -538,36 +538,86 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { #if defined(TARGET_XARCH) + case NI_VectorT128_get_AllBitsSet: + case NI_VectorT256_get_AllBitsSet: + { + return gtNewAllBitsSetConNode(retType, simdBaseJitType); + } + + case NI_VectorT128_get_Count: + case NI_VectorT256_get_Count: + { + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + return countNode; + } + case NI_Vector2_get_One: case NI_Vector3_get_One: case NI_Vector4_get_One: case NI_VectorT128_get_One: case NI_VectorT256_get_One: { + GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType); + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); + switch (simdBaseType) { case TYP_BYTE: case TYP_UBYTE: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.u8[index] = 1; + } + break; + } + case TYP_SHORT: case TYP_USHORT: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.u16[index] = 1; + } + break; + } + case TYP_INT: case TYP_UINT: { - op1 = gtNewIconNode(1, TYP_INT); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.u32[index] = 1; + } break; } case TYP_LONG: case TYP_ULONG: { - op1 = gtNewLconNode(1); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.u64[index] = 1; + } break; } case TYP_FLOAT: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.f32[index] = 1.0f; + } + break; + } + case TYP_DOUBLE: { - op1 = gtNewDconNode(1.0, simdBaseType); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd32Val.f64[index] = 1.0; + } break; } @@ -577,47 +627,95 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } } - return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); + return vecCon; + } + + case NI_Vector2_get_Zero: + case NI_Vector3_get_Zero: + case NI_Vector4_get_Zero: + case NI_VectorT128_get_Zero: + case NI_VectorT256_get_Zero: + { + return gtNewZeroConNode(retType, simdBaseJitType); + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_get_AllBitsSet: + { + return gtNewAllBitsSetConNode(retType, simdBaseJitType); } case NI_VectorT128_get_Count: - case NI_VectorT256_get_Count: { GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT); countNode->gtFlags |= GTF_ICON_SIMD_COUNT; return countNode; } -#elif defined(TARGET_ARM64) + case NI_Vector2_get_One: case NI_Vector3_get_One: case NI_Vector4_get_One: case NI_VectorT128_get_One: { + GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType); + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); + switch (simdBaseType) { case TYP_BYTE: case TYP_UBYTE: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.u8[index] = 1; + } + break; + } + case TYP_SHORT: case TYP_USHORT: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.u16[index] = 1; + } + break; + } + case TYP_INT: case TYP_UINT: { - op1 = gtNewIconNode(1, TYP_INT); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.u32[index] = 1; + } break; } case TYP_LONG: case TYP_ULONG: { - op1 = gtNewLconNode(1); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.u64[index] = 1; + } break; } case TYP_FLOAT: + { + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.f32[index] = 1.0f; + } + break; + } + case TYP_DOUBLE: { - op1 = gtNewDconNode(1.0, simdBaseType); + for (uint32_t index = 0; index < simdLength; index++) + { + vecCon->gtSimd16Val.f64[index] = 1.0; + } break; } @@ -627,15 +725,15 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } } - return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); + return vecCon; } - case NI_VectorT128_get_Count: + case NI_Vector2_get_Zero: + case NI_Vector3_get_Zero: + case NI_Vector4_get_Zero: + case NI_VectorT128_get_Zero: { - GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT); - countNode->gtFlags |= GTF_ICON_SIMD_COUNT; - return countNode; + return gtNewZeroConNode(retType, simdBaseJitType); } #else #error Unsupported platform diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 848889b59dedc8..78ca6ab6e072cc 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -41,7 +41,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -61,7 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero , NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -81,7 +81,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -112,11 +112,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT128, CreateBroadcast, ".ctor", SIMD_AS_HWINTRINSIC_ID(VectorT128, Dot, 2, {NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Arm64_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Item, 2, {NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_One, 0, {NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index e6f3134185b0c4..d676bbe80f2001 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -41,7 +41,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -61,7 +61,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -81,7 +81,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -112,11 +112,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT128, CreateBroadcast, ".ctor", SIMD_AS_HWINTRINSIC_ID(VectorT128, Dot, 2, {NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot, NI_VectorT128_Dot, NI_VectorT128_Dot, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Item, 2, {NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(VectorT128, get_One, 0, {NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) @@ -162,11 +162,11 @@ SIMD_AS_HWINTRINSIC_NM(VectorT256, CreateBroadcast, ".ctor", SIMD_AS_HWINTRINSIC_ID(VectorT256, Dot, 2, {NI_Illegal, NI_Illegal, NI_Vector256_Dot, NI_Vector256_Dot, NI_Vector256_Dot, NI_Vector256_Dot, NI_Illegal, NI_Illegal, NI_Vector256_Dot, NI_Vector256_Dot}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Item, 2, {NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(VectorT256, get_One, 0, {NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index 5e0942d1711335..b6f406d2b1d9d2 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -442,7 +442,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) #endif // !defined(TARGET_64BIT) if (op1->isContained()) { - if (op1->IsIntegralConst(0) || op1->IsFPZero()) + if (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero()) { genSIMDZero(targetType, baseType, targetReg); } @@ -943,22 +943,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) unsigned varNum = lclVar->GetLclNum(); assert(varNum < compiler->lvaCount); - regNumber tmpReg = treeNode->GetSingleTempReg(); - GenTree* op1 = lclVar->gtOp1; - if (op1->isContained()) - { - // This is only possible for a zero-init. - assert(op1->IsIntegralConst(0) || op1->IsSIMDZero()); - genSIMDZero(TYP_SIMD16, op1->AsSIMD()->GetSimdBaseType(), tmpReg); - - // store lower 8 bytes - GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, tmpReg, varNum, offs); - - // Store upper 4 bytes - GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8); - - return; - } + GenTree* op1 = lclVar->gtOp1; assert(!op1->isContained()); regNumber operandReg = genConsumeReg(op1); @@ -966,11 +951,18 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) // store lower 8 bytes GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs); - // Extract upper 4-bytes from operandReg - GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02); + if (!op1->IsVectorZero()) + { + regNumber tmpReg = treeNode->GetSingleTempReg(); + + // Extract upper 4-bytes from operandReg + GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02); + + operandReg = tmpReg; + } // Store upper 4 bytes - GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8); + GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, operandReg, varNum, offs + 8); } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 8dec7213cd3551..47a311ef2e14df 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -442,6 +442,12 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) , m_floatCnsMap(nullptr) , m_doubleCnsMap(nullptr) , m_byrefCnsMap(nullptr) +#if defined(FEATURE_SIMD) + , m_simd8CnsMap(nullptr) + , m_simd12CnsMap(nullptr) + , m_simd16CnsMap(nullptr) + , m_simd32CnsMap(nullptr) +#endif // FEATURE_SIMD , m_VNFunc0Map(nullptr) , m_VNFunc1Map(nullptr) , m_VNFunc2Map(nullptr) @@ -1672,6 +1678,33 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type // Since this value is always the same, we represent it as a static. m_defs = &s_specialRefConsts[0]; break; // Nothing to do. + +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + m_defs = new (alloc) Alloc::Type[ChunkSize]; + break; + } + + case TYP_SIMD12: + { + m_defs = new (alloc) Alloc::Type[ChunkSize]; + break; + } + + case TYP_SIMD16: + { + m_defs = new (alloc) Alloc::Type[ChunkSize]; + break; + } + + case TYP_SIMD32: + { + m_defs = new (alloc) Alloc::Type[ChunkSize]; + break; + } +#endif // FEATURE_SIMD + default: assert(false); // Should not reach here. } @@ -1807,6 +1840,28 @@ ValueNum ValueNumStore::VNForByrefCon(target_size_t cnsVal) return VnForConst(cnsVal, GetByrefCnsMap(), TYP_BYREF); } +#if defined(FEATURE_SIMD) +ValueNum ValueNumStore::VNForSimd8Con(simd8_t cnsVal) +{ + return VnForConst(cnsVal, GetSimd8CnsMap(), TYP_SIMD8); +} + +ValueNum ValueNumStore::VNForSimd12Con(simd12_t cnsVal) +{ + return VnForConst(cnsVal, GetSimd12CnsMap(), TYP_SIMD12); +} + +ValueNum ValueNumStore::VNForSimd16Con(simd16_t cnsVal) +{ + return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16); +} + +ValueNum ValueNumStore::VNForSimd32Con(simd32_t cnsVal) +{ + return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32); +} +#endif // FEATURE_SIMD + ValueNum ValueNumStore::VNForCastOper(var_types castToType, bool srcIsUnsigned) { assert(castToType != TYP_STRUCT); @@ -1889,15 +1944,24 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) #ifdef FEATURE_SIMD case TYP_SIMD8: + { + return VNForSimd8Con({}); + } + case TYP_SIMD12: + { + return VNForSimd12Con({}); + } + case TYP_SIMD16: + { + return VNForSimd16Con({}); + } + case TYP_SIMD32: - // We do not have the base type - a "fake" one will have to do. Note that we cannot - // use the HWIntrinsic "get_Zero" VNFunc here. This is because they only represent - // "fully zeroed" vectors, and here we may be loading one from memory, leaving upper - // bits undefined. So using "SIMD_Init" is "the next best thing", so to speak, and - // TYP_FLOAT is one of the more popular base types, so that's why we use it here. - return VNForFunc(typ, VNF_SIMD_Init, VNForFloatCon(0), VNForSimdType(genTypeSize(typ), CORINFO_TYPE_FLOAT)); + { + return VNForSimd32Con({}); + } #endif // FEATURE_SIMD // These should be unreached. @@ -2939,6 +3003,48 @@ float ValueNumStore::GetConstantSingle(ValueNum argVN) return ConstantValue(argVN); } +#if defined(FEATURE_SIMD) +// Given a simd8 constant value number return its value as a simd8. +// +simd8_t ValueNumStore::GetConstantSimd8(ValueNum argVN) +{ + assert(IsVNConstant(argVN)); + assert(TypeOfVN(argVN) == TYP_SIMD8); + + return ConstantValue(argVN); +} + +// Given a simd12 constant value number return its value as a simd12. +// +simd12_t ValueNumStore::GetConstantSimd12(ValueNum argVN) +{ + assert(IsVNConstant(argVN)); + assert(TypeOfVN(argVN) == TYP_SIMD12); + + return ConstantValue(argVN); +} + +// Given a simd16 constant value number return its value as a simd16. +// +simd16_t ValueNumStore::GetConstantSimd16(ValueNum argVN) +{ + assert(IsVNConstant(argVN)); + assert(TypeOfVN(argVN) == TYP_SIMD16); + + return ConstantValue(argVN); +} + +// Given a simd32 constant value number return its value as a simd32. +// +simd32_t ValueNumStore::GetConstantSimd32(ValueNum argVN) +{ + assert(IsVNConstant(argVN)); + assert(TypeOfVN(argVN) == TYP_SIMD32); + + return ConstantValue(argVN); +} +#endif // FEATURE_SIMD + // Compute the proper value number when the VNFunc has all constant arguments // This essentially performs constant folding at value numbering time // @@ -4994,114 +5100,6 @@ bool ValueNumStore::IsVNConstant(ValueNum vn) } } -//------------------------------------------------------------------------ -// IsVNVectorZero: Checks if the value number is a Vector*_get_Zero. -// -// Arguments: -// vn - The value number. -// -// Return Value: -// true - The value number is a Vector*_get_Zero. -// false - The value number is not a Vector*_get_Zero. -bool ValueNumStore::IsVNVectorZero(ValueNum vn) -{ -#ifdef FEATURE_SIMD - VNSimdTypeInfo vnInfo = GetVectorZeroSimdTypeOfVN(vn); - // Check the size to see if we got a valid SIMD type. - // '0' means it is not valid. - if (vnInfo.m_simdSize != 0) - { - return true; - } -#endif - return false; -} - -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// GetSimdTypeOfVN: Returns the SIMD type information based on the given value number. -// -// Arguments: -// vn - The value number. -// -// Return Value: -// Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type. -VNSimdTypeInfo ValueNumStore::GetSimdTypeOfVN(ValueNum vn) -{ - VNSimdTypeInfo vnInfo; - - // The SIMD type is encoded as a function, - // even though it is not actually a function. - VNFuncApp simdType; - if (GetVNFunc(vn, &simdType) && simdType.m_func == VNF_SimdType) - { - assert(simdType.m_arity == 2); - vnInfo.m_simdSize = GetConstantInt32(simdType.m_args[0]); - vnInfo.m_simdBaseJitType = (CorInfoType)GetConstantInt32(simdType.m_args[1]); - return vnInfo; - } - - vnInfo.m_simdSize = 0; - vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF; - return vnInfo; -} - -//------------------------------------------------------------------------ -// GetVectorZeroSimdTypeOfVN: Returns the SIMD type information based on the given value number -// if it's Vector*_get_Zero. -// -// Arguments: -// vn - The value number. -// -// Return Value: -// Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type -// for a Vector*_get_Zero value number. -// -// REVIEW: Vector*_get_Zero nodes in VN currently encode their SIMD type for -// conservative reasons. In the future, it might be possible not do this -// on most platforms since Vector*_get_Zero's base type does not matter. -VNSimdTypeInfo ValueNumStore::GetVectorZeroSimdTypeOfVN(ValueNum vn) -{ -#ifdef FEATURE_HW_INTRINSICS - // REVIEW: This will only return true if Vector*_get_Zero encodes - // its base type as an argument. On XARCH there may be - // scenarios where Vector*_get_Zero will not encode its base type; - // therefore, returning false here. - // Vector*_get_Zero does not have any arguments, - // but its SIMD type is encoded as an argument. - VNFuncApp funcApp; - if (GetVNFunc(vn, &funcApp) && funcApp.m_arity == 1) - { - switch (funcApp.m_func) - { - case VNF_HWI_Vector128_get_Zero: -#if defined(TARGET_XARCH) - case VNF_HWI_Vector256_get_Zero: -#elif defined(TARGET_ARM64) - case VNF_HWI_Vector64_get_Zero: -#endif - { - return GetSimdTypeOfVN(funcApp.m_args[0]); - } - - default: - { - VNSimdTypeInfo vnInfo; - vnInfo.m_simdSize = 0; - vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF; - return vnInfo; - } - } - } -#endif - - VNSimdTypeInfo vnInfo; - vnInfo.m_simdSize = 0; - vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF; - return vnInfo; -} -#endif // FEATURE_SIMD - bool ValueNumStore::IsVNInt32Constant(ValueNum vn) { if (!IsVNConstant(vn)) @@ -6464,17 +6462,34 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) #ifdef FEATURE_SIMD case TYP_SIMD8: + { + simd8_t cnsVal = GetConstantSimd8(vn); + printf("Simd8Cns[0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1]); + break; + } + case TYP_SIMD12: + { + simd12_t cnsVal = GetConstantSimd12(vn); + printf("Simd12Cns[0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2]); + break; + } + case TYP_SIMD16: + { + simd16_t cnsVal = GetConstantSimd16(vn); + printf("Simd16Cns[0x%08x, 0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2], + cnsVal.u32[3]); + break; + } + case TYP_SIMD32: { - // Only the zero constant is currently allowed for SIMD types - // - INT64 val = ConstantValue(vn); - assert(val == 0); - printf(" 0"); + simd32_t cnsVal = GetConstantSimd32(vn); + printf("Simd32Cns[0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx]", cnsVal.u64[0], cnsVal.u64[1], + cnsVal.u64[2], cnsVal.u64[3]); + break; } - break; #endif // FEATURE_SIMD // These should be unreached. @@ -7944,6 +7959,16 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) tree->gtVNPair.SetBoth( vnStore->VNForHandle(ssize_t(tree->AsIntConCommon()->IconValue()), tree->GetIconHandleFlag())); } +#ifdef FEATURE_SIMD + else if (tree->IsCnsVec()) + { + // TODO-1stClassStructs: do not retype SIMD nodes + assert(varTypeIsLong(typ)); + + simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val; + tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val)); + } +#endif // FEATURE_SIMD else if ((typ == TYP_LONG) || (typ == TYP_ULONG)) { tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->AsIntConCommon()->LngValue()))); @@ -7956,28 +7981,101 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) #ifdef FEATURE_SIMD case TYP_SIMD8: + { + simd8_t simd8Val; + + // TODO-Cleanup: delete SIMD-typed CNS_INT nodes + if (tree->IsIntegralConst(0)) + { + simd8Val = {}; + } + else + { + simd8Val = tree->AsVecCon()->gtSimd8Val; + } + + tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val)); + break; + } + case TYP_SIMD12: + { + simd12_t simd12Val; + + // TODO-Cleanup: delete SIMD-typed CNS_INT nodes + if (tree->IsIntegralConst(0)) + { + simd12Val = {}; + } + else + { + simd12Val = tree->AsVecCon()->gtSimd12Val; + } + + tree->gtVNPair.SetBoth(vnStore->VNForSimd12Con(simd12Val)); + break; + } + case TYP_SIMD16: + { + simd16_t simd16Val; + + // TODO-Cleanup: delete SIMD-typed CNS_INT nodes + if (tree->IsIntegralConst(0)) + { + simd16Val = {}; + } + else + { + simd16Val = tree->AsVecCon()->gtSimd16Val; + } + + tree->gtVNPair.SetBoth(vnStore->VNForSimd16Con(simd16Val)); + break; + } + case TYP_SIMD32: + { + simd32_t simd32Val; -#ifdef TARGET_64BIT - // Only the zero constant is currently allowed for SIMD types - // - assert(tree->AsIntConCommon()->LngValue() == 0); - tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue())); -#else // 32BIT - assert(tree->AsIntConCommon()->IconValue() == 0); - tree->gtVNPair.SetBoth(vnStore->VNForIntCon(int(tree->AsIntConCommon()->IconValue()))); -#endif + // TODO-Cleanup: delete SIMD-typed CNS_INT nodes + if (tree->IsIntegralConst(0)) + { + simd32Val = {}; + } + else + { + simd32Val = tree->AsVecCon()->gtSimd32Val; + } + + tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(simd32Val)); break; + } #endif // FEATURE_SIMD case TYP_FLOAT: + { tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal)); break; + } + case TYP_DOUBLE: - tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal)); + { +#ifdef FEATURE_SIMD + if (tree->IsCnsVec()) + { + // TODO-1stClassStructs: do not retype SIMD nodes + simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val; + tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val)); + } + else +#endif // FEATURE_SIMD + { + tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal)); + } break; + } + case TYP_REF: if (tree->AsIntConCommon()->IconValue() == 0) { diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 734011625c0768..22755856066a7d 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -347,6 +347,13 @@ class ValueNumStore double GetConstantDouble(ValueNum argVN); float GetConstantSingle(ValueNum argVN); +#if defined(FEATURE_SIMD) + simd8_t GetConstantSimd8(ValueNum argVN); + simd12_t GetConstantSimd12(ValueNum argVN); + simd16_t GetConstantSimd16(ValueNum argVN); + simd32_t GetConstantSimd32(ValueNum argVN); +#endif // FEATURE_SIMD + // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants. // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second). // Assume that "CanEvalForConstantArgs(vnf)" is true. @@ -419,6 +426,13 @@ class ValueNumStore ValueNum VNForDoubleCon(double cnsVal); ValueNum VNForByrefCon(target_size_t byrefVal); +#if defined(FEATURE_SIMD) + ValueNum VNForSimd8Con(simd8_t cnsVal); + ValueNum VNForSimd12Con(simd12_t cnsVal); + ValueNum VNForSimd16Con(simd16_t cnsVal); + ValueNum VNForSimd32Con(simd32_t cnsVal); +#endif // FEATURE_SIMD + #ifdef TARGET_64BIT ValueNum VNForPtrSizeIntCon(INT64 cnsVal) { @@ -748,14 +762,6 @@ class ValueNumStore // Returns true iff the VN represents a (non-handle) constant. bool IsVNConstant(ValueNum vn); - bool IsVNVectorZero(ValueNum vn); - -#ifdef FEATURE_SIMD - VNSimdTypeInfo GetSimdTypeOfVN(ValueNum vn); - - VNSimdTypeInfo GetVectorZeroSimdTypeOfVN(ValueNum vn); -#endif - // Returns true iff the VN represents an integer constant. bool IsVNInt32Constant(ValueNum vn); @@ -1390,6 +1396,133 @@ class ValueNumStore return m_byrefCnsMap; } +#if defined(FEATURE_SIMD) + struct Simd8PrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(simd8_t x, simd8_t y) + { + return x == y; + } + + static unsigned GetHashCode(const simd8_t val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.u32[0]); + hash = static_cast(hash ^ val.u32[1]); + + return hash; + } + }; + + typedef VNMap Simd8ToValueNumMap; + Simd8ToValueNumMap* m_simd8CnsMap; + Simd8ToValueNumMap* GetSimd8CnsMap() + { + if (m_simd8CnsMap == nullptr) + { + m_simd8CnsMap = new (m_alloc) Simd8ToValueNumMap(m_alloc); + } + return m_simd8CnsMap; + } + + struct Simd12PrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(simd12_t x, simd12_t y) + { + return x == y; + } + + static unsigned GetHashCode(const simd12_t val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.u32[0]); + hash = static_cast(hash ^ val.u32[1]); + hash = static_cast(hash ^ val.u32[2]); + + return hash; + } + }; + + typedef VNMap Simd12ToValueNumMap; + Simd12ToValueNumMap* m_simd12CnsMap; + Simd12ToValueNumMap* GetSimd12CnsMap() + { + if (m_simd12CnsMap == nullptr) + { + m_simd12CnsMap = new (m_alloc) Simd12ToValueNumMap(m_alloc); + } + return m_simd12CnsMap; + } + + struct Simd16PrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(simd16_t x, simd16_t y) + { + return x == y; + } + + static unsigned GetHashCode(const simd16_t val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.u32[0]); + hash = static_cast(hash ^ val.u32[1]); + hash = static_cast(hash ^ val.u32[2]); + hash = static_cast(hash ^ val.u32[3]); + + return hash; + } + }; + + typedef VNMap Simd16ToValueNumMap; + Simd16ToValueNumMap* m_simd16CnsMap; + Simd16ToValueNumMap* GetSimd16CnsMap() + { + if (m_simd16CnsMap == nullptr) + { + m_simd16CnsMap = new (m_alloc) Simd16ToValueNumMap(m_alloc); + } + return m_simd16CnsMap; + } + + struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(simd32_t x, simd32_t y) + { + return x == y; + } + + static unsigned GetHashCode(const simd32_t val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.u32[0]); + hash = static_cast(hash ^ val.u32[1]); + hash = static_cast(hash ^ val.u32[2]); + hash = static_cast(hash ^ val.u32[3]); + hash = static_cast(hash ^ val.u32[4]); + hash = static_cast(hash ^ val.u32[5]); + hash = static_cast(hash ^ val.u32[6]); + hash = static_cast(hash ^ val.u32[7]); + + return hash; + } + }; + + typedef VNMap Simd32ToValueNumMap; + Simd32ToValueNumMap* m_simd32CnsMap; + Simd32ToValueNumMap* GetSimd32CnsMap() + { + if (m_simd32CnsMap == nullptr) + { + m_simd32CnsMap = new (m_alloc) Simd32ToValueNumMap(m_alloc); + } + return m_simd32CnsMap; + } +#endif // FEATURE_SIMD + template struct VNDefFuncAppKeyFuncs : public JitKeyFuncsDefEquals> { @@ -1508,6 +1641,34 @@ struct ValueNumStore::VarTypConv typedef INT64 Type; typedef double Lang; }; + +#if defined(FEATURE_SIMD) +template <> +struct ValueNumStore::VarTypConv +{ + typedef simd8_t Type; + typedef simd8_t Lang; +}; +template <> +struct ValueNumStore::VarTypConv +{ + typedef simd12_t Type; + typedef simd12_t Lang; +}; +template <> +struct ValueNumStore::VarTypConv +{ + typedef simd16_t Type; + typedef simd16_t Lang; +}; +template <> +struct ValueNumStore::VarTypConv +{ + typedef simd32_t Type; + typedef simd32_t Lang; +}; +#endif // FEATURE_SIMD + template <> struct ValueNumStore::VarTypConv { @@ -1545,6 +1706,92 @@ FORCEINLINE T ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) } } +#if defined(FEATURE_SIMD) +template <> +FORCEINLINE simd8_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_SIMD8); + return reinterpret_cast::Lang*>(c->m_defs)[offset]; +} + +template <> +FORCEINLINE simd12_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_SIMD12); + return reinterpret_cast::Lang*>(c->m_defs)[offset]; +} + +template <> +FORCEINLINE simd16_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_SIMD16); + return reinterpret_cast::Lang*>(c->m_defs)[offset]; +} + +template <> +FORCEINLINE simd32_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_SIMD32); + return reinterpret_cast::Lang*>(c->m_defs)[offset]; +} + +template <> +FORCEINLINE simd8_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_SIMD8); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} + +template <> +FORCEINLINE simd12_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_SIMD12); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} + +template <> +FORCEINLINE simd16_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_SIMD16); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} + +template <> +FORCEINLINE simd32_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_SIMD32); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} +#endif // FEATURE_SIMD + // Inline functions. // static