Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 28 additions & 31 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19544,7 +19544,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
assert(varTypeIsArithmetic(simdBaseType));

assert(op1 != nullptr);
assert(op1->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
assert(op1->TypeIs(type, simdBaseType, genActualType(simdBaseType)) ||
(op1->TypeIs(TYP_SIMD12) && type == TYP_SIMD16));

assert(op2 != nullptr);

Expand All @@ -19554,7 +19555,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
}
else
{
assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)) ||
(op2->TypeIs(TYP_SIMD12) && type == TYP_SIMD16));
}

NamedIntrinsic intrinsic = NI_Illegal;
Expand Down Expand Up @@ -22425,11 +22427,9 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
simdSize, isSimdAsHWIntrinsic);

CorInfoType permuteBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
Expand Down Expand Up @@ -22468,11 +22468,9 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE41_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
simdSize, isSimdAsHWIntrinsic);

CorInfoType permuteBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
Expand Down Expand Up @@ -22576,10 +22574,8 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
simdSize, isSimdAsHWIntrinsic);
Expand Down Expand Up @@ -22618,10 +22614,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 =
gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 =
gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE41_PackUnsignedSaturate,
CORINFO_TYPE_USHORT, simdSize, isSimdAsHWIntrinsic);
Expand Down Expand Up @@ -22928,16 +22924,16 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type,
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;

GenTree* op1Dup = fgMakeMultiUse(&op1, clsHnd);
GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
GenTree* op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector256_GetLower, simdBaseJitType,
simdSize, isSimdAsHWIntrinsic);

op2 = gtNewVconNode(TYP_SIMD16);
op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];

op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
isSimdAsHWIntrinsic);

GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
GenTree* op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

op2 = gtNewVconNode(TYP_SIMD16);
Expand Down Expand Up @@ -23346,12 +23342,12 @@ GenTree* Compiler::gtNewSimdSumNode(
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(0x01, TYP_INT), NI_AVX_ExtractVector128,
simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

tmp = gtNewSimdHWIntrinsicNode(simdType, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize,
tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, tmp, intrinsic, simdBaseJitType, 16, isSimdAsHWIntrinsic);
}

return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, 16, isSimdAsHWIntrinsic);
#elif defined(TARGET_ARM64)
switch (simdBaseType)
{
Expand Down Expand Up @@ -23544,8 +23540,8 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(!varTypeIsIntegral(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

tmp1 =
gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);

switch (simdBaseType)
{
Expand Down Expand Up @@ -23673,7 +23669,8 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(

if (simdSize == 8)
{
tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, 16, isSimdAsHWIntrinsic);
tmp1 =
gtNewSimdHWIntrinsicNode(TYP_SIMD8, tmp1, NI_Vector128_GetLower, simdBaseJitType, 16, isSimdAsHWIntrinsic);
}

return tmp1;
Expand Down Expand Up @@ -23706,8 +23703,8 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(!varTypeIsIntegral(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(1), NI_AVX_ExtractVector128, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(1), NI_AVX_ExtractVector128, simdBaseJitType,
simdSize, isSimdAsHWIntrinsic);

switch (simdBaseType)
{
Expand Down Expand Up @@ -23860,7 +23857,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
zero = gtNewZeroConNode(TYP_SIMD16);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
simdBaseJitType, 16, isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
return gtNewSimdHWIntrinsicNode(TYP_SIMD8, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
}
#else
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
assert(retType == TYP_SIMD8);

op1 = impSIMDPopStack(TYP_SIMD16);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize);
break;
}

Expand Down Expand Up @@ -1056,7 +1056,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

if (varTypeIsByte(simdBaseType) && (simdSize == 16))
{
CORINFO_CLASS_HANDLE simdClsHnd = gtGetStructHandleForSimdOrHW(simdType, simdBaseJitType);
CORINFO_CLASS_HANDLE simdClsHnd = gtGetStructHandleForSimdOrHW(TYP_SIMD16, simdBaseJitType);

op1 = impCloneExpr(op1, &op2, simdClsHnd, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone op1 for vector extractmostsignificantbits"));
Expand All @@ -1069,10 +1069,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
/* isSimdAsHWIntrinsic */ false);
op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT);

GenTree* zero = gtNewZeroConNode(simdType);
GenTree* zero = gtNewZeroConNode(TYP_SIMD16);
ssize_t index = 8 / genTypeSize(simdBaseType);

op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, NI_Vector128_GetLower, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
simdVal.u64[3] = 0x8080808080808080;

shuffleIntrinsic = NI_AVX2_Shuffle;
moveMaskIntrinsic = NI_AVX2_MoveMask;
moveMaskIntrinsic = NI_SSE2_MoveMask;
}
else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3))
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,13 @@ class Lowering final : public Phase
GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
void LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
#if defined(TARGET_XARCH)
void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
Expand Down
Loading