Skip to content
29 changes: 27 additions & 2 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20219,8 +20219,33 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In
// TODO-1stClassStructs: We currently do not reuse an existing lclVar
// if it is a struct, because it requires some additional handling.

if (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef &&
!argInfo.argHasCallerLocalRef)
bool substitute = false;
switch (argNode->OperGet())
{
#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
{
// Enable for all parameterless (=invariant) hw intrinsics such as
// Vector128<>.Empty and Vector256<>.AllBitSets. We might consider
// doing that for Vector.Create(cns) as well.
if ((argNode->gtGetOp1() == nullptr) && (argNode->gtGetOp2() == nullptr))
{
substitute = true;
}
break;
}
#endif

// TODO: Enable substitution for CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE (typeof(T))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have an existing issue for this TODO?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's why I want to merge it - to start experimenting with it 🙂 the issue is #40381

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I just wanted to make sure we had some github issue corresponding to the TODO, so its not just a comment we'll forget about

// but in order to benefit from that, we need to move various "typeof + IsValueType"
// optimizations from importer to morph.

default:
break;
}

if (substitute || (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef &&
!argInfo.argHasCallerLocalRef))
{
/* Get a *LARGE* LCL_VAR node */
op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp) DEBUGARG(lclNum));
Expand Down
49 changes: 49 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14290,6 +14290,55 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
}
break;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
case GT_HWINTRINSIC:
{
GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic();
switch (hw->gtHWIntrinsicId)
{
case NI_SSE_Xor:
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
{
// Optimize Xor(x, Vector_.Zero) to just x
GenTree* op1 = hw->gtGetOp1();
GenTree* op2 = hw->gtGetOp2();

// Is node - Vector_.Zero ?
auto isHwZero = [](GenTree* node) -> bool {
if (node->OperIs(GT_HWINTRINSIC))
{
NamedIntrinsic ni = node->AsHWIntrinsic()->gtHWIntrinsicId;
return (ni == NI_Vector128_get_Zero) || (ni == NI_Vector256_get_Zero);
}
return false;
};

if (isHwZero(op1))
{
DEBUG_DESTROY_NODE(tree);
DEBUG_DESTROY_NODE(op1);
INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return op2;
}
if (isHwZero(op2))
{
DEBUG_DESTROY_NODE(tree);
DEBUG_DESTROY_NODE(op2);
INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return op1;
}
break;
}

default:
break;
}
break;
}
#endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)

default:
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ public bool Equals(Vector128<T> other)
return Sse.MoveMask(result) == 0b1111; // We have one bit per element
}

if (Sse41.IsSupported && (typeof(T) != typeof(double)))
{
Debug.Assert(typeof(T) != typeof(float));

// xor + testz is slightly better for integer types
Vector128<byte> xored = Sse2.Xor(this.AsByte(), other.AsByte());
return Sse41.TestZ(xored, xored);
}

if (Sse2.IsSupported)
{
if (typeof(T) == typeof(double))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ public bool Equals(Vector256<T> other)
// bytes are exactly the same.

Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)));
Vector256<byte> result = Avx2.CompareEqual(this.AsByte(), other.AsByte());
return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element

Vector256<byte> xored = Avx2.Xor(this.AsByte(), other.AsByte());
return Avx.TestZ(xored, xored);
}

return SoftwareFallback(in this, other);
Expand Down