diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2fe7d20f118e8b..aa6091520a8226 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -20388,8 +20388,33 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In // TODO-1stClassStructs: We currently do not reuse an existing lclVar // if it is a struct, because it requires some additional handling. - if (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && - !argInfo.argHasCallerLocalRef) + bool substitute = false; + switch (argNode->OperGet()) + { +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + // Enable for all parameterless (=invariant) hw intrinsics such as + // Vector128<>.Zero and Vector256<>.AllBitSets. We might consider + // doing that for Vector.Create(cns) as well. + if ((argNode->gtGetOp1() == nullptr) && (argNode->gtGetOp2() == nullptr)) + { + substitute = true; + } + break; + } +#endif + + // TODO: Enable substitution for CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE (typeof(T)) + // but in order to benefit from that, we need to move various "typeof + IsValueType" + // optimizations from importer to morph. + + default: + break; + } + + if (substitute || (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef && + !argInfo.argHasCallerLocalRef)) { /* Get a *LARGE* LCL_VAR node */ op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp) DEBUGARG(lclNum)); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 79f0e20bba643e..95a8e2cb43d727 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14242,6 +14242,47 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) } break; +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); + switch (hw->gtHWIntrinsicId) + { + case NI_SSE_Xor: + case NI_SSE2_Xor: + case NI_AVX_Xor: + case NI_AVX2_Xor: + { + // Transform XOR(X, 0) to X for vectors + GenTree* op1 = hw->gtGetOp1(); + GenTree* op2 = hw->gtGetOp2(); + if (!gtIsActiveCSE_Candidate(tree)) + { + if (op1->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op1)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op2; + } + if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2)) + { + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return op1; + } + } + break; + } + + default: + break; + } + break; + } +#endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + default: break; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 65a4bb0fd2077c..e1ebac344cab0f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -123,6 +123,12 @@ public bool Equals(Vector128 other) Vector128 result = Sse2.CompareEqual(this.AsDouble(), other.AsDouble()); return Sse2.MoveMask(result) == 0b11; // We have one bit per element } + else if (Sse41.IsSupported) + { + // xor + testz is slightly better for integer types + Vector128 xored = Sse2.Xor(this.AsByte(), other.AsByte()); + return Sse41.TestZ(xored, xored); + } else { // Unlike float/double, there are no special values to consider diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index e336034617785b..74b56a03b4db0e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -134,8 +134,9 @@ public bool Equals(Vector256 other) // bytes are exactly the same. Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); - Vector256 result = Avx2.CompareEqual(this.AsByte(), other.AsByte()); - return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element + + Vector256 xored = Avx2.Xor(this.AsByte(), other.AsByte()); + return Avx.TestZ(xored, xored); } return SoftwareFallback(in this, other);