Skip to content

Commit ca1553d

Browse files
Revert product changes for find_end vectorization (microsoft#5041)
1 parent 27973ad commit ca1553d

2 files changed

Lines changed: 0 additions & 313 deletions

File tree

stl/inc/algorithm

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,6 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void*
5959
const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
6060
const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;
6161

62-
const void* __stdcall __std_find_end_1(
63-
const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept;
64-
const void* __stdcall __std_find_end_2(
65-
const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept;
66-
6762
__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept;
6863
__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept;
6964
__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept;
@@ -194,19 +189,6 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val
194189
}
195190
}
196191

197-
template <class _Ty1, class _Ty2>
198-
_Ty1* _Find_end_vectorized(
199-
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept {
200-
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
201-
if constexpr (sizeof(_Ty1) == 1) {
202-
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_find_end_1(_First1, _Last1, _First2, _Count2)));
203-
} else if constexpr (sizeof(_Ty1) == 2) {
204-
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_find_end_2(_First1, _Last1, _First2, _Count2)));
205-
} else {
206-
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
207-
}
208-
}
209-
210192
template <class _Ty, class _TVal1, class _TVal2>
211193
__declspec(noalias) void _Replace_vectorized(
212194
_Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept {
@@ -3212,26 +3194,6 @@ _NODISCARD _CONSTEXPR20 _FwdIt1 find_end(
32123194
if constexpr (_Is_ranges_random_iter_v<_FwdIt1> && _Is_ranges_random_iter_v<_FwdIt2>) {
32133195
const _Iter_diff_t<_FwdIt2> _Count2 = _ULast2 - _UFirst2;
32143196
if (_Count2 > 0 && _Count2 <= _ULast1 - _UFirst1) {
3215-
#if _USE_STD_VECTOR_ALGORITHMS
3216-
if constexpr (_Vector_alg_in_search_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
3217-
if (!_STD _Is_constant_evaluated()) {
3218-
const auto _Ptr1 = _STD _To_address(_UFirst1);
3219-
3220-
const auto _Ptr_res1 = _STD _Find_end_vectorized(
3221-
_Ptr1, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), static_cast<size_t>(_Count2));
3222-
3223-
if constexpr (is_pointer_v<decltype(_UFirst1)>) {
3224-
_UFirst1 = _Ptr_res1;
3225-
} else {
3226-
_UFirst1 += _Ptr_res1 - _Ptr1;
3227-
}
3228-
3229-
_STD _Seek_wrapped(_First1, _UFirst1);
3230-
return _First1;
3231-
}
3232-
}
3233-
#endif // _USE_STD_VECTOR_ALGORITHMS
3234-
32353197
for (auto _UCandidate = _ULast1 - static_cast<_Iter_diff_t<_FwdIt1>>(_Count2);; --_UCandidate) {
32363198
if (_STD _Equal_rev_pred_unchecked(_UCandidate, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) {
32373199
_STD _Seek_wrapped(_First1, _UCandidate);
@@ -3335,34 +3297,6 @@ namespace ranges {
33353297

33363298
if (_Count2 > 0 && _Count2 <= _Count1) {
33373299
const auto _Count2_as1 = static_cast<iter_difference_t<_It1>>(_Count2);
3338-
#if _USE_STD_VECTOR_ALGORITHMS
3339-
if constexpr (_Vector_alg_in_search_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity>
3340-
&& is_same_v<_Pj2, identity>) {
3341-
if (!_STD is_constant_evaluated()) {
3342-
const auto _Ptr1 = _STD to_address(_First1);
3343-
const auto _Ptr2 = _STD to_address(_First2);
3344-
const auto _Ptr_last1 = _Ptr1 + _Count1;
3345-
3346-
const auto _Ptr_res1 =
3347-
_STD _Find_end_vectorized(_Ptr1, _Ptr_last1, _Ptr2, static_cast<size_t>(_Count2));
3348-
3349-
if constexpr (is_pointer_v<_It1>) {
3350-
if (_Ptr_res1 != _Ptr_last1) {
3351-
return {_Ptr_res1, _Ptr_res1 + _Count2};
3352-
} else {
3353-
return {_Ptr_res1, _Ptr_res1};
3354-
}
3355-
} else {
3356-
_First1 += _Ptr_res1 - _Ptr1;
3357-
if (_Ptr_res1 != _Ptr_last1) {
3358-
return {_First1, _First1 + _Count2_as1};
3359-
} else {
3360-
return {_First1, _First1};
3361-
}
3362-
}
3363-
}
3364-
}
3365-
#endif // _USE_STD_VECTOR_ALGORITHMS
33663300

33673301
for (auto _Candidate = _First1 + (_Count1 - _Count2_as1);; --_Candidate) {
33683302
auto _Match_and_mid1 =

stl/src/vector_algorithms.cpp

Lines changed: 0 additions & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -3633,243 +3633,6 @@ namespace {
36333633
return _Last1;
36343634
}
36353635
}
3636-
3637-
template <class _Traits, class _Ty>
3638-
const void* __stdcall __std_find_end_impl(
3639-
const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept {
3640-
if (_Count2 == 0) {
3641-
return _Last1;
3642-
}
3643-
3644-
if (_Count2 == 1) {
3645-
return __std_find_last_trivial_impl<_Traits>(_First1, _Last1, *static_cast<const _Ty*>(_First2));
3646-
}
3647-
3648-
const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1);
3649-
const size_t _Size_bytes_2 = _Count2 * sizeof(_Ty);
3650-
3651-
if (_Size_bytes_1 < _Size_bytes_2) {
3652-
return _Last1;
3653-
}
3654-
3655-
#ifndef _M_ARM64EC
3656-
if (_Use_sse42() && _Size_bytes_1 >= 16) {
3657-
constexpr int _Op = (sizeof(_Ty) == 1 ? _SIDD_UBYTE_OPS : _SIDD_UWORD_OPS) | _SIDD_CMP_EQUAL_ORDERED;
3658-
constexpr int _Part_size_el = sizeof(_Ty) == 1 ? 16 : 8;
3659-
3660-
static constexpr int8_t _Low_part_mask[] = {//
3661-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, //
3662-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3663-
3664-
if (_Size_bytes_2 <= 16) {
3665-
const int _Size_el_2 = static_cast<int>(_Count2);
3666-
constexpr unsigned int _Whole_mask = (1 << _Part_size_el) - 1;
3667-
const unsigned int _Needle_fit_mask = (1 << (_Part_size_el - _Size_el_2 + 1)) - 1;
3668-
const unsigned int _Needle_unfit_mask = _Whole_mask ^ _Needle_fit_mask;
3669-
3670-
const void* _Stop1 = _First1;
3671-
_Advance_bytes(_Stop1, _Size_bytes_1 & 0xF);
3672-
3673-
alignas(16) uint8_t _Tmp2[16];
3674-
memcpy(_Tmp2, _First2, _Size_bytes_2);
3675-
const __m128i _Data2 = _mm_load_si128(reinterpret_cast<const __m128i*>(_Tmp2));
3676-
3677-
const void* _Mid1 = _Last1;
3678-
_Rewind_bytes(_Mid1, 16);
3679-
3680-
const auto _Check_fit = [&_Mid1, _Needle_fit_mask](const unsigned int _Match) noexcept {
3681-
const unsigned int _Fit_match = _Match & _Needle_fit_mask;
3682-
if (_Fit_match != 0) {
3683-
unsigned long _Match_last_pos;
3684-
3685-
// CodeQL [SM02313] Result is always initialized: we just tested that _Fit_match is non-zero.
3686-
_BitScanReverse(&_Match_last_pos, _Fit_match);
3687-
3688-
_Advance_bytes(_Mid1, _Match_last_pos * sizeof(_Ty));
3689-
return true;
3690-
}
3691-
3692-
return false;
3693-
};
3694-
3695-
#pragma warning(push)
3696-
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
3697-
const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept {
3698-
long _Unfit_match = _Match & _Needle_unfit_mask;
3699-
while (_Unfit_match != 0) {
3700-
const void* _Tmp1 = _Mid1;
3701-
unsigned long _Match_last_pos;
3702-
3703-
// CodeQL [SM02313] Result is always initialized: we just tested that _Unfit_match is non-zero.
3704-
_BitScanReverse(&_Match_last_pos, _Unfit_match);
3705-
3706-
_Advance_bytes(_Tmp1, _Match_last_pos * sizeof(_Ty));
3707-
3708-
const __m128i _Match_data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Tmp1));
3709-
const __m128i _Cmp_result = _mm_xor_si128(_Data2, _Match_data);
3710-
const __m128i _Data_mask =
3711-
_mm_loadu_si128(reinterpret_cast<const __m128i*>(_Low_part_mask + 16 - _Size_bytes_2));
3712-
3713-
if (_mm_testz_si128(_Cmp_result, _Data_mask)) {
3714-
_Mid1 = _Tmp1;
3715-
return true;
3716-
}
3717-
3718-
_bittestandreset(&_Unfit_match, _Match_last_pos);
3719-
}
3720-
3721-
return false;
3722-
};
3723-
#pragma warning(pop)
3724-
3725-
// TRANSITION, DevCom-10689455, the code below could test with _mm_cmpestrc,
3726-
// if it has been fused with _mm_cmpestrm.
3727-
3728-
// The very last part, for any match needle should fit, otherwise false match
3729-
__m128i _Data1_last = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Mid1));
3730-
const auto _Match_last = _mm_cmpestrm(_Data2, _Size_el_2, _Data1_last, _Part_size_el, _Op);
3731-
const unsigned int _Match_last_val = _mm_cvtsi128_si32(_Match_last);
3732-
if (_Check_fit(_Match_last_val)) {
3733-
return _Mid1;
3734-
}
3735-
3736-
// The middle part, fit and unfit needle
3737-
while (_Mid1 != _Stop1) {
3738-
_Rewind_bytes(_Mid1, 16);
3739-
const __m128i _Data1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Mid1));
3740-
const auto _Match = _mm_cmpestrm(_Data2, _Size_el_2, _Data1, _Part_size_el, _Op);
3741-
const unsigned int _Match_val = _mm_cvtsi128_si32(_Match);
3742-
if (_Match_val != 0 && (_Check_unfit(_Match_val) || _Check_fit(_Match_val))) {
3743-
return _Mid1;
3744-
}
3745-
}
3746-
3747-
// The first part, fit and unfit needle, mask out already processed positions
3748-
if (const size_t _Tail_bytes_1 = _Size_bytes_1 & 0xF; _Tail_bytes_1 != 0) {
3749-
_Mid1 = _First1;
3750-
const __m128i _Data1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Mid1));
3751-
const auto _Match = _mm_cmpestrm(_Data2, _Size_el_2, _Data1, _Part_size_el, _Op);
3752-
const unsigned int _Match_val = _mm_cvtsi128_si32(_Match) & ((1 << _Tail_bytes_1) - 1);
3753-
if (_Match_val != 0 && (_Check_unfit(_Match_val) || _Check_fit(_Match_val))) {
3754-
return _Mid1;
3755-
}
3756-
}
3757-
3758-
return _Last1;
3759-
} else {
3760-
const __m128i _Data2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_First2));
3761-
3762-
const void* _Tail2 = _First2;
3763-
_Advance_bytes(_Tail2, 16);
3764-
3765-
const void* _Mid1 = _Last1;
3766-
_Rewind_bytes(_Mid1, _Size_bytes_2);
3767-
3768-
const size_t _Size_diff_bytes = _Size_bytes_1 - _Size_bytes_2;
3769-
const void* _Stop1 = _First1;
3770-
_Advance_bytes(_Stop1, _Size_diff_bytes & 0xF);
3771-
3772-
#pragma warning(push)
3773-
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
3774-
const auto _Check = [=, &_Mid1](long _Match) noexcept {
3775-
while (_Match != 0) {
3776-
const void* _Tmp1 = _Mid1;
3777-
unsigned long _Match_last_pos;
3778-
3779-
// CodeQL [SM02313] Result is always initialized: we just tested that _Match is non-zero.
3780-
_BitScanReverse(&_Match_last_pos, _Match);
3781-
3782-
bool _Match_1st_16 = true;
3783-
3784-
if (_Match_last_pos != 0) {
3785-
_Advance_bytes(_Tmp1, _Match_last_pos * sizeof(_Ty));
3786-
3787-
const __m128i _Match_data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Tmp1));
3788-
const __m128i _Cmp_result = _mm_xor_si128(_Data2, _Match_data);
3789-
3790-
if (!_mm_testz_si128(_Cmp_result, _Cmp_result)) {
3791-
_Match_1st_16 = false;
3792-
}
3793-
}
3794-
3795-
if (_Match_1st_16) {
3796-
const void* _Tail1 = _Tmp1;
3797-
_Advance_bytes(_Tail1, 16);
3798-
3799-
if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - 16) == 0) {
3800-
_Mid1 = _Tmp1;
3801-
return true;
3802-
}
3803-
}
3804-
3805-
_bittestandreset(&_Match, _Match_last_pos);
3806-
}
3807-
3808-
return false;
3809-
};
3810-
#pragma warning(pop)
3811-
// TRANSITION, DevCom-10689455, the code below could test with _mm_cmpestrc,
3812-
// if it has been fused with _mm_cmpestrm.
3813-
3814-
// The main part, match all characters
3815-
for (;;) {
3816-
const __m128i _Data1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Mid1));
3817-
const auto _Match = _mm_cmpestrm(_Data2, _Part_size_el, _Data1, _Part_size_el, _Op);
3818-
const unsigned int _Match_val = _mm_cvtsi128_si32(_Match);
3819-
if (_Match_val != 0 && _Check(_Match_val)) {
3820-
return _Mid1;
3821-
}
3822-
3823-
if (_Mid1 == _Stop1) {
3824-
break;
3825-
}
3826-
3827-
_Rewind_bytes(_Mid1, 16);
3828-
}
3829-
3830-
// The first part, mask out already processed positions
3831-
if (const size_t _Tail_bytes_1 = _Size_diff_bytes & 0xF; _Tail_bytes_1 != 0) {
3832-
_Mid1 = _First1;
3833-
const __m128i _Data1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Mid1));
3834-
const auto _Match = _mm_cmpestrm(_Data2, _Part_size_el, _Data1, _Part_size_el, _Op);
3835-
const unsigned int _Match_val = _mm_cvtsi128_si32(_Match) & ((1 << _Tail_bytes_1) - 1);
3836-
if (_Match_val != 0 && _Check(_Match_val)) {
3837-
return _Mid1;
3838-
}
3839-
}
3840-
3841-
return _Last1;
3842-
}
3843-
} else
3844-
#endif // !defined(_M_ARM64EC)
3845-
{
3846-
auto _Ptr1 = static_cast<const _Ty*>(_Last1) - _Count2;
3847-
const auto _Ptr2 = static_cast<const _Ty*>(_First2);
3848-
3849-
for (;;) {
3850-
if (*_Ptr1 == *_Ptr2) {
3851-
bool _Equal = true;
3852-
3853-
for (size_t _Idx = 1; _Idx != _Count2; ++_Idx) {
3854-
if (_Ptr1[_Idx] != _Ptr2[_Idx]) {
3855-
_Equal = false;
3856-
break;
3857-
}
3858-
}
3859-
3860-
if (_Equal) {
3861-
return _Ptr1;
3862-
}
3863-
}
3864-
3865-
if (_Ptr1 == _First1) {
3866-
return _Last1;
3867-
}
3868-
3869-
--_Ptr1;
3870-
}
3871-
}
3872-
}
38733636
} // unnamed namespace
38743637

38753638
extern "C" {
@@ -3994,16 +3757,6 @@ const void* __stdcall __std_search_2(
39943757
return __std_search_impl<_Find_traits_2, uint16_t>(_First1, _Last1, _First2, _Count2);
39953758
}
39963759

3997-
const void* __stdcall __std_find_end_1(
3998-
const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept {
3999-
return __std_find_end_impl<_Find_traits_1, uint8_t>(_First1, _Last1, _First2, _Count2);
4000-
}
4001-
4002-
const void* __stdcall __std_find_end_2(
4003-
const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept {
4004-
return __std_find_end_impl<_Find_traits_2, uint16_t>(_First1, _Last1, _First2, _Count2);
4005-
}
4006-
40073760
__declspec(noalias) size_t __stdcall __std_mismatch_1(
40083761
const void* const _First1, const void* const _First2, const size_t _Count) noexcept {
40093762
return __std_mismatch_impl<_Find_traits_1, uint8_t>(_First1, _First2, _Count);

0 commit comments

Comments
 (0)