@@ -3633,243 +3633,6 @@ namespace {
36333633 return _Last1;
36343634 }
36353635 }
3636-
3637- template <class _Traits , class _Ty >
3638- const void * __stdcall __std_find_end_impl (
3639- const void * const _First1, const void * const _Last1, const void * const _First2, const size_t _Count2) noexcept {
3640- if (_Count2 == 0 ) {
3641- return _Last1;
3642- }
3643-
3644- if (_Count2 == 1 ) {
3645- return __std_find_last_trivial_impl<_Traits>(_First1, _Last1, *static_cast <const _Ty*>(_First2));
3646- }
3647-
3648- const size_t _Size_bytes_1 = _Byte_length (_First1, _Last1);
3649- const size_t _Size_bytes_2 = _Count2 * sizeof (_Ty);
3650-
3651- if (_Size_bytes_1 < _Size_bytes_2) {
3652- return _Last1;
3653- }
3654-
3655- #ifndef _M_ARM64EC
3656- if (_Use_sse42 () && _Size_bytes_1 >= 16 ) {
3657- constexpr int _Op = (sizeof (_Ty) == 1 ? _SIDD_UBYTE_OPS : _SIDD_UWORD_OPS) | _SIDD_CMP_EQUAL_ORDERED;
3658- constexpr int _Part_size_el = sizeof (_Ty) == 1 ? 16 : 8 ;
3659-
3660- static constexpr int8_t _Low_part_mask[] = {//
3661- -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //
3662- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
3663-
3664- if (_Size_bytes_2 <= 16 ) {
3665- const int _Size_el_2 = static_cast <int >(_Count2);
3666- constexpr unsigned int _Whole_mask = (1 << _Part_size_el) - 1 ;
3667- const unsigned int _Needle_fit_mask = (1 << (_Part_size_el - _Size_el_2 + 1 )) - 1 ;
3668- const unsigned int _Needle_unfit_mask = _Whole_mask ^ _Needle_fit_mask;
3669-
3670- const void * _Stop1 = _First1;
3671- _Advance_bytes (_Stop1, _Size_bytes_1 & 0xF );
3672-
3673- alignas (16 ) uint8_t _Tmp2[16 ];
3674- memcpy (_Tmp2, _First2, _Size_bytes_2);
3675- const __m128i _Data2 = _mm_load_si128 (reinterpret_cast <const __m128i*>(_Tmp2));
3676-
3677- const void * _Mid1 = _Last1;
3678- _Rewind_bytes (_Mid1, 16 );
3679-
3680- const auto _Check_fit = [&_Mid1, _Needle_fit_mask](const unsigned int _Match) noexcept {
3681- const unsigned int _Fit_match = _Match & _Needle_fit_mask;
3682- if (_Fit_match != 0 ) {
3683- unsigned long _Match_last_pos;
3684-
3685- // CodeQL [SM02313] Result is always initialized: we just tested that _Fit_match is non-zero.
3686- _BitScanReverse (&_Match_last_pos, _Fit_match);
3687-
3688- _Advance_bytes (_Mid1, _Match_last_pos * sizeof (_Ty));
3689- return true ;
3690- }
3691-
3692- return false ;
3693- };
3694-
3695- #pragma warning(push)
3696- #pragma warning(disable : 4324) // structure was padded due to alignment specifier
3697- const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept {
3698- long _Unfit_match = _Match & _Needle_unfit_mask;
3699- while (_Unfit_match != 0 ) {
3700- const void * _Tmp1 = _Mid1;
3701- unsigned long _Match_last_pos;
3702-
3703- // CodeQL [SM02313] Result is always initialized: we just tested that _Unfit_match is non-zero.
3704- _BitScanReverse (&_Match_last_pos, _Unfit_match);
3705-
3706- _Advance_bytes (_Tmp1, _Match_last_pos * sizeof (_Ty));
3707-
3708- const __m128i _Match_data = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Tmp1));
3709- const __m128i _Cmp_result = _mm_xor_si128 (_Data2, _Match_data);
3710- const __m128i _Data_mask =
3711- _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Low_part_mask + 16 - _Size_bytes_2));
3712-
3713- if (_mm_testz_si128 (_Cmp_result, _Data_mask)) {
3714- _Mid1 = _Tmp1;
3715- return true ;
3716- }
3717-
3718- _bittestandreset (&_Unfit_match, _Match_last_pos);
3719- }
3720-
3721- return false ;
3722- };
3723- #pragma warning(pop)
3724-
3725- // TRANSITION, DevCom-10689455, the code below could test with _mm_cmpestrc,
3726- // if it has been fused with _mm_cmpestrm.
3727-
3728- // The very last part, for any match needle should fit, otherwise false match
3729- __m128i _Data1_last = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Mid1));
3730- const auto _Match_last = _mm_cmpestrm (_Data2, _Size_el_2, _Data1_last, _Part_size_el, _Op);
3731- const unsigned int _Match_last_val = _mm_cvtsi128_si32 (_Match_last);
3732- if (_Check_fit (_Match_last_val)) {
3733- return _Mid1;
3734- }
3735-
3736- // The middle part, fit and unfit needle
3737- while (_Mid1 != _Stop1) {
3738- _Rewind_bytes (_Mid1, 16 );
3739- const __m128i _Data1 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Mid1));
3740- const auto _Match = _mm_cmpestrm (_Data2, _Size_el_2, _Data1, _Part_size_el, _Op);
3741- const unsigned int _Match_val = _mm_cvtsi128_si32 (_Match);
3742- if (_Match_val != 0 && (_Check_unfit (_Match_val) || _Check_fit (_Match_val))) {
3743- return _Mid1;
3744- }
3745- }
3746-
3747- // The first part, fit and unfit needle, mask out already processed positions
3748- if (const size_t _Tail_bytes_1 = _Size_bytes_1 & 0xF ; _Tail_bytes_1 != 0 ) {
3749- _Mid1 = _First1;
3750- const __m128i _Data1 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Mid1));
3751- const auto _Match = _mm_cmpestrm (_Data2, _Size_el_2, _Data1, _Part_size_el, _Op);
3752- const unsigned int _Match_val = _mm_cvtsi128_si32 (_Match) & ((1 << _Tail_bytes_1) - 1 );
3753- if (_Match_val != 0 && (_Check_unfit (_Match_val) || _Check_fit (_Match_val))) {
3754- return _Mid1;
3755- }
3756- }
3757-
3758- return _Last1;
3759- } else {
3760- const __m128i _Data2 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_First2));
3761-
3762- const void * _Tail2 = _First2;
3763- _Advance_bytes (_Tail2, 16 );
3764-
3765- const void * _Mid1 = _Last1;
3766- _Rewind_bytes (_Mid1, _Size_bytes_2);
3767-
3768- const size_t _Size_diff_bytes = _Size_bytes_1 - _Size_bytes_2;
3769- const void * _Stop1 = _First1;
3770- _Advance_bytes (_Stop1, _Size_diff_bytes & 0xF );
3771-
3772- #pragma warning(push)
3773- #pragma warning(disable : 4324) // structure was padded due to alignment specifier
3774- const auto _Check = [=, &_Mid1](long _Match) noexcept {
3775- while (_Match != 0 ) {
3776- const void * _Tmp1 = _Mid1;
3777- unsigned long _Match_last_pos;
3778-
3779- // CodeQL [SM02313] Result is always initialized: we just tested that _Match is non-zero.
3780- _BitScanReverse (&_Match_last_pos, _Match);
3781-
3782- bool _Match_1st_16 = true ;
3783-
3784- if (_Match_last_pos != 0 ) {
3785- _Advance_bytes (_Tmp1, _Match_last_pos * sizeof (_Ty));
3786-
3787- const __m128i _Match_data = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Tmp1));
3788- const __m128i _Cmp_result = _mm_xor_si128 (_Data2, _Match_data);
3789-
3790- if (!_mm_testz_si128 (_Cmp_result, _Cmp_result)) {
3791- _Match_1st_16 = false ;
3792- }
3793- }
3794-
3795- if (_Match_1st_16) {
3796- const void * _Tail1 = _Tmp1;
3797- _Advance_bytes (_Tail1, 16 );
3798-
3799- if (memcmp (_Tail1, _Tail2, _Size_bytes_2 - 16 ) == 0 ) {
3800- _Mid1 = _Tmp1;
3801- return true ;
3802- }
3803- }
3804-
3805- _bittestandreset (&_Match, _Match_last_pos);
3806- }
3807-
3808- return false ;
3809- };
3810- #pragma warning(pop)
3811- // TRANSITION, DevCom-10689455, the code below could test with _mm_cmpestrc,
3812- // if it has been fused with _mm_cmpestrm.
3813-
3814- // The main part, match all characters
3815- for (;;) {
3816- const __m128i _Data1 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Mid1));
3817- const auto _Match = _mm_cmpestrm (_Data2, _Part_size_el, _Data1, _Part_size_el, _Op);
3818- const unsigned int _Match_val = _mm_cvtsi128_si32 (_Match);
3819- if (_Match_val != 0 && _Check (_Match_val)) {
3820- return _Mid1;
3821- }
3822-
3823- if (_Mid1 == _Stop1) {
3824- break ;
3825- }
3826-
3827- _Rewind_bytes (_Mid1, 16 );
3828- }
3829-
3830- // The first part, mask out already processed positions
3831- if (const size_t _Tail_bytes_1 = _Size_diff_bytes & 0xF ; _Tail_bytes_1 != 0 ) {
3832- _Mid1 = _First1;
3833- const __m128i _Data1 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Mid1));
3834- const auto _Match = _mm_cmpestrm (_Data2, _Part_size_el, _Data1, _Part_size_el, _Op);
3835- const unsigned int _Match_val = _mm_cvtsi128_si32 (_Match) & ((1 << _Tail_bytes_1) - 1 );
3836- if (_Match_val != 0 && _Check (_Match_val)) {
3837- return _Mid1;
3838- }
3839- }
3840-
3841- return _Last1;
3842- }
3843- } else
3844- #endif // !defined(_M_ARM64EC)
3845- {
3846- auto _Ptr1 = static_cast <const _Ty*>(_Last1) - _Count2;
3847- const auto _Ptr2 = static_cast <const _Ty*>(_First2);
3848-
3849- for (;;) {
3850- if (*_Ptr1 == *_Ptr2) {
3851- bool _Equal = true ;
3852-
3853- for (size_t _Idx = 1 ; _Idx != _Count2; ++_Idx) {
3854- if (_Ptr1[_Idx] != _Ptr2[_Idx]) {
3855- _Equal = false ;
3856- break ;
3857- }
3858- }
3859-
3860- if (_Equal) {
3861- return _Ptr1;
3862- }
3863- }
3864-
3865- if (_Ptr1 == _First1) {
3866- return _Last1;
3867- }
3868-
3869- --_Ptr1;
3870- }
3871- }
3872- }
38733636} // unnamed namespace
38743637
38753638extern " C" {
@@ -3994,16 +3757,6 @@ const void* __stdcall __std_search_2(
39943757 return __std_search_impl<_Find_traits_2, uint16_t >(_First1, _Last1, _First2, _Count2);
39953758}
39963759
3997- const void * __stdcall __std_find_end_1 (
3998- const void * const _First1, const void * const _Last1, const void * const _First2, const size_t _Count2) noexcept {
3999- return __std_find_end_impl<_Find_traits_1, uint8_t >(_First1, _Last1, _First2, _Count2);
4000- }
4001-
4002- const void * __stdcall __std_find_end_2 (
4003- const void * const _First1, const void * const _Last1, const void * const _First2, const size_t _Count2) noexcept {
4004- return __std_find_end_impl<_Find_traits_2, uint16_t >(_First1, _Last1, _First2, _Count2);
4005- }
4006-
40073760__declspec (noalias) size_t __stdcall __std_mismatch_1(
40083761 const void * const _First1, const void * const _First2, const size_t _Count) noexcept {
40093762 return __std_mismatch_impl<_Find_traits_1, uint8_t >(_First1, _First2, _Count);
0 commit comments