@@ -489,8 +489,8 @@ public static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint buff
489489 // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
490490 // this method is running.
491491
492- return ( Sse2 . IsSupported )
493- ? GetIndexOfFirstNonAsciiChar_Sse2 ( pBuffer , bufferLength )
492+ return ( BitConverter . IsLittleEndian && ( Sse2 . IsSupported || AdvSimd . Arm64 . IsSupported ) )
493+ ? GetIndexOfFirstNonAsciiChar_Sse2OrArm64 ( pBuffer , bufferLength )
494494 : GetIndexOfFirstNonAsciiChar_Default ( pBuffer , bufferLength ) ;
495495 }
496496
@@ -630,9 +630,9 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, n
630630 goto Finish ;
631631 }
632632
633- private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2 ( char * pBuffer , nuint bufferLength /* in chars */ )
633+ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2OrArm64 ( char * pBuffer , nuint bufferLength /* in chars */ )
634634 {
635- // This method contains logic optimized for both SSE2 and SSE41 . Much of the logic in this method
635+ // This method contains logic optimized for SSE2, SSE41 and ARM64 . Much of the logic in this method
636636 // will be elided by JIT once we determine which specific ISAs we support.
637637
638638 // Quick check for empty inputs.
@@ -647,9 +647,10 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
647647 uint SizeOfVector128InBytes = ( uint ) Unsafe . SizeOf < Vector128 < byte > > ( ) ;
648648 uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof ( char ) ;
649649
650- Debug . Assert ( Sse2 . IsSupported , "Should've been checked by caller ." ) ;
651- Debug . Assert ( BitConverter . IsLittleEndian , "SSE2 assumes little-endian." ) ;
650+ Debug . Assert ( Sse2 . IsSupported || AdvSimd . Arm64 . IsSupported , "Sse2 or AdvSimd64 required ." ) ;
651+ Debug . Assert ( BitConverter . IsLittleEndian , "This SSE2/Arm64 implementation assumes little-endian." ) ;
652652
653+ Vector128 < byte > bitmask = Vector128 . Create ( ( ushort ) 0x1001 ) . AsByte ( ) ;
653654 Vector128 < ushort > firstVector , secondVector ;
654655 uint currentMask ;
655656 char * pOriginalBuffer = pBuffer ;
@@ -673,13 +674,35 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
673674
674675 // Read the first vector unaligned.
675676
676- firstVector = Sse2 . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
677+ if ( Sse2 . IsSupported )
678+ {
679+ firstVector = Sse2 . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
680+ }
681+ else if ( AdvSimd . Arm64 . IsSupported )
682+ {
683+ firstVector = AdvSimd . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
684+ }
685+ else
686+ {
687+ throw new PlatformNotSupportedException ( ) ;
688+ }
677689
678690 // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element
679- // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
691+ // has value >= 0x0080 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
680692 // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored.
681693
682- currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
694+ if ( Sse2 . IsSupported )
695+ {
696+ currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
697+ }
698+ else if ( AdvSimd . Arm64 . IsSupported )
699+ {
700+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
701+ }
702+ else
703+ {
704+ throw new PlatformNotSupportedException ( ) ;
705+ }
683706
684707 if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
685708 {
@@ -725,9 +748,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
725748
726749 do
727750 {
728- firstVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer ) ;
729- secondVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer + SizeOfVector128InChars ) ;
730- Vector128 < ushort > combinedVector = Sse2 . Or ( firstVector , secondVector ) ;
751+ Vector128 < ushort > combinedVector ;
752+ if ( Sse2 . IsSupported )
753+ {
754+ firstVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer ) ;
755+ secondVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer + SizeOfVector128InChars ) ;
756+ combinedVector = Sse2 . Or ( firstVector , secondVector ) ;
757+ }
758+ else if ( AdvSimd . Arm64 . IsSupported )
759+ {
760+ firstVector = AdvSimd . LoadVector128 ( ( ushort * ) pBuffer ) ;
761+ secondVector = AdvSimd . LoadVector128 ( ( ushort * ) pBuffer + SizeOfVector128InChars ) ;
762+ combinedVector = AdvSimd . Or ( firstVector , secondVector ) ;
763+ }
764+ else
765+ {
766+ throw new PlatformNotSupportedException ( ) ;
767+ }
731768
732769 if ( Sse41 . IsSupported )
733770 {
@@ -738,7 +775,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
738775 goto FoundNonAsciiDataInFirstOrSecondVector ;
739776 }
740777 }
741- else
778+ else if ( Sse2 . IsSupported )
742779 {
743780 // See comment earlier in the method for an explanation of how the below logic works.
744781 currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( combinedVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
@@ -747,6 +784,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
747784 goto FoundNonAsciiDataInFirstOrSecondVector ;
748785 }
749786 }
787+ else if ( AdvSimd . Arm64 . IsSupported )
788+ {
789+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( combinedVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
790+ if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
791+ {
792+ goto FoundNonAsciiDataInFirstOrSecondVector ;
793+ }
794+ }
795+ else
796+ {
797+ throw new PlatformNotSupportedException ( ) ;
798+ }
750799
751800 pBuffer += 2 * SizeOfVector128InChars ;
752801 } while ( pBuffer <= pFinalVectorReadPos ) ;
@@ -770,7 +819,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
770819 // At least one full vector's worth of data remains, so we can safely read it.
771820 // Remember, at this point pBuffer is still aligned.
772821
773- firstVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer ) ;
822+ if ( Sse2 . IsSupported )
823+ {
824+ firstVector = Sse2 . LoadAlignedVector128 ( ( ushort * ) pBuffer ) ;
825+ }
826+ else if ( AdvSimd . Arm64 . IsSupported )
827+ {
828+ firstVector = AdvSimd . LoadVector128 ( ( ushort * ) pBuffer ) ;
829+ }
830+ else
831+ {
832+ throw new PlatformNotSupportedException ( ) ;
833+ }
774834
775835 if ( Sse41 . IsSupported )
776836 {
@@ -781,7 +841,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
781841 goto FoundNonAsciiDataInFirstVector ;
782842 }
783843 }
784- else
844+ else if ( Sse2 . IsSupported )
785845 {
786846 // See comment earlier in the method for an explanation of how the below logic works.
787847 currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
@@ -790,6 +850,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
790850 goto FoundNonAsciiDataInCurrentMask ;
791851 }
792852 }
853+ else if ( AdvSimd . Arm64 . IsSupported )
854+ {
855+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
856+ if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
857+ {
858+ goto FoundNonAsciiDataInCurrentMask ;
859+ }
860+ }
861+ else
862+ {
863+ throw new PlatformNotSupportedException ( ) ;
864+ }
793865
794866 IncrementCurrentOffsetBeforeFinalUnalignedVectorRead :
795867
@@ -803,7 +875,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
803875 // We need to adjust the pointer because we're re-reading data.
804876
805877 pBuffer = ( char * ) ( ( byte * ) pBuffer + ( bufferLength & ( SizeOfVector128InBytes - 1 ) ) - SizeOfVector128InBytes ) ;
806- firstVector = Sse2 . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
878+ if ( Sse2 . IsSupported )
879+ {
880+ firstVector = Sse2 . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
881+ }
882+ else if ( AdvSimd . Arm64 . IsSupported )
883+ {
884+ firstVector = AdvSimd . LoadVector128 ( ( ushort * ) pBuffer ) ; // unaligned load
885+ }
886+ else
887+ {
888+ throw new PlatformNotSupportedException ( ) ;
889+ }
807890
808891 if ( Sse41 . IsSupported )
809892 {
@@ -814,7 +897,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
814897 goto FoundNonAsciiDataInFirstVector ;
815898 }
816899 }
817- else
900+ else if ( Sse2 . IsSupported )
818901 {
819902 // See comment earlier in the method for an explanation of how the below logic works.
820903 currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
@@ -823,6 +906,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
823906 goto FoundNonAsciiDataInCurrentMask ;
824907 }
825908 }
909+ else if ( AdvSimd . Arm64 . IsSupported )
910+ {
911+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
912+ if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
913+ {
914+ goto FoundNonAsciiDataInCurrentMask ;
915+ }
916+ }
917+ else
918+ {
919+ throw new PlatformNotSupportedException ( ) ;
920+ }
826921
827922 pBuffer += SizeOfVector128InChars ;
828923 }
@@ -846,14 +941,26 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
846941 goto FoundNonAsciiDataInFirstVector ;
847942 }
848943 }
849- else
944+ else if ( Sse2 . IsSupported )
850945 {
851946 currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
852947 if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
853948 {
854949 goto FoundNonAsciiDataInCurrentMask ;
855950 }
856951 }
952+ else if ( AdvSimd . Arm64 . IsSupported )
953+ {
954+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
955+ if ( ( currentMask & NonAsciiDataSeenMask ) != 0 )
956+ {
957+ goto FoundNonAsciiDataInCurrentMask ;
958+ }
959+ }
960+ else
961+ {
962+ throw new PlatformNotSupportedException ( ) ;
963+ }
857964
858965 // Wasn't the first vector; must be the second.
859966
@@ -863,7 +970,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
863970 FoundNonAsciiDataInFirstVector :
864971
865972 // See comment earlier in the method for an explanation of how the below logic works.
866- currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
973+ if ( Sse2 . IsSupported )
974+ {
975+ currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) ) ;
976+ }
977+ else if ( AdvSimd . Arm64 . IsSupported )
978+ {
979+ currentMask = Unicode . Utf16Utility . GetNonAsciiBytes ( AdvSimd . AddSaturate ( firstVector , asciiMaskForAddSaturate ) . AsByte ( ) , bitmask ) ;
980+ }
981+ else
982+ {
983+ throw new PlatformNotSupportedException ( ) ;
984+ }
867985
868986 FoundNonAsciiDataInCurrentMask :
869987
0 commit comments