66using System . Numerics ;
77using System . Runtime . CompilerServices ;
88using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . Arm ;
910using System . Runtime . Intrinsics . X86 ;
1011
1112#if SYSTEM_PRIVATE_CORELIB
@@ -882,7 +883,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
882883 // is not enabled.
883884
884885 Unsafe . SkipInit ( out Vector128 < short > nonAsciiUtf16DataMask ) ;
885- if ( Sse41 . X64 . IsSupported )
886+ if ( Sse41 . X64 . IsSupported || ( AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian ) )
886887 {
887888 nonAsciiUtf16DataMask = Vector128 . Create ( unchecked ( ( short ) 0xFF80 ) ) ; // mask of non-ASCII bits in a UTF-16 char
888889 }
@@ -940,10 +941,8 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
940941 uint inputCharsRemaining = ( uint ) ( pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer ) + 2 ;
941942 uint minElementsRemaining = ( uint ) Math . Min ( inputCharsRemaining , outputBytesRemaining ) ;
942943
943- if ( Sse41 . X64 . IsSupported )
944+ if ( Sse41 . X64 . IsSupported || ( AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian ) )
944945 {
945- Debug . Assert ( BitConverter . IsLittleEndian , "SSE41 requires little-endian." ) ;
946-
947946 // Try reading and writing 8 elements per iteration.
948947 uint maxIters = minElementsRemaining / 8 ;
949948 ulong possibleNonAsciiQWord ;
@@ -952,14 +951,30 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
952951 for ( i = 0 ; ( uint ) i < maxIters ; i ++ )
953952 {
954953 utf16Data = Unsafe . ReadUnaligned < Vector128 < short > > ( pInputBuffer ) ;
955- if ( ! Sse41 . TestZ ( utf16Data , nonAsciiUtf16DataMask ) )
954+
955+ if ( AdvSimd . IsSupported )
956956 {
957- goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
958- }
957+ Vector128 < short > isUtf16DataNonAscii = AdvSimd . CompareTest ( utf16Data , nonAsciiUtf16DataMask ) ;
958+ bool hasNonAsciiDataInVector = AdvSimd . Arm64 . MinPairwise ( isUtf16DataNonAscii , isUtf16DataNonAscii ) . AsUInt64 ( ) . ToScalar ( ) != 0 ;
959959
960- // narrow and write
960+ if ( hasNonAsciiDataInVector )
961+ {
962+ goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
963+ }
961964
962- Sse2 . StoreScalar ( ( ulong * ) pOutputBuffer /* unaligned */ , Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt64 ( ) ) ;
965+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
966+ AdvSimd . Store ( pOutputBuffer , lower ) ;
967+ }
968+ else
969+ {
970+ if ( ! Sse41 . TestZ ( utf16Data , nonAsciiUtf16DataMask ) )
971+ {
972+ goto LoopTerminatedDueToNonAsciiDataInVectorLocal ;
973+ }
974+
975+ // narrow and write
976+ Sse2 . StoreScalar ( ( ulong * ) pOutputBuffer /* unaligned */ , Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt64 ( ) ) ;
977+ }
963978
964979 pInputBuffer += 8 ;
965980 pOutputBuffer += 8 ;
@@ -978,7 +993,16 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
978993 }
979994
980995 utf16Data = Vector128 . CreateScalarUnsafe ( possibleNonAsciiQWord ) . AsInt16 ( ) ;
981- Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
996+
997+ if ( AdvSimd . IsSupported )
998+ {
999+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
1000+ AdvSimd . StoreSelectedScalar ( ( uint * ) pOutputBuffer , lower . AsUInt32 ( ) , 0 ) ;
1001+ }
1002+ else
1003+ {
1004+ Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1005+ }
9821006
9831007 pInputBuffer += 4 ;
9841008 pOutputBuffer += 4 ;
@@ -990,7 +1014,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
9901014 LoopTerminatedDueToNonAsciiDataInVectorLocal :
9911015
9921016 outputBytesRemaining -= 8 * i ;
993- possibleNonAsciiQWord = Sse2 . X64 . ConvertToUInt64 ( utf16Data . AsUInt64 ( ) ) ;
1017+
1018+ if ( Sse2 . X64 . IsSupported )
1019+ {
1020+ possibleNonAsciiQWord = Sse2 . X64 . ConvertToUInt64 ( utf16Data . AsUInt64 ( ) ) ;
1021+ }
1022+ else
1023+ {
1024+ possibleNonAsciiQWord = utf16Data . AsUInt64 ( ) . ToScalar ( ) ;
1025+ }
9941026
9951027 // Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector,
9961028 // then check whether it's all-ASCII. If so, narrow and write to the destination
@@ -1000,7 +1032,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
10001032
10011033 if ( Utf16Utility . AllCharsInUInt64AreAscii ( possibleNonAsciiQWord ) ) // all chars in first QWORD are ASCII
10021034 {
1003- Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1035+ if ( AdvSimd . IsSupported )
1036+ {
1037+ Vector64 < byte > lower = AdvSimd . ExtractNarrowingSaturateUnsignedLower ( utf16Data ) ;
1038+ AdvSimd . StoreSelectedScalar ( ( uint * ) pOutputBuffer , lower . AsUInt32 ( ) , 0 ) ;
1039+ }
1040+ else
1041+ {
1042+ Unsafe . WriteUnaligned < uint > ( pOutputBuffer , Sse2 . ConvertToUInt32 ( Sse2 . PackUnsignedSaturate ( utf16Data , utf16Data ) . AsUInt32 ( ) ) ) ;
1043+ }
10041044 pInputBuffer += 4 ;
10051045 pOutputBuffer += 4 ;
10061046 outputBytesRemaining -= 4 ;
0 commit comments