Skip to content

Commit 9d88a94

Browse files
authored
AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte (#38653)
* AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte * Move comment to the top, add shims. * Little endian checks * Use custom MoveMask method for AdvSimd * Address suggestions to improve the AdvSimdMoveMask method * Define initialMask outside MoveMask method * UInt64 in Arm64MoveMask * Add unit test case to verify intrinsics improvement * Avoid casting to smaller integer type * Typo and comment * Use ShiftRightArithmetic instead of CompareEqual + And. Remove test case causing other unit tests to fail. * Use AddPairwise version of GetNotAsciiBytes * Add missing shims causing Linux build to fail * Simplify GetNonAsciiBytes to only one AddPairwise call, shorter bitmask * Respect data type returned by masking method * Address suggestions - assert trailingzerocount and bring back uint mask * Trailing zeroes in AdvSimd need to be divided by 4, and total number should not be larger than 16 * Avoid declaring static field which causes PNSE in Utf8String.Experimental (S.P.Corelib code is used for being NetStandard) * Prefer using nuint for BitConverter.TrailingZeroCount
1 parent 7324741 commit 9d88a94

File tree

1 file changed

+47
-17
lines changed

1 file changed

+47
-17
lines changed

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
using System.Diagnostics;
55
using System.Numerics;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.Intrinsics;
8+
using System.Runtime.Intrinsics.Arm;
79
using System.Runtime.Intrinsics.X86;
810

911
#if SYSTEM_PRIVATE_CORELIB
@@ -117,22 +119,35 @@ internal static unsafe partial class Utf8Utility
117119
// the alignment check consumes at most a single DWORD.)
118120

119121
byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here
120-
uint mask;
122+
nuint trailingZeroCount;
123+
124+
Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
125+
Vector128.Create((ushort)0x1001).AsByte() :
126+
Vector128.Create((ushort)0x0110).AsByte();
121127

122128
do
123129
{
124-
if (Sse2.IsSupported)
130+
// pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're
131+
// going to perform an unaligned load. We don't necessarily care about aligning
132+
// this because we pessimistically assume we'll encounter non-ASCII data at some
133+
// point in the not-too-distant future (otherwise we would've stayed entirely
134+
// within the all-ASCII vectorized code at the entry to this method).
135+
if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
125136
{
126-
// pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're
127-
// going to perform an unaligned load. We don't necessarily care about aligning
128-
// this because we pessimistically assume we'll encounter non-ASCII data at some
129-
// point in the not-too-distant future (otherwise we would've stayed entirely
130-
// within the all-ASCII vectorized code at the entry to this method).
131-
132-
mask = (uint)Sse2.MoveMask(Sse2.LoadVector128((byte*)pInputBuffer));
137+
ulong mask = GetNonAsciiBytes(AdvSimd.LoadVector128(pInputBuffer), bitMask128);
138+
if (mask != 0)
139+
{
140+
trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask) >> 2;
141+
goto LoopTerminatedEarlyDueToNonAsciiData;
142+
}
143+
}
144+
else if (Sse2.IsSupported)
145+
{
146+
uint mask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pInputBuffer));
133147
if (mask != 0)
134148
{
135-
goto Sse2LoopTerminatedEarlyDueToNonAsciiData;
149+
trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask);
150+
goto LoopTerminatedEarlyDueToNonAsciiData;
136151
}
137152
}
138153
else
@@ -153,19 +168,20 @@ internal static unsafe partial class Utf8Utility
153168

154169
continue; // need to perform a bounds check because we might be running out of data
155170

156-
Sse2LoopTerminatedEarlyDueToNonAsciiData:
171+
LoopTerminatedEarlyDueToNonAsciiData:
172+
// x86 can only be little endian, while ARM can be big or little endian
173+
// so if we reached this label we need to check both combinations are supported
174+
Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported);
157175

158-
Debug.Assert(BitConverter.IsLittleEndian);
159-
Debug.Assert(Sse2.IsSupported);
160176

161177
// The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit
162-
// for each non-ASCII byte we saw. We can count the number of ASCII bytes,
178+
// for each non-ASCII byte we saw. trailingZeroCount will count the number of ASCII bytes,
163179
// bump our input counter by that amount, and resume processing from the
164180
// "the first byte is no longer ASCII" portion of the main loop.
181+
// We should not expect a total number of zeroes equal or larger than 16.
182+
Debug.Assert(trailingZeroCount < 16);
165183

166-
Debug.Assert(mask != 0);
167-
168-
pInputBuffer += BitOperations.TrailingZeroCount(mask);
184+
pInputBuffer += trailingZeroCount;
169185
if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
170186
{
171187
goto ProcessRemainingBytesSlow;
@@ -719,5 +735,19 @@ internal static unsafe partial class Utf8Utility
719735
scalarCountAdjustment = tempScalarCountAdjustment;
720736
return pInputBuffer;
721737
}
738+
739+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
740+
private static ulong GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bitMask128)
741+
{
742+
if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian)
743+
{
744+
throw new PlatformNotSupportedException();
745+
}
746+
747+
Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
748+
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128);
749+
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
750+
return extractedBits.AsUInt64().ToScalar();
751+
}
722752
}
723753
}

0 commit comments

Comments
 (0)