Skip to content

Commit 358e28a

Browse files
Begin using the xplat hardware intrinsics in BitArray (#63722)
* Change the BitArray(bool[]) constructor to use the xplat intrinsics * Change the And, Or, Xor, and Not methods to use the xplat intrinsics
1 parent 3a77a6d commit 358e28a

File tree

1 file changed

+67
-187
lines changed
  • src/libraries/System.Collections/src/System/Collections

1 file changed

+67
-187
lines changed

src/libraries/System.Collections/src/System/Collections/BitArray.cs

Lines changed: 67 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
using System.Buffers.Binary;
55
using System.Diagnostics;
66
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
78
using System.Runtime.Intrinsics;
89
using System.Runtime.Intrinsics.X86;
910
using System.Runtime.Intrinsics.Arm;
11+
using Internal.Runtime.CompilerServices;
1012

1113
namespace System.Collections
1214
{
@@ -145,81 +147,32 @@ public unsafe BitArray(bool[] values)
145147
// (true for any non-zero values, false for 0) - any values between 2-255 will be interpreted as false.
146148
// Instead, We compare with zeroes (== false) then negate the result to ensure compatibility.
147149

148-
if (Avx2.IsSupported)
150+
ref byte value = ref Unsafe.As<bool, byte>(ref MemoryMarshal.GetArrayDataReference<bool>(values));
151+
152+
if (Vector256.IsHardwareAccelerated)
149153
{
150-
// JIT does not support code hoisting for SIMD yet
151-
Vector256<byte> zero = Vector256<byte>.Zero;
152-
fixed (bool* ptr = values)
154+
for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount)
153155
{
154-
for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount)
155-
{
156-
Vector256<byte> vector = Avx.LoadVector256((byte*)ptr + i);
157-
Vector256<byte> isFalse = Avx2.CompareEqual(vector, zero);
158-
int result = Avx2.MoveMask(isFalse);
159-
m_array[i / 32u] = ~result;
160-
}
156+
Vector256<byte> vector = Vector256.LoadUnsafe(ref value, i);
157+
Vector256<byte> isFalse = Vector256.Equals(vector, Vector256<byte>.Zero);
158+
159+
uint result = isFalse.ExtractMostSignificantBits();
160+
m_array[i / 32u] = (int)(~result);
161161
}
162162
}
163-
else if (Sse2.IsSupported)
163+
else if (Vector128.IsHardwareAccelerated)
164164
{
165-
// JIT does not support code hoisting for SIMD yet
166-
Vector128<byte> zero = Vector128<byte>.Zero;
167-
fixed (bool* ptr = values)
165+
for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u)
168166
{
169-
for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u)
170-
{
171-
Vector128<byte> lowerVector = Sse2.LoadVector128((byte*)ptr + i);
172-
Vector128<byte> lowerIsFalse = Sse2.CompareEqual(lowerVector, zero);
173-
int lowerPackedIsFalse = Sse2.MoveMask(lowerIsFalse);
167+
Vector128<byte> lowerVector = Vector128.LoadUnsafe(ref value, i);
168+
Vector128<byte> lowerIsFalse = Vector128.Equals(lowerVector, Vector128<byte>.Zero);
169+
uint lowerResult = lowerIsFalse.ExtractMostSignificantBits();
174170

175-
Vector128<byte> upperVector = Sse2.LoadVector128((byte*)ptr + i + Vector128<byte>.Count);
176-
Vector128<byte> upperIsFalse = Sse2.CompareEqual(upperVector, zero);
177-
int upperPackedIsFalse = Sse2.MoveMask(upperIsFalse);
171+
Vector128<byte> upperVector = Vector128.LoadUnsafe(ref value, i + Vector128ByteCount);
172+
Vector128<byte> upperIsFalse = Vector128.Equals(upperVector, Vector128<byte>.Zero);
173+
uint upperResult = upperIsFalse.ExtractMostSignificantBits();
178174

179-
m_array[i / 32u] = ~((upperPackedIsFalse << 16) | lowerPackedIsFalse);
180-
}
181-
}
182-
}
183-
else if (AdvSimd.Arm64.IsSupported)
184-
{
185-
// JIT does not support code hoisting for SIMD yet
186-
// However comparison against zero can be replaced to cmeq against zero (vceqzq_s8)
187-
// See dotnet/runtime#33972 for details
188-
Vector128<byte> zero = Vector128<byte>.Zero;
189-
Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
190-
Vector128.Create(0x80402010_08040201).AsByte() :
191-
Vector128.Create(0x01020408_10204080).AsByte();
192-
193-
fixed (bool* ptr = values)
194-
{
195-
for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u)
196-
{
197-
// Same logic as SSE2 path, however we lack MoveMask (equivalent) instruction
198-
// As a workaround, mask out the relevant bit after comparison
199-
// and combine by ORing all of them together (In this case, adding all of them does the same thing)
200-
Vector128<byte> lowerVector = AdvSimd.LoadVector128((byte*)ptr + i);
201-
Vector128<byte> lowerIsFalse = AdvSimd.CompareEqual(lowerVector, zero);
202-
Vector128<byte> bitsExtracted1 = AdvSimd.And(lowerIsFalse, bitMask128);
203-
bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1);
204-
bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1);
205-
bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1);
206-
Vector128<short> lowerPackedIsFalse = bitsExtracted1.AsInt16();
207-
208-
Vector128<byte> upperVector = AdvSimd.LoadVector128((byte*)ptr + i + Vector128<byte>.Count);
209-
Vector128<byte> upperIsFalse = AdvSimd.CompareEqual(upperVector, zero);
210-
Vector128<byte> bitsExtracted2 = AdvSimd.And(upperIsFalse, bitMask128);
211-
bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2);
212-
bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2);
213-
bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2);
214-
Vector128<short> upperPackedIsFalse = bitsExtracted2.AsInt16();
215-
216-
int result = AdvSimd.Arm64.ZipLow(lowerPackedIsFalse, upperPackedIsFalse).AsInt32().ToScalar();
217-
if (!BitConverter.IsLittleEndian)
218-
{
219-
result = BinaryPrimitives.ReverseEndianness(result);
220-
}
221-
m_array[i / 32u] = ~result;
222-
}
175+
m_array[i / 32u] = (int)(~((upperResult << 16) | lowerResult));
223176
}
224177
}
225178

@@ -400,43 +353,24 @@ public unsafe BitArray And(BitArray value)
400353
}
401354

402355
uint i = 0;
403-
if (Avx2.IsSupported)
404-
{
405-
fixed (int* leftPtr = thisArray)
406-
fixed (int* rightPtr = valueArray)
407-
{
408-
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
409-
{
410-
Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i);
411-
Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i);
412-
Avx.Store(leftPtr + i, Avx2.And(leftVec, rightVec));
413-
}
414-
}
415-
}
416-
else if (Sse2.IsSupported)
356+
357+
ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
358+
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
359+
360+
if (Vector256.IsHardwareAccelerated)
417361
{
418-
fixed (int* leftPtr = thisArray)
419-
fixed (int* rightPtr = valueArray)
362+
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
420363
{
421-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
422-
{
423-
Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i);
424-
Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i);
425-
Sse2.Store(leftPtr + i, Sse2.And(leftVec, rightVec));
426-
}
364+
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) & Vector256.LoadUnsafe(ref right, i);
365+
result.StoreUnsafe(ref left, i);
427366
}
428367
}
429-
else if (AdvSimd.IsSupported)
368+
else if (Vector128.IsHardwareAccelerated)
430369
{
431-
fixed (int* leftPtr = thisArray)
432-
fixed (int* rightPtr = valueArray)
370+
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
433371
{
434-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
435-
{
436-
Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i);
437-
Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i);
438-
AdvSimd.Store(leftPtr + i, AdvSimd.And(leftVec, rightVec));
439-
}
372+
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) & Vector128.LoadUnsafe(ref right, i);
373+
result.StoreUnsafe(ref left, i);
440374
}
441375
}
442376

@@ -486,43 +420,24 @@ public unsafe BitArray Or(BitArray value)
486420
}
487421

488422
uint i = 0;
489-
if (Avx2.IsSupported)
490-
{
491-
fixed (int* leftPtr = thisArray)
492-
fixed (int* rightPtr = valueArray)
493-
{
494-
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
495-
{
496-
Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i);
497-
Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i);
498-
Avx.Store(leftPtr + i, Avx2.Or(leftVec, rightVec));
499-
}
500-
}
501-
}
502-
else if (Sse2.IsSupported)
423+
424+
ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
425+
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
426+
427+
if (Vector256.IsHardwareAccelerated)
503428
{
504-
fixed (int* leftPtr = thisArray)
505-
fixed (int* rightPtr = valueArray)
429+
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
506430
{
507-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
508-
{
509-
Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i);
510-
Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i);
511-
Sse2.Store(leftPtr + i, Sse2.Or(leftVec, rightVec));
512-
}
431+
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) | Vector256.LoadUnsafe(ref right, i);
432+
result.StoreUnsafe(ref left, i);
513433
}
514434
}
515-
else if (AdvSimd.IsSupported)
435+
else if (Vector128.IsHardwareAccelerated)
516436
{
517-
fixed (int* leftPtr = thisArray)
518-
fixed (int* rightPtr = valueArray)
437+
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
519438
{
520-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
521-
{
522-
Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i);
523-
Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i);
524-
AdvSimd.Store(leftPtr + i, AdvSimd.Or(leftVec, rightVec));
525-
}
439+
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) | Vector128.LoadUnsafe(ref right, i);
440+
result.StoreUnsafe(ref left, i);
526441
}
527442
}
528443

@@ -572,43 +487,24 @@ public unsafe BitArray Xor(BitArray value)
572487
}
573488

574489
uint i = 0;
575-
if (Avx2.IsSupported)
490+
491+
ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
492+
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
493+
494+
if (Vector256.IsHardwareAccelerated)
576495
{
577-
fixed (int* leftPtr = m_array)
578-
fixed (int* rightPtr = value.m_array)
496+
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
579497
{
580-
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
581-
{
582-
Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i);
583-
Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i);
584-
Avx.Store(leftPtr + i, Avx2.Xor(leftVec, rightVec));
585-
}
498+
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) ^ Vector256.LoadUnsafe(ref right, i);
499+
result.StoreUnsafe(ref left, i);
586500
}
587501
}
588-
else if (Sse2.IsSupported)
502+
else if (Vector128.IsHardwareAccelerated)
589503
{
590-
fixed (int* leftPtr = thisArray)
591-
fixed (int* rightPtr = valueArray)
504+
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
592505
{
593-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
594-
{
595-
Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i);
596-
Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i);
597-
Sse2.Store(leftPtr + i, Sse2.Xor(leftVec, rightVec));
598-
}
599-
}
600-
}
601-
else if (AdvSimd.IsSupported)
602-
{
603-
fixed (int* leftPtr = thisArray)
604-
fixed (int* rightPtr = valueArray)
605-
{
606-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
607-
{
608-
Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i);
609-
Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i);
610-
AdvSimd.Store(leftPtr + i, AdvSimd.Xor(leftVec, rightVec));
611-
}
506+
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) ^ Vector128.LoadUnsafe(ref right, i);
507+
result.StoreUnsafe(ref left, i);
612508
}
613509
}
614510

@@ -650,39 +546,23 @@ public unsafe BitArray Not()
650546
}
651547

652548
uint i = 0;
653-
if (Avx2.IsSupported)
654-
{
655-
Vector256<int> ones = Vector256.Create(-1);
656-
fixed (int* ptr = thisArray)
657-
{
658-
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
659-
{
660-
Vector256<int> vec = Avx.LoadVector256(ptr + i);
661-
Avx.Store(ptr + i, Avx2.Xor(vec, ones));
662-
}
663-
}
664-
}
665-
else if (Sse2.IsSupported)
549+
550+
ref int value = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
551+
552+
if (Vector256.IsHardwareAccelerated)
666553
{
667-
Vector128<int> ones = Vector128.Create(-1);
668-
fixed (int* ptr = thisArray)
554+
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
669555
{
670-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
671-
{
672-
Vector128<int> vec = Sse2.LoadVector128(ptr + i);
673-
Sse2.Store(ptr + i, Sse2.Xor(vec, ones));
674-
}
556+
Vector256<int> result = ~Vector256.LoadUnsafe(ref value, i);
557+
result.StoreUnsafe(ref value, i);
675558
}
676559
}
677-
else if (AdvSimd.IsSupported)
560+
else if (Vector128.IsHardwareAccelerated)
678561
{
679-
fixed (int* leftPtr = thisArray)
562+
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
680563
{
681-
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
682-
{
683-
Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i);
684-
AdvSimd.Store(leftPtr + i, AdvSimd.Not(leftVec));
685-
}
564+
Vector128<int> result = ~Vector128.LoadUnsafe(ref value, i);
565+
result.StoreUnsafe(ref value, i);
686566
}
687567
}
688568

0 commit comments

Comments
 (0)