|
4 | 4 | using System.Buffers.Binary; |
5 | 5 | using System.Diagnostics; |
6 | 6 | using System.Runtime.CompilerServices; |
| 7 | +using System.Runtime.InteropServices; |
7 | 8 | using System.Runtime.Intrinsics; |
8 | 9 | using System.Runtime.Intrinsics.X86; |
9 | 10 | using System.Runtime.Intrinsics.Arm; |
| 11 | +using Internal.Runtime.CompilerServices; |
10 | 12 |
|
11 | 13 | namespace System.Collections |
12 | 14 | { |
@@ -145,81 +147,32 @@ public unsafe BitArray(bool[] values) |
145 | 147 | // (true for any non-zero values, false for 0) - any values between 2-255 will be interpreted as false. |
146 | 148 | // Instead, We compare with zeroes (== false) then negate the result to ensure compatibility. |
147 | 149 |
|
148 | | - if (Avx2.IsSupported) |
| 150 | + ref byte value = ref Unsafe.As<bool, byte>(ref MemoryMarshal.GetArrayDataReference<bool>(values)); |
| 151 | + |
| 152 | + if (Vector256.IsHardwareAccelerated) |
149 | 153 | { |
150 | | - // JIT does not support code hoisting for SIMD yet |
151 | | - Vector256<byte> zero = Vector256<byte>.Zero; |
152 | | - fixed (bool* ptr = values) |
| 154 | + for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount) |
153 | 155 | { |
154 | | - for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount) |
155 | | - { |
156 | | - Vector256<byte> vector = Avx.LoadVector256((byte*)ptr + i); |
157 | | - Vector256<byte> isFalse = Avx2.CompareEqual(vector, zero); |
158 | | - int result = Avx2.MoveMask(isFalse); |
159 | | - m_array[i / 32u] = ~result; |
160 | | - } |
| 156 | + Vector256<byte> vector = Vector256.LoadUnsafe(ref value, i); |
| 157 | + Vector256<byte> isFalse = Vector256.Equals(vector, Vector256<byte>.Zero); |
| 158 | + |
| 159 | + uint result = isFalse.ExtractMostSignificantBits(); |
| 160 | + m_array[i / 32u] = (int)(~result); |
161 | 161 | } |
162 | 162 | } |
163 | | - else if (Sse2.IsSupported) |
| 163 | + else if (Vector128.IsHardwareAccelerated) |
164 | 164 | { |
165 | | - // JIT does not support code hoisting for SIMD yet |
166 | | - Vector128<byte> zero = Vector128<byte>.Zero; |
167 | | - fixed (bool* ptr = values) |
| 165 | + for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) |
168 | 166 | { |
169 | | - for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) |
170 | | - { |
171 | | - Vector128<byte> lowerVector = Sse2.LoadVector128((byte*)ptr + i); |
172 | | - Vector128<byte> lowerIsFalse = Sse2.CompareEqual(lowerVector, zero); |
173 | | - int lowerPackedIsFalse = Sse2.MoveMask(lowerIsFalse); |
| 167 | + Vector128<byte> lowerVector = Vector128.LoadUnsafe(ref value, i); |
| 168 | + Vector128<byte> lowerIsFalse = Vector128.Equals(lowerVector, Vector128<byte>.Zero); |
| 169 | + uint lowerResult = lowerIsFalse.ExtractMostSignificantBits(); |
174 | 170 |
|
175 | | - Vector128<byte> upperVector = Sse2.LoadVector128((byte*)ptr + i + Vector128<byte>.Count); |
176 | | - Vector128<byte> upperIsFalse = Sse2.CompareEqual(upperVector, zero); |
177 | | - int upperPackedIsFalse = Sse2.MoveMask(upperIsFalse); |
| 171 | + Vector128<byte> upperVector = Vector128.LoadUnsafe(ref value, i + Vector128ByteCount); |
| 172 | + Vector128<byte> upperIsFalse = Vector128.Equals(upperVector, Vector128<byte>.Zero); |
| 173 | + uint upperResult = upperIsFalse.ExtractMostSignificantBits(); |
178 | 174 |
|
179 | | - m_array[i / 32u] = ~((upperPackedIsFalse << 16) | lowerPackedIsFalse); |
180 | | - } |
181 | | - } |
182 | | - } |
183 | | - else if (AdvSimd.Arm64.IsSupported) |
184 | | - { |
185 | | - // JIT does not support code hoisting for SIMD yet |
186 | | - // However comparison against zero can be replaced to cmeq against zero (vceqzq_s8) |
187 | | - // See dotnet/runtime#33972 for details |
188 | | - Vector128<byte> zero = Vector128<byte>.Zero; |
189 | | - Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ? |
190 | | - Vector128.Create(0x80402010_08040201).AsByte() : |
191 | | - Vector128.Create(0x01020408_10204080).AsByte(); |
192 | | - |
193 | | - fixed (bool* ptr = values) |
194 | | - { |
195 | | - for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) |
196 | | - { |
197 | | - // Same logic as SSE2 path, however we lack MoveMask (equivalent) instruction |
198 | | - // As a workaround, mask out the relevant bit after comparison |
199 | | - // and combine by ORing all of them together (In this case, adding all of them does the same thing) |
200 | | - Vector128<byte> lowerVector = AdvSimd.LoadVector128((byte*)ptr + i); |
201 | | - Vector128<byte> lowerIsFalse = AdvSimd.CompareEqual(lowerVector, zero); |
202 | | - Vector128<byte> bitsExtracted1 = AdvSimd.And(lowerIsFalse, bitMask128); |
203 | | - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); |
204 | | - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); |
205 | | - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); |
206 | | - Vector128<short> lowerPackedIsFalse = bitsExtracted1.AsInt16(); |
207 | | - |
208 | | - Vector128<byte> upperVector = AdvSimd.LoadVector128((byte*)ptr + i + Vector128<byte>.Count); |
209 | | - Vector128<byte> upperIsFalse = AdvSimd.CompareEqual(upperVector, zero); |
210 | | - Vector128<byte> bitsExtracted2 = AdvSimd.And(upperIsFalse, bitMask128); |
211 | | - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); |
212 | | - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); |
213 | | - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); |
214 | | - Vector128<short> upperPackedIsFalse = bitsExtracted2.AsInt16(); |
215 | | - |
216 | | - int result = AdvSimd.Arm64.ZipLow(lowerPackedIsFalse, upperPackedIsFalse).AsInt32().ToScalar(); |
217 | | - if (!BitConverter.IsLittleEndian) |
218 | | - { |
219 | | - result = BinaryPrimitives.ReverseEndianness(result); |
220 | | - } |
221 | | - m_array[i / 32u] = ~result; |
222 | | - } |
| 175 | + m_array[i / 32u] = (int)(~((upperResult << 16) | lowerResult)); |
223 | 176 | } |
224 | 177 | } |
225 | 178 |
|
@@ -400,43 +353,24 @@ public unsafe BitArray And(BitArray value) |
400 | 353 | } |
401 | 354 |
|
402 | 355 | uint i = 0; |
403 | | - if (Avx2.IsSupported) |
404 | | - { |
405 | | - fixed (int* leftPtr = thisArray) |
406 | | - fixed (int* rightPtr = valueArray) |
407 | | - { |
408 | | - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
409 | | - { |
410 | | - Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i); |
411 | | - Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i); |
412 | | - Avx.Store(leftPtr + i, Avx2.And(leftVec, rightVec)); |
413 | | - } |
414 | | - } |
415 | | - } |
416 | | - else if (Sse2.IsSupported) |
| 356 | + |
| 357 | + ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray); |
| 358 | + ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray); |
| 359 | + |
| 360 | + if (Vector256.IsHardwareAccelerated) |
417 | 361 | { |
418 | | - fixed (int* leftPtr = thisArray) |
419 | | - fixed (int* rightPtr = valueArray) |
| 362 | + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
420 | 363 | { |
421 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
422 | | - { |
423 | | - Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i); |
424 | | - Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i); |
425 | | - Sse2.Store(leftPtr + i, Sse2.And(leftVec, rightVec)); |
426 | | - } |
| 364 | + Vector256<int> result = Vector256.LoadUnsafe(ref left, i) & Vector256.LoadUnsafe(ref right, i); |
| 365 | + result.StoreUnsafe(ref left, i); |
427 | 366 | } |
428 | 367 | } |
429 | | - else if (AdvSimd.IsSupported) |
| 368 | + else if (Vector128.IsHardwareAccelerated) |
430 | 369 | { |
431 | | - fixed (int* leftPtr = thisArray) |
432 | | - fixed (int* rightPtr = valueArray) |
| 370 | + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
433 | 371 | { |
434 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
435 | | - { |
436 | | - Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i); |
437 | | - Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i); |
438 | | - AdvSimd.Store(leftPtr + i, AdvSimd.And(leftVec, rightVec)); |
439 | | - } |
| 372 | + Vector128<int> result = Vector128.LoadUnsafe(ref left, i) & Vector128.LoadUnsafe(ref right, i); |
| 373 | + result.StoreUnsafe(ref left, i); |
440 | 374 | } |
441 | 375 | } |
442 | 376 |
|
@@ -486,43 +420,24 @@ public unsafe BitArray Or(BitArray value) |
486 | 420 | } |
487 | 421 |
|
488 | 422 | uint i = 0; |
489 | | - if (Avx2.IsSupported) |
490 | | - { |
491 | | - fixed (int* leftPtr = thisArray) |
492 | | - fixed (int* rightPtr = valueArray) |
493 | | - { |
494 | | - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
495 | | - { |
496 | | - Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i); |
497 | | - Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i); |
498 | | - Avx.Store(leftPtr + i, Avx2.Or(leftVec, rightVec)); |
499 | | - } |
500 | | - } |
501 | | - } |
502 | | - else if (Sse2.IsSupported) |
| 423 | + |
| 424 | + ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray); |
| 425 | + ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray); |
| 426 | + |
| 427 | + if (Vector256.IsHardwareAccelerated) |
503 | 428 | { |
504 | | - fixed (int* leftPtr = thisArray) |
505 | | - fixed (int* rightPtr = valueArray) |
| 429 | + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
506 | 430 | { |
507 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
508 | | - { |
509 | | - Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i); |
510 | | - Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i); |
511 | | - Sse2.Store(leftPtr + i, Sse2.Or(leftVec, rightVec)); |
512 | | - } |
| 431 | + Vector256<int> result = Vector256.LoadUnsafe(ref left, i) | Vector256.LoadUnsafe(ref right, i); |
| 432 | + result.StoreUnsafe(ref left, i); |
513 | 433 | } |
514 | 434 | } |
515 | | - else if (AdvSimd.IsSupported) |
| 435 | + else if (Vector128.IsHardwareAccelerated) |
516 | 436 | { |
517 | | - fixed (int* leftPtr = thisArray) |
518 | | - fixed (int* rightPtr = valueArray) |
| 437 | + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
519 | 438 | { |
520 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
521 | | - { |
522 | | - Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i); |
523 | | - Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i); |
524 | | - AdvSimd.Store(leftPtr + i, AdvSimd.Or(leftVec, rightVec)); |
525 | | - } |
| 439 | + Vector128<int> result = Vector128.LoadUnsafe(ref left, i) | Vector128.LoadUnsafe(ref right, i); |
| 440 | + result.StoreUnsafe(ref left, i); |
526 | 441 | } |
527 | 442 | } |
528 | 443 |
|
@@ -572,43 +487,24 @@ public unsafe BitArray Xor(BitArray value) |
572 | 487 | } |
573 | 488 |
|
574 | 489 | uint i = 0; |
575 | | - if (Avx2.IsSupported) |
| 490 | + |
| 491 | + ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray); |
| 492 | + ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray); |
| 493 | + |
| 494 | + if (Vector256.IsHardwareAccelerated) |
576 | 495 | { |
577 | | - fixed (int* leftPtr = m_array) |
578 | | - fixed (int* rightPtr = value.m_array) |
| 496 | + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
579 | 497 | { |
580 | | - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
581 | | - { |
582 | | - Vector256<int> leftVec = Avx.LoadVector256(leftPtr + i); |
583 | | - Vector256<int> rightVec = Avx.LoadVector256(rightPtr + i); |
584 | | - Avx.Store(leftPtr + i, Avx2.Xor(leftVec, rightVec)); |
585 | | - } |
| 498 | + Vector256<int> result = Vector256.LoadUnsafe(ref left, i) ^ Vector256.LoadUnsafe(ref right, i); |
| 499 | + result.StoreUnsafe(ref left, i); |
586 | 500 | } |
587 | 501 | } |
588 | | - else if (Sse2.IsSupported) |
| 502 | + else if (Vector128.IsHardwareAccelerated) |
589 | 503 | { |
590 | | - fixed (int* leftPtr = thisArray) |
591 | | - fixed (int* rightPtr = valueArray) |
| 504 | + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
592 | 505 | { |
593 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
594 | | - { |
595 | | - Vector128<int> leftVec = Sse2.LoadVector128(leftPtr + i); |
596 | | - Vector128<int> rightVec = Sse2.LoadVector128(rightPtr + i); |
597 | | - Sse2.Store(leftPtr + i, Sse2.Xor(leftVec, rightVec)); |
598 | | - } |
599 | | - } |
600 | | - } |
601 | | - else if (AdvSimd.IsSupported) |
602 | | - { |
603 | | - fixed (int* leftPtr = thisArray) |
604 | | - fixed (int* rightPtr = valueArray) |
605 | | - { |
606 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
607 | | - { |
608 | | - Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i); |
609 | | - Vector128<int> rightVec = AdvSimd.LoadVector128(rightPtr + i); |
610 | | - AdvSimd.Store(leftPtr + i, AdvSimd.Xor(leftVec, rightVec)); |
611 | | - } |
| 506 | + Vector128<int> result = Vector128.LoadUnsafe(ref left, i) ^ Vector128.LoadUnsafe(ref right, i); |
| 507 | + result.StoreUnsafe(ref left, i); |
612 | 508 | } |
613 | 509 | } |
614 | 510 |
|
@@ -650,39 +546,23 @@ public unsafe BitArray Not() |
650 | 546 | } |
651 | 547 |
|
652 | 548 | uint i = 0; |
653 | | - if (Avx2.IsSupported) |
654 | | - { |
655 | | - Vector256<int> ones = Vector256.Create(-1); |
656 | | - fixed (int* ptr = thisArray) |
657 | | - { |
658 | | - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
659 | | - { |
660 | | - Vector256<int> vec = Avx.LoadVector256(ptr + i); |
661 | | - Avx.Store(ptr + i, Avx2.Xor(vec, ones)); |
662 | | - } |
663 | | - } |
664 | | - } |
665 | | - else if (Sse2.IsSupported) |
| 549 | + |
| 550 | + ref int value = ref MemoryMarshal.GetArrayDataReference<int>(thisArray); |
| 551 | + |
| 552 | + if (Vector256.IsHardwareAccelerated) |
666 | 553 | { |
667 | | - Vector128<int> ones = Vector128.Create(-1); |
668 | | - fixed (int* ptr = thisArray) |
| 554 | + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) |
669 | 555 | { |
670 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
671 | | - { |
672 | | - Vector128<int> vec = Sse2.LoadVector128(ptr + i); |
673 | | - Sse2.Store(ptr + i, Sse2.Xor(vec, ones)); |
674 | | - } |
| 556 | + Vector256<int> result = ~Vector256.LoadUnsafe(ref value, i); |
| 557 | + result.StoreUnsafe(ref value, i); |
675 | 558 | } |
676 | 559 | } |
677 | | - else if (AdvSimd.IsSupported) |
| 560 | + else if (Vector128.IsHardwareAccelerated) |
678 | 561 | { |
679 | | - fixed (int* leftPtr = thisArray) |
| 562 | + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
680 | 563 | { |
681 | | - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) |
682 | | - { |
683 | | - Vector128<int> leftVec = AdvSimd.LoadVector128(leftPtr + i); |
684 | | - AdvSimd.Store(leftPtr + i, AdvSimd.Not(leftVec)); |
685 | | - } |
| 564 | + Vector128<int> result = ~Vector128.LoadUnsafe(ref value, i); |
| 565 | + result.StoreUnsafe(ref value, i); |
686 | 566 | } |
687 | 567 | } |
688 | 568 |
|
|
0 commit comments