RalfJung
diff --git a/‎crates/core_arch/src/macros.rs‎
Lines changed: 100 additions & 0 deletions b/‎crates/core_arch/src/macros.rs‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎crates/core_arch/src/x86/avx.rs‎
Lines changed: 16 additions & 16 deletions b/‎crates/core_arch/src/x86/avx.rs‎
Lines changed: 16 additions & 16 deletions
@@ -92,3 +92,103 @@ macro_rules! types {
         pub struct $name($($fields)*);
     )*)
 }
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle2 {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        const IDX: [u32; 2] = $idx;
+        simd_shuffle2($x, $y, IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle2_param {
+    ($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
+        struct ConstParam<const $imm: $ty>;
+        impl<const $imm: $ty> ConstParam<$imm> {
+            const IDX: [u32; 2] = $idx;
+        }
+
+        simd_shuffle2($x, $y, ConstParam::<$imm>::IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle4 {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        const IDX: [u32; 4] = $idx;
+        simd_shuffle4($x, $y, IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle4_param {
+    ($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
+        struct ConstParam<const $imm: $ty>;
+        impl<const $imm: $ty> ConstParam<$imm> {
+            const IDX: [u32; 4] = $idx;
+        }
+
+        simd_shuffle4($x, $y, ConstParam::<$imm>::IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle8 {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        const IDX: [u32; 8] = $idx;
+        simd_shuffle8($x, $y, IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle8_param {
+    ($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
+        struct ConstParam<const $imm: $ty>;
+        impl<const $imm: $ty> ConstParam<$imm> {
+            const IDX: [u32; 8] = $idx;
+        }
+
+        simd_shuffle8($x, $y, ConstParam::<$imm>::IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle16 {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        const IDX: [u32; 16] = $idx;
+        simd_shuffle16($x, $y, IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle16_param {
+    ($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
+        struct ConstParam<const $imm: $ty>;
+        impl<const $imm: $ty> ConstParam<$imm> {
+            const IDX: [u32; 16] = $idx;
+        }
+
+        simd_shuffle16($x, $y, ConstParam::<$imm>::IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle32 {
+    ($x:expr, $y:expr, $idx:expr $(,)?) => {{
+        const IDX: [u32; 32] = $idx;
+        simd_shuffle32($x, $y, IDX)
+    }}
+}
+
+#[allow(unused_macros)]
+macro_rules! simd_shuffle32_param {
+    ($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
+        struct ConstParam<const $imm: $ty>;
+        impl<const $imm: $ty> ConstParam<$imm> {
+            const IDX: [u32; 32] = $idx;
+        }
+
+        simd_shuffle32($x, $y, ConstParam::<$imm>::IDX)
+    }}
+}
@@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
 #[cfg_attr(test, assert_instr(vmovshdup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
-    simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
+    simd_shuffle8!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
 }
 
 /// Duplicate even-indexed single-precision (32-bit) floating-point elements
@@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vmovsldup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
-    simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
+    simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
 }
 
 /// Duplicate even-indexed double-precision (64-bit) floating-point elements
@@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vmovddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
-    simd_shuffle4(a, a, [0, 0, 2, 2])
+    simd_shuffle4!(a, a, [0, 0, 2, 2])
 }
 
 /// Loads 256-bits of integer data from unaligned memory into result.
@@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vunpckhpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
-    simd_shuffle4(a, b, [1, 5, 3, 7])
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
 }
 
 /// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vunpckhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
-    simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
+    simd_shuffle8!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
 }
 
 /// Unpacks and interleave double-precision (64-bit) floating-point elements
@@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vunpcklpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
-    simd_shuffle4(a, b, [0, 4, 2, 6])
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
 }
 
 /// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vunpcklps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
-    simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
 }
 
 /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
-    simd_shuffle2(a, a, [0, 1])
+    simd_shuffle2!(a, a, [0, 1])
 }
 
 /// Casts vector of type __m256i to type __m128i.
@@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
     let a = a.as_i64x4();
-    let dst: i64x2 = simd_shuffle2(a, a, [0, 1]);
+    let dst: i64x2 = simd_shuffle2!(a, a, [0, 1]);
     transmute(dst)
 }
 
@@ -2612,7 +2612,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
     // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
-    simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
+    simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
 }
 
 /// Casts vector of type __m128d to type __m256d;
@@ -2626,7 +2626,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
     // FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
-    simd_shuffle4(a, a, [0, 1, 0, 0])
+    simd_shuffle4!(a, a, [0, 1, 0, 0])
 }
 
 /// Casts vector of type __m128i to type __m256i;
@@ -2641,7 +2641,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
 pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
     let a = a.as_i64x2();
     // FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
-    let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]);
+    let dst: i64x4 = simd_shuffle4!(a, a, [0, 1, 0, 0]);
     transmute(dst)
 }
 
@@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
-    simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
+    simd_shuffle8!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
@@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
     let b = _mm_setzero_si128().as_i64x2();
-    let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]);
+    let dst: i64x4 = simd_shuffle4!(a.as_i64x2(), b, [0, 1, 2, 3]);
     transmute(dst)
 }
 
@@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
-    simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
+    simd_shuffle4!(a, _mm_setzero_pd(), [0, 1, 2, 3])
 }
 
 /// Returns vector of type `__m256` with undefined elements.
@@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
-    simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
+    simd_shuffle8!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
 /// Sets packed __m256d returned vector with the supplied values.