@@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
16391639#[ cfg_attr( test, assert_instr( vmovshdup) ) ]
16401640#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
16411641pub unsafe fn _mm256_movehdup_ps ( a : __m256 ) -> __m256 {
1642- simd_shuffle8 ( a, a, [ 1 , 1 , 3 , 3 , 5 , 5 , 7 , 7 ] )
1642+ simd_shuffle8 ! ( a, a, [ 1 , 1 , 3 , 3 , 5 , 5 , 7 , 7 ] )
16431643}
16441644
16451645/// Duplicate even-indexed single-precision (32-bit) floating-point elements
@@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
16511651#[ cfg_attr( test, assert_instr( vmovsldup) ) ]
16521652#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
16531653pub unsafe fn _mm256_moveldup_ps ( a : __m256 ) -> __m256 {
1654- simd_shuffle8 ( a, a, [ 0 , 0 , 2 , 2 , 4 , 4 , 6 , 6 ] )
1654+ simd_shuffle8 ! ( a, a, [ 0 , 0 , 2 , 2 , 4 , 4 , 6 , 6 ] )
16551655}
16561656
16571657/// Duplicate even-indexed double-precision (64-bit) floating-point elements
@@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
16631663#[ cfg_attr( test, assert_instr( vmovddup) ) ]
16641664#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
16651665pub unsafe fn _mm256_movedup_pd ( a : __m256d ) -> __m256d {
1666- simd_shuffle4 ( a, a, [ 0 , 0 , 2 , 2 ] )
1666+ simd_shuffle4 ! ( a, a, [ 0 , 0 , 2 , 2 ] )
16671667}
16681668
16691669/// Loads 256-bits of integer data from unaligned memory into result.
@@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
17561756#[ cfg_attr( test, assert_instr( vunpckhpd) ) ]
17571757#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
17581758pub unsafe fn _mm256_unpackhi_pd ( a : __m256d , b : __m256d ) -> __m256d {
1759- simd_shuffle4 ( a, b, [ 1 , 5 , 3 , 7 ] )
1759+ simd_shuffle4 ! ( a, b, [ 1 , 5 , 3 , 7 ] )
17601760}
17611761
17621762/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
17681768#[ cfg_attr( test, assert_instr( vunpckhps) ) ]
17691769#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
17701770pub unsafe fn _mm256_unpackhi_ps ( a : __m256 , b : __m256 ) -> __m256 {
1771- simd_shuffle8 ( a, b, [ 2 , 10 , 3 , 11 , 6 , 14 , 7 , 15 ] )
1771+ simd_shuffle8 ! ( a, b, [ 2 , 10 , 3 , 11 , 6 , 14 , 7 , 15 ] )
17721772}
17731773
17741774/// Unpacks and interleave double-precision (64-bit) floating-point elements
@@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
17801780#[ cfg_attr( test, assert_instr( vunpcklpd) ) ]
17811781#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
17821782pub unsafe fn _mm256_unpacklo_pd ( a : __m256d , b : __m256d ) -> __m256d {
1783- simd_shuffle4 ( a, b, [ 0 , 4 , 2 , 6 ] )
1783+ simd_shuffle4 ! ( a, b, [ 0 , 4 , 2 , 6 ] )
17841784}
17851785
17861786/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
17921792#[ cfg_attr( test, assert_instr( vunpcklps) ) ]
17931793#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
17941794pub unsafe fn _mm256_unpacklo_ps ( a : __m256 , b : __m256 ) -> __m256 {
1795- simd_shuffle8 ( a, b, [ 0 , 8 , 1 , 9 , 4 , 12 , 5 , 13 ] )
1795+ simd_shuffle8 ! ( a, b, [ 0 , 8 , 1 , 9 , 4 , 12 , 5 , 13 ] )
17961796}
17971797
17981798/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
25842584// instructions, thus it has zero latency.
25852585#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
25862586pub unsafe fn _mm256_castpd256_pd128 ( a : __m256d ) -> __m128d {
2587- simd_shuffle2 ( a, a, [ 0 , 1 ] )
2587+ simd_shuffle2 ! ( a, a, [ 0 , 1 ] )
25882588}
25892589
25902590/// Casts vector of type __m256i to type __m128i.
@@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
25972597#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
25982598pub unsafe fn _mm256_castsi256_si128 ( a : __m256i ) -> __m128i {
25992599 let a = a. as_i64x4 ( ) ;
2600- let dst: i64x2 = simd_shuffle2 ( a, a, [ 0 , 1 ] ) ;
2600+ let dst: i64x2 = simd_shuffle2 ! ( a, a, [ 0 , 1 ] ) ;
26012601 transmute ( dst)
26022602}
26032603
@@ -2612,7 +2612,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
26122612#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26132613pub unsafe fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
26142614 // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2615- simd_shuffle8 ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
2615+ simd_shuffle8 ! ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
26162616}
26172617
26182618/// Casts vector of type __m128d to type __m256d;
@@ -2626,7 +2626,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
26262626#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26272627pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> __m256d {
26282628 // FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2629- simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] )
2629+ simd_shuffle4 ! ( a, a, [ 0 , 1 , 0 , 0 ] )
26302630}
26312631
26322632/// Casts vector of type __m128i to type __m256i;
@@ -2641,7 +2641,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
26412641pub unsafe fn _mm256_castsi128_si256 ( a : __m128i ) -> __m256i {
26422642 let a = a. as_i64x2 ( ) ;
26432643 // FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2644- let dst: i64x4 = simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] ) ;
2644+ let dst: i64x4 = simd_shuffle4 ! ( a, a, [ 0 , 1 , 0 , 0 ] ) ;
26452645 transmute ( dst)
26462646}
26472647
@@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
26562656// instructions, thus it has zero latency.
26572657#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26582658pub unsafe fn _mm256_zextps128_ps256 ( a : __m128 ) -> __m256 {
2659- simd_shuffle8 ( a, _mm_setzero_ps ( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2659+ simd_shuffle8 ! ( a, _mm_setzero_ps( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
26602660}
26612661
26622662/// Constructs a 256-bit integer vector from a 128-bit integer vector.
@@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
26712671#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26722672pub unsafe fn _mm256_zextsi128_si256 ( a : __m128i ) -> __m256i {
26732673 let b = _mm_setzero_si128 ( ) . as_i64x2 ( ) ;
2674- let dst: i64x4 = simd_shuffle4 ( a. as_i64x2 ( ) , b, [ 0 , 1 , 2 , 3 ] ) ;
2674+ let dst: i64x4 = simd_shuffle4 ! ( a. as_i64x2( ) , b, [ 0 , 1 , 2 , 3 ] ) ;
26752675 transmute ( dst)
26762676}
26772677
@@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
26872687// instructions, thus it has zero latency.
26882688#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26892689pub unsafe fn _mm256_zextpd128_pd256 ( a : __m128d ) -> __m256d {
2690- simd_shuffle4 ( a, _mm_setzero_pd ( ) , [ 0 , 1 , 2 , 3 ] )
2690+ simd_shuffle4 ! ( a, _mm_setzero_pd( ) , [ 0 , 1 , 2 , 3 ] )
26912691}
26922692
26932693/// Returns vector of type `__m256` with undefined elements.
@@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
27322732#[ cfg_attr( test, assert_instr( vinsertf128) ) ]
27332733#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
27342734pub unsafe fn _mm256_set_m128 ( hi : __m128 , lo : __m128 ) -> __m256 {
2735- simd_shuffle8 ( lo, hi, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2735+ simd_shuffle8 ! ( lo, hi, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
27362736}
27372737
27382738/// Sets packed __m256d returned vector with the supplied values.
0 commit comments