Skip to content

Commit 2d198c8

Browse files
committed
Port from packed_simd crate to portable_simd feature (x86_64 part)
1 parent 9217fd2 commit 2d198c8

File tree

6 files changed

+33
-41
lines changed

6 files changed

+33
-41
lines changed

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ categories = ["text-processing", "encoding", "web-programming", "internationaliz
1515
[features]
1616
default = ["alloc"]
1717
alloc = []
18-
simd-accel = ["packed_simd", "packed_simd/into_bits"]
18+
simd-accel = []
1919
less-slow-kanji-encode = []
2020
less-slow-big5-hanzi-encode = []
2121
less-slow-gb-hanzi-encode = []
@@ -32,7 +32,6 @@ fast-legacy-encode = ["fast-hangul-encode",
3232

3333
[dependencies]
3434
cfg-if = "1.0"
35-
packed_simd = { version = "0.3.9", optional = true }
3635
serde = { version = "1.0", optional = true }
3736

3837
[dev-dependencies]

src/handles.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use crate::simd_funcs::*;
3434
all(target_endian = "little", target_feature = "neon")
3535
)
3636
))]
37-
use packed_simd::u16x8;
37+
use core::simd::u16x8;
3838

3939
use super::DecoderResult;
4040
use super::EncoderResult;

src/lib.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@
689689
//! for discussion about the UTF-16 family.
690690
691691
#![no_std]
692-
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
692+
#![cfg_attr(feature = "simd-accel", feature(core_intrinsics, portable_simd))]
693693

694694
#[cfg(feature = "alloc")]
695695
#[cfg_attr(test, macro_use)]
@@ -699,17 +699,6 @@ extern crate core;
699699
#[macro_use]
700700
extern crate cfg_if;
701701

702-
#[cfg(all(
703-
feature = "simd-accel",
704-
any(
705-
target_feature = "sse2",
706-
all(target_endian = "little", target_arch = "aarch64"),
707-
all(target_endian = "little", target_feature = "neon")
708-
)
709-
))]
710-
#[macro_use(shuffle)]
711-
extern crate packed_simd;
712-
713702
#[cfg(feature = "serde")]
714703
extern crate serde;
715704

src/mem.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,8 @@ macro_rules! by_unit_check_simd {
248248
cfg_if! {
249249
if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
250250
use crate::simd_funcs::*;
251-
use packed_simd::u8x16;
252-
use packed_simd::u16x8;
251+
use core::simd::u8x16;
252+
use core::simd::u16x8;
253253

254254
const SIMD_ALIGNMENT: usize = 16;
255255

src/simd_funcs.rs

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10-
use packed_simd::u16x8;
11-
use packed_simd::u8x16;
12-
use packed_simd::IntoBits;
10+
use core::simd::cmp::SimdPartialEq;
11+
use core::simd::cmp::SimdPartialOrd;
12+
use core::simd::simd_swizzle;
13+
use core::simd::u16x8;
14+
use core::simd::u8x16;
15+
use core::simd::ToBytes;
1316

1417
// TODO: Migrate unaligned access to stdlib code if/when the RFC
1518
// https://github.com/rust-lang/rfcs/pull/1725 is implemented.
@@ -110,7 +113,7 @@ pub fn simd_byte_swap(s: u16x8) -> u16x8 {
110113

111114
#[inline(always)]
112115
pub fn to_u16_lanes(s: u8x16) -> u16x8 {
113-
s.into_bits()
116+
u16x8::from_ne_bytes(s)
114117
}
115118

116119
cfg_if! {
@@ -122,7 +125,7 @@ cfg_if! {
122125
#[inline(always)]
123126
pub fn mask_ascii(s: u8x16) -> i32 {
124127
unsafe {
125-
_mm_movemask_epi8(s.into_bits())
128+
_mm_movemask_epi8(s.into())
126129
}
127130
}
128131

@@ -137,7 +140,7 @@ cfg_if! {
137140
pub fn simd_is_ascii(s: u8x16) -> bool {
138141
unsafe {
139142
// Safety: We have cfg()d the correct platform
140-
_mm_movemask_epi8(s.into_bits()) == 0
143+
_mm_movemask_epi8(s.into()) == 0
141144
}
142145
}
143146
} else if #[cfg(target_arch = "aarch64")]{
@@ -154,7 +157,7 @@ cfg_if! {
154157
// This optimizes better on ARM than
155158
// the lt formulation.
156159
let highest_ascii = u8x16::splat(0x7F);
157-
!s.gt(highest_ascii).any()
160+
!s.simd_gt(highest_ascii).any()
158161
}
159162
}
160163
}
@@ -167,7 +170,7 @@ cfg_if! {
167170
return true;
168171
}
169172
let above_str_latin1 = u8x16::splat(0xC4);
170-
s.lt(above_str_latin1).all()
173+
s.simd_lt(above_str_latin1).all()
171174
}
172175
} else if #[cfg(target_arch = "aarch64")]{
173176
#[inline(always)]
@@ -181,7 +184,7 @@ cfg_if! {
181184
#[inline(always)]
182185
pub fn simd_is_str_latin1(s: u8x16) -> bool {
183186
let above_str_latin1 = u8x16::splat(0xC4);
184-
s.lt(above_str_latin1).all()
187+
s.simd_lt(above_str_latin1).all()
185188
}
186189
}
187190
}
@@ -207,7 +210,7 @@ cfg_if! {
207210
#[inline(always)]
208211
pub fn simd_is_basic_latin(s: u16x8) -> bool {
209212
let above_ascii = u16x8::splat(0x80);
210-
s.lt(above_ascii).all()
213+
s.simd_lt(above_ascii).all()
211214
}
212215

213216
#[inline(always)]
@@ -216,7 +219,7 @@ cfg_if! {
216219
// seems faster in this case while the above
217220
// function is better the other way round...
218221
let highest_latin1 = u16x8::splat(0xFF);
219-
!s.gt(highest_latin1).any()
222+
!s.simd_gt(highest_latin1).any()
220223
}
221224
}
222225
}
@@ -225,7 +228,7 @@ cfg_if! {
225228
pub fn contains_surrogates(s: u16x8) -> bool {
226229
let mask = u16x8::splat(0xF800);
227230
let surrogate_bits = u16x8::splat(0xD800);
228-
(s & mask).eq(surrogate_bits).any()
231+
(s & mask).simd_eq(surrogate_bits).any()
229232
}
230233

231234
cfg_if! {
@@ -262,7 +265,7 @@ cfg_if! {
262265
macro_rules! in_range16x8 {
263266
($s:ident, $start:expr, $end:expr) => {{
264267
// SIMD sub is wrapping
265-
($s - u16x8::splat($start)).lt(u16x8::splat($end - $start))
268+
($s - u16x8::splat($start)).simd_lt(u16x8::splat($end - $start))
266269
}};
267270
}
268271

@@ -276,7 +279,7 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
276279

277280
aarch64_return_false_if_below_hebrew!(s);
278281

279-
let below_hebrew = s.lt(u16x8::splat(0x0590));
282+
let below_hebrew = s.simd_lt(u16x8::splat(0x0590));
280283

281284
non_aarch64_return_false_if_all!(below_hebrew);
282285

@@ -291,26 +294,26 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
291294
| in_range16x8!(s, 0xFE70, 0xFEFF)
292295
| in_range16x8!(s, 0xD802, 0xD804)
293296
| in_range16x8!(s, 0xD83A, 0xD83C)
294-
| s.eq(u16x8::splat(0x200F))
295-
| s.eq(u16x8::splat(0x202B))
296-
| s.eq(u16x8::splat(0x202E))
297-
| s.eq(u16x8::splat(0x2067)))
297+
| s.simd_eq(u16x8::splat(0x200F))
298+
| s.simd_eq(u16x8::splat(0x202B))
299+
| s.simd_eq(u16x8::splat(0x202E))
300+
| s.simd_eq(u16x8::splat(0x2067)))
298301
.any()
299302
}
300303

301304
#[inline(always)]
302305
pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
303-
let first: u8x16 = shuffle!(
306+
let first: u8x16 = simd_swizzle!(
304307
s,
305308
u8x16::splat(0),
306309
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
307310
);
308-
let second: u8x16 = shuffle!(
311+
let second: u8x16 = simd_swizzle!(
309312
s,
310313
u8x16::splat(0),
311314
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
312315
);
313-
(first.into_bits(), second.into_bits())
316+
(u16x8::from_ne_bytes(first), u16x8::from_ne_bytes(second))
314317
}
315318

316319
cfg_if! {
@@ -319,7 +322,7 @@ cfg_if! {
319322
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
320323
unsafe {
321324
// Safety: We have cfg()d the correct platform
322-
_mm_packus_epi16(a.into_bits(), b.into_bits()).into_bits()
325+
_mm_packus_epi16(a.into(), b.into()).into()
323326
}
324327
}
325328
} else {

src/x_user_defined.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ use crate::variant::*;
1414
cfg_if! {
1515
if #[cfg(feature = "simd-accel")] {
1616
use simd_funcs::*;
17-
use packed_simd::u16x8;
17+
use core::simd::u16x8;
18+
use core::simd::cmp::SimdPartialOrd;
1819

1920
#[inline(always)]
2021
fn shift_upper(unpacked: u16x8) -> u16x8 {
2122
let highest_ascii = u16x8::splat(0x7F);
22-
unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
23+
unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
2324
} else {
2425
}
2526
}

0 commit comments

Comments
 (0)