diff --git a/CHANGELOG.md b/CHANGELOG.md index b80b9bcc1e8..928480db0d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md). You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.html) useful. ## [Unreleased] +- Remove `zerocopy` dependency (#1579) - Fix feature `simd_support` for recent nightly rust (#1586) - Add `Alphabetic` distribution. (#1587) - Re-export `rand_core` (#1602) diff --git a/Cargo.toml b/Cargo.toml index c01fcd85e08..3904f2f5579 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ alloc = [] os_rng = ["rand_core/os_rng"] # Option (requires nightly Rust): experimental SIMD support -simd_support = ["zerocopy/simd-nightly"] +simd_support = [] # Option (enabled by default): enable StdRng std_rng = ["dep:rand_chacha"] @@ -75,7 +75,6 @@ rand_core = { path = "rand_core", version = "0.9.0", default-features = false } log = { version = "0.4.4", optional = true } serde = { version = "1.0.103", features = ["derive"], optional = true } rand_chacha = { path = "rand_chacha", version = "0.9.0", default-features = false, optional = true } -zerocopy = { version = "0.8.0", default-features = false, features = ["simd"] } [dev-dependencies] rand_pcg = { path = "rand_pcg", version = "0.9.0" } diff --git a/benches/Cargo.toml b/benches/Cargo.toml index a0470ea9597..adb9aadd84b 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -8,7 +8,6 @@ publish = false # Option (requires nightly Rust): experimental SIMD support simd_support = ["rand/simd_support"] - [dependencies] [dev-dependencies] @@ -38,6 +37,10 @@ harness = false name = "shuffle" harness = false +[[bench]] +name = "simd" +harness = false + [[bench]] name = "standard" harness = false diff --git a/benches/benches/simd.rs b/benches/benches/simd.rs new file mode 100644 index 00000000000..f1723245977 --- /dev/null +++ b/benches/benches/simd.rs @@ -0,0 +1,76 @@ +// Copyright 2018-2023 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Generating SIMD / wide types + +#![cfg_attr(feature = "simd_support", feature(portable_simd))] + +use criterion::{criterion_group, criterion_main, Criterion}; + +criterion_group!( + name = benches; + config = Criterion::default(); + targets = simd +); +criterion_main!(benches); + +#[cfg(not(feature = "simd_support"))] +pub fn simd(_: &mut Criterion) {} + +#[cfg(feature = "simd_support")] +pub fn simd(c: &mut Criterion) { + use rand::prelude::*; + use rand_pcg::Pcg64Mcg; + + let mut g = c.benchmark_group("random_simd"); + + g.bench_function("u128", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("m128i", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("m256i", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("m512i", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("u64x2", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("u32x4", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("u32x8", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("u16x8", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); + + g.bench_function("u8x16", |b| { + let mut rng = Pcg64Mcg::from_rng(&mut rand::rng()); + b.iter(|| rng.random::()); + }); +} diff --git a/rand_core/src/lib.rs b/rand_core/src/lib.rs index d41d0c03329..6c007797806 100644 --- a/rand_core/src/lib.rs +++ b/rand_core/src/lib.rs @@ -31,6 +31,7 @@ )] #![deny(missing_docs)] #![deny(missing_debug_implementations)] +#![deny(clippy::undocumented_unsafe_blocks)] #![doc(test(attr(allow(unused_variables), deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![no_std] diff --git a/src/distr/integer.rs b/src/distr/integer.rs index d0040e69e7e..37b2081c471 100644 --- a/src/distr/integer.rs +++ b/src/distr/integer.rs @@ -107,21 +107,50 @@ impl_nzint!(NonZeroI64, NonZeroI64::new); impl_nzint!(NonZeroI128, NonZeroI128::new); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -macro_rules! x86_intrinsic_impl { - ($meta:meta, $($intrinsic:ident),+) => {$( - #[cfg($meta)] - impl Distribution<$intrinsic> for StandardUniform { - #[inline] - fn sample(&self, rng: &mut R) -> $intrinsic { - // On proper hardware, this should compile to SIMD instructions - // Verified on x86 Haswell with __m128i, __m256i - let mut buf = [0_u8; core::mem::size_of::<$intrinsic>()]; - rng.fill_bytes(&mut buf); - // x86 is little endian so no need for conversion - zerocopy::transmute!(buf) - } - } - )+}; +impl Distribution<__m128i> for StandardUniform { + #[inline] + fn sample(&self, rng: &mut R) -> __m128i { + // NOTE: It's tempting to use the u128 impl here, but confusingly this + // results in different code (return via rdx, r10 instead of rax, rdx + // with u128 impl) and is much slower (+130 time). This version calls + // impls::fill_bytes_via_next but performs well. + + let mut buf = [0_u8; core::mem::size_of::<__m128i>()]; + rng.fill_bytes(&mut buf); + // x86 is little endian so no need for conversion + + // SAFETY: All byte sequences of `buf` represent values of the output type. + unsafe { core::mem::transmute(buf) } + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +impl Distribution<__m256i> for StandardUniform { + #[inline] + fn sample(&self, rng: &mut R) -> __m256i { + let mut buf = [0_u8; core::mem::size_of::<__m256i>()]; + rng.fill_bytes(&mut buf); + // x86 is little endian so no need for conversion + + // SAFETY: All byte sequences of `buf` represent values of the output type. + unsafe { core::mem::transmute(buf) } + } +} + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + feature = "simd_support" +))] +impl Distribution<__m512i> for StandardUniform { + #[inline] + fn sample(&self, rng: &mut R) -> __m512i { + let mut buf = [0_u8; core::mem::size_of::<__m512i>()]; + rng.fill_bytes(&mut buf); + // x86 is little endian so no need for conversion + + // SAFETY: All byte sequences of `buf` represent values of the output type. + unsafe { core::mem::transmute(buf) } + } } #[cfg(feature = "simd_support")] @@ -148,24 +177,6 @@ macro_rules! simd_impl { #[cfg(feature = "simd_support")] simd_impl!(u8, i8, u16, i16, u32, i32, u64, i64); -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -x86_intrinsic_impl!( - any(target_arch = "x86", target_arch = "x86_64"), - __m128i, - __m256i -); -#[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - feature = "simd_support" -))] -x86_intrinsic_impl!( - all( - any(target_arch = "x86", target_arch = "x86_64"), - feature = "simd_support" - ), - __m512i -); - #[cfg(test)] mod tests { use super::*; diff --git a/src/distr/other.rs b/src/distr/other.rs index 0e1fc149be7..47b99323d6b 100644 --- a/src/distr/other.rs +++ b/src/distr/other.rs @@ -118,6 +118,7 @@ impl Distribution for StandardUniform { if n <= 0xDFFF { n -= GAP_SIZE; } + // SAFETY: We ensure above that `n` represents a `char`. unsafe { char::from_u32_unchecked(n) } } } @@ -166,9 +167,14 @@ impl Distribution for Alphabetic { #[cfg(feature = "alloc")] impl SampleString for Alphanumeric { fn append_string(&self, rng: &mut R, string: &mut String, len: usize) { + // SAFETY: `self` only samples alphanumeric characters, which are valid UTF-8. unsafe { let v = string.as_mut_vec(); - v.extend(self.sample_iter(rng).take(len)); + v.extend( + self.sample_iter(rng) + .take(len) + .inspect(|b| debug_assert!(b.is_ascii_alphanumeric())), + ); } } } diff --git a/src/lib.rs b/src/lib.rs index 5cb71b8bde2..6f2af2fc147 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,6 +59,7 @@ clippy::neg_cmp_op_on_partial_ord, clippy::nonminimal_bool )] +#![deny(clippy::undocumented_unsafe_blocks)] #[cfg(feature = "alloc")] extern crate alloc; diff --git a/src/rng.rs b/src/rng.rs index 258c87de273..b0891a97217 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -12,8 +12,8 @@ use crate::distr::uniform::{SampleRange, SampleUniform}; use crate::distr::{self, Distribution, StandardUniform}; use core::num::Wrapping; +use core::{mem, slice}; use rand_core::RngCore; -use zerocopy::IntoBytes; /// User-level interface for RNGs /// @@ -393,14 +393,36 @@ impl Fill for [u8] { } } +/// Call target for unsafe macros +const unsafe fn __unsafe() {} + +/// Implement `Fill` for given type `$t`. +/// +/// # Safety +/// All bit patterns of `[u8; size_of::<$t>()]` must represent values of `$t`. macro_rules! impl_fill { () => {}; - ($t:ty) => { + ($t:ty) => {{ + // Force caller to wrap with an `unsafe` block + __unsafe(); + impl Fill for [$t] { - #[inline(never)] // in micro benchmarks, this improves performance fn fill(&mut self, rng: &mut R) { if self.len() > 0 { - rng.fill_bytes(self.as_mut_bytes()); + let size = mem::size_of_val(self); + rng.fill_bytes( + // SAFETY: `self` non-null and valid for reads and writes within its `size` + // bytes. `self` meets the alignment requirements of `&mut [u8]`. + // The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid + // for all bit-patterns of their contents (note that the SAFETY requirement + // on callers of this macro). `self` is not borrowed. + unsafe { + slice::from_raw_parts_mut(self.as_mut_ptr() + as *mut u8, + size + ) + } + ); for x in self { *x = x.to_le(); } @@ -409,27 +431,41 @@ macro_rules! impl_fill { } impl Fill for [Wrapping<$t>] { - #[inline(never)] fn fill(&mut self, rng: &mut R) { if self.len() > 0 { - rng.fill_bytes(self.as_mut_bytes()); + let size = self.len() * mem::size_of::<$t>(); + rng.fill_bytes( + // SAFETY: `self` non-null and valid for reads and writes within its `size` + // bytes. `self` meets the alignment requirements of `&mut [u8]`. + // The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid + // for all bit-patterns of their contents (note that the SAFETY requirement + // on callers of this macro). `self` is not borrowed. + unsafe { + slice::from_raw_parts_mut(self.as_mut_ptr() + as *mut u8, + size + ) + } + ); for x in self { - *x = Wrapping(x.0.to_le()); + *x = Wrapping(x.0.to_le()); } } } - } + }} }; - ($t:ty, $($tt:ty,)*) => { + ($t:ty, $($tt:ty,)*) => {{ impl_fill!($t); // TODO: this could replace above impl once Rust #32463 is fixed // impl_fill!(Wrapping<$t>); impl_fill!($($tt,)*); - } + }} } -impl_fill!(u16, u32, u64, u128,); -impl_fill!(i8, i16, i32, i64, i128,); +// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `u*`. +const _: () = unsafe { impl_fill!(u16, u32, u64, u128,) }; +// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `i*`. +const _: () = unsafe { impl_fill!(i8, i16, i32, i64, i128,) }; impl Fill for [T; N] where diff --git a/src/seq/iterator.rs b/src/seq/iterator.rs index b10d205676a..a9a9e56155c 100644 --- a/src/seq/iterator.rs +++ b/src/seq/iterator.rs @@ -134,6 +134,10 @@ pub trait IteratorRandom: Iterator + Sized { /// force every element to be created regardless call `.inspect(|e| ())`. /// /// [`choose`]: IteratorRandom::choose + // + // Clippy is wrong here: we need to iterate over all entries with the RNG to + // ensure that choosing is *stable*. + #[allow(clippy::double_ended_iterator_last)] fn choose_stable(mut self, rng: &mut R) -> Option where R: Rng + ?Sized,