diff --git a/CHANGELOG.md b/CHANGELOG.md
index b80b9bcc1e8..928480db0d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md).
 You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.html) useful.
 
 ## [Unreleased]
+- Remove `zerocopy` dependency (#1579)
 - Fix feature `simd_support` for recent nightly rust (#1586)
 - Add `Alphabetic` distribution. (#1587)
 - Re-export `rand_core` (#1602)
diff --git a/Cargo.toml b/Cargo.toml
index c01fcd85e08..3904f2f5579 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,7 +43,7 @@ alloc = []
 os_rng = ["rand_core/os_rng"]
 
 # Option (requires nightly Rust): experimental SIMD support
-simd_support = ["zerocopy/simd-nightly"]
+simd_support = []
 
 # Option (enabled by default): enable StdRng
 std_rng = ["dep:rand_chacha"]
@@ -75,7 +75,6 @@ rand_core = { path = "rand_core", version = "0.9.0", default-features = false }
 log = { version = "0.4.4", optional = true }
 serde = { version = "1.0.103", features = ["derive"], optional = true }
 rand_chacha = { path = "rand_chacha", version = "0.9.0", default-features = false, optional = true }
-zerocopy = { version = "0.8.0", default-features = false, features = ["simd"] }
 
 [dev-dependencies]
 rand_pcg = { path = "rand_pcg", version = "0.9.0" }
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index a0470ea9597..adb9aadd84b 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -8,7 +8,6 @@ publish = false
 # Option (requires nightly Rust): experimental SIMD support
 simd_support = ["rand/simd_support"]
 
-
 [dependencies]
 
 [dev-dependencies]
@@ -38,6 +37,10 @@ harness = false
 name = "shuffle"
 harness = false
 
+[[bench]]
+name = "simd"
+harness = false
+
 [[bench]]
 name = "standard"
 harness = false
diff --git a/benches/benches/simd.rs b/benches/benches/simd.rs
new file mode 100644
index 00000000000..f1723245977
--- /dev/null
+++ b/benches/benches/simd.rs
@@ -0,0 +1,76 @@
+// Copyright 2018-2023 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Generating SIMD / wide types
+
+#![cfg_attr(feature = "simd_support", feature(portable_simd))]
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+criterion_group!(
+    name = benches;
+    config = Criterion::default();
+    targets = simd
+);
+criterion_main!(benches);
+
+#[cfg(not(feature = "simd_support"))]
+pub fn simd(_: &mut Criterion) {}
+
+#[cfg(feature = "simd_support")]
+pub fn simd(c: &mut Criterion) {
+    use rand::prelude::*;
+    use rand_pcg::Pcg64Mcg;
+
+    let mut g = c.benchmark_group("random_simd");
+
+    g.bench_function("u128", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<u128>());
+    });
+
+    g.bench_function("m128i", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::arch::x86_64::__m128i>());
+    });
+
+    g.bench_function("m256i", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::arch::x86_64::__m256i>());
+    });
+
+    g.bench_function("m512i", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::arch::x86_64::__m512i>());
+    });
+
+    g.bench_function("u64x2", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::simd::u64x2>());
+    });
+
+    g.bench_function("u32x4", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::simd::u64x4>());
+    });
+
+    g.bench_function("u32x8", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::simd::u8x32>());
+    });
+
+    g.bench_function("u16x8", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::simd::u8x32>());
+    });
+
+    g.bench_function("u8x16", |b| {
+        let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
+        b.iter(|| rng.random::<core::simd::u8x32>());
+    });
+}
diff --git a/rand_core/src/lib.rs b/rand_core/src/lib.rs
index d41d0c03329..6c007797806 100644
--- a/rand_core/src/lib.rs
+++ b/rand_core/src/lib.rs
@@ -31,6 +31,7 @@
 )]
 #![deny(missing_docs)]
 #![deny(missing_debug_implementations)]
+#![deny(clippy::undocumented_unsafe_blocks)]
 #![doc(test(attr(allow(unused_variables), deny(warnings))))]
 #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 #![no_std]
diff --git a/src/distr/integer.rs b/src/distr/integer.rs
index d0040e69e7e..37b2081c471 100644
--- a/src/distr/integer.rs
+++ b/src/distr/integer.rs
@@ -107,21 +107,50 @@ impl_nzint!(NonZeroI64, NonZeroI64::new);
 impl_nzint!(NonZeroI128, NonZeroI128::new);
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! x86_intrinsic_impl {
-    ($meta:meta, $($intrinsic:ident),+) => {$(
-        #[cfg($meta)]
-        impl Distribution<$intrinsic> for StandardUniform {
-            #[inline]
-            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $intrinsic {
-                // On proper hardware, this should compile to SIMD instructions
-                // Verified on x86 Haswell with __m128i, __m256i
-                let mut buf = [0_u8; core::mem::size_of::<$intrinsic>()];
-                rng.fill_bytes(&mut buf);
-                // x86 is little endian so no need for conversion
-                zerocopy::transmute!(buf)
-            }
-        }
-    )+};
+impl Distribution<__m128i> for StandardUniform {
+    #[inline]
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m128i {
+        // NOTE: It's tempting to use the u128 impl here, but confusingly this
+        // results in different code (return via rdx, r10 instead of rax, rdx
+        // with u128 impl) and is much slower (+130 time). This version calls
+        // impls::fill_bytes_via_next but performs well.
+
+        let mut buf = [0_u8; core::mem::size_of::<__m128i>()];
+        rng.fill_bytes(&mut buf);
+        // x86 is little endian so no need for conversion
+
+        // SAFETY: All byte sequences of `buf` represent values of the output type.
+        unsafe { core::mem::transmute(buf) }
+    }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+impl Distribution<__m256i> for StandardUniform {
+    #[inline]
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m256i {
+        let mut buf = [0_u8; core::mem::size_of::<__m256i>()];
+        rng.fill_bytes(&mut buf);
+        // x86 is little endian so no need for conversion
+
+        // SAFETY: All byte sequences of `buf` represent values of the output type.
+        unsafe { core::mem::transmute(buf) }
+    }
+}
+
+#[cfg(all(
+    any(target_arch = "x86", target_arch = "x86_64"),
+    feature = "simd_support"
+))]
+impl Distribution<__m512i> for StandardUniform {
+    #[inline]
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m512i {
+        let mut buf = [0_u8; core::mem::size_of::<__m512i>()];
+        rng.fill_bytes(&mut buf);
+        // x86 is little endian so no need for conversion
+
+        // SAFETY: All byte sequences of `buf` represent values of the output type.
+        unsafe { core::mem::transmute(buf) }
+    }
 }
 
 #[cfg(feature = "simd_support")]
@@ -148,24 +177,6 @@ macro_rules! simd_impl {
 #[cfg(feature = "simd_support")]
 simd_impl!(u8, i8, u16, i16, u32, i32, u64, i64);
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-x86_intrinsic_impl!(
-    any(target_arch = "x86", target_arch = "x86_64"),
-    __m128i,
-    __m256i
-);
-#[cfg(all(
-    any(target_arch = "x86", target_arch = "x86_64"),
-    feature = "simd_support"
-))]
-x86_intrinsic_impl!(
-    all(
-        any(target_arch = "x86", target_arch = "x86_64"),
-        feature = "simd_support"
-    ),
-    __m512i
-);
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/distr/other.rs b/src/distr/other.rs
index 0e1fc149be7..47b99323d6b 100644
--- a/src/distr/other.rs
+++ b/src/distr/other.rs
@@ -118,6 +118,7 @@ impl Distribution<char> for StandardUniform {
         if n <= 0xDFFF {
             n -= GAP_SIZE;
         }
+        // SAFETY: We ensure above that `n` represents a `char`.
         unsafe { char::from_u32_unchecked(n) }
     }
 }
@@ -166,9 +167,14 @@ impl Distribution<u8> for Alphabetic {
 #[cfg(feature = "alloc")]
 impl SampleString for Alphanumeric {
     fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) {
+        // SAFETY: `self` only samples alphanumeric characters, which are valid UTF-8.
         unsafe {
             let v = string.as_mut_vec();
-            v.extend(self.sample_iter(rng).take(len));
+            v.extend(
+                self.sample_iter(rng)
+                    .take(len)
+                    .inspect(|b| debug_assert!(b.is_ascii_alphanumeric())),
+            );
         }
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 5cb71b8bde2..6f2af2fc147 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -59,6 +59,7 @@
     clippy::neg_cmp_op_on_partial_ord,
     clippy::nonminimal_bool
 )]
+#![deny(clippy::undocumented_unsafe_blocks)]
 
 #[cfg(feature = "alloc")]
 extern crate alloc;
diff --git a/src/rng.rs b/src/rng.rs
index 258c87de273..b0891a97217 100644
--- a/src/rng.rs
+++ b/src/rng.rs
@@ -12,8 +12,8 @@
 use crate::distr::uniform::{SampleRange, SampleUniform};
 use crate::distr::{self, Distribution, StandardUniform};
 use core::num::Wrapping;
+use core::{mem, slice};
 use rand_core::RngCore;
-use zerocopy::IntoBytes;
 
 /// User-level interface for RNGs
 ///
@@ -393,14 +393,36 @@ impl Fill for [u8] {
     }
 }
 
+/// Call target for unsafe macros
+const unsafe fn __unsafe() {}
+
+/// Implement `Fill` for given type `$t`.
+///
+/// # Safety
+/// All bit patterns of `[u8; size_of::<$t>()]` must represent values of `$t`.
 macro_rules! impl_fill {
     () => {};
-    ($t:ty) => {
+    ($t:ty) => {{
+        // Force caller to wrap with an `unsafe` block
+        __unsafe();
+
         impl Fill for [$t] {
-            #[inline(never)] // in micro benchmarks, this improves performance
             fn fill<R: Rng + ?Sized>(&mut self, rng: &mut R) {
                 if self.len() > 0 {
-                    rng.fill_bytes(self.as_mut_bytes());
+                    let size = mem::size_of_val(self);
+                    rng.fill_bytes(
+                        // SAFETY: `self` non-null and valid for reads and writes within its `size`
+                        // bytes. `self` meets the alignment requirements of `&mut [u8]`.
+                        // The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid
+                        // for all bit-patterns of their contents (note that the SAFETY requirement
+                        // on callers of this macro). `self` is not borrowed.
+                        unsafe {
+                            slice::from_raw_parts_mut(self.as_mut_ptr()
+                                as *mut u8,
+                                size
+                            )
+                        }
+                    );
                     for x in self {
                         *x = x.to_le();
                     }
@@ -409,27 +431,41 @@ macro_rules! impl_fill {
         }
 
         impl Fill for [Wrapping<$t>] {
-            #[inline(never)]
             fn fill<R: Rng + ?Sized>(&mut self, rng: &mut R) {
                 if self.len() > 0 {
-                    rng.fill_bytes(self.as_mut_bytes());
+                    let size = self.len() * mem::size_of::<$t>();
+                    rng.fill_bytes(
+                        // SAFETY: `self` non-null and valid for reads and writes within its `size`
+                        // bytes. `self` meets the alignment requirements of `&mut [u8]`.
+                        // The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid
+                        // for all bit-patterns of their contents (note that the SAFETY requirement
+                        // on callers of this macro). `self` is not borrowed.
+                        unsafe {
+                            slice::from_raw_parts_mut(self.as_mut_ptr()
+                                as *mut u8,
+                                size
+                            )
+                        }
+                    );
                     for x in self {
-                    *x = Wrapping(x.0.to_le());
+                        *x = Wrapping(x.0.to_le());
                     }
                 }
             }
-        }
+        }}
     };
-    ($t:ty, $($tt:ty,)*) => {
+    ($t:ty, $($tt:ty,)*) => {{
         impl_fill!($t);
         // TODO: this could replace above impl once Rust #32463 is fixed
         // impl_fill!(Wrapping<$t>);
         impl_fill!($($tt,)*);
-    }
+    }}
 }
 
-impl_fill!(u16, u32, u64, u128,);
-impl_fill!(i8, i16, i32, i64, i128,);
+// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `u*`.
+const _: () = unsafe { impl_fill!(u16, u32, u64, u128,) };
+// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `i*`.
+const _: () = unsafe { impl_fill!(i8, i16, i32, i64, i128,) };
 
 impl<T, const N: usize> Fill for [T; N]
 where
diff --git a/src/seq/iterator.rs b/src/seq/iterator.rs
index b10d205676a..a9a9e56155c 100644
--- a/src/seq/iterator.rs
+++ b/src/seq/iterator.rs
@@ -134,6 +134,10 @@ pub trait IteratorRandom: Iterator + Sized {
     /// force every element to be created regardless call `.inspect(|e| ())`.
     ///
     /// [`choose`]: IteratorRandom::choose
+    //
+    // Clippy is wrong here: we need to iterate over all entries with the RNG to
+    // ensure that choosing is *stable*.
+    #[allow(clippy::double_ended_iterator_last)]
     fn choose_stable<R>(mut self, rng: &mut R) -> Option<Self::Item>
     where
         R: Rng + ?Sized,