Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions rand_core/src/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

use core::intrinsics::transmute;
use core::slice;
use core::cmp::min;
use Rng;

/// Implement `next_u64` via `next_u32`, little-endian order.
Expand Down Expand Up @@ -93,6 +94,82 @@ macro_rules! impl_uint_from_fill {
});
}

macro_rules! fill_via_chunks {
($src:expr, $dest:expr, $N:expr) => ({
let chunk_size_u8 = min($src.len() * $N, $dest.len());
let chunk_size = (chunk_size_u8 + $N - 1) / $N;

// Convert to little-endian:
for ref mut x in $src[0..chunk_size].iter_mut() {
**x = (*x).to_le();
}

let bytes = unsafe { slice::from_raw_parts($src.as_ptr() as *const u8,
$src.len() * $N) };

let dest_chunk = &mut $dest[0..chunk_size_u8];
dest_chunk.copy_from_slice(&bytes[0..chunk_size_u8]);

(chunk_size, chunk_size_u8)
});
}

/// Implement `fill_bytes` by reading chunks from the output buffer of a block
/// based RNG.
///
/// The return values are `(consumed_u32, filled_u8)`.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure these names are sufficiently clear without explanation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try write some better documentation.

///
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
/// the length of `dest`.
/// `consumed_u32` is the number of words consumed from `src`, which is the same
/// as `filled_u8 / 4` rounded up.
///
/// Note that on big-endian systems values in the output buffer `src` are
/// mutated. `src[0..consumed_u32]` get converted to little-endian before
/// copying.
///
/// # Example
/// (from `IsaacRng`)
///
/// ```rust,ignore
/// fn fill_bytes(&mut self, dest: &mut [u8]) {
/// let mut read_len = 0;
/// while read_len < dest.len() {
/// if self.index >= self.rsl.len() {
/// self.isaac();
/// }
///
/// let (consumed_u32, filled_u8) =
/// impls::fill_via_u32_chunks(&mut self.rsl[self.index..],
/// &mut dest[read_len..]);
///
/// self.index += consumed_u32;
/// read_len += filled_u8;
/// }
/// }
/// ```
pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
fill_via_chunks!(src, dest, 4)
}

/// Implement `fill_bytes` by reading chunks from the output buffer of a block
/// based RNG.
///
/// The return values are `(consumed_u64, filled_u8)`.
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
/// the length of `dest`.
/// `consumed_u64` is the number of words consumed from `src`, which is the same
/// as `filled_u8 / 8` rounded up.
///
/// Note that on big-endian systems values in the output buffer `src` are
/// mutated. `src[0..consumed_u64]` get converted to little-endian before
/// copying.
///
/// See `fill_via_u32_chunks` for an example.
pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
fill_via_chunks!(src, dest, 8)
}

/// Implement `next_u32` via `fill_bytes`, little-endian order.
pub fn next_u32_via_fill<R: Rng+?Sized>(rng: &mut R) -> u32 {
impl_uint_from_fill!(rng, u32, 4)
Expand Down
136 changes: 59 additions & 77 deletions src/prng/chacha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@

//! The ChaCha random number generator.

use core::num::Wrapping as w;
use core::fmt;
use rand_core::impls;
use {Rng, CryptoRng, SeedFromRng, SeedableRng, Error};

#[allow(bad_style)]
type w32 = w<u32>;

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that removing this is actually a win... I mean now you have .wrapping_add in a few places and can't just think I know this algorithm uses wrapping arithmetic everywhere.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the commit message for ChaCha I added this note:

This also replaces core::num::Wrapping with a few wrapping_add's.
There were about 30 conversions to and from Wrapping, while there are only
9 wrapping operations.

Because fill_via_u32_chunks expects a [u32], converting away was just
easier.

I agree that I know this algorithm uses wrapping arithmetic everywhere is an advantage. Not all operations are available on wrapping types though, like rotate_*. You can maybe consider this to be a bug in the standard library.

While working with ISAAC, XorShift* and PCG it happened to many times I had to ask myself if I was working with the wrapped or the normal type, and if an operation was available.

const KEY_WORDS : usize = 8; // 8 words for the 256-bit key
const STATE_WORDS : usize = 16;
const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of this writing
Expand All @@ -32,9 +29,9 @@ const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of
/// Salsa20*](http://cr.yp.to/chacha.html)
#[derive(Clone)]
pub struct ChaChaRng {
buffer: [w32; STATE_WORDS], // Internal buffer of output
state: [w32; STATE_WORDS], // Initial state
index: usize, // Index into state
buffer: [u32; STATE_WORDS], // Internal buffer of output
state: [u32; STATE_WORDS], // Initial state
index: usize, // Index into state
}

// Custom Debug implementation that does not expose the internal state
Expand All @@ -46,10 +43,10 @@ impl fmt::Debug for ChaChaRng {

macro_rules! quarter_round{
($a: expr, $b: expr, $c: expr, $d: expr) => {{
$a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left(16));
$c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left(12));
$a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left( 8));
$c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left( 7));
$a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left(16);
$c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left(12);
$a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left( 8);
$c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left( 7);
}}
}

Expand All @@ -69,15 +66,15 @@ macro_rules! double_round{
}

#[inline]
fn core(output: &mut [w32; STATE_WORDS], input: &[w32; STATE_WORDS]) {
*output = *input;
fn core(new: &mut [u32; STATE_WORDS], input: &[u32; STATE_WORDS]) {
*new = *input;

for _ in 0..CHACHA_ROUNDS / 2 {
double_round!(output);
double_round!(new);
}

for i in 0..STATE_WORDS {
output[i] = output[i] + input[i];
new[i] = new[i].wrapping_add(input[i]);
}
}

Expand All @@ -104,8 +101,8 @@ impl ChaChaRng {
/// - 2419978656
pub fn new_unseeded() -> ChaChaRng {
let mut rng = ChaChaRng {
buffer: [w(0); STATE_WORDS],
state: [w(0); STATE_WORDS],
buffer: [0; STATE_WORDS],
state: [0; STATE_WORDS],
index: STATE_WORDS
};
rng.init(&[0; KEY_WORDS]);
Expand Down Expand Up @@ -133,10 +130,10 @@ impl ChaChaRng {
/// println!("{:?}", ra.next_u32());
/// ```
pub fn set_counter(&mut self, counter_low: u64, counter_high: u64) {
self.state[12] = w((counter_low >> 0) as u32);
self.state[13] = w((counter_low >> 32) as u32);
self.state[14] = w((counter_high >> 0) as u32);
self.state[15] = w((counter_high >> 32) as u32);
self.state[12] = (counter_low >> 0) as u32;
self.state[13] = (counter_low >> 32) as u32;
self.state[14] = (counter_high >> 0) as u32;
self.state[15] = (counter_high >> 32) as u32;
self.index = STATE_WORDS; // force recomputation
}

Expand All @@ -159,19 +156,19 @@ impl ChaChaRng {
/// [1]: Daniel J. Bernstein. [*Extending the Salsa20
/// nonce.*](http://cr.yp.to/papers.html#xsalsa)
fn init(&mut self, key: &[u32; KEY_WORDS]) {
self.state[0] = w(0x61707865);
self.state[1] = w(0x3320646E);
self.state[2] = w(0x79622D32);
self.state[3] = w(0x6B206574);
self.state[0] = 0x61707865;
self.state[1] = 0x3320646E;
self.state[2] = 0x79622D32;
self.state[3] = 0x6B206574;

for i in 0..KEY_WORDS {
self.state[4+i] = w(key[i]);
self.state[4+i] = key[i];
}

self.state[12] = w(0);
self.state[13] = w(0);
self.state[14] = w(0);
self.state[15] = w(0);
self.state[12] = 0;
self.state[13] = 0;
self.state[14] = 0;
self.state[15] = 0;

self.index = STATE_WORDS;
}
Expand All @@ -181,69 +178,54 @@ impl ChaChaRng {
core(&mut self.buffer, &self.state);
self.index = 0;
// update 128-bit counter
self.state[12] = self.state[12] + w(1);
if self.state[12] != w(0) { return };
self.state[13] = self.state[13] + w(1);
if self.state[13] != w(0) { return };
self.state[14] = self.state[14] + w(1);
if self.state[14] != w(0) { return };
self.state[15] = self.state[15] + w(1);
self.state[12] = self.state[12].wrapping_add(1);
if self.state[12] != 0 { return };
self.state[13] = self.state[13].wrapping_add(1);
if self.state[13] != 0 { return };
self.state[14] = self.state[14].wrapping_add(1);
if self.state[14] != 0 { return };
self.state[15] = self.state[15].wrapping_add(1);
}
}

impl Rng for ChaChaRng {
#[inline]
fn next_u32(&mut self) -> u32 {
if self.index == STATE_WORDS {
// Using a local variable for `index`, and checking the size avoids a
// bounds check later on.
let mut index = self.index as usize;
if index >= STATE_WORDS {
self.update();
index = 0;
}

let value = self.buffer[self.index % STATE_WORDS];
let value = self.buffer[index];
self.index += 1;
value.0
value
}

fn next_u64(&mut self) -> u64 {
::rand_core::impls::next_u64_via_u32(self)
impls::next_u64_via_u32(self)
}

#[cfg(feature = "i128_support")]
fn next_u128(&mut self) -> u128 {
::rand_core::impls::next_u128_via_u64(self)
impls::next_u128_via_u64(self)
}

// Custom implementation allowing larger reads from buffer is about 8%
// faster than default implementation in my tests

fn fill_bytes(&mut self, dest: &mut [u8]) {
use core::cmp::min;
use core::intrinsics::{transmute, copy_nonoverlapping};

let mut left = dest;
while left.len() >= 4 {
if self.index == STATE_WORDS {
let mut read_len = 0;
while read_len < dest.len() {
if self.index >= self.buffer.len() {
self.update();
}

let words = min(left.len() / 4, STATE_WORDS - self.index);
let (l, r) = {left}.split_at_mut(4 * words);
left = r;

// convert to LE:
for ref mut x in self.buffer[self.index..self.index+words].iter_mut() {
**x = w((*x).0.to_le());
}

unsafe{ copy_nonoverlapping(
&self.buffer[self.index].0 as *const u32 as *const u8,
l.as_mut_ptr(),
4 * words) };
self.index += words;
}
let n = left.len();
if n > 0 {
let chunk: [u8; 4] = unsafe {
transmute(self.next_u32().to_le())
};
left.copy_from_slice(&chunk[..n]);

let (consumed_u32, filled_u8) =
impls::fill_via_u32_chunks(&mut self.buffer[self.index..],
&mut dest[read_len..]);

self.index += consumed_u32;
read_len += filled_u8;
}
}

Expand Down Expand Up @@ -271,16 +253,16 @@ impl<'a> SeedableRng<&'a [u32]> for ChaChaRng {
/// words are used, the remaining are set to zero.
fn from_seed(seed: &'a [u32]) -> ChaChaRng {
let mut rng = ChaChaRng {
buffer: [w(0); STATE_WORDS],
state: [w(0); STATE_WORDS],
buffer: [0; STATE_WORDS],
state: [0; STATE_WORDS],
index: STATE_WORDS
};
rng.init(&[0u32; KEY_WORDS]);
// set key in place
{
let key = &mut rng.state[4 .. 4+KEY_WORDS];
for (k, s) in key.iter_mut().zip(seed.iter()) {
*k = w(*s);
*k = *s;
}
}
rng
Expand Down
Loading