Skip to content

Commit 272b385

Browse files
author
Paul Dicker
committed
Allow sampling from a closed integer range
These changes make it possible to sample from closed ranges, not only from open. Included is a small optimisation for the modulus operator, and an optimisation for the types i8/u8 and i16/u16.
1 parent 31f964e commit 272b385

1 file changed

Lines changed: 107 additions & 49 deletions

File tree

src/distributions/range2.rs

Lines changed: 107 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,21 @@
99
// except according to those terms.
1010

1111
//! Alternative design for `Range`.
12-
//!
12+
//!
1313
//! TODO: decide whether to replace the old `range` module with this.
1414
//! Advantage: float ranges don't have to store a "zone" parameter.
1515
//! Advantage: custom implementations can store extra parameters.
1616
//! Possible advantage: easier implementations for custom types written in
1717
//! terms of implementations of other types.
1818
//! Disadvantage: complex?
19-
//!
19+
//!
2020
//! This is *almost* like having separate `RangeInt<T>`, `RangeFloat<T>`,
2121
//! etc. (or just `RangeI32`, etc.) types, each implementing `Distribution`,
2222
//! but it adds some magic to support generic `range` and `new_range` methods.
2323
24-
use core::num::Wrapping as w;
25-
24+
use Rand;
2625
use Rng;
27-
use distributions::Distribution;
26+
use distributions::{Distribution, Uniform};
2827
use utils::FloatConversions;
2928

3029
/// Sample values uniformly between two bounds.
@@ -40,7 +39,7 @@ use utils::FloatConversions;
4039
/// including `high`, but this may be very difficult. All the
4140
/// primitive integer types satisfy this property, and the float types
4241
/// normally satisfy it, but rounding may mean `high` can occur.
43-
///
42+
///
4443
/// # Example
4544
///
4645
/// ```rust
@@ -88,13 +87,15 @@ pub trait SampleRange: PartialOrd+Sized {
8887
pub trait RangeImpl {
8988
/// The type sampled by this implementation.
9089
type X: PartialOrd;
91-
90+
9291
/// Construct self.
93-
///
92+
///
9493
/// This should not be called directly. `Range::new` asserts that
9594
/// `low < high` before calling this.
9695
fn new(low: Self::X, high: Self::X) -> Self;
97-
96+
97+
fn new_inclusive(low: Self::X, high: Self::X) -> Self;
98+
9899
/// Sample a value.
99100
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X;
100101
}
@@ -108,35 +109,73 @@ pub struct RangeInt<X> {
108109
}
109110

110111
macro_rules! range_int_impl {
111-
($ty:ty, $unsigned:ident) => {
112+
($ty:ty, $signed:ident, $unsigned:ident,
113+
$i_large:ident, $u_large:ident) => {
112114
impl SampleRange for $ty {
113115
type T = RangeInt<$ty>;
114116
}
115-
117+
116118
impl RangeImpl for RangeInt<$ty> {
117-
// we play free and fast with unsigned vs signed here
119+
// We play free and fast with unsigned vs signed here
118120
// (when $ty is signed), but that's fine, since the
119121
// contract of this macro is for $ty and $unsigned to be
120-
// "bit-equal", so casting between them is a no-op & a
121-
// bijection.
122+
// "bit-equal", so casting between them is a no-op.
122123

123124
type X = $ty;
124-
125+
125126
fn new(low: Self::X, high: Self::X) -> Self {
126-
let range = (w(high as $unsigned) - w(low as $unsigned)).0;
127-
let unsigned_max: $unsigned = ::core::$unsigned::MAX;
128-
129-
// We want to calculate type_range % range where type_range is
130-
// pow(2, n_bits($ty)), but we can't represent type_range.
131-
// (type_range - range) % range is equivalent, since we know
132-
// type_range > range. Since range >= 1,
133-
// type_range - range = (unsigned_max - range) + 1.
134-
let ignore_zone = ((unsigned_max - range) + 1) % range;
135-
// We want to sample from the zone
136-
// [0, (type_range - ignore_zone))
137-
// however, ignore_zone may be zero. Instead use a closed range
138-
// from zero to:
139-
let zone = unsigned_max - ignore_zone;
127+
RangeImpl::new_inclusive(low, high - 1)
128+
}
129+
130+
fn new_inclusive(low: Self::X, high: Self::X) -> Self {
131+
// For a closed range the number of possible numbers we should
132+
// generate is `range = (high - low + 1)`. It is not possible to
133+
// end up with a uniform distribution if we map _all_ the random
134+
// integers that can be generated to this range. We have to map
135+
// integers from a `zone` that is a multiple of the range. The
136+
// rest of the integers, that cause a bias, are rejected. The
137+
// sampled number is `zone % range`.
138+
//
139+
// The problem with `range` is that to cover the full range of
140+
// the type, it has to store `unsigned_max + 1`, which can't be
141+
// represented. But if the range covers the full range of the
142+
// type, no modulus is needed. A range of size 0 can't exist, so
143+
// we use that to represent this special case. Wrapping
144+
// arithmetic even makes representing `unsigned_max + 1` as 0
145+
// simple.
146+
//
147+
// We don't calculate zone directly, but first calculate the
148+
// number of integers to reject. To handle `unsigned_max + 1`
149+
// not fitting in the type, we use:
150+
// ints_to_reject = (unsigned_max + 1) % range;
151+
// ints_to_reject = (unsigned_max - range + 1) % range;
152+
//
153+
// The smallest integer prngs generate is u32. That is why for
154+
// small integer sizes (i8/u8 and i16/u16) there is an
155+
// optimisation: don't pick the largest zone that can fit in the
156+
// small type, but pick the largest zone that can fit in an u32.
157+
// This improves the chance to get a random integer that fits in
158+
// the zone to 998 in 1000 in the worst case.
159+
//
160+
// There is a problem however: we can't store such a large range
161+
// in `RangeInt`, that can only hold values of the size of $ty.
162+
// `ints_to_reject` is always less than half the size of the
163+
// small integer. For an u8 it only ever uses 7 bits. This means
164+
// that all but the last 7 bits of `zone` are always 1's (or 15
165+
// in the case of u16). So nothing is lost by trucating `zone`.
166+
167+
let unsigned_max: $u_large = ::core::$u_large::MAX;
168+
169+
let range = (high as $u_large)
170+
.wrapping_sub(low as $u_large)
171+
.wrapping_add(1);
172+
let ints_to_reject =
173+
if range > 0 {
174+
(unsigned_max - range + 1) % range
175+
} else {
176+
0
177+
};
178+
let zone = unsigned_max - ints_to_reject;
140179

141180
RangeInt {
142181
low: low,
@@ -145,36 +184,46 @@ macro_rules! range_int_impl {
145184
zone: zone as $ty
146185
}
147186
}
148-
187+
188+
#[inline(always)]
149189
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
150-
use $crate::distributions::uniform;
151-
loop {
152-
let v: $unsigned = uniform(rng);
153-
// Reject samples not between 0 and zone:
154-
if v <= self.zone as $unsigned {
155-
// Adjustment sample for range and low value:
156-
return (w(self.low) + w((v % self.range as $unsigned) as $ty)).0;
190+
let range = self.range as $unsigned as $u_large;
191+
if range > 0 {
192+
// Some casting to recover the trucated bits of `zone`:
193+
// First bit-cast to a signed int. Next sign-extend to the
194+
// larger type. Then bit-cast to unsigned.
195+
// For types that already have the right size, all the
196+
// casting is a no-op.
197+
let zone = self.zone as $signed as $i_large as $u_large;
198+
loop {
199+
let v: $u_large = Rand::rand(rng, Uniform);
200+
if v <= zone {
201+
return self.low.wrapping_add((v % range) as $ty);
202+
}
157203
}
204+
} else {
205+
// Sample from the entire integer range.
206+
Rand::rand(rng, Uniform)
158207
}
159208
}
160209
}
161210
}
162211
}
163212

164-
range_int_impl! { i8, u8 }
165-
range_int_impl! { i16, u16 }
166-
range_int_impl! { i32, u32 }
167-
range_int_impl! { i64, u64 }
213+
range_int_impl! { i8, i8, u8, i32, u32 }
214+
range_int_impl! { i16, i16, u16, i32, u32 }
215+
range_int_impl! { i32, i32, u32, i32, u32 }
216+
range_int_impl! { i64, i64, u64, i64, u64 }
168217
#[cfg(feature = "i128_support")]
169-
range_int_impl! { i128, u128 }
170-
range_int_impl! { isize, usize }
171-
range_int_impl! { u8, u8 }
172-
range_int_impl! { u16, u16 }
173-
range_int_impl! { u32, u32 }
174-
range_int_impl! { u64, u64 }
218+
range_int_impl! { i128, i128, u128, u128 }
219+
range_int_impl! { isize, isize, usize, isize, usize }
220+
range_int_impl! { u8, i8, u8, i32, u32 }
221+
range_int_impl! { u16, i16, u16, i32, u32 }
222+
range_int_impl! { u32, i32, u32, i32, u32 }
223+
range_int_impl! { u64, i64, u64, i64, u64 }
175224
#[cfg(feature = "i128_support")]
176-
range_int_impl! { u128, u128 }
177-
range_int_impl! { usize, usize }
225+
range_int_impl! { u128, u128, u128, i128, u128 }
226+
range_int_impl! { usize, isize, usize, isize, usize }
178227

179228
/// Implementation of `RangeImpl` for float types.
180229
#[derive(Clone, Copy, Debug)]
@@ -251,6 +300,12 @@ macro_rules! range_float_impl {
251300
}
252301
}
253302

303+
fn new_inclusive(low: Self::X, high: Self::X) -> Self {
304+
// Same as `new`, because the boundaries of a floats range are
305+
// (at least currently) not exact due to rounding errors.
306+
RangeImpl::new(low, high)
307+
}
308+
254309
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
255310
let rnd = $next_u(rng);
256311
match *self {
@@ -404,6 +459,9 @@ mod tests {
404459
inner: RangeFloat::<f32>::new(low.x, high.x),
405460
}
406461
}
462+
fn new_inclusive(low: Self::X, high: Self::X) -> Self {
463+
RangeImpl::new(low, high)
464+
}
407465
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
408466
MyF32 { x: self.inner.sample(rng) }
409467
}

0 commit comments

Comments
 (0)