Allow sampling from a closed integer range

Paul Dicker · Paul Dicker · commit 188307a000b3 · 2017-08-30T08:21:59.000+02:00
diff --git a/src/distributions/range2.rs b/src/distributions/range2.rs
@@ -117,29 +117,49 @@ macro_rules! range_int_impl {
         }
         
         impl RangeImpl for RangeInt<$ty> {
-            // we play free and fast with unsigned vs signed here
+            // We play free and fast with unsigned vs signed here
             // (when $ty is signed), but that's fine, since the
             // contract of this macro is for $ty and $unsigned to be
-            // "bit-equal", so casting between them is a no-op & a
-            // bijection.
+            // "bit-equal", so casting between them is a no-op.
 
             type X = $ty;
             
             fn new(low: Self::X, high: Self::X) -> Self {
-                let range = (w(high as $unsigned) - w(low as $unsigned)).0;
+                new_closed(low, high - 1)
+            }
+
+            fn new_closed(low: Self::X, high: Self::X) -> Self {
+                // For a closed range the number of possible numbers we should
+                // generate is `range = (high - low + 1)`. It is not possible to
+                // end up with a uniform distribution if we map _all_ the random
+                // integers that can be generated to this range. We have to map
+                // integers from a `zone` that is a multiple of the range. The
+                // rest of the integers, that cause a bias, are rejected. The
+                // sampled number is `zone % range`.
+                //
+                // The problem with `range` is that to cover the full range of
+                // the type, it has to store `unsigned_max + 1`, which can't be
+                // represented. But a range of size 0 can't exist, and a
+                // modulus op `unsigned_max + 1` is a no-op. So we treat this as
+                // a special case. Wrapping arithmetic makes representing
+                // `unsigned_max + 1` as 0 even simple.
+                //
+                // We don't calculate zone directly, but first calculate the
+                // number of integers to reject first. With a wrikle to handle
+                // `unsigned_max + 1` not fitting in the type, this is:
+                // ints_to_reject = (unsigned_max + 1) % range;
+                // ints_to_reject = (unsigned_max - range + 1) % range;
+
                 let unsigned_max: $unsigned = ::core::$unsigned::MAX;
 
-                // We want to calculate type_range % range where type_range is
-                // pow(2, n_bits($ty)), but we can't represent type_range.
-                // (type_range - range) % range is equivalent, since we know
-                // type_range > range. Since range >= 1,
-                // type_range - range = (unsigned_max - range) + 1.
-                let ignore_zone = ((unsigned_max - range) + 1) % range;
-                // We want to sample from the zone
-                // [0, (type_range - ignore_zone))
-                // however, ignore_zone may be zero. Instead use a closed range
-                // from zero to:
-                let zone = unsigned_max - ignore_zone;
+                let range = (w(high as $unsigned) - w(low as $unsigned)).0;
+                let ints_to_reject =
+                    if range > 0 {
+                        (unsigned_max - range + 1) % range
+                    } else {
+                        0
+                    };
+                let zone = unsigned_max - ints_to_reject;
 
                 RangeInt {
                     low: low,
@@ -148,16 +168,21 @@ macro_rules! range_int_impl {
                     zone: zone as $ty
                 }
             }
-            
+
             fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
                 use $crate::distributions::uniform;
+                let range = self.range as $unsigned;
                 loop {
                     let v: $unsigned = uniform(rng);
-                    // Reject samples not between 0 and zone:
-                    if v <= self.zone as $unsigned {
-                        // Adjustment sample for range and low value:
-                        return (w(self.low) + w((v % self.range as $unsigned) as $ty)).0;
+                    // The modulus operator is incredibly slow. Even skipping
+                    // it for the small chance `v` falls in the target range
+                    // makes it a few percent faster.
+                    if v <= range || self.range == 0 {
+                        return (w(self.low) + w(v as $ty)).0;
+                    } else if v <= self.zone as $unsigned {
+                        return (w(self.low) + w((v % range) as $ty)).0;
                     }
+                    // Sample does not fall in `zone`, so reject it and retry.
                 }
             }
         }