diff --git a/src/uint/boxed/div.rs b/src/uint/boxed/div.rs
index 4558fff16..9d061fc90 100644
--- a/src/uint/boxed/div.rs
+++ b/src/uint/boxed/div.rs
@@ -382,42 +382,6 @@ impl RemLimb for BoxedUint {
     }
 }
 
-/// Computes `limbs << shift` inplace, where `0 <= shift < Limb::BITS`, returning the carry.
-fn shl_limb_vartime(limbs: &mut [Limb], shift: u32) -> Limb {
-    if shift == 0 {
-        return Limb::ZERO;
-    }
-
-    let lshift = shift;
-    let rshift = Limb::BITS - shift;
-    let limbs_num = limbs.len();
-
-    let carry = limbs[limbs_num - 1] >> rshift;
-    for i in (1..limbs_num).rev() {
-        limbs[i] = (limbs[i] << lshift) | (limbs[i - 1] >> rshift);
-    }
-    limbs[0] <<= lshift;
-
-    carry
-}
-
-/// Computes `limbs >> shift` inplace, where `0 <= shift < Limb::BITS`.
-fn shr_limb_vartime(limbs: &mut [Limb], shift: u32) {
-    if shift == 0 {
-        return;
-    }
-
-    let lshift = Limb::BITS - shift;
-    let rshift = shift;
-
-    let limbs_num = limbs.len();
-
-    for i in 0..limbs_num - 1 {
-        limbs[i] = (limbs[i] >> rshift) | (limbs[i + 1] << lshift);
-    }
-    limbs[limbs_num - 1] >>= rshift;
-}
-
 /// Computes `x` / `y`, returning the quotient in `x` and the remainder in `y`.
 ///
 /// This function operates in variable-time. It will panic if the divisor is zero
@@ -444,44 +408,51 @@ pub(crate) fn div_rem_vartime_in_place(x: &mut [Limb], y: &mut [Limb]) {
     }
 
     let lshift = y[yc - 1].leading_zeros();
+    let rshift = if lshift == 0 { 0 } else { Limb::BITS - lshift };
+    let mut x_hi = Limb::ZERO;
+    let mut carry;
+
+    if lshift != 0 {
+        // Shift divisor such that it has no leading zeros
+        // This means that div2by1 requires no extra shifts, and ensures that the high word >= b/2
+        carry = Limb::ZERO;
+        for i in 0..yc {
+            (y[i], carry) = (Limb((y[i].0 << lshift) | carry.0), Limb(y[i].0 >> rshift));
+        }
 
-    // Shift divisor such that it has no leading zeros
-    // This means that div2by1 requires no extra shifts, and ensures that the high word >= b/2
-    shl_limb_vartime(y, lshift);
-
-    // Shift the dividend to match
-    let mut x_hi = shl_limb_vartime(x, lshift);
+        // Shift the dividend to match
+        carry = Limb::ZERO;
+        for i in 0..xc {
+            (x[i], carry) = (Limb((x[i].0 << lshift) | carry.0), Limb(x[i].0 >> rshift));
+        }
+        x_hi = carry;
+    }
 
     let reciprocal = Reciprocal::new(y[yc - 1].to_nz().expect("zero divisor"));
 
     for xi in (yc - 1..xc).rev() {
         // Divide high dividend words by the high divisor word to estimate the quotient word
-        let mut quo = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
+        let (mut quo, _) = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
 
         // Subtract q*divisor from the dividend
-        let borrow = {
-            let mut carry = Limb::ZERO;
-            let mut borrow = Limb::ZERO;
-            let mut tmp;
-            for i in 0..yc {
-                (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
-                (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
-            }
-            (_, borrow) = x_hi.sbb(carry, borrow);
-            borrow
-        };
+        carry = Limb::ZERO;
+        let mut borrow = Limb::ZERO;
+        let mut tmp;
+        for i in 0..yc {
+            (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
+            (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
+        }
+        (_, borrow) = x_hi.sbb(carry, borrow);
 
         // If the subtraction borrowed, then decrement q and add back the divisor
         // The probability of this being needed is very low, about 2/(Limb::MAX+1)
-        quo = {
-            let ct_borrow = ConstChoice::from_word_mask(borrow.0);
-            let mut carry = Limb::ZERO;
-            for i in 0..yc {
-                (x[xi + i + 1 - yc], carry) =
-                    x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
-            }
-            ct_borrow.select_word(quo, quo.wrapping_sub(1))
-        };
+        let ct_borrow = ConstChoice::from_word_mask(borrow.0);
+        carry = Limb::ZERO;
+        for i in 0..yc {
+            (x[xi + i + 1 - yc], carry) =
+                x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
+        }
+        quo = ct_borrow.select_word(quo, quo.saturating_sub(1));
 
         // Store the quotient within dividend and set x_hi to the current highest word
         x_hi = x[xi];
@@ -493,7 +464,12 @@ pub(crate) fn div_rem_vartime_in_place(x: &mut [Limb], y: &mut [Limb]) {
     y[yc - 1] = x_hi;
 
     // Unshift the remainder from the earlier adjustment
-    shr_limb_vartime(y, lshift);
+    if lshift != 0 {
+        carry = Limb::ZERO;
+        for i in (0..yc).rev() {
+            (y[i], carry) = (Limb((y[i].0 >> lshift) | carry.0), Limb(y[i].0 << rshift));
+        }
+    }
 
     // Shift the quotient to the low limbs within dividend
     // let x_size = xc - yc + 1;
diff --git a/src/uint/div.rs b/src/uint/div.rs
index 7e116eb96..14a061993 100644
--- a/src/uint/div.rs
+++ b/src/uint/div.rs
@@ -130,54 +130,6 @@ impl<const LIMBS: usize> Uint<LIMBS> {
         )
     }
 
-    /// Computes `self << shift` where `0 <= shift < Limb::BITS`,
-    /// returning the result and the carry.
-    ///
-    /// Note: assumes that `self` only has `limb_num` lowest non-zero limbs.
-    const fn shl_limb_vartime(&self, shift: u32, limbs_num: usize) -> (Self, Limb) {
-        if shift == 0 {
-            return (*self, Limb::ZERO);
-        }
-
-        let mut limbs = [Limb::ZERO; LIMBS];
-
-        let lshift = shift;
-        let rshift = Limb::BITS - shift;
-
-        let carry = self.limbs[limbs_num - 1].0 >> rshift;
-        let mut i = limbs_num - 1;
-        while i > 0 {
-            limbs[i] = Limb((self.limbs[i].0 << lshift) | (self.limbs[i - 1].0 >> rshift));
-            i -= 1;
-        }
-        limbs[0] = Limb(self.limbs[0].0 << lshift);
-
-        (Uint::<LIMBS>::new(limbs), Limb(carry))
-    }
-
-    /// Computes `self >> shift` where `0 <= shift < Limb::BITS`.
-    ///
-    /// Note: assumes that `self` only has `limb_num` lowest non-zero limbs.
-    const fn shr_limb_vartime(&self, shift: u32, limbs_num: usize) -> Self {
-        if shift == 0 {
-            return *self;
-        }
-
-        let mut limbs = [Limb::ZERO; LIMBS];
-
-        let lshift = Limb::BITS - shift;
-        let rshift = shift;
-
-        let mut i = 0;
-        while i < limbs_num - 1 {
-            limbs[i] = Limb((self.limbs[i].0 >> rshift) | (self.limbs[i + 1].0 << lshift));
-            i += 1;
-        }
-        limbs[limbs_num - 1] = Limb(self.limbs[limbs_num - 1].0 >> rshift);
-
-        Uint::<LIMBS>::new(limbs)
-    }
-
     /// Computes `self` / `rhs`, returns the quotient (q) and the remainder (r)
     ///
     /// This is variable only with respect to `rhs`.
@@ -195,67 +147,81 @@ impl<const LIMBS: usize> Uint<LIMBS> {
         let yc = ((dbits + Limb::BITS - 1) / Limb::BITS) as usize;
 
         // Short circuit for small or extra large divisors
-        if yc == 1 {
-            // If the divisor is a single limb, use limb division
-            let (q, r) = div_rem_limb_with_reciprocal(
-                self,
-                &Reciprocal::new(rhs.0.limbs[0].to_nz().expect("zero divisor")),
-            );
-            return (q, Uint::from_word(r.0));
-        }
-        if yc > LIMBS {
-            // Divisor is greater than dividend. Return zero and the dividend as the
-            // quotient and remainder
-            return (Uint::ZERO, self.resize());
-        }
-
-        // The shift needed to set the MSB of the highest nonzero limb of the divisor.
-        // 2^shift == d in the algorithm above.
-        let shift = (Limb::BITS - (dbits % Limb::BITS)) % Limb::BITS;
+        match yc {
+            1 => {
+                // If the divisor is a single limb, use limb division
+                let (q, r) = div_rem_limb_with_reciprocal(
+                    self,
+                    &Reciprocal::new(rhs.0.limbs[0].to_nz().expect("zero divisor")),
+                );
+                return (q, Uint::from_word(r.0));
+            }
+            yc if yc > LIMBS => {
+                // Divisor is greater than dividend. Return zero and the dividend as the
+                // quotient and remainder
+                return (Uint::ZERO, self.resize());
+            }
+            _ => {}
+        };
 
-        let (x, mut x_hi) = self.shl_limb_vartime(shift, LIMBS);
-        let mut x = x.to_limbs();
-        let (y, _) = rhs.0.shl_limb_vartime(shift, yc);
-        let mut y = y.to_limbs();
+        let lshift = (Limb::BITS - (dbits % Limb::BITS)) % Limb::BITS;
+        let rshift = if lshift == 0 { 0 } else { Limb::BITS - lshift };
+        let mut x = self.to_limbs();
+        let mut x_hi = Limb::ZERO;
+        let mut xi = LIMBS - 1;
+        let mut y = rhs.0.to_limbs();
+        let mut i;
+        let mut carry;
 
-        let reciprocal = Reciprocal::new(y[yc - 1].to_nz().expect("zero divisor"));
+        if lshift != 0 {
+            // Shift divisor such that it has no leading zeros
+            // This means that div2by1 requires no extra shifts, and ensures that the high word >= b/2
+            i = 0;
+            carry = Limb::ZERO;
+            while i < yc {
+                (y[i], carry) = (Limb((y[i].0 << lshift) | carry.0), Limb(y[i].0 >> rshift));
+                i += 1;
+            }
 
-        let mut i;
+            // Shift the dividend to match
+            i = 0;
+            carry = Limb::ZERO;
+            while i < LIMBS {
+                (x[i], carry) = (Limb((x[i].0 << lshift) | carry.0), Limb(x[i].0 >> rshift));
+                i += 1;
+            }
+            x_hi = carry;
+        }
 
-        let mut xi = LIMBS - 1;
+        let reciprocal = Reciprocal::new(y[yc - 1].to_nz().expect("zero divisor"));
 
         loop {
             // Divide high dividend words by the high divisor word to estimate the quotient word
-            let mut quo = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
+            let (mut quo, _) = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
 
             // Subtract q*divisor from the dividend
-            let borrow = {
-                let mut carry = Limb::ZERO;
-                let mut borrow = Limb::ZERO;
-                let mut tmp;
-                i = 0;
-                while i < yc {
-                    (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
-                    (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
-                    i += 1;
-                }
-                (_, borrow) = x_hi.sbb(carry, borrow);
-                borrow
-            };
+            carry = Limb::ZERO;
+            let mut borrow = Limb::ZERO;
+            let mut tmp;
+            i = 0;
+            while i < yc {
+                (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
+                (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
+                i += 1;
+            }
+            (_, borrow) = x_hi.sbb(carry, borrow);
 
             // If the subtraction borrowed, then decrement q and add back the divisor
             // The probability of this being needed is very low, about 2/(Limb::MAX+1)
-            quo = {
-                let ct_borrow = ConstChoice::from_word_mask(borrow.0);
-                let mut carry = Limb::ZERO;
-                i = 0;
-                while i < yc {
-                    (x[xi + i + 1 - yc], carry) =
-                        x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
-                    i += 1;
-                }
-                ct_borrow.select_word(quo, quo.wrapping_sub(1))
-            };
+            let ct_borrow = ConstChoice::from_word_mask(borrow.0);
+            carry = Limb::ZERO;
+            i = 0;
+            while i < yc {
+                (x[xi + i + 1 - yc], carry) =
+                    x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
+                i += 1;
+            }
+            quo = ct_borrow.select_word(quo, quo.saturating_sub(1));
 
             // Store the quotient within dividend and set x_hi to the current highest word
             x_hi = x[xi];
@@ -276,7 +242,14 @@ impl<const LIMBS: usize> Uint<LIMBS> {
         y[yc - 1] = x_hi;
 
         // Unshift the remainder from the earlier adjustment
-        let y = Uint::new(y).shr_limb_vartime(shift, yc);
+        if lshift != 0 {
+            i = yc;
+            carry = Limb::ZERO;
+            while i > 0 {
+                i -= 1;
+                (y[i], carry) = (Limb((y[i].0 >> lshift) | carry.0), Limb(y[i].0 << rshift));
+            }
+        }
 
         // Shift the quotient to the low limbs within dividend
         i = 0;
@@ -289,7 +262,7 @@ impl<const LIMBS: usize> Uint<LIMBS> {
             i += 1;
         }
 
-        (Uint::new(x), y)
+        (Uint::new(x), Uint::new(y))
     }
 
     /// Computes `self` % `rhs`, returns the remainder.
@@ -324,63 +297,63 @@ impl<const LIMBS: usize> Uint<LIMBS> {
             return Uint::from_word(r.0);
         }
 
-        // The shift needed to set the MSB of the highest nonzero limb of the divisor.
-        // 2^shift == d in the algorithm above.
-        let shift = (Limb::BITS - (dbits % Limb::BITS)) % Limb::BITS;
+        let lshift = (Limb::BITS - (dbits % Limb::BITS)) % Limb::BITS;
+        let rshift = if lshift == 0 { 0 } else { Limb::BITS - lshift };
+        let mut x = lower_upper.1.to_limbs(); // high limbs
+        let mut x_hi = Limb::ZERO;
+        let mut xi = LIMBS - 1;
+        let mut y = rhs.0.to_limbs();
+        let mut extra_limbs = LIMBS;
+        let mut i;
+        let mut carry;
 
-        let (y, _) = rhs.0.shl_limb_vartime(shift, yc);
-        let y = y.to_limbs();
+        if lshift != 0 {
+            // Shift divisor such that it has no leading zeros
+            // This ensures that the high word >= b/2, and means that div2by1 requires no extra shifts
+            i = 0;
+            carry = Limb::ZERO;
+            while i < yc {
+                (y[i], carry) = (Limb((y[i].0 << lshift) | carry.0), Limb(y[i].0 >> rshift));
+                i += 1;
+            }
 
-        let (x_lo, x_lo_carry) = lower_upper.0.shl_limb_vartime(shift, LIMBS);
-        let (x, mut x_hi) = lower_upper.1.shl_limb_vartime(shift, LIMBS);
-        let mut x = x.to_limbs();
-        if shift > 0 {
-            x[0] = Limb(x[0].0 | x_lo_carry.0);
+            // Shift the dividend to match
+            i = 0;
+            carry = Limb(lower_upper.0.limbs[LIMBS - 1].0 >> rshift);
+            while i < LIMBS {
+                (x[i], carry) = (Limb((x[i].0 << lshift) | carry.0), Limb(x[i].0 >> rshift));
+                i += 1;
+            }
+            x_hi = carry;
         }
 
         let reciprocal = Reciprocal::new(y[yc - 1].to_nz().expect("zero divisor"));
 
-        let mut xi = LIMBS - 1;
-        let mut extra_limbs = LIMBS;
-        let mut i;
-
-        // Note that in the algorithm we only ever need to access the highest `yc` limbs
-        // of the dividend, and since `yc < LIMBS`, we only need to access
-        // the high half of the dividend.
-        //
-        // So we proceed similarly to `div_rem_vartime()` applied to the high half of the dividend,
-        // fetching the limbs from the lower part as we go.
-
         loop {
             // Divide high dividend words by the high divisor word to estimate the quotient word
-            let quo = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
+            let (quo, _) = div3by2(x_hi.0, x[xi].0, x[xi - 1].0, &reciprocal, y[yc - 2].0);
 
             // Subtract q*divisor from the dividend
-            let borrow = {
-                let mut carry = Limb::ZERO;
-                let mut borrow = Limb::ZERO;
-                let mut tmp;
-                i = 0;
-                while i < yc {
-                    (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
-                    (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
-                    i += 1;
-                }
-                (_, borrow) = x_hi.sbb(carry, borrow);
-                borrow
-            };
+            carry = Limb::ZERO;
+            let mut borrow = Limb::ZERO;
+            let mut tmp;
+            i = 0;
+            while i < yc {
+                (tmp, carry) = Limb::ZERO.mac(y[i], Limb(quo), carry);
+                (x[xi + i + 1 - yc], borrow) = x[xi + i + 1 - yc].sbb(tmp, borrow);
+                i += 1;
+            }
+            (_, borrow) = x_hi.sbb(carry, borrow);
 
             // If the subtraction borrowed, then add back the divisor
             // The probability of this being needed is very low, about 2/(Limb::MAX+1)
-            {
-                let ct_borrow = ConstChoice::from_word_mask(borrow.0);
-                let mut carry = Limb::ZERO;
-                i = 0;
-                while i < yc {
-                    (x[xi + i + 1 - yc], carry) =
-                        x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
-                    i += 1;
-                }
+            let ct_borrow = ConstChoice::from_word_mask(borrow.0);
+            carry = Limb::ZERO;
+            i = 0;
+            while i < yc {
+                (x[xi + i + 1 - yc], carry) =
+                    x[xi + i + 1 - yc].adc(Limb::select(Limb::ZERO, y[i], ct_borrow), carry);
+                i += 1;
             }
 
             // Set x_hi to the current highest word
@@ -394,7 +367,13 @@ impl<const LIMBS: usize> Uint<LIMBS> {
                     x[i] = x[i - 1];
                     i -= 1;
                 }
-                x[0] = x_lo.limbs[extra_limbs];
+                x[0] = lower_upper.0.limbs[extra_limbs];
+                if lshift != 0 {
+                    x[0].0 <<= lshift;
+                    if extra_limbs > 0 {
+                        x[0].0 |= lower_upper.0.limbs[extra_limbs - 1].0 >> rshift;
+                    }
+                }
             } else {
                 if xi == yc - 1 {
                     break;
@@ -404,7 +383,22 @@ impl<const LIMBS: usize> Uint<LIMBS> {
         }
 
         // Unshift the remainder from the earlier adjustment
-        Uint::new(x).shr_limb_vartime(shift, yc)
+        if lshift != 0 {
+            i = yc;
+            carry = Limb::ZERO;
+            while i > 0 {
+                i -= 1;
+                (x[i], carry) = (Limb((x[i].0 >> lshift) | carry.0), Limb(x[i].0 << rshift));
+            }
+        }
+        // Clear upper limbs
+        i = LIMBS - 1;
+        while i >= yc {
+            x[i] = Limb::ZERO;
+            i -= 1;
+        }
+
+        Uint::new(x)
     }
 
     /// Computes `self` % 2^k. Faster than reduce since its a power of 2.
diff --git a/src/uint/div_limb.rs b/src/uint/div_limb.rs
index 775bac646..595518b8d 100644
--- a/src/uint/div_limb.rs
+++ b/src/uint/div_limb.rs
@@ -145,27 +145,23 @@ pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Wor
     (q1, r)
 }
 
-/// Given two long integers `u = (..., u0, u1, u2)` and `v = (..., v0, v1)`
-/// (where `u2` and `v1` are the most significant limbs), where `floor(u / v) <= Limb::MAX`,
-/// calculates `q` such that `q - 1 <= floor(u / v) <= q`.
-/// In place of `v1` takes its reciprocal, and assumes that `v` was already pre-shifted
-/// so that v1 has its most significant bit set (that is, the reciprocal's `shift` is 0).
+/// Calculate the quotient and the remainder of the division of a 3-word
+/// dividend `u`, with a precalculated leading-word reciprocal `v` and a second
+/// divisor word `v0`. The dividend and the lower divisor word must be left-shifted
+/// according to the `shift` value  of the reciprocal.
 #[inline(always)]
 pub(crate) const fn div3by2(
     u2: Word,
     u1: Word,
     u0: Word,
-    v1_reciprocal: &Reciprocal,
+    reciprocal: &Reciprocal,
     v0: Word,
-) -> Word {
-    debug_assert!(v1_reciprocal.shift == 0);
-    debug_assert!(u2 <= v1_reciprocal.divisor_normalized);
-
+) -> (Word, WideWord) {
     // This method corresponds to Algorithm Q:
     // https://janmr.com/blog/2014/04/basic-multiple-precision-long-division/
 
-    let q_maxed = ConstChoice::from_word_eq(u2, v1_reciprocal.divisor_normalized);
-    let (mut quo, rem) = div2by1(q_maxed.select_word(u2, 0), u1, v1_reciprocal);
+    let q_maxed = ConstChoice::from_word_eq(u2, reciprocal.divisor_normalized);
+    let (mut quo, rem) = div2by1(q_maxed.select_word(u2, 0), u1, reciprocal);
     // When the leading dividend word equals the leading divisor word, cap the quotient
     // at Word::MAX and set the remainder to the sum of the top dividend words.
     quo = q_maxed.select_word(quo, Word::MAX);
@@ -178,12 +174,12 @@ pub(crate) const fn div3by2(
         // If r < b and q*y[-2] > r*x[-1], then set q = q - 1 and r = r + v1
         let done = ConstChoice::from_word_nonzero((rem >> Word::BITS) as Word)
             .or(ConstChoice::from_wide_word_le(qy, rx));
-        quo = done.select_word(quo.wrapping_sub(1), quo);
-        rem = done.select_wide_word(rem + (v1_reciprocal.divisor_normalized as WideWord), rem);
+        quo = done.select_word(quo.saturating_sub(1), quo);
+        rem = done.select_wide_word(rem + (reciprocal.divisor_normalized as WideWord), rem);
         i += 1;
     }
 
-    quo
+    (quo, rem)
 }
 
 /// A pre-calculated reciprocal for division by a single limb.