From 80894aaa7e8675e7165d8158d8ff7b328ba4b32e Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Mon, 19 Aug 2024 20:40:47 -0700 Subject: [PATCH 1/7] add radix string encoding for Uint, BoxedUint Signed-off-by: Andrew Whitehead --- benches/boxed_uint.rs | 57 +++++- src/uint/boxed.rs | 2 +- src/uint/boxed/encoding.rs | 33 +++- src/uint/div_limb.rs | 5 + src/uint/encoding.rs | 346 ++++++++++++++++++++++++++++++++++++- tests/boxed_uint.rs | 15 ++ 6 files changed, 440 insertions(+), 18 deletions(-) diff --git a/benches/boxed_uint.rs b/benches/boxed_uint.rs index 03047b5f6..6fb7e1910 100644 --- a/benches/boxed_uint.rs +++ b/benches/boxed_uint.rs @@ -1,5 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; use crypto_bigint::{BoxedUint, Limb, NonZero, RandomBits}; +use num_bigint::BigUint; use rand_core::OsRng; /// Size of `BoxedUint` to use in benchmark. @@ -19,7 +20,7 @@ fn bench_shifts(c: &mut Criterion) { group.bench_function("shl", |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS), - |x| x.overflowing_shl(UINT_BITS / 2 + 10), + |x| black_box(x.overflowing_shl(UINT_BITS / 2 + 10)), BatchSize::SmallInput, ) }); @@ -35,7 +36,7 @@ fn bench_shifts(c: &mut Criterion) { group.bench_function("shr", |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS), - |x| x.overflowing_shr(UINT_BITS / 2 + 10), + |x| black_box(x.overflowing_shr(UINT_BITS / 2 + 10)), BatchSize::SmallInput, ) }); @@ -167,7 +168,7 @@ fn bench_boxed_sqrt(c: &mut Criterion) { group.bench_function("boxed_sqrt, 4096", |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS), - |x| x.sqrt(), + |x| black_box(x.sqrt()), BatchSize::SmallInput, ) }); @@ -175,18 +176,64 @@ fn bench_boxed_sqrt(c: &mut Criterion) { group.bench_function("boxed_sqrt_vartime, 4096", |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS), - |x| x.sqrt_vartime(), + |x| black_box(x.sqrt_vartime()), BatchSize::SmallInput, ) }); } +fn bench_radix_encoding(c: &mut Criterion) { + let mut group = c.benchmark_group("boxed_radix_encode"); + + for radix in [2, 8, 10] { + group.bench_function(format!("from_str_radix_vartime, {radix}"), |b| { + b.iter_batched( + || BoxedUint::random_bits(&mut OsRng, UINT_BITS).to_string_radix_vartime(10), + |x| { + black_box(BoxedUint::from_str_radix_with_precision_vartime( + &x, 10, UINT_BITS, + )) + }, + BatchSize::SmallInput, + ) + }); + + group.bench_function(format!("parse_bytes, {radix} (num-bigint-dig)"), |b| { + b.iter_batched( + || BoxedUint::random_bits(&mut OsRng, UINT_BITS).to_string_radix_vartime(10), + |x| black_box(BigUint::parse_bytes(x.as_bytes(), 10)), + BatchSize::SmallInput, + ) + }); + + group.bench_function(format!("to_str_radix_vartime, {radix}"), |b| { + b.iter_batched( + || BoxedUint::random_bits(&mut OsRng, UINT_BITS), + |x| black_box(x.to_string_radix_vartime(10)), + BatchSize::SmallInput, + ) + }); + + group.bench_function(format!("to_str_radix, {radix} (num-bigint-dig)"), |b| { + b.iter_batched( + || { + let u = BoxedUint::random_bits(&mut OsRng, UINT_BITS); + BigUint::from_bytes_be(&u.to_be_bytes()) + }, + |x| black_box(x.to_str_radix(10)), + BatchSize::SmallInput, + ) + }); + } +} + criterion_group!( benches, bench_mul, bench_division, bench_shifts, - bench_boxed_sqrt + bench_boxed_sqrt, + bench_radix_encoding, ); criterion_main!(benches); diff --git a/src/uint/boxed.rs b/src/uint/boxed.rs index b1d920a00..348708fb4 100644 --- a/src/uint/boxed.rs +++ b/src/uint/boxed.rs @@ -9,7 +9,7 @@ mod bit_xor; mod bits; mod cmp; mod ct; -mod div; +pub(crate) mod div; mod div_limb; pub(crate) mod encoding; mod from; diff --git a/src/uint/boxed/encoding.rs b/src/uint/boxed/encoding.rs index 650b6272d..f9e6db363 100644 --- a/src/uint/boxed/encoding.rs +++ b/src/uint/boxed/encoding.rs @@ -2,7 +2,7 @@ use super::BoxedUint; use crate::{uint::encoding, DecodeError, Limb, Word}; -use alloc::{boxed::Box, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; use subtle::{Choice, CtOption}; impl BoxedUint { @@ -147,7 +147,7 @@ impl BoxedUint { /// Panics if `radix` is not in the range from 2 to 36. pub fn from_str_radix_vartime(src: &str, radix: u32) -> Result { let mut dec = VecDecodeByLimb::default(); - encoding::decode_str_radix(src, radix, &mut dec)?; + encoding::radix_decode_str(src, radix, &mut dec)?; Ok(Self { limbs: dec.limbs.into(), }) @@ -177,7 +177,7 @@ impl BoxedUint { bits_precision: u32, ) -> Result { let mut ret = Self::zero_with_precision(bits_precision); - encoding::decode_str_radix( + encoding::radix_decode_str( src, radix, &mut encoding::SliceDecodeByLimb::new(&mut ret.limbs), @@ -187,6 +187,13 @@ impl BoxedUint { } Ok(ret) } + + /// Format a [`BoxedUint`] as a string in a given base. + /// + /// Panics if `radix` is not in the range from 2 to 36. + pub fn to_string_radix_vartime(&self, radix: u32) -> String { + encoding::radix_encode_limbs_to_string(radix, &self.limbs) + } } /// Decoder target producing a Vec @@ -458,4 +465,24 @@ mod tests { let res = BoxedUint::from_str_radix_vartime(hex, 16).expect("error decoding"); assert_eq!(hex, format!("{res:x}")); } + + #[test] + #[cfg(feature = "rand_core")] + fn encode_radix_round_trip() { + use crate::RandomBits; + use rand_core::SeedableRng; + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1); + + for _ in 0..100 { + let uint = BoxedUint::random_bits(&mut rng, 4096); + for radix in 2..=26 { + let enc = uint.to_string_radix_vartime(radix); + let res = BoxedUint::from_str_radix_vartime(&enc, radix).expect("decoding error"); + assert_eq!( + res, uint, + "round trip failure: radix {radix} encoded {uint} as {enc}" + ); + } + } + } } diff --git a/src/uint/div_limb.rs b/src/uint/div_limb.rs index 595518b8d..0516937b2 100644 --- a/src/uint/div_limb.rs +++ b/src/uint/div_limb.rs @@ -224,6 +224,11 @@ impl Reciprocal { } } + #[cfg(feature = "alloc")] + pub(crate) const fn divisor(&self) -> NonZero { + NonZero(Limb(self.divisor_normalized >> self.shift)) + } + /// Get the shift value pub const fn shift(&self) -> u32 { self.shift diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index 97da2bc68..df000c1c2 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -6,12 +6,25 @@ mod der; #[cfg(feature = "rlp")] mod rlp; +#[cfg(feature = "alloc")] +use alloc::{string::String, vec::Vec}; + use super::Uint; use crate::{DecodeError, Limb, Word}; +#[cfg(feature = "alloc")] +use super::boxed::div::div_rem_vartime_in_place; +#[cfg(feature = "alloc")] +use super::div_limb::{div2by1, Reciprocal}; +#[cfg(feature = "alloc")] +use crate::{NonZero, WideWord}; + #[cfg(feature = "hybrid-array")] use crate::Encoding; +#[cfg(feature = "alloc")] +const RADIX_ENCODING_LIMBS_LARGE: usize = 32; + impl Uint { /// Create a new [`Uint`] from the provided big endian bytes. pub const fn from_be_slice(bytes: &[u8]) -> Self { @@ -176,9 +189,18 @@ impl Uint { /// Panics if `radix` is not in the range from 2 to 36. pub fn from_str_radix_vartime(src: &str, radix: u32) -> Result { let mut slf = Self::ZERO; - decode_str_radix(src, radix, &mut SliceDecodeByLimb::new(&mut slf.limbs))?; + radix_decode_str(src, radix, &mut SliceDecodeByLimb::new(&mut slf.limbs))?; Ok(slf) } + + /// Format a [`Uint`] as a string in a given base. + /// + /// Panics if `radix` is not in the range from 2 to 36. + #[cfg(feature = "alloc")] + pub fn to_string_radix_vartime(&self, radix: u32) -> String { + let mut buf = *self; + radix_encode_limbs_mut_to_string(radix, buf.as_limbs_mut()) + } } /// Encode a [`Uint`] to a big endian byte array of the given size. @@ -309,7 +331,7 @@ impl DecodeByLimb for SliceDecodeByLimb<'_> { /// to the `DecodeByLimb` instance `out`. /// The input must be a non-empty ascii string, may begin with a `+` /// character, and may use `_` as a separator between digits. -pub(crate) fn decode_str_radix( +pub(crate) fn radix_decode_str( src: &str, radix: u32, out: &mut D, @@ -318,15 +340,15 @@ pub(crate) fn decode_str_radix( panic!("unsupported radix"); } if radix == 2 || radix == 4 || radix == 16 { - decode_str_radix_aligned_digits(src, radix as u8, out) + radix_decode_str_aligned_digits(src, radix as u8, out) } else { - decode_str_radix_digits(src, radix as u8, out) + radix_decode_str_digits(src, radix as u8, out) } } #[inline(always)] /// Perform basic validation and pre-processing on a digit string -fn process_radix_str(src: &str) -> Result<&[u8], DecodeError> { +fn radix_preprocess_str(src: &str) -> Result<&[u8], DecodeError> { // Treat the input as ascii bytes let src_b = src.as_bytes(); let mut digits = src_b.strip_prefix(b"+").unwrap_or(src_b); @@ -350,12 +372,12 @@ fn process_radix_str(src: &str) -> Result<&[u8], DecodeError> { } // Decode a string of digits in base `radix` -fn decode_str_radix_digits( +fn radix_decode_str_digits( src: &str, radix: u8, out: &mut D, ) -> Result<(), DecodeError> { - let digits = process_radix_str(src)?; + let digits = radix_preprocess_str(src)?; let mut buf = [0u8; Limb::BITS as _]; let mut limb_digits = Word::MAX.ilog(radix as _) as usize; let mut limb_max = Limb(Word::pow(radix as _, limb_digits as _)); @@ -417,14 +439,14 @@ fn decode_str_radix_digits( // Decode digits for bases where an integer number of characters // can represent a saturated Limb (specifically 2, 4, and 16). -fn decode_str_radix_aligned_digits( +fn radix_decode_str_aligned_digits( src: &str, radix: u8, out: &mut D, ) -> Result<(), DecodeError> { debug_assert!(radix == 2 || radix == 4 || radix == 16); - let digits = process_radix_str(src)?; + let digits = radix_preprocess_str(src)?; let shift = radix.trailing_zeros(); let limb_digits = (Limb::BITS / shift) as usize; let mut buf = [0u8; Limb::BITS as _]; @@ -476,6 +498,250 @@ fn decode_str_radix_aligned_digits( Ok(()) } +#[cfg(feature = "alloc")] +pub(crate) fn radix_encode_limbs_to_string(radix: u32, limbs: &[Limb]) -> String { + let mut array_buf = [Limb::ZERO; 128]; + let mut vec_buf = Vec::new(); + let limb_count = limbs.len(); + let buf = if limb_count <= array_buf.len() { + array_buf[..limb_count].copy_from_slice(limbs); + &mut array_buf[..limb_count] + } else { + vec_buf.extend_from_slice(limbs); + &mut vec_buf[..limb_count] + }; + radix_encode_limbs_mut_to_string(radix, buf) +} + +#[cfg(feature = "alloc")] +pub(crate) fn radix_encode_limbs_mut_to_string(radix: u32, limbs: &mut [Limb]) -> String { + if !(2..=26).contains(&radix) { + panic!("unsupported radix"); + } + + let mut out; + if radix.is_power_of_two() { + let bits = radix.trailing_zeros() as usize; + let size = (limbs.len() * Limb::BITS as usize + bits - 1) / bits; + out = vec![0u8; size]; + radix_encode_limbs_by_shifting(radix, limbs, &mut out[..]); + } else { + let params = RadixDivisionParams::for_radix(radix); + let size = params.encoded_size(limbs.len()); + out = vec![0u8; size]; + params.encode_limbs(limbs, &mut out[..]); + } + let size = out.len(); + let mut skip = 0; + while skip + 1 < size && out[skip] == b'0' { + skip += 1; + } + if skip > 0 { + out.copy_within(skip..size, 0); + out.truncate(size - skip); + } + String::from_utf8(out).expect("utf-8 decoding error") +} + +#[cfg(feature = "alloc")] +fn radix_encode_limbs_by_shifting(radix: u32, limbs: &mut [Limb], out: &mut [u8]) { + debug_assert!(radix.is_power_of_two()); + debug_assert!(!out.is_empty()); + + let radix_bits = radix.trailing_zeros(); + let mask = (radix - 1) as u8; + let mut out_idx = out.len(); + let mut digits: WideWord = 0; + let mut digits_bits = 0; + let mut digit; + + for limb in limbs.iter().chain([&Limb::ZERO]) { + digits_bits += Limb::BITS; + digits |= (limb.0 as WideWord) << (digits_bits % Limb::BITS); + for _ in 0..((digits_bits / radix_bits) as usize).min(out_idx) { + out_idx -= 1; + (digit, digits) = ((digits as u8) & mask, digits >> radix_bits); + out[out_idx] = if digit < 10 { + b'0' + digit + } else { + b'a' + (digit - 10) + }; + digits_bits -= radix_bits; + } + } + + out[0..out_idx].fill(b'0'); +} + +#[cfg(feature = "alloc")] +#[derive(Debug, Clone, Copy)] +pub(crate) struct RadixDivisionParams { + radix: u32, + digits_limb: usize, + reciprocal: Reciprocal, + digits_large: usize, + div_large: [Limb; RADIX_ENCODING_LIMBS_LARGE], +} + +#[cfg(feature = "alloc")] +impl RadixDivisionParams { + const ALL: [Self; 22] = { + let mut res = [Self { + radix: 0, + digits_limb: 0, + reciprocal: Reciprocal::default(), + digits_large: 0, + div_large: [Limb::ZERO; RADIX_ENCODING_LIMBS_LARGE], + }; 22]; + let mut radix: u32 = 3; + let mut i: usize = 0; + while radix <= 26 { + if radix.is_power_of_two() { + radix += 1; + continue; + } + let digits_limb = Word::MAX.ilog(radix as Word); + let div_limb = NonZero(Limb((radix as Word).pow(digits_limb))); + let (div_large, digits_large) = radix_large_divisor(div_limb, digits_limb as usize); + res[i] = Self { + radix, + digits_limb: digits_limb as usize, + reciprocal: Reciprocal::new(div_limb), + digits_large, + div_large, + }; + radix += 1; + i += 1; + } + res + }; + + #[allow(trivial_numeric_casts)] + pub const fn for_radix(radix: u32) -> Self { + if radix < 3 || radix > 26 || radix.is_power_of_two() { + panic!("invalid radix for division"); + } + let ret = Self::ALL[(radix + radix.leading_zeros() - 33) as usize]; + if cfg!(debug) && ret.radix != radix { + panic!("radix lookup failure"); + } + ret + } + + pub const fn encoded_size(&self, limb_count: usize) -> usize { + // a slightly pessimistic estimate + limb_count * (self.digits_limb + 1) + } + + fn encode_limbs(&self, limbs: &mut [Limb], out: &mut [u8]) { + debug_assert!(!limbs.is_empty()); + + let radix = self.radix as Word; + let div_limb = self.reciprocal.divisor().0; + let mut limb_count = limbs.len(); + let mut out_idx = out.len(); + + if limb_count > RADIX_ENCODING_LIMBS_LARGE { + // Divide by the large divisor and recurse on the encoding of the digits + let mut remain; + while limb_count >= RADIX_ENCODING_LIMBS_LARGE { + remain = self.div_large; + div_rem_vartime_in_place(&mut limbs[..limb_count], &mut remain); + limb_count = limb_count + 1 - RADIX_ENCODING_LIMBS_LARGE; + if limbs[limb_count - 1] == Limb::ZERO { + limb_count -= 1; + } + let next_idx = out_idx.saturating_sub(self.digits_large); + self.encode_limbs(&mut remain, &mut out[next_idx..out_idx]); + out_idx = next_idx; + } + } + + let lshift = self.reciprocal.shift(); + let rshift = (Limb::BITS - lshift) % Limb::BITS; + let mut hi = Limb::ZERO; + let mut digits_word; + let mut digit; + + loop { + digits_word = if limb_count > 0 { + let mut carry = Limb::ZERO; + + // If required by the reciprocal, left shift the buffer, placing the + // overflow into `hi`. + if lshift > 0 { + for limb in limbs[..limb_count].iter_mut() { + (*limb, carry) = ((*limb << lshift) | carry, *limb >> rshift); + } + carry |= hi << lshift; + } else { + carry = hi; + } + + // Divide in place by `radix ** digits_per_limb` + for limb in limbs[..limb_count].iter_mut().rev() { + (limb.0, carry.0) = div2by1(carry.0, limb.0, &self.reciprocal); + } + if limbs[limb_count - 1] << lshift < div_limb { + hi = limbs[limb_count - 1]; + limb_count -= 1; + } else { + hi = Limb::ZERO + } + + // The remainder represents a digit in base `radix ** digits_per_limb` + carry.0 >> lshift + } else { + let res = hi.0; + hi = Limb::ZERO; + res + }; + + // Output the individual digits + for _ in 0..self.digits_limb.min(out_idx) { + out_idx -= 1; + (digits_word, digit) = (digits_word / radix, (digits_word % radix) as u8); + out[out_idx] = if digit < 10 { + b'0' + digit + } else { + b'a' + (digit - 10) + }; + } + + if out_idx == 0 { + break; + } + } + } +} + +#[cfg(feature = "alloc")] +/// The maximum radix divisor for a number of limbs, returning the number of output digits +const fn radix_large_divisor( + div_limb: NonZero, + digits_limb: usize, +) -> ([Limb; RADIX_ENCODING_LIMBS_LARGE], usize) { + let mut out = [Limb::ZERO; RADIX_ENCODING_LIMBS_LARGE]; + let mut digits_large = digits_limb; + let mut top = 1; + out[0] = div_limb.0; + while top < out.len() { + let mut carry = Limb::ZERO; + let mut j = 0; + while j < top { + (out[j], carry) = Limb::ZERO.mac(out[j], div_limb.0, carry); + j += 1; + } + if carry.0 != 0 { + out[top] = carry; + top += 1; + } + digits_large += digits_limb; + } + // FIXME add multiple of radix to top limb + (out, digits_large) +} + #[cfg(test)] mod tests { use crate::{DecodeError, Limb, Zero, U128, U64}; @@ -490,6 +756,9 @@ mod tests { #[cfg(target_pointer_width = "64")] use crate::U128 as UintEx; + #[cfg(feature = "alloc")] + use super::radix_encode_limbs_to_string; + #[test] #[cfg(target_pointer_width = "32")] fn from_be_slice() { @@ -666,4 +935,63 @@ mod tests { let res = U128::from_str_radix_vartime(hex, 16).expect("error decoding"); assert_eq!(hex, format!("{res:x}")); } + + #[cfg(feature = "alloc")] + #[test] + fn encode_radix_8() { + assert_eq!( + &radix_encode_limbs_to_string(8, U128::MAX.as_limbs()), + "3777777777777777777777777777777777777777777" + ); + assert_eq!(&radix_encode_limbs_to_string(8, U128::ZERO.as_limbs()), "0"); + assert_eq!(&radix_encode_limbs_to_string(8, U128::ONE.as_limbs()), "1"); + + let hex = "1234567123456765432107654321"; + let res = U128::from_str_radix_vartime(hex, 8).expect("error decoding"); + let out = radix_encode_limbs_to_string(8, res.as_limbs()); + assert_eq!(&out, hex); + } + + #[cfg(feature = "alloc")] + #[test] + fn encode_radix_10() { + assert_eq!( + &radix_encode_limbs_to_string(10, U128::MAX.as_limbs()), + "340282366920938463463374607431768211455" + ); + assert_eq!( + &radix_encode_limbs_to_string(10, U128::ZERO.as_limbs()), + "0" + ); + assert_eq!(&radix_encode_limbs_to_string(10, U128::ONE.as_limbs()), "1"); + } + + #[cfg(feature = "alloc")] + #[test] + fn encode_radix_16() { + let hex = "fedcba9876543210fedcba9876543210"; + let res = U128::from_str_radix_vartime(hex, 16).expect("error decoding"); + let out = radix_encode_limbs_to_string(16, res.as_limbs()); + assert_eq!(&out, hex); + } + + #[cfg(all(feature = "rand", feature = "alloc"))] + #[test] + fn encode_radix_round_trip() { + use crate::{Random, U256}; + use rand_core::SeedableRng; + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1); + + for _ in 0..100 { + let uint = U256::random(&mut rng); + for radix in 2..=26 { + let enc = uint.to_string_radix_vartime(radix); + let res = U256::from_str_radix_vartime(&enc, radix).expect("decoding error"); + assert_eq!( + res, uint, + "round trip failure: radix {radix} encoded {uint} as {enc}" + ); + } + } + } } diff --git a/tests/boxed_uint.rs b/tests/boxed_uint.rs index c4327f849..0ea472468 100644 --- a/tests/boxed_uint.rs +++ b/tests/boxed_uint.rs @@ -296,4 +296,19 @@ proptest! { prop_assert_eq!(expected, actual.unwrap()); } } + + + #[test] + fn radix_encode_vartime(a in uint(), radix in 2u32..=26) { + let a_bi = to_biguint(&a); + + let expected_enc = a_bi.to_str_radix(radix); + let actual_enc = a.to_string_radix_vartime(radix); + prop_assert_eq!(&expected_enc, &actual_enc); + + let decoded = BoxedUint::from_str_radix_vartime(&actual_enc, radix).expect("decoding error"); + let dec_bi = to_biguint(&decoded); + prop_assert_eq!(dec_bi, a_bi); + + } } From fba6be5819de618ce80a08a4fe0bc5a2122c8801 Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Tue, 20 Aug 2024 17:34:46 -0700 Subject: [PATCH 2/7] allow trivial casts for 32-bit Signed-off-by: Andrew Whitehead --- src/uint/encoding.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index df000c1c2..a1f553a20 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -585,6 +585,7 @@ pub(crate) struct RadixDivisionParams { #[cfg(feature = "alloc")] impl RadixDivisionParams { + #[allow(trivial_numeric_casts)] const ALL: [Self; 22] = { let mut res = [Self { radix: 0, @@ -622,7 +623,7 @@ impl RadixDivisionParams { panic!("invalid radix for division"); } let ret = Self::ALL[(radix + radix.leading_zeros() - 33) as usize]; - if cfg!(debug) && ret.radix != radix { + if ret.radix != radix { panic!("radix lookup failure"); } ret @@ -633,6 +634,7 @@ impl RadixDivisionParams { limb_count * (self.digits_limb + 1) } + #[allow(trivial_numeric_casts)] fn encode_limbs(&self, limbs: &mut [Limb], out: &mut [u8]) { debug_assert!(!limbs.is_empty()); From 7c3fdb2ef48b46ecd64db0e8a9a8363691522fe8 Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Wed, 21 Aug 2024 13:20:39 -0700 Subject: [PATCH 3/7] fix upper radix limit Signed-off-by: Andrew Whitehead --- src/uint/boxed/encoding.rs | 2 +- src/uint/encoding.rs | 12 ++++++------ tests/boxed_uint.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/uint/boxed/encoding.rs b/src/uint/boxed/encoding.rs index f9e6db363..073c1a8d8 100644 --- a/src/uint/boxed/encoding.rs +++ b/src/uint/boxed/encoding.rs @@ -475,7 +475,7 @@ mod tests { for _ in 0..100 { let uint = BoxedUint::random_bits(&mut rng, 4096); - for radix in 2..=26 { + for radix in 2..=36 { let enc = uint.to_string_radix_vartime(radix); let res = BoxedUint::from_str_radix_vartime(&enc, radix).expect("decoding error"); assert_eq!( diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index a1f553a20..f0ef4720c 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -515,7 +515,7 @@ pub(crate) fn radix_encode_limbs_to_string(radix: u32, limbs: &[Limb]) -> String #[cfg(feature = "alloc")] pub(crate) fn radix_encode_limbs_mut_to_string(radix: u32, limbs: &mut [Limb]) -> String { - if !(2..=26).contains(&radix) { + if !(2..=36).contains(&radix) { panic!("unsupported radix"); } @@ -586,17 +586,17 @@ pub(crate) struct RadixDivisionParams { #[cfg(feature = "alloc")] impl RadixDivisionParams { #[allow(trivial_numeric_casts)] - const ALL: [Self; 22] = { + const ALL: [Self; 31] = { let mut res = [Self { radix: 0, digits_limb: 0, reciprocal: Reciprocal::default(), digits_large: 0, div_large: [Limb::ZERO; RADIX_ENCODING_LIMBS_LARGE], - }; 22]; + }; 31]; let mut radix: u32 = 3; let mut i: usize = 0; - while radix <= 26 { + while radix <= 36 { if radix.is_power_of_two() { radix += 1; continue; @@ -619,7 +619,7 @@ impl RadixDivisionParams { #[allow(trivial_numeric_casts)] pub const fn for_radix(radix: u32) -> Self { - if radix < 3 || radix > 26 || radix.is_power_of_two() { + if radix < 3 || radix > 36 || radix.is_power_of_two() { panic!("invalid radix for division"); } let ret = Self::ALL[(radix + radix.leading_zeros() - 33) as usize]; @@ -986,7 +986,7 @@ mod tests { for _ in 0..100 { let uint = U256::random(&mut rng); - for radix in 2..=26 { + for radix in 2..=36 { let enc = uint.to_string_radix_vartime(radix); let res = U256::from_str_radix_vartime(&enc, radix).expect("decoding error"); assert_eq!( diff --git a/tests/boxed_uint.rs b/tests/boxed_uint.rs index 0ea472468..652a81c66 100644 --- a/tests/boxed_uint.rs +++ b/tests/boxed_uint.rs @@ -299,7 +299,7 @@ proptest! { #[test] - fn radix_encode_vartime(a in uint(), radix in 2u32..=26) { + fn radix_encode_vartime(a in uint(), radix in 2u32..=36) { let a_bi = to_biguint(&a); let expected_enc = a_bi.to_str_radix(radix); From 8d9298de4b8f684041b08f72ae123eb35efa6833 Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Tue, 27 Aug 2024 13:49:48 -0700 Subject: [PATCH 4/7] add constants for supported radices Signed-off-by: Andrew Whitehead --- src/uint/encoding.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index f0ef4720c..7a4d80436 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -25,6 +25,9 @@ use crate::Encoding; #[cfg(feature = "alloc")] const RADIX_ENCODING_LIMBS_LARGE: usize = 32; +const RADIX_ENCODING_MIN: u32 = 2; +const RADIX_ENCODING_MAX: u32 = 36; + impl Uint { /// Create a new [`Uint`] from the provided big endian bytes. pub const fn from_be_slice(bytes: &[u8]) -> Self { @@ -336,7 +339,7 @@ pub(crate) fn radix_decode_str( radix: u32, out: &mut D, ) -> Result<(), DecodeError> { - if !(2u32..=36).contains(&radix) { + if !(RADIX_ENCODING_MIN..=RADIX_ENCODING_MAX).contains(&radix) { panic!("unsupported radix"); } if radix == 2 || radix == 4 || radix == 16 { @@ -515,7 +518,7 @@ pub(crate) fn radix_encode_limbs_to_string(radix: u32, limbs: &[Limb]) -> String #[cfg(feature = "alloc")] pub(crate) fn radix_encode_limbs_mut_to_string(radix: u32, limbs: &mut [Limb]) -> String { - if !(2..=36).contains(&radix) { + if !(RADIX_ENCODING_MIN..=RADIX_ENCODING_MAX).contains(&radix) { panic!("unsupported radix"); } @@ -585,6 +588,7 @@ pub(crate) struct RadixDivisionParams { #[cfg(feature = "alloc")] impl RadixDivisionParams { + // Generate all valid parameters ahead of time #[allow(trivial_numeric_casts)] const ALL: [Self; 31] = { let mut res = [Self { @@ -596,7 +600,7 @@ impl RadixDivisionParams { }; 31]; let mut radix: u32 = 3; let mut i: usize = 0; - while radix <= 36 { + while radix <= RADIX_ENCODING_MAX { if radix.is_power_of_two() { radix += 1; continue; @@ -619,7 +623,7 @@ impl RadixDivisionParams { #[allow(trivial_numeric_casts)] pub const fn for_radix(radix: u32) -> Self { - if radix < 3 || radix > 36 || radix.is_power_of_two() { + if radix < RADIX_ENCODING_MIN || radix > RADIX_ENCODING_MAX { panic!("invalid radix for division"); } let ret = Self::ALL[(radix + radix.leading_zeros() - 33) as usize]; @@ -694,6 +698,7 @@ impl RadixDivisionParams { // The remainder represents a digit in base `radix ** digits_per_limb` carry.0 >> lshift } else { + // Use up the remainder in `hi`, and on any further loops continue with `0` if necessary let res = hi.0; hi = Limb::ZERO; res @@ -710,6 +715,7 @@ impl RadixDivisionParams { }; } + // Finished when the buffer is full if out_idx == 0 { break; } @@ -727,7 +733,8 @@ const fn radix_large_divisor( let mut digits_large = digits_limb; let mut top = 1; out[0] = div_limb.0; - while top < out.len() { + // Calculate largest power of div_limb (itself a power of radix) + while top < RADIX_ENCODING_LIMBS_LARGE { let mut carry = Limb::ZERO; let mut j = 0; while j < top { From d45fc2eb5ef9e2c397c2f81c515d7f09249e4107 Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Tue, 27 Aug 2024 13:54:55 -0700 Subject: [PATCH 5/7] larger large divisor in radix encoding Signed-off-by: Andrew Whitehead --- src/uint/encoding.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index 7a4d80436..a944586b1 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -607,7 +607,8 @@ impl RadixDivisionParams { } let digits_limb = Word::MAX.ilog(radix as Word); let div_limb = NonZero(Limb((radix as Word).pow(digits_limb))); - let (div_large, digits_large) = radix_large_divisor(div_limb, digits_limb as usize); + let (div_large, digits_large) = + radix_large_divisor(radix, div_limb, digits_limb as usize); res[i] = Self { radix, digits_limb: digits_limb as usize, @@ -724,8 +725,10 @@ impl RadixDivisionParams { } #[cfg(feature = "alloc")] +#[allow(trivial_numeric_casts)] /// The maximum radix divisor for a number of limbs, returning the number of output digits const fn radix_large_divisor( + radix: u32, div_limb: NonZero, digits_limb: usize, ) -> ([Limb; RADIX_ENCODING_LIMBS_LARGE], usize) { @@ -747,7 +750,22 @@ const fn radix_large_divisor( } digits_large += digits_limb; } - // FIXME add multiple of radix to top limb + // Multiply by radix while we can do so without overflowing + let mut out_test = out; + loop { + let mut carry = Limb::ZERO; + let mut j = 0; + while j < RADIX_ENCODING_LIMBS_LARGE { + (out_test[j], carry) = Limb::ZERO.mac(out[j], Limb(radix as Word), carry); + j += 1; + } + if carry.0 == 0 { + out = out_test; + digits_large += 1; + } else { + break; + } + } (out, digits_large) } From b4b696bf49c44ed3a6ab030c228650d3131c35f4 Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Tue, 27 Aug 2024 14:14:19 -0700 Subject: [PATCH 6/7] document methods Signed-off-by: Andrew Whitehead --- src/uint/encoding.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/uint/encoding.rs b/src/uint/encoding.rs index a944586b1..fb316809b 100644 --- a/src/uint/encoding.rs +++ b/src/uint/encoding.rs @@ -374,7 +374,7 @@ fn radix_preprocess_str(src: &str) -> Result<&[u8], DecodeError> { } } -// Decode a string of digits in base `radix` +/// Decode a string of digits in base `radix` fn radix_decode_str_digits( src: &str, radix: u8, @@ -440,8 +440,8 @@ fn radix_decode_str_digits( Ok(()) } -// Decode digits for bases where an integer number of characters -// can represent a saturated Limb (specifically 2, 4, and 16). +/// Decode digits for bases where an integer number of characters +/// can represent a saturated Limb (specifically 2, 4, and 16). fn radix_decode_str_aligned_digits( src: &str, radix: u8, @@ -501,6 +501,9 @@ fn radix_decode_str_aligned_digits( Ok(()) } +/// Encode a slice of limbs to a string in base `radix`. The result will have no leading +/// zeros unless the value itself is zero. +/// Panics if `radix` is not in the range from 2 to 36. #[cfg(feature = "alloc")] pub(crate) fn radix_encode_limbs_to_string(radix: u32, limbs: &[Limb]) -> String { let mut array_buf = [Limb::ZERO; 128]; @@ -516,6 +519,10 @@ pub(crate) fn radix_encode_limbs_to_string(radix: u32, limbs: &[Limb]) -> String radix_encode_limbs_mut_to_string(radix, buf) } +/// Encode a slice of limbs to a string in base `radix`. The contents of the slice +/// will be used as a working buffer. The result will have no leading zeros unless +/// the value itself is zero. +/// Panics if `radix` is not in the range from 2 to 36. #[cfg(feature = "alloc")] pub(crate) fn radix_encode_limbs_mut_to_string(radix: u32, limbs: &mut [Limb]) -> String { if !(RADIX_ENCODING_MIN..=RADIX_ENCODING_MAX).contains(&radix) { @@ -546,6 +553,10 @@ pub(crate) fn radix_encode_limbs_mut_to_string(radix: u32, limbs: &mut [Limb]) - String::from_utf8(out).expect("utf-8 decoding error") } +/// For `radix` values which are a power of two, encode the mutable limb slice to +/// the output buffer as ASCII characters in base `radix`. Leading zeros are added to +/// fill `out`. The slice `limbs` is used as a working buffer. Output will be truncated +/// if the provided buffer is too small. #[cfg(feature = "alloc")] fn radix_encode_limbs_by_shifting(radix: u32, limbs: &mut [Limb], out: &mut [u8]) { debug_assert!(radix.is_power_of_two()); @@ -576,6 +587,7 @@ fn radix_encode_limbs_by_shifting(radix: u32, limbs: &mut [Limb], out: &mut [u8] out[0..out_idx].fill(b'0'); } +/// Parameter set used to perform radix encoding by division. #[cfg(feature = "alloc")] #[derive(Debug, Clone, Copy)] pub(crate) struct RadixDivisionParams { @@ -634,11 +646,15 @@ impl RadixDivisionParams { ret } + /// Get the minimum size of the required output buffer for encoding a set of limbs. pub const fn encoded_size(&self, limb_count: usize) -> usize { // a slightly pessimistic estimate limb_count * (self.digits_limb + 1) } + /// Encode the mutable limb slice to the output buffer as ASCII characters in base + /// `radix`. Leading zeros are added to fill `out`. The slice `limbs` is used as a + /// working buffer. Output will be truncated if the provided buffer is too small. #[allow(trivial_numeric_casts)] fn encode_limbs(&self, limbs: &mut [Limb], out: &mut [u8]) { debug_assert!(!limbs.is_empty()); @@ -724,9 +740,12 @@ impl RadixDivisionParams { } } +/// Compute the maximum radix divisor for a number of limbs. +/// Returns a pair of the large divisor value and the number of digits, +/// such that `divisor = radix ** digits`. The value `div_limb` is the +/// largest power of `radix` that can fit within a limb. #[cfg(feature = "alloc")] #[allow(trivial_numeric_casts)] -/// The maximum radix divisor for a number of limbs, returning the number of output digits const fn radix_large_divisor( radix: u32, div_limb: NonZero, From e5f4bb192b88236e78011f7581012b407cdcd4fc Mon Sep 17 00:00:00 2001 From: Andrew Whitehead Date: Tue, 27 Aug 2024 14:14:25 -0700 Subject: [PATCH 7/7] fix radix encoding benchmark Signed-off-by: Andrew Whitehead --- benches/boxed_uint.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benches/boxed_uint.rs b/benches/boxed_uint.rs index 6fb7e1910..46fc67399 100644 --- a/benches/boxed_uint.rs +++ b/benches/boxed_uint.rs @@ -191,7 +191,7 @@ fn bench_radix_encoding(c: &mut Criterion) { || BoxedUint::random_bits(&mut OsRng, UINT_BITS).to_string_radix_vartime(10), |x| { black_box(BoxedUint::from_str_radix_with_precision_vartime( - &x, 10, UINT_BITS, + &x, radix, UINT_BITS, )) }, BatchSize::SmallInput, @@ -201,7 +201,7 @@ fn bench_radix_encoding(c: &mut Criterion) { group.bench_function(format!("parse_bytes, {radix} (num-bigint-dig)"), |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS).to_string_radix_vartime(10), - |x| black_box(BigUint::parse_bytes(x.as_bytes(), 10)), + |x| black_box(BigUint::parse_bytes(x.as_bytes(), radix)), BatchSize::SmallInput, ) }); @@ -209,7 +209,7 @@ fn bench_radix_encoding(c: &mut Criterion) { group.bench_function(format!("to_str_radix_vartime, {radix}"), |b| { b.iter_batched( || BoxedUint::random_bits(&mut OsRng, UINT_BITS), - |x| black_box(x.to_string_radix_vartime(10)), + |x| black_box(x.to_string_radix_vartime(radix)), BatchSize::SmallInput, ) }); @@ -220,7 +220,7 @@ fn bench_radix_encoding(c: &mut Criterion) { let u = BoxedUint::random_bits(&mut OsRng, UINT_BITS); BigUint::from_bytes_be(&u.to_be_bytes()) }, - |x| black_box(x.to_str_radix(10)), + |x| black_box(x.to_str_radix(radix)), BatchSize::SmallInput, ) });