Skip to content

Commit 694c9fc

Browse files
authored
base64ct: add Decoder::decoded_len method (#403)
Adds a method for querying the total length of the remaining data to be decoded from the stateful `Decoder`. The count is updated each time `Decoder::decode` is called.
1 parent cdc9d73 commit 694c9fc

4 files changed

Lines changed: 158 additions & 20 deletions

File tree

base64ct/src/decoder.rs

Lines changed: 146 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Buffered Base64 decoder.
22
33
use crate::{
4+
encoding,
45
variant::Variant,
56
Encoding,
67
Error::{self, InvalidLength},
@@ -33,6 +34,9 @@ pub struct Decoder<'i, E: Variant> {
3334
/// Base64 input data reader.
3435
line_reader: LineReader<'i>,
3536

37+
/// Length of the remaining data after Base64 decoding.
38+
decoded_len: usize,
39+
3640
/// Block buffer used for non-block-aligned data.
3741
block_buffer: BlockBuffer,
3842

@@ -48,13 +52,13 @@ impl<'i, E: Variant> Decoder<'i, E> {
4852
/// - `Ok(decoder)` on success.
4953
/// - `Err(Error::InvalidLength)` if the input buffer is empty.
5054
pub fn new(input: &'i [u8]) -> Result<Self, Error> {
51-
if input.is_empty() {
52-
return Err(InvalidLength);
53-
}
55+
let line_reader = LineReader::new_unwrapped(input)?;
56+
let decoded_len = line_reader.decoded_len::<E>()?;
5457

5558
Ok(Self {
56-
line: Line::new(input),
57-
line_reader: LineReader::default(),
59+
line: Line::default(),
60+
line_reader,
61+
decoded_len,
5862
block_buffer: BlockBuffer::default(),
5963
encoding: PhantomData,
6064
})
@@ -85,13 +89,13 @@ impl<'i, E: Variant> Decoder<'i, E> {
8589
///
8690
/// [RFC7468]: https://datatracker.ietf.org/doc/html/rfc7468
8791
pub fn new_wrapped(input: &'i [u8], line_width: usize) -> Result<Self, Error> {
88-
if input.is_empty() {
89-
return Err(InvalidLength);
90-
}
92+
let line_reader = LineReader::new_wrapped(input, line_width)?;
93+
let decoded_len = line_reader.decoded_len::<E>()?;
9194

9295
Ok(Self {
9396
line: Line::default(),
94-
line_reader: LineReader::new(input, line_width)?,
97+
line_reader,
98+
decoded_len,
9599
block_buffer: BlockBuffer::default(),
96100
encoding: PhantomData,
97101
})
@@ -153,9 +157,21 @@ impl<'i, E: Variant> Decoder<'i, E> {
153157
}
154158
}
155159

160+
self.decoded_len = self
161+
.decoded_len
162+
.checked_sub(out.len())
163+
.ok_or(InvalidLength)?;
164+
156165
Ok(out)
157166
}
158167

168+
/// Get the length of the remaining data after Base64 decoding.
169+
///
170+
/// Decreases every time data is decoded.
171+
pub fn decoded_len(&self) -> usize {
172+
self.decoded_len
173+
}
174+
159175
/// Has all of the input data been decoded?
160176
pub fn is_finished(&self) -> bool {
161177
self.line.is_empty() && self.line_reader.is_empty() && self.block_buffer.is_empty()
@@ -285,8 +301,8 @@ impl<'i> Default for Line<'i> {
285301
}
286302

287303
impl<'i> Line<'i> {
288-
/// Create a new line which wraps the given input data
289-
pub fn new(bytes: &'i [u8]) -> Self {
304+
/// Create a new line which wraps the given input data.
305+
fn new(bytes: &'i [u8]) -> Self {
290306
Self { remaining: bytes }
291307
}
292308

@@ -302,6 +318,12 @@ impl<'i> Line<'i> {
302318
bytes
303319
}
304320

321+
/// Slice off a tail of a given length.
322+
fn slice_tail(&self, nbytes: usize) -> Result<&'i [u8], Error> {
323+
let offset = self.len().checked_sub(nbytes).ok_or(InvalidLength)?;
324+
self.remaining.get(offset..).ok_or(InvalidLength)
325+
}
326+
305327
/// Get the number of bytes remaining in this line.
306328
fn len(&self) -> usize {
307329
self.remaining.len()
@@ -311,10 +333,20 @@ impl<'i> Line<'i> {
311333
fn is_empty(&self) -> bool {
312334
self.len() == 0
313335
}
336+
337+
/// Trim the newline off the end of this line.
338+
fn trim_end(&self) -> Self {
339+
Line::new(match self.remaining {
340+
[line @ .., CHAR_CR, CHAR_LF] => line,
341+
[line @ .., CHAR_CR] => line,
342+
[line @ .., CHAR_LF] => line,
343+
line => line,
344+
})
345+
}
314346
}
315347

316348
/// Iterator over multi-line Base64 input.
317-
#[derive(Clone, Default)]
349+
#[derive(Clone)]
318350
struct LineReader<'i> {
319351
/// Remaining linewrapped data to be processed.
320352
remaining: &'i [u8],
@@ -324,22 +356,103 @@ struct LineReader<'i> {
324356
}
325357

326358
impl<'i> LineReader<'i> {
359+
/// Create a new reader which operates over continugous unwrapped data.
360+
fn new_unwrapped(bytes: &'i [u8]) -> Result<Self, Error> {
361+
if bytes.is_empty() {
362+
Err(InvalidLength)
363+
} else {
364+
Ok(Self {
365+
remaining: bytes,
366+
line_width: None,
367+
})
368+
}
369+
}
370+
327371
/// Create a new reader which operates over linewrapped data.
328-
fn new(bytes: &'i [u8], line_width: usize) -> Result<Self, Error> {
329-
if line_width == 0 {
372+
fn new_wrapped(bytes: &'i [u8], line_width: usize) -> Result<Self, Error> {
373+
if line_width < 4 {
330374
return Err(InvalidLength);
331375
}
332376

333-
Ok(Self {
334-
remaining: bytes,
335-
line_width: Some(line_width),
336-
})
377+
let mut reader = Self::new_unwrapped(bytes)?;
378+
reader.line_width = Some(line_width);
379+
Ok(reader)
337380
}
338381

339382
/// Is this line reader empty?
340383
fn is_empty(&self) -> bool {
341384
self.remaining.is_empty()
342385
}
386+
387+
/// Get the total length of the data decoded from this line reader.
388+
fn decoded_len<E: Variant>(&self) -> Result<usize, Error> {
389+
let mut buffer = [0u8; 4];
390+
let mut lines = self.clone();
391+
let mut line = match lines.next().transpose()? {
392+
Some(l) => l,
393+
None => return Ok(0),
394+
};
395+
let mut base64_len = 0usize;
396+
397+
loop {
398+
base64_len = base64_len.checked_add(line.len()).ok_or(InvalidLength)?;
399+
400+
match lines.next().transpose()? {
401+
Some(l) => {
402+
// Store the end of the line in the buffer so we can
403+
// reassemble the last block to determine the real length
404+
buffer.copy_from_slice(line.slice_tail(4)?);
405+
406+
line = l
407+
}
408+
409+
// To compute an exact decoded length we need to decode the
410+
// last Base64 block and get the decoded length.
411+
//
412+
// This is what the somewhat complex code below is doing.
413+
None => {
414+
// Compute number of bytes in the last block (may be unpadded)
415+
let base64_last_block_len = match base64_len % 4 {
416+
0 => 4,
417+
n => n,
418+
};
419+
420+
// Compute decoded length without the last block
421+
let decoded_len = encoding::decoded_len(
422+
base64_len
423+
.checked_sub(base64_last_block_len)
424+
.ok_or(InvalidLength)?,
425+
);
426+
427+
// Compute the decoded length of the last block
428+
let mut out = [0u8; 3];
429+
let last_block_len = if line.len() < base64_last_block_len {
430+
let buffered_part_len = base64_last_block_len
431+
.checked_sub(line.len())
432+
.ok_or(InvalidLength)?;
433+
434+
let offset = 4usize.checked_sub(buffered_part_len).ok_or(InvalidLength)?;
435+
436+
for i in 0..buffered_part_len {
437+
buffer[i] = buffer[offset.checked_add(i).ok_or(InvalidLength)?];
438+
}
439+
440+
buffer[buffered_part_len..][..line.len()].copy_from_slice(line.remaining);
441+
let buffer_len = buffered_part_len
442+
.checked_add(line.len())
443+
.ok_or(InvalidLength)?;
444+
445+
E::decode(&buffer[..buffer_len], &mut out)?.len()
446+
} else {
447+
let last_block = line.slice_tail(base64_last_block_len)?;
448+
E::decode(last_block, &mut out)?.len()
449+
};
450+
451+
return decoded_len.checked_add(last_block_len).ok_or(InvalidLength);
452+
}
453+
}
454+
}
455+
}
343456
}
344457

345458
impl<'i> Iterator for LineReader<'i> {
@@ -352,7 +465,7 @@ impl<'i> Iterator for LineReader<'i> {
352465
if self.remaining.is_empty() {
353466
return None;
354467
} else {
355-
let line = Line::new(self.remaining);
468+
let line = Line::new(self.remaining).trim_end();
356469
self.remaining = &[];
357470
return Some(Ok(line));
358471
}
@@ -369,6 +482,15 @@ impl<'i> Iterator for LineReader<'i> {
369482
let line = Line::new(&self.remaining[..line_width]);
370483
self.remaining = rest;
371484
Some(Ok(line))
485+
} else if !self.remaining.is_empty() {
486+
let line = Line::new(self.remaining).trim_end();
487+
self.remaining = b"";
488+
489+
if line.is_empty() {
490+
None
491+
} else {
492+
Some(Ok(line))
493+
}
372494
} else {
373495
None
374496
}
@@ -416,15 +538,20 @@ mod tests {
416538
{
417539
for chunk_size in 1..expected.len() {
418540
let mut decoder = f();
541+
let mut remaining_len = decoder.decoded_len();
419542
let mut buffer = [0u8; 1024];
420543

421544
for chunk in expected.chunks(chunk_size) {
422545
assert!(!decoder.is_finished());
423546
let decoded = decoder.decode(&mut buffer[..chunk.len()]).unwrap();
424547
assert_eq!(chunk, decoded);
548+
549+
remaining_len -= decoded.len();
550+
assert_eq!(remaining_len, decoder.decoded_len());
425551
}
426552

427553
assert!(decoder.is_finished());
554+
assert_eq!(decoder.decoded_len(), 0);
428555
}
429556
}
430557
}

base64ct/src/encoding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ fn validate_padding<T: Variant>(encoded: &[u8], decoded: &[u8]) -> Result<(), Er
321321
/// Note that this function does not fully validate the Base64 is well-formed
322322
/// and may return incorrect results for malformed Base64.
323323
#[inline(always)]
324-
fn decoded_len(input_len: usize) -> usize {
324+
pub(crate) fn decoded_len(input_len: usize) -> usize {
325325
// overflow-proof computation of `(3*n)/4`
326326
let k = input_len / 4;
327327
let l = input_len - 4 * k;

base64ct/tests/proptests.proptest-regressions

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
cc ea4af6a6a3c5feddd17be51d3bb3d863881547acf50b553e76da3f34f8b755d4 # shrinks to base64ish = ""
88
cc 348d4acf2c3d1e8db3772f5645179e24b50178747469da9709e60800175eef80 # shrinks to bytes = [240, 144, 128, 128, 240, 144, 128, 128, 32, 32, 32, 194, 161, 48, 97, 97, 65, 194, 161, 32, 97, 194, 161, 32, 240, 144, 128, 128, 194, 161, 48, 32, 97, 194, 161, 240, 144, 128, 128, 32, 224, 160, 128, 97, 224, 160, 128, 48, 48, 194, 161, 32, 240, 144, 128, 128, 11, 65, 97, 48, 65, 65, 97, 11, 240, 144, 128, 128, 240, 144, 128, 128, 48, 224, 160, 128, 194, 161, 32, 32, 194, 161, 32, 48, 97, 240, 144, 128, 128, 224, 160, 128, 240, 144, 128, 128, 0, 224, 160, 128, 32, 240, 144, 128, 128, 0, 32, 32, 97, 240, 144, 128, 128, 240, 144, 128, 128, 240, 144, 128, 128, 240, 144, 128, 128, 0, 0, 240, 144, 128, 128, 32, 240, 144, 128, 128, 32, 48, 65, 11, 32, 65, 48, 48, 65, 65, 194, 161, 32, 224, 160, 128, 240, 144, 128, 128, 224, 160, 128, 0, 65, 0, 65, 32, 194, 161, 240, 144, 128, 128, 32, 65, 32, 0, 97, 32, 97, 11, 11, 48, 97, 97, 240, 144, 128, 128, 65, 240, 144, 128, 128, 194, 161], line_width = 10, chunk_size = 163
99
cc 0c0ee7f6a60d24431333f5c39c506b818a6c21022e39288619c8f78f29d30b1c # shrinks to bytes = [240, 144, 128, 128, 194, 161, 194, 161, 240, 144, 128, 128, 194, 161, 240, 144, 128, 128, 65, 224, 160, 128, 97, 224, 160, 128, 32, 97, 32, 65, 224, 160, 128, 0, 97, 0, 240, 144, 128, 128, 97, 194, 161, 32, 240, 144, 128, 128, 11, 48, 32, 65, 32, 240, 144, 128, 128, 97, 194, 161, 48, 48, 240, 144, 128, 128, 194, 161, 194, 161, 32, 194, 161, 48, 0, 32, 48, 224, 160, 128, 65, 240, 144, 128, 128, 11, 65, 11, 240, 144, 128, 128, 32, 32, 194, 161, 240, 144, 128, 128, 224, 160, 128, 240, 144, 128, 128, 194, 161, 224, 160, 128, 65, 32, 240, 144, 128, 128, 32, 240, 144, 128, 128, 48, 240, 144, 128, 128, 0, 48, 240, 144, 128, 128, 48, 65, 65, 11, 0, 65, 240, 144, 128, 128, 240, 144, 128, 128, 32, 65, 240, 144, 128, 128, 112, 75, 46, 232, 143, 132, 240, 159, 149, 180, 101, 92, 11, 42, 98, 244, 142, 150, 136, 83, 13, 243, 189, 168, 131, 194, 154, 9, 243, 129, 165, 130, 241, 138, 188, 150, 39, 241, 170, 133, 154, 39, 61, 244, 136, 146, 157, 46, 91, 108, 34, 66, 0, 239, 187, 191, 34, 240, 158, 187, 152, 241, 187, 172, 188, 46, 239, 191, 189, 244, 143, 139, 131, 13, 13, 226, 128, 174, 60, 200, 186, 194, 151, 27, 105, 43, 226, 128, 174, 70, 0, 38, 127, 194, 133, 195, 177, 123, 127, 121, 241, 128, 141, 141, 244, 137, 146, 189, 55, 54, 9, 240, 159, 149, 180, 2, 209, 168, 239, 187, 191, 11, 34, 123, 32, 42, 242, 171, 149, 149, 102, 241, 174, 190, 188, 242, 144, 186, 145, 1, 84, 34, 56, 7, 0, 194, 188, 43, 117, 48, 96, 11, 60, 242, 190, 170, 187, 47, 99, 37, 241, 175, 142, 186, 240, 178, 162, 136, 46, 2, 241, 176, 162, 162, 37, 242, 148, 135, 179, 11, 36, 104, 244, 130, 136, 177], line_width = 24, chunk_size = 240
10+
cc b6d81102accbff17f00786b06c6040fc59fee8aa087033c9b5604d2a3f246afd # shrinks to bytes = [32, 65, 11, 97, 97, 32, 240, 144, 128, 128, 97, 32, 65, 0, 0, 32, 240, 144, 128, 128, 97, 65, 97, 97, 240, 144, 128, 128, 240, 144, 128, 128, 65, 48, 240, 144, 128, 128, 240, 144, 128, 128, 32, 0, 97, 97, 240, 144, 128, 128, 65, 32, 194, 161, 65, 0, 32, 11, 97, 32, 32, 11, 32, 240, 144, 128, 128, 240, 144, 128, 128, 194, 128, 32, 48, 65, 32, 240, 144, 128, 128, 240, 144, 128, 128, 240, 144, 128, 128, 194, 161, 32, 194, 161, 48, 224, 160, 128, 240, 144, 128, 128, 97, 32, 0, 48, 240, 144, 128, 128, 0, 11, 240, 144, 128, 128, 97, 240, 144, 128, 128, 11, 32, 0, 32, 0, 194, 161, 194, 161, 56, 242, 150, 180, 168, 243, 187, 153, 181, 46, 36, 121, 70, 8, 226, 128, 174, 242, 135, 172, 189, 0, 194, 169, 244, 130, 145, 146, 240, 159, 149, 180, 63, 240, 184, 155, 139, 27, 243, 185, 138, 139, 194, 162, 46, 242, 148, 129, 171, 195, 143, 56, 241, 147, 151, 173, 240, 159, 149, 180, 33, 89, 36, 37, 240, 159, 149, 180, 200, 186, 117, 194, 165, 77, 241, 171, 180, 143, 60, 96, 242, 175, 134, 177, 27, 1, 42, 242, 145, 189, 151, 92, 39, 96, 38, 243, 181, 148, 171, 243, 164, 185, 188, 47, 195, 181, 0, 226, 128, 174, 13, 233, 136, 141, 57, 200, 186, 243, 129, 145, 159, 242, 137, 177, 176, 122, 61, 243, 140, 180, 151, 239, 191, 189, 80, 194, 144, 121, 42, 239, 191, 189, 231, 173, 145, 75, 91, 0, 123, 238, 154, 139, 58, 240, 179, 187, 172, 107, 13, 13, 123, 241, 152, 132, 160, 242, 130, 149, 190, 92, 239, 187, 191, 117, 241, 182, 130, 165, 241, 165, 155, 168, 39, 60, 0, 0, 13, 200, 186, 83, 37, 243, 174, 183, 166, 11, 0, 237, 134, 157, 39, 58, 113, 44, 243, 135, 142, 174, 9, 9, 195, 184, 74, 241, 146, 132, 133, 34, 58, 92, 123, 239, 187, 191, 37, 58, 239, 187, 191, 77, 9, 243, 183, 143, 189, 243, 159, 143, 171, 243, 162, 128, 179, 241, 137, 158, 163, 127, 60, 195, 159, 106, 47, 242, 135, 154, 161, 51, 243, 160, 136, 149, 91, 241, 175, 181, 149, 96, 58, 46, 11, 37, 107, 32, 52, 237, 136, 144, 77, 194, 156, 42, 13, 39, 61, 2, 59, 48, 58, 240, 159, 149, 180, 4, 96, 127, 230, 166, 145, 58, 239, 187, 191, 242, 135, 132, 146, 241, 178, 129, 185, 36], line_width = 118, chunk_size = 147

base64ct/tests/proptests.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,20 @@ proptest! {
3333

3434
let mut buffer = [0u8; 384];
3535
let mut decoder = Decoder::new(encoded.as_bytes()).unwrap();
36+
let mut remaining_len = decoder.decoded_len();
3637

3738
for chunk in bytes.chunks(chunk_size) {
3839
prop_assert!(!decoder.is_finished());
3940

4041
let decoded = decoder.decode(&mut buffer[..chunk.len()]);
4142
prop_assert_eq!(Ok(chunk), decoded);
43+
44+
remaining_len -= decoded.unwrap().len();
45+
prop_assert_eq!(remaining_len, decoder.decoded_len());
4246
}
4347

4448
prop_assert!(decoder.is_finished());
49+
prop_assert_eq!(decoder.decoded_len(), 0);
4550
}
4651

4752
#[test]
@@ -76,15 +81,20 @@ proptest! {
7681

7782
let mut buffer = [0u8; 384];
7883
let mut decoder = Decoder::new_wrapped(&encoded_wrapped, line_width).unwrap();
84+
let mut remaining_len = decoder.decoded_len();
7985

8086
for chunk in bytes.chunks(chunk_size) {
8187
prop_assert!(!decoder.is_finished());
8288

8389
let decoded = decoder.decode(&mut buffer[..chunk.len()]);
8490
prop_assert_eq!(Ok(chunk), decoded);
91+
92+
remaining_len -= decoded.unwrap().len();
93+
prop_assert_eq!(remaining_len, decoder.decoded_len());
8594
}
8695

8796
prop_assert!(decoder.is_finished());
97+
prop_assert_eq!(decoder.decoded_len(), 0);
8898
}
8999
}
90100

0 commit comments

Comments
 (0)