Skip to content

Commit cd23518

Browse files
authored
pem-rfc7468: buffered Base64 decoder (#406)
Modifies `pem_rfc7468::Decoder` to be a wrapper around the buffered `base64ct::Decoder` type. This allows incrementally decoding PEM, as opposed to a one-shot approach, however support for one-shot decoding is still preserved as a set of static functions which previously served as the crate's primary API, meaning this should not be a breaking change for most current applications. Additionally impls `std::io::Read` for `pem_rfc7468::Decoder`, delegating to the `std::io::Read` implementation added to `base64ct::Decoder` in #404.
1 parent a1f08fd commit cd23518

5 files changed

Lines changed: 139 additions & 171 deletions

File tree

pem-rfc7468/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ rust-version = "1.56"
2121
base64ct = { version = "=1.4.0-pre.0", path = "../base64ct" }
2222

2323
[features]
24-
alloc = []
25-
std = ["alloc"]
24+
alloc = ["base64ct/alloc"]
25+
std = ["alloc", "base64ct/std"]
2626

2727
[package.metadata.docs.rs]
2828
all-features = true

pem-rfc7468/src/decoder.rs

Lines changed: 108 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,48 @@
1010
//!
1111
//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648
1212
13+
use crate::{
14+
grammar, Base64Decoder, Error, Result, BASE64_WRAP_WIDTH, POST_ENCAPSULATION_BOUNDARY,
15+
PRE_ENCAPSULATION_BOUNDARY,
16+
};
17+
use core::str;
18+
1319
#[cfg(feature = "alloc")]
1420
use alloc::vec::Vec;
1521

16-
use crate::{grammar, Error, Result, POST_ENCAPSULATION_BOUNDARY, PRE_ENCAPSULATION_BOUNDARY};
17-
use base64ct::{Base64, Encoding};
18-
use core::str;
22+
#[cfg(feature = "std")]
23+
use std::io;
1924

2025
/// Decode a PEM document according to RFC 7468's "Strict" grammar.
2126
///
2227
/// On success, writes the decoded document into the provided buffer, returning
2328
/// the decoded label and the portion of the provided buffer containing the
2429
/// decoded message.
2530
pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
26-
Decoder::new().decode(pem, buf)
31+
let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
32+
let type_label = decoder.type_label();
33+
let buf = buf.get_mut(..decoder.decoded_len()).ok_or(Error::Length)?;
34+
let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;
35+
36+
if decoder.base64.is_finished() {
37+
Ok((type_label, decoded))
38+
} else {
39+
Err(Error::Length)
40+
}
2741
}
2842

2943
/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
3044
/// the result as a [`Vec`] upon success.
3145
#[cfg(feature = "alloc")]
3246
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
3347
pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
34-
Decoder::new().decode_vec(pem)
48+
let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
49+
let type_label = decoder.type_label();
50+
let mut buf = Vec::new();
51+
decoder
52+
.decode_to_end(&mut buf)
53+
.map_err(|e| check_for_headers(pem, e))?;
54+
Ok((type_label, buf))
3555
}
3656

3757
/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
@@ -41,64 +61,103 @@ pub fn decode_label(pem: &[u8]) -> Result<&str> {
4161
Ok(Encapsulation::try_from(pem)?.label())
4262
}
4363

64+
/// Check for PEM headers in the input, as they are disallowed by RFC7468.
65+
///
66+
/// Returns `Error::HeaderDisallowed` if headers are encountered.
67+
fn check_for_headers(pem: &[u8], err: Error) -> Error {
68+
if err == Error::Base64(base64ct::Error::InvalidEncoding)
69+
&& pem.iter().any(|&b| b == grammar::CHAR_COLON)
70+
{
71+
Error::HeaderDisallowed
72+
} else {
73+
err
74+
}
75+
}
76+
4477
/// PEM decoder.
4578
///
46-
/// This type provides a degree of configurability for how PEM is decoded.
47-
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
48-
pub struct Decoder {
49-
/// Number of characters at which to line-wrap Base64-encoded data
50-
/// (default `64`).
51-
///
52-
/// Must be a multiple of `4`, or otherwise decoding operations will return
53-
/// `Error::Base64`.
54-
// TODO(tarcieri): support for wrap widths which aren't multiples of 4?
55-
pub wrap_width: usize,
79+
/// Stateful buffered decoder type which decodes an input PEM document according
80+
/// to RFC 7468's "Strict" grammar.
81+
#[derive(Clone)]
82+
pub struct Decoder<'i> {
83+
/// PEM type label.
84+
type_label: &'i str,
85+
86+
/// Buffered Base64 decoder.
87+
base64: Base64Decoder<'i>,
5688
}
5789

58-
impl Decoder {
59-
/// Create a new [`Decoder`] with the default options.
60-
pub fn new() -> Self {
61-
Self::default()
90+
impl<'i> Decoder<'i> {
91+
/// Create a new PEM [`Decoder`] with the default options.
92+
///
93+
/// Uses the default 64-character line wrapping.
94+
pub fn new(pem: &'i [u8]) -> Result<Self> {
95+
Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
6296
}
6397

64-
/// Decode a PEM document according to RFC 7468's "Strict" grammar.
65-
///
66-
/// On success, writes the decoded document into the provided buffer, returning
67-
/// the decoded label and the portion of the provided buffer containing the
68-
/// decoded message.
69-
pub fn decode<'i, 'o>(&self, pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
98+
/// Create a new PEM [`Decoder`] which wraps at the given line width.
99+
pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
70100
let encapsulation = Encapsulation::try_from(pem)?;
71-
let label = encapsulation.label();
72-
let decoded_bytes = encapsulation.decode(self, buf)?;
73-
Ok((label, decoded_bytes))
101+
let type_label = encapsulation.label();
102+
let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
103+
Ok(Self { type_label, base64 })
104+
}
105+
106+
/// Get the PEM type label for the input document.
107+
pub fn type_label(&self) -> &'i str {
108+
self.type_label
109+
}
110+
111+
/// Decode data into the provided output buffer.
112+
///
113+
/// There must be at least as much remaining Base64 input to be decoded
114+
/// in order to completely fill `buf`.
115+
pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
116+
Ok(self.base64.decode(buf)?)
74117
}
75118

76-
/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
77-
/// the result as a [`Vec`] upon success.
119+
/// Decode all of the remaining data in the input buffer into `buf`.
78120
#[cfg(feature = "alloc")]
79121
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
80-
pub fn decode_vec<'a>(&self, pem: &'a [u8]) -> Result<(&'a str, Vec<u8>)> {
81-
let encapsulation = Encapsulation::try_from(pem)?;
82-
let label = encapsulation.label();
122+
pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
123+
Ok(self.base64.decode_to_end(buf)?)
124+
}
83125

84-
// count all chars (gives over-estimation, due to whitespace)
85-
let max_len = encapsulation.encapsulated_text.len() * 3 / 4;
126+
/// Get the decoded length of the remaining PEM data after Base64 decoding.
127+
pub fn decoded_len(&self) -> usize {
128+
self.base64.decoded_len()
129+
}
86130

87-
let mut result = vec![0u8; max_len];
88-
let decoded_len = encapsulation.decode(self, &mut result)?.len();
131+
/// Are we finished decoding the PEM input?
132+
pub fn is_finished(&self) -> bool {
133+
self.base64.is_finished()
134+
}
89135

90-
// Actual encoded length can be slightly shorter than estimated
91-
// TODO(tarcieri): more reliable length estimation
92-
result.truncate(decoded_len);
93-
Ok((label, result))
136+
/// Convert into the inner [`base64::Decoder`].
137+
pub fn into_base64_decoder(self) -> Base64Decoder<'i> {
138+
self.base64
94139
}
95140
}
96141

97-
impl Default for Decoder {
98-
fn default() -> Self {
99-
Self {
100-
wrap_width: crate::BASE64_WRAP_WIDTH,
101-
}
142+
impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
143+
fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
144+
decoder.base64
145+
}
146+
}
147+
148+
#[cfg(feature = "std")]
149+
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
150+
impl<'i> io::Read for Decoder<'i> {
151+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
152+
self.base64.read(buf)
153+
}
154+
155+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
156+
self.base64.read_to_end(buf)
157+
}
158+
159+
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
160+
self.base64.read_exact(buf)
102161
}
103162
}
104163

@@ -185,51 +244,6 @@ impl<'a> Encapsulation<'a> {
185244
pub fn label(self) -> &'a str {
186245
self.label
187246
}
188-
189-
/// Get an iterator over the (allegedly) Base64-encoded lines of the
190-
/// encapsulated text.
191-
pub fn encapsulated_text(self, wrap_width: usize) -> Result<Lines<'a>> {
192-
if (wrap_width > 0) && (wrap_width % 4 == 0) {
193-
Ok(Lines {
194-
bytes: self.encapsulated_text,
195-
is_start: true,
196-
wrap_width,
197-
})
198-
} else {
199-
Err(Error::Base64)
200-
}
201-
}
202-
203-
/// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
204-
/// the pre/post-encapsulation boundaries.
205-
fn decode<'o>(&self, decoder: &Decoder, buf: &'o mut [u8]) -> Result<&'o [u8]> {
206-
// Ensure wrap width is supported.
207-
if (decoder.wrap_width == 0) || (decoder.wrap_width % 4 != 0) {
208-
return Err(Error::Base64);
209-
}
210-
211-
let mut out_len = 0;
212-
213-
for line in self.encapsulated_text(decoder.wrap_width)? {
214-
let line = line?;
215-
216-
match Base64::decode(line, &mut buf[out_len..]) {
217-
Err(error) => {
218-
// in the case that we are decoding the first line
219-
// and we error, then attribute the error to an unsupported header
220-
// if a colon char is present in the line
221-
if out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
222-
return Err(Error::HeaderDisallowed);
223-
} else {
224-
return Err(error.into());
225-
}
226-
}
227-
Ok(out) => out_len += out.len(),
228-
}
229-
}
230-
231-
Ok(&buf[..out_len])
232-
}
233247
}
234248

235249
impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
@@ -240,73 +254,24 @@ impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
240254
}
241255
}
242256

243-
/// Iterator over the lines in the encapsulated text.
244-
struct Lines<'a> {
245-
/// Remaining data being iterated over.
246-
bytes: &'a [u8],
247-
248-
/// `true` if no lines have been read.
249-
is_start: bool,
250-
251-
/// Base64 line-wrapping width in bytes.
252-
wrap_width: usize,
253-
}
254-
255-
impl<'a> Iterator for Lines<'a> {
256-
type Item = Result<&'a [u8]>;
257-
258-
fn next(&mut self) -> Option<Self::Item> {
259-
if self.bytes.len() > self.wrap_width {
260-
let (line, rest) = self.bytes.split_at(self.wrap_width);
261-
if let Some(rest) = grammar::strip_leading_eol(rest) {
262-
self.is_start = false;
263-
self.bytes = rest;
264-
Some(Ok(line))
265-
} else {
266-
// if bytes remaining does not split at `wrap_width` such
267-
// that the next char(s) in the rest is vertical whitespace
268-
// then attribute the error generically as `EncapsulatedText`
269-
// unless we are at the first line and the line contains a colon
270-
// then it may be a unsupported header
271-
Some(Err(
272-
if self.is_start && line.iter().any(|&b| b == grammar::CHAR_COLON) {
273-
Error::HeaderDisallowed
274-
} else {
275-
Error::EncapsulatedText
276-
},
277-
))
278-
}
279-
} else if !self.bytes.is_empty() {
280-
let line = self.bytes;
281-
self.bytes = &[];
282-
Some(Ok(line))
283-
} else {
284-
None
285-
}
286-
}
287-
}
288-
289257
#[cfg(test)]
290258
mod tests {
291259
use super::Encapsulation;
292-
use crate::BASE64_WRAP_WIDTH;
293260

294261
#[test]
295262
fn pkcs8_example() {
296263
let pem = include_bytes!("../tests/examples/pkcs8.pem");
297-
let result = Encapsulation::parse(pem).unwrap();
298-
assert_eq!(result.label, "PRIVATE KEY");
264+
let encapsulation = Encapsulation::parse(pem).unwrap();
265+
assert_eq!(encapsulation.label, "PRIVATE KEY");
299266

300-
let mut lines = result.encapsulated_text(BASE64_WRAP_WIDTH).unwrap();
301267
assert_eq!(
302-
lines.next().unwrap().unwrap(),
268+
encapsulation.encapsulated_text,
303269
&[
304270
77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
305271
73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
306272
77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
307273
106, 47, 49, 53, 101, 113, 70
308274
]
309275
);
310-
assert_eq!(lines.next(), None);
311276
}
312277
}

pem-rfc7468/src/error.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub type Result<T> = core::result::Result<T, Error>;
1010
#[non_exhaustive]
1111
pub enum Error {
1212
/// Base64-related errors.
13-
Base64,
13+
Base64(base64ct::Error),
1414

1515
/// Character encoding-related errors.
1616
CharacterEncoding,
@@ -39,26 +39,30 @@ pub enum Error {
3939

4040
impl fmt::Display for Error {
4141
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42-
f.write_str(match self {
43-
Error::Base64 => "PEM Base64 error",
44-
Error::CharacterEncoding => "PEM character encoding error",
45-
Error::EncapsulatedText => "PEM error in encapsulated text",
46-
Error::HeaderDisallowed => "PEM headers disallowed by RFC7468",
47-
Error::Label => "PEM type label invalid",
48-
Error::Length => "PEM length invalid",
49-
Error::Preamble => "PEM preamble contains invalid data (NUL byte)",
50-
Error::PreEncapsulationBoundary => "PEM error in pre-encapsulation boundary",
51-
Error::PostEncapsulationBoundary => "PEM error in post-encapsulation boundary",
52-
})
42+
match self {
43+
Error::Base64(err) => write!(f, "PEM Base64 error: {}", err),
44+
Error::CharacterEncoding => f.write_str("PEM character encoding error"),
45+
Error::EncapsulatedText => f.write_str("PEM error in encapsulated text"),
46+
Error::HeaderDisallowed => f.write_str("PEM headers disallowed by RFC7468"),
47+
Error::Label => f.write_str("PEM type label invalid"),
48+
Error::Length => f.write_str("PEM length invalid"),
49+
Error::Preamble => f.write_str("PEM preamble contains invalid data (NUL byte)"),
50+
Error::PreEncapsulationBoundary => {
51+
f.write_str("PEM error in pre-encapsulation boundary")
52+
}
53+
Error::PostEncapsulationBoundary => {
54+
f.write_str("PEM error in post-encapsulation boundary")
55+
}
56+
}
5357
}
5458
}
5559

5660
#[cfg(feature = "std")]
5761
impl std::error::Error for Error {}
5862

5963
impl From<base64ct::Error> for Error {
60-
fn from(_: base64ct::Error) -> Error {
61-
Error::Base64
64+
fn from(err: base64ct::Error) -> Error {
65+
Error::Base64(err)
6266
}
6367
}
6468

0 commit comments

Comments
 (0)