Skip to content

Commit 44101bd

Browse files
committed
refactor(allocator): refactor and improve safty comments of String::from_strs_array_in (#9639)
Follow-on after #9329. More fully document the constraints that ensure the safety of `String::from_strs_array_in`. The implementation in #9329 was already sound, but the comments didn't prove that (and in fact without checking the docs for `ptr::copy_nonoverlapping` and `*mut T::add`, I wasn't sure that it *was* sound if some of the input strings are zero length). Also add a debug assertion to check the pointer calculations are correct. Also refactor: Use `String::from_utf8_unchecked` to construct the eventual `String`, instead of `String::from_raw_parts_in`. The 2 are currently equivalent, but `allocator_api2::vec::Vec` does not guaranteed that `with_capacity_in` won't allocate *more* bytes than requested. If it does, `String::from_utf8_unchecked` makes use of that spare capacity in the `String`, whereas `String::from_raw_parts_in(ptr, len, len, allocator)` doesn't. That wasn't a soundness hole because both `Vec` and `String` are non-`Drop`, but it would have been a potential memory leak if they were.
1 parent ae7bb75 commit 44101bd

1 file changed

Lines changed: 66 additions & 22 deletions

File tree

crates/oxc_allocator/src/string.rs

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ impl<'alloc> String<'alloc> {
117117
}
118118
}
119119

120-
/// Create a new [`String`] from a fixed-size &str array, allocated in the given `allocator`.
120+
/// Create a new [`String`] from a fixed-size array of `&str`s concatenated together,
121+
/// allocated in the given `allocator`.
121122
///
122123
/// # Examples
123124
/// ```
@@ -135,31 +136,55 @@ impl<'alloc> String<'alloc> {
135136
strings: [&str; N],
136137
allocator: &'alloc Allocator,
137138
) -> String<'alloc> {
138-
let len = strings.iter().fold(0usize, |l, s| l.checked_add(s.len()).unwrap());
139+
// Calculate total length of all the strings concatenated.
140+
// We have to use `checked_add` here to guard against additions wrapping around
141+
// if some of the input `&str`s are very long, or there's many of them.
142+
let total_len = strings.iter().fold(0usize, |len, s| len.checked_add(s.len()).unwrap());
139143

140-
let mut ptr = Vec::with_capacity_in(len, allocator);
144+
// Create a `Vec<u8>` with sufficient capacity to contain all the `strings` concatenated.
145+
// Note: If `total_len == 0`, this does not allocate, and `vec.as_mut_ptr()` is a dangling pointer.
146+
let mut vec = Vec::with_capacity_in(total_len, allocator);
141147

142-
let mut dst = ptr.as_mut_ptr();
148+
let mut dst = vec.as_mut_ptr();
143149
for str in strings {
150+
let src = str.as_ptr();
144151
let len = str.len();
145152

146153
// SAFETY:
147-
// `src` is obtained from a `&str`
148-
// `dst` is obtained from a properly allocated `Vec<u8>` with sufficient
149-
// capacity to hold all strings.
150-
// Both `src` and `dst` are properly aligned for `u8`.
151-
// No overlapping, because we've copying from a `&str` to a newly allocated buffer.
152-
unsafe {
153-
let src = str.as_ptr();
154-
ptr::copy_nonoverlapping(src, dst, len);
155-
dst = dst.add(len);
156-
}
154+
// `src` is obtained from a `&str` with length `len`, so is valid for reading `len` bytes.
155+
// `dst` is within bounds of `vec`'s allocation. So is `dst + len`.
156+
// `u8` has no alignment requirements, so `src` and `dst` are sufficiently aligned.
157+
// No overlapping, because we're copying from an existing `&str` to a newly allocated buffer.
158+
//
159+
// If `str` is empty, `len` is 0.
160+
// If `total_len == 0`, `dst` is a dangling pointer, *not* valid for read or write of a `u8`.
161+
// `copy_nonoverlapping` requires that `src` and `dst must both
162+
// "be valid for reads of `count * size_of::<T>()` bytes".
163+
// However, safety docs for `std::ptr` (https://doc.rust-lang.org/std/ptr/index.html#safety)
164+
// state that:
165+
// "For memory accesses of size zero, *every* pointer is valid, including the null pointer".
166+
// So we do not need any special handling of zero-size `&str`s to satisfy safety constraints.
167+
unsafe { ptr::copy_nonoverlapping(src, dst, len) };
168+
169+
// SAFETY: We allocated sufficient capacity in `vec` for all the strings concatenated.
170+
// So `dst.add(len)` cannot go out of bounds.
171+
//
172+
// If `total_len` is 0, `dst` may be an invalid dangling pointer and adding to it would be UB.
173+
// But if that is the case, `len` must be 0 too.
174+
// Docs for `*mut T::add` preface the safety requirements for being in bounds and provenance
175+
// with "If the computed offset is non-zero". So they don't apply in the case where `len == 0`.
176+
// So we do not need any special handling of zero-size `&str`s to satisfy safety constraints.
177+
dst = unsafe { dst.add(len) };
157178
}
158179

159-
// SAFETY:
160-
// `ptr` was allocated with correct size for `[&str, len]`.
161-
// `len` is the sum of lengths of all strings.
162-
unsafe { String::from_raw_parts_in(ptr.as_mut_ptr(), len, len, allocator) }
180+
debug_assert_eq!(dst as usize - vec.as_ptr() as usize, total_len);
181+
182+
// SAFETY: We have written `total_len` bytes to `vec`'s backing memory
183+
unsafe { vec.set_len(total_len) };
184+
185+
// SAFETY: All the data added to `vec` has been `&str`s, so the contents of `vec` is guaranteed
186+
// to be a valid UTF-8 string
187+
unsafe { String::from_utf8_unchecked(vec) }
163188
}
164189

165190
/// Creates a new [`String`] from a length, capacity, and pointer.
@@ -299,12 +324,24 @@ impl Hash for String<'_> {
299324

300325
#[cfg(test)]
301326
mod test {
327+
use crate::{Allocator, String};
302328

303-
use crate::Allocator;
304-
use crate::String;
329+
#[test]
330+
fn string_from_array_len_1() {
331+
let allocator = Allocator::default();
332+
let string = String::from_strs_array_in(["hello"], &allocator);
333+
assert_eq!(string, "hello");
334+
}
305335

306336
#[test]
307-
fn string_from_array() {
337+
fn string_from_array_len_2() {
338+
let allocator = Allocator::default();
339+
let string = String::from_strs_array_in(["hello", "world!"], &allocator);
340+
assert_eq!(string, "helloworld!");
341+
}
342+
343+
#[test]
344+
fn string_from_array_len_3() {
308345
let hello = "hello";
309346
let world = std::string::String::from("world");
310347
let allocator = Allocator::default();
@@ -320,7 +357,14 @@ mod test {
320357
}
321358

322359
#[test]
323-
fn string_from_maybe_empty_str_array() {
360+
fn string_from_array_of_empty_strs() {
361+
let allocator = Allocator::default();
362+
let string = String::from_strs_array_in(["", "", ""], &allocator);
363+
assert_eq!(string, "");
364+
}
365+
366+
#[test]
367+
fn string_from_array_containing_some_empty_strs() {
324368
let allocator = Allocator::default();
325369
let string = String::from_strs_array_in(["", "hello", ""], &allocator);
326370
assert_eq!(string, "hello");

0 commit comments

Comments
 (0)