@@ -342,9 +342,9 @@ impl<T> Bucket<T> {
342342 /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
343343 /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
344344 /// [`RawTable::data_end`]: crate::raw::RawTable::data_end
345- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
345+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
346346 /// [`RawTable::buckets`]: crate::raw::RawTable::buckets
347- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
347+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
348348 #[ inline]
349349 unsafe fn from_base_index ( base : NonNull < T > , index : usize ) -> Self {
350350 // If mem::size_of::<T>() != 0 then return a pointer to an `element` in
@@ -414,9 +414,9 @@ impl<T> Bucket<T> {
414414 /// [`Bucket`]: crate::raw::Bucket
415415 /// [`from_base_index`]: crate::raw::Bucket::from_base_index
416416 /// [`RawTable::data_end`]: crate::raw::RawTable::data_end
417- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
417+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
418418 /// [`RawTable`]: crate::raw::RawTable
419- /// [`RawTableInner`]: crate::raw:: RawTableInner
419+ /// [`RawTableInner`]: RawTableInner
420420 /// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from
421421 #[ inline]
422422 unsafe fn to_base_index ( & self , base : NonNull < T > ) -> usize {
@@ -549,7 +549,7 @@ impl<T> Bucket<T> {
549549 /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
550550 /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
551551 /// [`RawTable::buckets`]: crate::raw::RawTable::buckets
552- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
552+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
553553 #[ inline]
554554 unsafe fn next_n ( & self , offset : usize ) -> Self {
555555 let ptr = if Self :: IS_ZERO_SIZED_TYPE {
@@ -1630,7 +1630,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16301630 // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
16311631 let result = ( probe_seq. pos + bit) & self . bucket_mask ;
16321632
1633- // In tables smaller than the group width, trailing control
1633+ // In tables smaller than the group width
1634+ // (self.buckets() < Group::WIDTH), trailing control
16341635 // bytes outside the range of the table are filled with
16351636 // EMPTY entries. These will unfortunately trigger a
16361637 // match, but once masked may point to a full bucket that
@@ -1651,8 +1652,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16511652 // and properly aligned, because the table is already allocated
16521653 // (see `TableLayout::calculate_layout_for` and `ptr::read`);
16531654 //
1654- // * For tables larger than the group width, we will never end up in the given
1655- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1655+ // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1656+ // we will never end up in the given branch, since
1657+ // `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
16561658 // full bucket index. For tables smaller than the group width, calling the
16571659 // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
16581660 // safe, as the trailing control bytes outside the range of the table are filled
@@ -1719,12 +1721,49 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17191721 }
17201722 }
17211723
1724+ /// Prepares for rehashing data in place (that is, without allocating new memory).
1725+ /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
1726+ /// bytes to `EMPTY`, i.e. performs the following conversion:
1727+ ///
1728+ /// - `EMPTY` control bytes -> `EMPTY`;
1729+ /// - `DELETED` control bytes -> `EMPTY`;
1730+ /// - `FULL` control bytes -> `DELETED`.
1731+ ///
1732+ /// This function does not make any changes to the `data` parts of the table,
1733+ /// or any changes to the the `items` or `growth_left` field of the table.
1734+ ///
1735+ /// # Safety
1736+ ///
1737+ /// You must observe the following safety rules when calling this function:
1738+ ///
1739+ /// * The [`RawTableInner`] has already been allocated;
1740+ ///
1741+ /// * The caller of this function must convert the `DELETED` bytes back to `FULL`
1742+ /// bytes when re-inserting them into their ideal position (which was impossible
1743+ /// to do during the first insert due to tombstones). If the caller does not do
1744+ /// this, then calling this function may result in a memory leak.
1745+ ///
1746+ /// Calling this function on a table that has not been allocated results in
1747+ /// [`undefined behavior`].
1748+ ///
1749+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1750+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
1751+ ///
1752+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
1753+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
17221754 #[ allow( clippy:: mut_mut) ]
17231755 #[ inline]
17241756 unsafe fn prepare_rehash_in_place ( & mut self ) {
1725- // Bulk convert all full control bytes to DELETED, and all DELETED
1726- // control bytes to EMPTY. This effectively frees up all buckets
1727- // containing a DELETED entry.
1757+ // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
1758+ // This effectively frees up all buckets containing a DELETED entry.
1759+ //
1760+ // SAFETY:
1761+ // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
1762+ // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
1763+ // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
1764+ // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
1765+ // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
1766+ // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
17281767 for i in ( 0 ..self . buckets ( ) ) . step_by ( Group :: WIDTH ) {
17291768 let group = Group :: load_aligned ( self . ctrl ( i) ) ;
17301769 let group = group. convert_special_to_empty_and_full_to_deleted ( ) ;
@@ -1733,10 +1772,19 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17331772
17341773 // Fix up the trailing control bytes. See the comments in set_ctrl
17351774 // for the handling of tables smaller than the group width.
1736- if self . buckets ( ) < Group :: WIDTH {
1775+ //
1776+ // SAFETY: The caller of this function guarantees that [`RawTableInner`]
1777+ // has already been allocated
1778+ if unlikely ( self . buckets ( ) < Group :: WIDTH ) {
1779+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
1780+ // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
1781+ // `Group::WIDTH` is safe
17371782 self . ctrl ( 0 )
17381783 . copy_to ( self . ctrl ( Group :: WIDTH ) , self . buckets ( ) ) ;
17391784 } else {
1785+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
1786+ // control bytes,so copying `Group::WIDTH` bytes with offset equal
1787+ // to `self.buckets() == self.bucket_mask + 1` is safe
17401788 self . ctrl ( 0 )
17411789 . copy_to ( self . ctrl ( self . buckets ( ) ) , Group :: WIDTH ) ;
17421790 }
@@ -2236,27 +2284,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
22362284 self . growth_left = bucket_mask_to_capacity ( self . bucket_mask ) ;
22372285 }
22382286
2287+ /// Erases the [`Bucket`]'s control byte at the given index so that it does not
2288+ /// triggered as full, decreases the `items` of the table and, if it can be done,
2289+ /// increases `self.growth_left`.
2290+ ///
2291+ /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
2292+ /// does not make any changes to the `data` parts of the table. The caller of this
2293+ /// function must take care to properly drop the `data`, otherwise calling this
2294+ /// function may result in a memory leak.
2295+ ///
2296+ /// # Safety
2297+ ///
2298+ /// You must observe the following safety rules when calling this function:
2299+ ///
2300+ /// * The [`RawTableInner`] has already been allocated;
2301+ ///
2302+ /// * It must be the full control byte at the given position;
2303+ ///
2304+ /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2305+ /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2306+ /// be no greater than the number returned by the function [`RawTableInner::buckets`].
2307+ ///
2308+ /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2309+ ///
2310+ /// Calling this function on a table with no elements is unspecified, but calling subsequent
2311+ /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
2312+ /// (`self.items -= 1 cause overflow when self.items == 0`).
2313+ ///
2314+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2315+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2316+ ///
2317+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2318+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
2319+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22392320 #[ inline]
22402321 unsafe fn erase ( & mut self , index : usize ) {
22412322 debug_assert ! ( self . is_bucket_full( index) ) ;
2323+
2324+ // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
2325+ // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22422326 let index_before = index. wrapping_sub ( Group :: WIDTH ) & self . bucket_mask ;
2327+ // SAFETY:
2328+ // - The caller must uphold the safety contract for `erase` method;
2329+ // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
22432330 let empty_before = Group :: load ( self . ctrl ( index_before) ) . match_empty ( ) ;
22442331 let empty_after = Group :: load ( self . ctrl ( index) ) . match_empty ( ) ;
22452332
2246- // If we are inside a continuous block of Group::WIDTH full or deleted
2247- // cells then a probe window may have seen a full block when trying to
2248- // insert. We therefore need to keep that block non-empty so that
2249- // lookups will continue searching to the next probe window.
2333+ // Inserting and searching in the map is performed by two key functions:
2334+ //
2335+ // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED`
2336+ // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED`
2337+ // slot immediately in the first group, it jumps to the next `Group` looking for it,
2338+ // and so on until it has gone through all the groups in the control bytes.
2339+ //
2340+ // - The `find_inner` function that looks for the index of the desired element by looking
2341+ // at all the `FULL` bytes in the group. If it did not find the element right away, and
2342+ // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot`
2343+ // function may have found a suitable slot in the next group. Therefore, `find_inner`
2344+ // jumps further, and if it does not find the desired element and again there is no `EMPTY`
2345+ // byte, then it jumps further, and so on. The search stops only if `find_inner` function
2346+ // finds the desired element or hits an `EMPTY` slot/byte.
2347+ //
2348+ // Accordingly, this leads to two consequences:
2349+ //
2350+ // - The map must have `EMPTY` slots (bytes);
2351+ //
2352+ // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner`
2353+ // function may stumble upon an `EMPTY` byte before finding the desired element and stop
2354+ // searching.
2355+ //
2356+ // Thus it is necessary to check all bytes after and before the erased element. If we are in
2357+ // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
2358+ // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
2359+ // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
2360+ // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
2361+ // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
2362+ //
2363+ // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
2364+ // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
2365+ // cannot have `DELETED` bytes.
22502366 //
2251- // Note that in this context `leading_zeros` refers to the bytes at the
2252- // end of a group, while `trailing_zeros` refers to the bytes at the
2253- // beginning of a group.
2367+ // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
2368+ // `trailing_zeros` refers to the bytes at the beginning of a group.
22542369 let ctrl = if empty_before. leading_zeros ( ) + empty_after. trailing_zeros ( ) >= Group :: WIDTH {
22552370 DELETED
22562371 } else {
22572372 self . growth_left += 1 ;
22582373 EMPTY
22592374 } ;
2375+ // SAFETY: the caller must uphold the safety contract for `erase` method.
22602376 self . set_ctrl ( index, ctrl) ;
22612377 self . items -= 1 ;
22622378 }
0 commit comments