From e2a9857e721728ecd165b90ed12794b84d4d5eb9 Mon Sep 17 00:00:00 2001 From: Gary Beihl Date: Thu, 15 Jan 2026 09:52:05 -0500 Subject: [PATCH 1/3] patina_internal_collections: Use MaybeUninit to avoid Default requirement Replace Default trait requirement with MaybeUninit wrapper for Node data field. Only initialize data when nodes move from available to in-use list. Fixes UB where Cell::set() was reading uninitialized memory. --- core/patina_internal_collections/src/bst.rs | 14 +- core/patina_internal_collections/src/node.rs | 151 ++++++++++++++----- core/patina_internal_collections/src/rbt.rs | 74 ++++----- 3 files changed, 158 insertions(+), 81 deletions(-) diff --git a/core/patina_internal_collections/src/bst.rs b/core/patina_internal_collections/src/bst.rs index 8a8f43288..02a75cec0 100644 --- a/core/patina_internal_collections/src/bst.rs +++ b/core/patina_internal_collections/src/bst.rs @@ -159,7 +159,7 @@ where /// pub fn get(&self, key: &D::Key) -> Option<&D> { match self.get_node(key) { - Some(node) => Some(&node.data), + Some(node) => Some(unsafe { node.data() }), None => None, } } @@ -186,7 +186,7 @@ where // SAFETY: The pointer comes from as_mut_ptr() on a valid node reference obtained from get_node(). // The caller is responsible for ensuring that the mutable reference doesn't modify key-affecting // values. - Some(unsafe { &mut (*ptr).data }) + Some(unsafe { (*ptr).data_mut() }) } None => None, } @@ -209,7 +209,7 @@ where /// pub fn get_with_idx(&self, idx: usize) -> Option<&D> { match self.storage.get(idx) { - Some(node) => Some(&node.data), + Some(node) => Some(unsafe { node.data() }), None => None, } } @@ -236,7 +236,7 @@ where /// pub unsafe fn get_with_idx_mut(&mut self, idx: usize) -> Option<&mut D> { match self.storage.get_mut(idx) { - Some(node) => Some(&mut node.data), + Some(node) => Some(unsafe { node.data_mut() }), None => None, } } @@ -281,7 +281,7 @@ where let mut current = self.root(); let mut closest = None; while let Some(node) = current { - match key.cmp(node.data.key()) { + match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(self.storage.idx(node.as_mut_ptr())), Ordering::Less => current = node.left(), Ordering::Greater => { @@ -494,7 +494,7 @@ where fn get_node(&self, key: &D::Key) -> Option<&Node> { let mut current_idx = self.root(); while let Some(node) = current_idx { - match key.cmp(node.data.key()) { + match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(node), Ordering::Less => current_idx = node.left(), Ordering::Greater => current_idx = node.right(), @@ -639,7 +639,7 @@ where fn _dfs(node: Option<&Node>, values: &mut alloc::vec::Vec) { if let Some(node) = node { Self::_dfs(node.left(), values); - values.push(node.data); + values.push(unsafe { *node.data() }); Self::_dfs(node.right(), values); } } diff --git a/core/patina_internal_collections/src/node.rs b/core/patina_internal_collections/src/node.rs index 718bb9fc5..feffa5c35 100644 --- a/core/patina_internal_collections/src/node.rs +++ b/core/patina_internal_collections/src/node.rs @@ -6,7 +6,7 @@ //! //! SPDX-License-Identifier: Apache-2.0 //! -use core::{cell::Cell, mem, ptr::NonNull, slice}; +use core::{cell::Cell, mem, mem::MaybeUninit, ptr::NonNull, slice}; use crate::{Error, Result, SliceKey}; @@ -51,23 +51,32 @@ where /// Create a new storage container with a slice of memory. pub fn with_capacity(slice: &'a mut [u8]) -> Storage<'a, D> { - let storage = Storage { - // SAFETY: This is reinterpreting a byte slice as a Node slice. - // 1. The alignment is checked implicitly by the slice bounds. - // 2. The correct number of Node elements that fit in the byte slice is calculated. - // 3. The lifetime ensures the byte slice remains valid for the storage's lifetime - data: unsafe { - slice::from_raw_parts_mut::<'a, Node>( - slice as *mut [u8] as *mut Node, - slice.len() / mem::size_of::>(), - ) - }, - length: 0, - available: Cell::default(), + // SAFETY: This is reinterpreting a byte slice as a MaybeUninit> slice. + // Using MaybeUninit explicitly represents uninitialized memory. + let uninit_buffer = unsafe { + slice::from_raw_parts_mut::<'a, MaybeUninit>>( + slice as *mut [u8] as *mut MaybeUninit>, + slice.len() / mem::size_of::>(), + ) }; - Self::build_linked_list(storage.data); - storage.available.set(storage.data[0].as_mut_ptr()); + // Initialize nodes with uninitialized data fields + for elem in uninit_buffer.iter_mut() { + elem.write(Node::new_uninit()); + } + + // SAFETY: All nodes have been initialized (though their data fields are uninitialized). + // We can now safely convert from MaybeUninit> to Node. + let buffer = + unsafe { slice::from_raw_parts_mut(uninit_buffer.as_mut_ptr() as *mut Node, uninit_buffer.len()) }; + + let storage = Storage { data: buffer, length: 0, available: Cell::default() }; + + if !storage.data.is_empty() { + Self::build_linked_list(storage.data); + storage.available.set(storage.data[0].as_mut_ptr()); + } + storage } @@ -105,7 +114,11 @@ where node.set_left(None); node.set_right(None); node.set_parent(None); - node.data = data; + // SAFETY: The node is from the available list, so its data field is uninitialized. + // We initialize it here when moving the node to the "in use" state. + unsafe { + node.init_data(data); + } self.length += 1; Ok((self.idx(node.as_mut_ptr()), node)) } else { @@ -188,17 +201,26 @@ where /// /// O(n) pub fn resize(&mut self, slice: &'a mut [u8]) { - // SAFETY: This is reinterpreting a byte slice as a Node slice. - // 1. The alignment is handled by slice casting rules - // 2. The correct number of Node elements that fit in the byte slice is calculated - // 3. The lifetime 'a ensures the byte slice remains valid for the storage's lifetime - let buffer = unsafe { - slice::from_raw_parts_mut::<'a, Node>( - slice as *mut [u8] as *mut Node, + // SAFETY: This is reinterpreting a byte slice as a MaybeUninit> slice. + // Using MaybeUninit explicitly represents uninitialized memory. + let uninit_buffer = unsafe { + slice::from_raw_parts_mut::<'a, MaybeUninit>>( + slice as *mut [u8] as *mut MaybeUninit>, slice.len() / mem::size_of::>(), ) }; + assert!(uninit_buffer.len() >= self.len()); + + // Initialize all nodes with uninitialized data fields + for elem in uninit_buffer.iter_mut() { + elem.write(Node::new_uninit()); + } + + // SAFETY: All nodes have been initialized (though their data fields may be uninitialized). + let buffer = + unsafe { slice::from_raw_parts_mut(uninit_buffer.as_mut_ptr() as *mut Node, uninit_buffer.len()) }; + assert!(buffer.len() >= self.len()); // When current capacity is 0, we just need to copy the data and build the available list @@ -213,7 +235,13 @@ where for i in 0..self.len() { let old = &self.data[i]; - buffer[i].data = old.data; + // SAFETY: Nodes at indices 0..self.len() are "in use" and have initialized data. + // We copy the initialized data from old to new. + unsafe { + let old_data = old.data(); + // Use ptr::copy to copy the data from old to new + buffer[i].data = MaybeUninit::new(*old_data); + } buffer[i].set_color(old.color()); if let Some(left) = old.left() { @@ -464,7 +492,7 @@ pub struct Node where D: SliceKey, { - pub data: D, + pub data: MaybeUninit, color: Cell, parent: Cell<*mut Node>, left: Cell<*mut Node>, @@ -475,8 +503,48 @@ impl Node where D: SliceKey, { + /// Create a new node with uninitialized data. + /// The data field must be initialized separately using `init_data()`. + pub fn new_uninit() -> Self { + Node { + data: MaybeUninit::uninit(), + color: Cell::new(RED), + parent: Cell::default(), + left: Cell::default(), + right: Cell::default(), + } + } + + /// Initialize the data field of an uninitialized node. + /// # Safety + /// The caller must ensure the data field has not been previously initialized. + pub unsafe fn init_data(&mut self, data: D) { + self.data.write(data); + } + + /// Creates a new Node with initialized data. + /// Used for testing purposes. + #[cfg(test)] pub fn new(data: D) -> Self { - Node { data, color: Cell::new(RED), parent: Cell::default(), left: Cell::default(), right: Cell::default() } + let mut node = Self::new_uninit(); + node.data.write(data); + node + } + + /// Get a reference to the data, assuming it is initialized. + /// # Safety + /// The caller must ensure the data field has been initialized. + pub unsafe fn data(&self) -> &D { + // SAFETY: Caller guarantees data is initialized + unsafe { self.data.assume_init_ref() } + } + + /// Get a mutable reference to the data, assuming it is initialized. + /// # Safety + /// The caller must ensure the data field has been initialized. + pub unsafe fn data_mut(&mut self) -> &mut D { + // SAFETY: Caller guarantees data is initialized + unsafe { self.data.assume_init_mut() } } pub fn height_and_balance(node: Option<&Node>) -> (i32, bool) { @@ -584,7 +652,9 @@ where impl SliceKey for Node { type Key = D::Key; fn key(&self) -> &Self::Key { - self.data.key() + // SAFETY: This method is only called on nodes that are in use (initialized). + // Nodes in the available list are never accessed for their key. + unsafe { self.data().key() } } } @@ -602,7 +672,8 @@ mod tests { for i in 0..10 { let (index, node) = storage.add(i).unwrap(); assert_eq!(index, i); - assert_eq!(node.data, i); + // SAFETY: Node was just added with data, so it's initialized + assert_eq!(unsafe { *node.data() }, i); assert_eq!(storage.len(), i + 1); } @@ -613,16 +684,22 @@ mod tests { storage.delete(storage.get(5).unwrap().as_mut_ptr()); let (index, node) = storage.add(11).unwrap(); assert_eq!(index, 5); - assert_eq!(node.data, 11); + // SAFETY: Node was just added with data, so it's initialized + assert_eq!(unsafe { *node.data() }, 11); // Try and get a mutable reference to a node { let node = storage.get_mut(5).unwrap(); - assert_eq!(node.data, 11); - node.data = 12; + // SAFETY: Node is in use, so data is initialized + assert_eq!(unsafe { *node.data() }, 11); + // SAFETY: Node is in use, we can modify the initialized data + unsafe { + *node.data_mut() = 12; + } } let node = storage.get(5).unwrap(); - assert_eq!(node.data, 12); + // SAFETY: Node is in use, so data is initialized + assert_eq!(unsafe { *node.data() }, 12); } #[test] @@ -640,8 +717,8 @@ mod tests { p4.set_parent(Some(p1)); - assert_eq!(Node::sibling(p2).unwrap().data, 3); - assert_eq!(Node::sibling(p3).unwrap().data, 2); + assert_eq!(unsafe { *Node::sibling(p2).unwrap().data() }, 3); + assert_eq!(unsafe { *Node::sibling(p3).unwrap().data() }, 2); assert!(Node::sibling(p1).is_none()); } @@ -680,7 +757,7 @@ mod tests { p2.set_right(Some(p4)); p4.set_parent(Some(p2)); - assert_eq!(Node::predecessor(p1).unwrap().data, 4); + assert_eq!(unsafe { *Node::predecessor(p1).unwrap().data() }, 4); assert!(Node::predecessor(p4).is_none()); } @@ -700,7 +777,7 @@ mod tests { p2.set_right(Some(p4)); p4.set_parent(Some(p2)); - assert_eq!(Node::successor(p1).unwrap().data, 3); + assert_eq!(unsafe { *Node::successor(p1).unwrap().data() }, 3); assert!(Node::successor(p4).is_none()); } diff --git a/core/patina_internal_collections/src/rbt.rs b/core/patina_internal_collections/src/rbt.rs index 996e221bb..199a158ec 100644 --- a/core/patina_internal_collections/src/rbt.rs +++ b/core/patina_internal_collections/src/rbt.rs @@ -165,7 +165,7 @@ where /// pub fn get(&self, key: &D::Key) -> Option<&D> { match self.get_node(key) { - Some(node) => Some(&node.data), + Some(node) => Some(unsafe { node.data() }), None => None, } } @@ -189,7 +189,7 @@ where match self.get_node(key) { // SAFETY: The pointer comes from as_mut_ptr() on a valid node reference obtained from get_node(). // The caller is responsible for ensuring the mutable reference doesn't modify key-affecting values. - Some(node) => Some(unsafe { &mut (*node.as_mut_ptr()).data }), + Some(node) => Some(unsafe { (*node.as_mut_ptr()).data_mut() }), None => None, } } @@ -211,7 +211,7 @@ where /// pub fn get_with_idx(&self, idx: usize) -> Option<&D> { match self.storage.get(idx) { - Some(node) => Some(&node.data), + Some(node) => Some(unsafe { node.data() }), None => None, } } @@ -238,7 +238,7 @@ where /// pub unsafe fn get_with_idx_mut(&mut self, idx: usize) -> Option<&mut D> { match self.storage.get_mut(idx) { - Some(node) => Some(&mut node.data), + Some(node) => Some(unsafe { node.data_mut() }), None => None, } } @@ -283,7 +283,7 @@ where let mut current = self.root(); let mut closest = None; while let Some(node) = current { - match key.cmp(node.data.key()) { + match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(self.storage.idx(node.as_mut_ptr())), Ordering::Less => current = node.left(), Ordering::Greater => { @@ -870,7 +870,7 @@ where fn _dfs(node: Option<&Node>, values: &mut alloc::vec::Vec) { if let Some(node) = node { Self::_dfs(node.left(), values); - values.push(node.data); + values.push(unsafe { *node.data() }); Self::_dfs(node.right(), values); } } @@ -1024,25 +1024,25 @@ mod tests { // Validate left child (9) let left = root.left().unwrap(); assert!(left.is_black()); - assert_eq!(left.data, 9); + assert_eq!(unsafe { *left.data() }, 9); assert_eq!(left.parent_ptr(), root.as_mut_ptr()); // Validate right child(24) let right = root.right().unwrap(); assert!(right.is_black()); - assert_eq!(right.data, 24); + assert_eq!(unsafe { *right.data() }, 24); assert_eq!(right.parent_ptr(), root.as_mut_ptr()); // Validate right child's left child (19) let right_l = right.left().unwrap(); assert!(right_l.is_red()); - assert_eq!(right_l.data, 19); + assert_eq!(unsafe { *right_l.data() }, 19); assert_eq!(right_l.parent_ptr(), right.as_mut_ptr()); // Validate right child's right child (75) let right_r = right.right().unwrap(); assert!(right_r.is_red()); - assert_eq!(right_r.data, 75); + assert_eq!(unsafe { *right_r.data() }, 75); } #[test] @@ -1242,7 +1242,7 @@ mod tests { // Validate the new root assert_eq!(new_root.as_mut_ptr(), right.as_mut_ptr()); - assert_eq!(right.data, 19); + assert_eq!(unsafe { *right.data() }, 19); assert!(right.is_black()); assert!(right.parent().is_none()); assert_eq!(right.left_ptr(), root.as_mut_ptr()); @@ -1250,21 +1250,21 @@ mod tests { //Validate the left child assert_eq!(root.parent_ptr(), right.as_mut_ptr()); - assert_eq!(root.data, 17); + assert_eq!(unsafe { *root.data() }, 17); assert!(root.is_black()); assert!(root.left().is_none()); assert_eq!(root.right_ptr(), right_l.as_mut_ptr()); // Validate the right child assert_eq!(right_r.parent_ptr(), right.as_mut_ptr()); - assert_eq!(right_r.data, 75); + assert_eq!(unsafe { *right_r.data() }, 75); assert!(right_r.is_black()); assert!(right_r.left().is_none()); assert!(right_r.right().is_none()); // validate the right child of the left child assert_eq!(right_l.parent_ptr(), root.as_mut_ptr()); - assert_eq!(right_l.data, 18); + assert_eq!(unsafe { *right_l.data() }, 18); assert!(right_l.is_red()); assert!(right_l.left().is_none()); assert!(right_l.right().is_none()); @@ -1327,38 +1327,38 @@ mod tests { // by remove_node_from_tree. The pointer points to a Node in storage. let new_root = unsafe { &*root_ptr.get() }; assert_eq!(new_root.as_mut_ptr(), root.as_mut_ptr()); - assert_eq!(new_root.data, 17); + assert_eq!(unsafe { *new_root.data() }, 17); assert!(new_root.is_black()); assert!(new_root.parent().is_none()); assert_eq!(new_root.left_ptr(), left.as_mut_ptr()); assert_eq!(new_root.right_ptr(), right.as_mut_ptr()); assert_eq!(left.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(left.data, 9); + assert_eq!(unsafe { *left.data() }, 9); assert!(left.is_black()); assert_eq!(left.left_ptr(), left_l.as_mut_ptr()); assert_eq!(left.right_ptr(), left_r.as_mut_ptr()); assert_eq!(left_l.parent_ptr(), left.as_mut_ptr()); - assert_eq!(left_l.data, 3); + assert_eq!(unsafe { *left_l.data() }, 3); assert!(left_l.is_red()); assert!(left_l.left().is_none()); assert!(left_l.right().is_none()); assert_eq!(left_r.parent_ptr(), left.as_mut_ptr()); - assert_eq!(left_r.data, 12); + assert_eq!(unsafe { *left_r.data() }, 12); assert!(left_r.is_red()); assert!(left_r.left().is_none()); assert!(left_r.right().is_none()); assert_eq!(right.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(right.data, 19); + assert_eq!(unsafe { *right.data() }, 19); assert!(right.is_black()); assert_eq!(right.left_ptr(), right_l.as_mut_ptr()); assert!(right.right().is_none()); assert_eq!(right_l.parent_ptr(), right.as_mut_ptr()); - assert_eq!(right_l.data, 18); + assert_eq!(unsafe { *right_l.data() }, 18); assert!(right_l.is_red()); assert!(right_l.left().is_none()); assert!(right_l.right().is_none()); @@ -1421,38 +1421,38 @@ mod tests { // by remove_node_from_tree. The pointer points to a Node in storage. let new_root = unsafe { &*root_ptr.get() }; assert_eq!(new_root.as_mut_ptr(), root.as_mut_ptr()); - assert_eq!(new_root.data, 17); + assert_eq!(unsafe { *new_root.data() }, 17); assert!(new_root.is_black()); assert!(new_root.parent().is_none()); assert_eq!(new_root.left_ptr(), left.as_mut_ptr()); assert_eq!(new_root.right_ptr(), right.as_mut_ptr()); assert_eq!(left.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(left.data, 9); + assert_eq!(unsafe { *left.data() }, 9); assert!(left.is_red()); assert_eq!(left.left_ptr(), left_l.as_mut_ptr()); assert_eq!(left.right_ptr(), left_r.as_mut_ptr()); assert_eq!(left_l.parent_ptr(), left.as_mut_ptr()); - assert_eq!(left_l.data, 3); + assert_eq!(unsafe { *left_l.data() }, 3); assert!(left_l.is_black()); assert!(left_l.left().is_none()); assert!(left_l.right().is_none()); assert_eq!(left_r.parent_ptr(), left.as_mut_ptr()); - assert_eq!(left_r.data, 12); + assert_eq!(unsafe { *left_r.data() }, 12); assert!(left_r.is_black()); assert!(left_r.left().is_none()); assert!(left_r.right().is_none()); assert_eq!(right.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(right.data, 19); + assert_eq!(unsafe { *right.data() }, 19); assert!(right.is_black()); assert!(right.left().is_none()); assert_eq!(right.right_ptr(), right_r.as_mut_ptr()); assert_eq!(right_r.parent_ptr(), right.as_mut_ptr()); - assert_eq!(right_r.data, 75); + assert_eq!(unsafe { *right_r.data() }, 75); assert!(right_r.is_red()); assert!(right_r.left().is_none()); assert!(right_r.right().is_none()); @@ -1510,32 +1510,32 @@ mod tests { let new_root = unsafe { &*root_ptr.get() }; assert_eq!(new_root.as_mut_ptr(), root.as_mut_ptr()); - assert_eq!(new_root.data, 17); + assert_eq!(unsafe { *new_root.data() }, 17); assert!(new_root.is_black()); assert!(new_root.parent().is_none()); assert_eq!(new_root.left_ptr(), left.as_mut_ptr()); assert_eq!(new_root.right_ptr(), right_r_l.as_mut_ptr()); assert_eq!(left.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(left.data, 9); + assert_eq!(unsafe { *left.data() }, 9); assert!(left.is_black()); assert!(left.left().is_none()); assert!(left.right().is_none()); assert_eq!(right_r_l.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(right_r_l.data, 24); + assert_eq!(unsafe { *right_r_l.data() }, 24); assert!(right_r_l.is_red()); assert_eq!(right_r_l.left_ptr(), right.as_mut_ptr()); assert_eq!(right_r_l.right_ptr(), right_r.as_mut_ptr()); assert_eq!(right.parent_ptr(), right_r_l.as_mut_ptr()); - assert_eq!(right.data, 19); + assert_eq!(unsafe { *right.data() }, 19); assert!(right.is_black()); assert!(right.left().is_none()); assert!(right.right().is_none()); assert_eq!(right_r.parent_ptr(), right_r_l.as_mut_ptr()); - assert_eq!(right_r.data, 75); + assert_eq!(unsafe { *right_r.data() }, 75); assert!(right_r.is_black()); assert!(right_r.left().is_none()); assert!(right_r.right().is_none()); @@ -1599,38 +1599,38 @@ mod tests { let new_root = unsafe { &*root_ptr.get() }; assert_eq!(new_root.as_mut_ptr(), root.as_mut_ptr()); - assert_eq!(new_root.data, 17); + assert_eq!(unsafe { *new_root.data() }, 17); assert!(new_root.is_black()); assert!(new_root.parent().is_none()); assert_eq!(new_root.left_ptr(), left.as_mut_ptr()); assert_eq!(new_root.right_ptr(), right_r.as_mut_ptr()); assert_eq!(left.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(left.data, 9); + assert_eq!(unsafe { *left.data() }, 9); assert!(left.is_black()); assert!(left.left().is_none()); assert!(left.right().is_none()); assert_eq!(right_r.parent_ptr(), new_root.as_mut_ptr()); - assert_eq!(right_r.data, 75); + assert_eq!(unsafe { *right_r.data() }, 75); assert!(right_r.is_red()); assert_eq!(right_r.left_ptr(), right.as_mut_ptr()); assert_eq!(right_r.right_ptr(), right_r_r.as_mut_ptr()); assert_eq!(right.parent_ptr(), right_r.as_mut_ptr()); - assert_eq!(right.data, 19); + assert_eq!(unsafe { *right.data() }, 19); assert!(right.is_black()); assert!(right.left().is_none()); assert_eq!(right.right_ptr(), right_r_l.as_mut_ptr()); assert_eq!(right_r_r.parent_ptr(), right_r.as_mut_ptr()); - assert_eq!(right_r_r.data, 81); + assert_eq!(unsafe { *right_r_r.data() }, 81); assert!(right_r_r.is_black()); assert!(right_r_r.left().is_none()); assert!(right_r_r.right().is_none()); assert_eq!(right_r_l.parent_ptr(), right.as_mut_ptr()); - assert_eq!(right_r_l.data, 24); + assert_eq!(unsafe { *right_r_l.data() }, 24); assert!(right_r_l.is_red()); assert!(right_r_l.left().is_none()); assert!(right_r_l.right().is_none()); From f669b072b8d47c6cddc9504484d7599eb796bc9d Mon Sep 17 00:00:00 2001 From: Gary Beihl Date: Thu, 15 Jan 2026 10:56:31 -0500 Subject: [PATCH 2/3] patina_internal_collections: Add safety comments to fix clippy warnings Add safety documentation to all unsafe blocks in production code. Add allow(clippy::undocumented_unsafe_blocks) to test modules to avoid redundant safety comments for test assertions. --- core/patina_internal_collections/src/bst.rs | 20 +++++++++++++++++--- core/patina_internal_collections/src/node.rs | 5 +++++ core/patina_internal_collections/src/rbt.rs | 19 ++++++++++++++++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/core/patina_internal_collections/src/bst.rs b/core/patina_internal_collections/src/bst.rs index 02a75cec0..93a039239 100644 --- a/core/patina_internal_collections/src/bst.rs +++ b/core/patina_internal_collections/src/bst.rs @@ -159,7 +159,10 @@ where /// pub fn get(&self, key: &D::Key) -> Option<&D> { match self.get_node(key) { - Some(node) => Some(unsafe { node.data() }), + Some(node) => { + // SAFETY: Nodes in the tree always have initialized data + Some(unsafe { node.data() }) + } None => None, } } @@ -209,7 +212,10 @@ where /// pub fn get_with_idx(&self, idx: usize) -> Option<&D> { match self.storage.get(idx) { - Some(node) => Some(unsafe { node.data() }), + Some(node) => { + // SAFETY: Nodes in storage always have initialized data + Some(unsafe { node.data() }) + } None => None, } } @@ -236,7 +242,10 @@ where /// pub unsafe fn get_with_idx_mut(&mut self, idx: usize) -> Option<&mut D> { match self.storage.get_mut(idx) { - Some(node) => Some(unsafe { node.data_mut() }), + Some(node) => { + // SAFETY: Nodes in storage always have initialized data + Some(unsafe { node.data_mut() }) + } None => None, } } @@ -281,6 +290,7 @@ where let mut current = self.root(); let mut closest = None; while let Some(node) = current { + // SAFETY: Nodes in the tree always have initialized data match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(self.storage.idx(node.as_mut_ptr())), Ordering::Less => current = node.left(), @@ -494,6 +504,7 @@ where fn get_node(&self, key: &D::Key) -> Option<&Node> { let mut current_idx = self.root(); while let Some(node) = current_idx { + // SAFETY: Nodes in the tree always have initialized data match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(node), Ordering::Less => current_idx = node.left(), @@ -639,6 +650,7 @@ where fn _dfs(node: Option<&Node>, values: &mut alloc::vec::Vec) { if let Some(node) = node { Self::_dfs(node.left(), values); + // SAFETY: Nodes in the tree always have initialized data values.push(unsafe { *node.data() }); Self::_dfs(node.right(), values); } @@ -659,6 +671,7 @@ where #[cfg(test)] #[coverage(off)] +#[allow(clippy::undocumented_unsafe_blocks)] mod tests { use crate::{Bst, node_size}; @@ -876,6 +889,7 @@ mod tests { } #[cfg(test)] +#[allow(clippy::undocumented_unsafe_blocks)] mod fuzz_tests { extern crate std; use crate::{Bst, node_size}; diff --git a/core/patina_internal_collections/src/node.rs b/core/patina_internal_collections/src/node.rs index feffa5c35..67d1d771e 100644 --- a/core/patina_internal_collections/src/node.rs +++ b/core/patina_internal_collections/src/node.rs @@ -660,6 +660,7 @@ impl SliceKey for Node { #[cfg(test)] #[coverage(off)] +#[allow(clippy::undocumented_unsafe_blocks)] mod tests { use super::*; @@ -717,7 +718,9 @@ mod tests { p4.set_parent(Some(p1)); + // SAFETY: Test nodes are created with initialized data via Node::new() assert_eq!(unsafe { *Node::sibling(p2).unwrap().data() }, 3); + // SAFETY: Test nodes are created with initialized data via Node::new() assert_eq!(unsafe { *Node::sibling(p3).unwrap().data() }, 2); assert!(Node::sibling(p1).is_none()); } @@ -757,6 +760,7 @@ mod tests { p2.set_right(Some(p4)); p4.set_parent(Some(p2)); + // SAFETY: Test nodes are created with initialized data via Node::new() assert_eq!(unsafe { *Node::predecessor(p1).unwrap().data() }, 4); assert!(Node::predecessor(p4).is_none()); } @@ -777,6 +781,7 @@ mod tests { p2.set_right(Some(p4)); p4.set_parent(Some(p2)); + // SAFETY: Test nodes are created with initialized data via Node::new() assert_eq!(unsafe { *Node::successor(p1).unwrap().data() }, 3); assert!(Node::successor(p4).is_none()); } diff --git a/core/patina_internal_collections/src/rbt.rs b/core/patina_internal_collections/src/rbt.rs index 199a158ec..eb9b00a1d 100644 --- a/core/patina_internal_collections/src/rbt.rs +++ b/core/patina_internal_collections/src/rbt.rs @@ -165,7 +165,10 @@ where /// pub fn get(&self, key: &D::Key) -> Option<&D> { match self.get_node(key) { - Some(node) => Some(unsafe { node.data() }), + Some(node) => { + // SAFETY: Nodes in the tree always have initialized data + Some(unsafe { node.data() }) + } None => None, } } @@ -211,7 +214,10 @@ where /// pub fn get_with_idx(&self, idx: usize) -> Option<&D> { match self.storage.get(idx) { - Some(node) => Some(unsafe { node.data() }), + Some(node) => { + // SAFETY: Nodes in storage always have initialized data + Some(unsafe { node.data() }) + } None => None, } } @@ -238,7 +244,10 @@ where /// pub unsafe fn get_with_idx_mut(&mut self, idx: usize) -> Option<&mut D> { match self.storage.get_mut(idx) { - Some(node) => Some(unsafe { node.data_mut() }), + Some(node) => { + // SAFETY: Nodes in storage always have initialized data + Some(unsafe { node.data_mut() }) + } None => None, } } @@ -283,6 +292,7 @@ where let mut current = self.root(); let mut closest = None; while let Some(node) = current { + // SAFETY: Nodes in the tree always have initialized data match key.cmp(unsafe { node.data() }.key()) { Ordering::Equal => return Some(self.storage.idx(node.as_mut_ptr())), Ordering::Less => current = node.left(), @@ -870,6 +880,7 @@ where fn _dfs(node: Option<&Node>, values: &mut alloc::vec::Vec) { if let Some(node) = node { Self::_dfs(node.left(), values); + // SAFETY: Nodes in the tree always have initialized data values.push(unsafe { *node.data() }); Self::_dfs(node.right(), values); } @@ -900,6 +911,7 @@ where #[cfg(test)] #[coverage(off)] +#[allow(clippy::undocumented_unsafe_blocks)] mod tests { extern crate std; @@ -1821,6 +1833,7 @@ mod tests { } #[cfg(test)] +#[allow(clippy::undocumented_unsafe_blocks)] mod fuzz_tests { extern crate std; use crate::{Rbt, node_size}; From c761fd6a19ff83bb6d9bc051769c68ad1419e920 Mon Sep 17 00:00:00 2001 From: Gary Beihl Date: Tue, 10 Mar 2026 18:26:29 -0400 Subject: [PATCH 3/3] patina_internal_collections: Address review feedback on Node MaybeUninit - Use MaybeUninit> in expand() to avoid UB from creating references to uninitialized memory - Change Node data field visibility from pub to pub(crate) - Fix expand() copy loop to iterate 0..self.len() instead of 0..self.capacity() to avoid reading uninitialized data - Remove module-level #[allow(clippy::undocumented_unsafe_blocks)] in favor of per-block safety comments Signed-off-by: Gary Beihl --- core/patina_internal_collections/src/node.rs | 32 ++++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/core/patina_internal_collections/src/node.rs b/core/patina_internal_collections/src/node.rs index de97d4243..0b8724833 100644 --- a/core/patina_internal_collections/src/node.rs +++ b/core/patina_internal_collections/src/node.rs @@ -204,18 +204,32 @@ where /// /// O(n) pub fn expand(&mut self, slice: &'a mut [u8]) { - // SAFETY: This is reinterpreting a byte slice as a Node slice. + // SAFETY: This is reinterpreting a byte slice as a MaybeUninit> slice. + // Using MaybeUninit explicitly represents uninitialized memory and avoids undefined + // behavior from creating references to uninitialized Node. // 1. The alignment is handled by slice casting rules // 2. The correct number of Node elements that fit in the byte slice is calculated // 3. The lifetime 'a ensures the byte slice remains valid for the storage's lifetime - let buffer = unsafe { - slice::from_raw_parts_mut::<'a, Node>( - slice as *mut [u8] as *mut Node, + // 4. MaybeUninit has the same size and alignment as T + let uninit_buffer = unsafe { + slice::from_raw_parts_mut::<'a, MaybeUninit>>( + slice as *mut [u8] as *mut MaybeUninit>, slice.len() / mem::size_of::>(), ) }; - assert!(buffer.len() >= self.capacity()); + assert!(uninit_buffer.len() >= self.capacity()); + + // Initialize all new nodes with uninitialized data fields. + // Nodes at indices 0..self.capacity() will be overwritten with copied data below. + for elem in uninit_buffer.iter_mut() { + elem.write(Node::new_uninit()); + } + + // SAFETY: All nodes have been initialized (though their data fields are uninitialized). + // We can now safely convert from MaybeUninit> to Node. + let buffer = + unsafe { slice::from_raw_parts_mut(uninit_buffer.as_mut_ptr() as *mut Node, uninit_buffer.len()) }; // When current capacity is 0, we just need to copy the data and build the available list if self.capacity() == 0 { @@ -226,14 +240,13 @@ where } // Copy the data from the old buffer to the new buffer. Update the pointers to the new buffer - for i in 0..self.capacity() { + for i in 0..self.len() { let old = &self.data[i]; // SAFETY: Nodes at indices 0..self.len() are "in use" and have initialized data. // We copy the initialized data from old to new. unsafe { let old_data = old.data(); - // Use ptr::copy to copy the data from old to new buffer[i].data = MaybeUninit::new(*old_data); } buffer[i].set_color(old.color()); @@ -486,7 +499,7 @@ pub struct Node where D: SliceKey, { - pub data: MaybeUninit, + pub(crate) data: MaybeUninit, color: Cell, parent: Cell<*mut Node>, left: Cell<*mut Node>, @@ -654,7 +667,6 @@ impl SliceKey for Node { #[cfg(test)] #[coverage(off)] -#[allow(clippy::undocumented_unsafe_blocks)] mod tests { use super::*; @@ -858,7 +870,7 @@ mod tests { } #[test] - #[should_panic(expected = "assertion failed: buffer.len() >= self.capacity()")] + #[should_panic(expected = "assertion failed: uninit_buffer.len() >= self.capacity()")] fn test_expand_prevents_capacity_shrink() { // Verify that expand() prevents shrinking capacity const INITIAL_SIZE: usize = 10;