diff --git a/.gitignore b/.gitignore index fbc2a9b1..4684b698 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build/ .idea/ +.vscode/settings.json test/ diff --git a/.rustfmt.toml b/.rustfmt.toml index d69872c6..85b1cfc7 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,4 +1,4 @@ -max_width = 100 +max_width = 80 hard_tabs = false tab_spaces = 4 newline_style = "Auto" @@ -14,7 +14,7 @@ single_line_if_else_max_width = 60 single_line_let_else_max_width = 60 wrap_comments = false format_code_in_doc_comments = false -doc_comment_code_block_width = 100 +doc_comment_code_block_width = 80 comment_width = 80 normalize_comments = false normalize_doc_attributes = false @@ -29,8 +29,8 @@ fn_single_line = false where_single_line = false imports_indent = "Block" imports_layout = "Mixed" -imports_granularity = "Preserve" -group_imports = "Preserve" +imports_granularity = "Module" +group_imports = "StdExternalCrate" reorder_imports = true reorder_modules = true reorder_impl_items = false diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 634d16af..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "makefile.configureOnOpen": false, -} diff --git a/Cargo.lock b/Cargo.lock index 59242bbc..896ec493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,21 @@ dependencies = [ "log", ] +[[package]] +name = "arcref" +version = "0.1.0" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" @@ -55,14 +70,13 @@ name = "buddy_allocator" version = "0.1.0" dependencies = [ "eonix_mm", - "intrusive_list", ] [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "critical-section" @@ -144,9 +158,12 @@ dependencies = [ "acpi", "align_ext", "another_ext4", + "arcref", + "async-trait", "atomic_unique_refcell", "bitflags", "buddy_allocator", + "cfg-if", "eonix_hal", "eonix_log", "eonix_macros", @@ -155,6 +172,7 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", + "futures", "intrusive-collections 0.9.8", "intrusive_list", "itertools", @@ -162,6 +180,7 @@ dependencies = [ "posix_types", "slab_allocator", "stalloc", + "static_assertions", "unwinding", "virtio-drivers", "xmas-elf", @@ -265,6 +284,79 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", +] + [[package]] name = "gimli" version = "0.32.0" @@ -332,6 +424,18 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pointers" version = "0.1.0" @@ -413,7 +517,6 @@ version = "0.1.0" dependencies = [ "eonix_mm", "eonix_sync", - "intrusive_list", ] [[package]] @@ -422,6 +525,12 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "syn" version = "2.0.104" diff --git a/Cargo.toml b/Cargo.toml index e70d8c65..214e5941 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,9 @@ edition = "2021" crate-type = ["bin"] [dependencies] +arcref = { path = "./crates/arcref", default-features = false, features = [ + "alloc", +] } atomic_unique_refcell = { path = "./crates/atomic_unique_refcell", features = [ "no_std", ] } @@ -24,8 +27,10 @@ pointers = { path = "./crates/pointers" } posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } +intrusive-collections = { version = "0.9.8", features = [ + "nightly", +], git = "https://github.com/greatbridf/intrusive-rs" } bitflags = "2.6.0" -intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" @@ -34,6 +39,13 @@ another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch stalloc = { version = "0.6.1", default-features = false, features = [ "allocator-api", ] } +async-trait = "0.1.89" +futures = { version = "0.3.31", features = [ + "alloc", + "async-await", +], default-features = false } +static_assertions = "1.1.0" +cfg-if = "1.0.4" [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/arcref/Cargo.lock b/crates/arcref/Cargo.lock new file mode 100644 index 00000000..3c4e1567 --- /dev/null +++ b/crates/arcref/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arcref" +version = "0.1.0" diff --git a/crates/arcref/Cargo.toml b/crates/arcref/Cargo.toml new file mode 100644 index 00000000..a0af89f8 --- /dev/null +++ b/crates/arcref/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "arcref" +version = "0.1.0" +edition = "2024" + +[dependencies] + +[features] +alloc = [] +std = ["alloc"] +default = ["std"] diff --git a/crates/arcref/src/arcref.rs b/crates/arcref/src/arcref.rs new file mode 100644 index 00000000..3d01852d --- /dev/null +++ b/crates/arcref/src/arcref.rs @@ -0,0 +1,216 @@ +#[cfg(not(feature = "std"))] +use core::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, +}; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +extern crate alloc; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +use alloc::sync::Arc; + +#[cfg(feature = "std")] +use std::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, + sync::Arc, +}; + +pub trait AsArcRef +where + T: ?Sized, +{ + /// Borrow the [`Arc`] and convert the reference into [`ArcRef`]. + fn aref(&self) -> ArcRef<'_, T>; +} + +pub struct ArcRef<'a, T: ?Sized> { + ptr: *const T, + _phantom: PhantomData<&'a ()>, +} + +unsafe impl Send for ArcRef<'_, T> {} +unsafe impl Sync for ArcRef<'_, T> {} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, T: ?Sized> ArcRef<'a, T> { + pub fn new(arc: &'a Arc) -> Self { + Self { + ptr: Arc::as_ptr(arc), + _phantom: PhantomData, + } + } + + /// Create a new `ArcRef` from a raw pointer. + /// + /// # Safety + /// The given pointer MUST be created by `Arc::as_ptr` or `Arc::into_raw`. + /// The caller is responsible to ensure that the pointer is valid for the + /// lifetime of the `ArcRef`. + pub unsafe fn new_unchecked(arc_ptr: *const T) -> Self { + Self { + ptr: arc_ptr, + _phantom: PhantomData, + } + } + + pub fn with_arc(self, func: Func) -> Out + where + Func: FnOnce(&Arc) -> Out, + { + func(&ManuallyDrop::new(unsafe { Arc::from_raw(self.ptr) })) + } + + pub fn clone_arc(self) -> Arc { + self.with_arc(|arc| arc.clone()) + } + + pub fn ptr_eq_arc(self, other: &Arc) -> bool { + self.with_arc(|arc| Arc::ptr_eq(arc, other)) + } +} + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +impl AsArcRef for Arc +where + T: ?Sized, +{ + fn aref(&self) -> ArcRef<'_, T> { + ArcRef::new(self) + } +} + +impl AsRef for ArcRef<'_, T> +where + T: ?Sized, +{ + fn as_ref(&self) -> &T { + self.deref() + } +} + +impl Borrow for ArcRef<'_, T> +where + T: ?Sized, +{ + fn borrow(&self) -> &T { + self.deref() + } +} + +impl<'a, T> Clone for ArcRef<'a, T> +where + T: ?Sized, +{ + fn clone(&self) -> Self { + Self { + ptr: self.ptr, + _phantom: PhantomData, + } + } +} + +impl Copy for ArcRef<'_, T> where T: ?Sized {} + +impl Deref for ArcRef<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { + // SAFETY: `self.ptr` points to a valid `T` instance because it was + // created from a valid `Arc`. + self.ptr.as_ref().unwrap_unchecked() + } + } +} + +impl<'a, T, U> DispatchFromDyn> for ArcRef<'a, T> +where + T: ?Sized + Unsize, + U: ?Sized, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn create_from_arc() { + let data = Arc::new(42); + let _arc_ref = ArcRef::new(&data); + } + + #[test] + fn deref() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + assert_eq!(*arc_ref, 42); + } + + #[test] + fn clone_into_arc() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_arc(); + + assert_eq!(Arc::strong_count(&data), 2); + assert_eq!(*cloned, 42); + } + + #[test] + fn dyn_compatible_receiver() { + struct Data(u32); + + trait Trait { + fn foo(self: ArcRef) -> u32; + } + + impl Trait for Data { + fn foo(self: ArcRef) -> u32 { + self.0 + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + assert_eq!(arc_ref.foo(), 42); + } + + #[test] + fn clone_from_train_methods() { + struct Data(u32); + + trait Trait { + fn foo(&self) -> u32; + + fn clone_self(self: ArcRef) -> Arc; + } + + impl Trait for Data { + fn foo(&self) -> u32 { + self.0 + } + + fn clone_self(self: ArcRef) -> Arc { + self.clone_arc() as _ + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_self(); + + assert_eq!(arc_ref.foo(), 42); + assert_eq!(cloned.foo(), 42); + } +} diff --git a/crates/arcref/src/lib.rs b/crates/arcref/src/lib.rs new file mode 100644 index 00000000..83a61985 --- /dev/null +++ b/crates/arcref/src/lib.rs @@ -0,0 +1,8 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![feature(arbitrary_self_types)] +#![feature(dispatch_from_dyn)] +#![feature(unsize)] + +mod arcref; + +pub use arcref::{ArcRef, AsArcRef}; diff --git a/crates/buddy_allocator/Cargo.toml b/crates/buddy_allocator/Cargo.toml index 51f02295..bdb0a28c 100644 --- a/crates/buddy_allocator/Cargo.toml +++ b/crates/buddy_allocator/Cargo.toml @@ -5,4 +5,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } -intrusive_list = { path = "../intrusive_list" } diff --git a/crates/buddy_allocator/src/free_area.rs b/crates/buddy_allocator/src/free_area.rs deleted file mode 100644 index 837f733f..00000000 --- a/crates/buddy_allocator/src/free_area.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::BuddyRawPage; -use core::marker::{PhantomData, Send, Sync}; -use intrusive_list::Link; - -pub struct FreeArea { - free_list: Link, - count: usize, - _phantom: PhantomData, -} - -unsafe impl Send for FreeArea {} -unsafe impl Sync for FreeArea {} - -impl FreeArea -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_list: Link::new(), - count: 0, - _phantom: PhantomData, - } - } - - pub fn get_free_pages(&mut self) -> Option { - self.free_list.next_mut().map(|pages_link| { - assert_ne!(self.count, 0); - - let pages_ptr = unsafe { - // SAFETY: Items in `self.free_list` are guaranteed to be of type `Raw`. - Raw::from_link(pages_link) - }; - - self.count -= 1; - pages_link.remove(); - - pages_ptr - }) - } - - pub fn add_pages(&mut self, pages_ptr: Raw) { - self.count += 1; - pages_ptr.set_free(); - - unsafe { - self.free_list.insert(pages_ptr.get_link()); - } - } - - pub fn del_pages(&mut self, pages_ptr: Raw) { - assert!(self.count >= 1 && pages_ptr.is_free()); - self.count -= 1; - pages_ptr.clear_free(); - unsafe { - pages_ptr.get_link().remove(); - } - } -} diff --git a/crates/buddy_allocator/src/lib.rs b/crates/buddy_allocator/src/lib.rs index f8c8eeda..82a7d6c5 100644 --- a/crates/buddy_allocator/src/lib.rs +++ b/crates/buddy_allocator/src/lib.rs @@ -1,87 +1,250 @@ #![no_std] -mod free_area; -mod zone; +use core::hint::unreachable_unchecked; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::PAddr, - paging::{RawPage, PFN}, -}; -use intrusive_list::Link; -use zone::Zone; +use eonix_mm::address::{AddrOps as _, PAddr, PRange}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; const MAX_ORDER: u32 = 10; -const ZONE_AREAS: usize = const { MAX_ORDER as usize + 1 }; +const AREAS: usize = const { MAX_ORDER as usize + 1 }; -pub trait BuddyRawPage: RawPage { - /// Get the container raw page struct of the list link. - /// - /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; +pub trait BuddyFolio: Sized + 'static { + fn pfn(&self) -> PFN; - /// Get the list link of the raw page. - /// - /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; + fn get_order(&self) -> u32; + fn is_buddy(&self) -> bool; - fn set_order(&self, order: u32); + fn set_order(&mut self, order: u32); + fn set_buddy(&mut self, value: bool); +} - fn is_buddy(&self) -> bool; - fn is_free(&self) -> bool; +struct FreeArea +where + L: FolioList, +{ + free_list: L, + count: usize, +} - fn set_buddy(&self); - fn set_free(&self); +unsafe impl Send for FreeArea where L: FolioList {} +unsafe impl Sync for FreeArea where L: FolioList {} - fn clear_buddy(&self); - fn clear_free(&self); +pub struct BuddyAllocator +where + Z: Zone + 'static, + L: FolioList, +{ + zone: &'static Z, + free_areas: [FreeArea; AREAS], } -pub struct BuddyAllocator +impl BuddyAllocator where - T: BuddyRawPage, + Z: Zone + 'static, + Z::Page: BuddyFolio, + L: FolioListSized, { - zone: Zone, + pub const fn new(zone: &'static Z) -> Self { + Self { + zone, + free_areas: [const { FreeArea::new() }; AREAS], + } + } } -impl BuddyAllocator +impl BuddyAllocator where - T: BuddyRawPage, + Z: Zone, + L: FolioList, + F: BuddyFolio + 'static, { - pub const fn new() -> Self { - Self { zone: Zone::new() } + pub fn create_folios(&mut self, start: PAddr, end: PAddr) { + assert!( + self.zone + .contains_prange(PRange::new(start.ceil(), end.floor())), + "The given address range is not within the zone." + ); + + let mut pfn = PFN::from(start.ceil()); + let end_pfn = PFN::from(end.floor()); + + while pfn < end_pfn { + let mut order = usize::from(pfn).trailing_zeros().min(MAX_ORDER); + let new_end_pfn = loop { + let new_end = pfn + (1 << order); + + if new_end <= end_pfn { + break new_end; + } + + order -= 1; + }; + + unsafe { + // SAFETY: We've checked that the range is within the zone above. + self.add_folio_unchecked(pfn, order) + }; + + pfn = new_end_pfn; + } + } + + fn add_folio(&mut self, pfn: PFN, order: u32) { + let prange = PRange::from(PAddr::from(pfn)).grow(1 << (order + 12)); + assert!( + self.zone.contains_prange(prange), + "The given folio is not within the zone." + ); + + unsafe { + // SAFETY: Checks above. + self.add_folio_unchecked(pfn, order); + } + } + + unsafe fn add_folio_unchecked(&mut self, pfn: PFN, order: u32) { + let Some(mut folio) = self.zone.get_page(pfn) else { + unsafe { unreachable_unchecked() } + }; + + unsafe { + // SAFETY: The caller ensures that the page is unused. + let folio_mut = folio.as_mut(); + self.free_areas[order as usize].add_folio(folio_mut, order); + } + } + + fn break_folio(&mut self, folio: &mut F, order: u32, target_order: u32) { + let pfn = folio.pfn(); + + for order in (target_order..order).rev() { + let buddy_pfn = pfn + (1 << order); + + unsafe { + // SAFETY: We got the page from `self.free_areas`. Checks are + // done when we've put the page into the buddy system. + self.add_folio_unchecked(buddy_pfn, order); + } + } + + folio.set_order(target_order); } - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - self.zone.create_pages(start, end); + pub fn alloc_order(&mut self, order: u32) -> Option<&'static mut Z::Page> { + for current_order in order..AREAS as u32 { + let Some(folio) = self.free_areas[current_order as usize].get_free_folio() else { + continue; + }; + + if current_order > order { + self.break_folio(folio, current_order, order); + } + + return Some(folio); + } + + None } - pub fn alloc_order(&mut self, order: u32) -> Option { - let pages_ptr = self.zone.get_free_pages(order); + pub unsafe fn dealloc(&mut self, folio: &'static mut Z::Page) { + let mut pfn = folio.pfn(); + let mut order = folio.get_order(); + + assert!( + !folio.is_buddy(), + "Trying to free a folio that is already in the buddy system: {pfn:?}", + ); + + while order < MAX_ORDER { + let buddy_pfn = pfn.buddy_pfn(order); + let Some(buddy) = self.try_get_buddy(buddy_pfn, order) else { + break; + }; - if let Some(pages_ptr) = pages_ptr { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - pages_ptr.refcount().fetch_add(1, Ordering::Relaxed); - pages_ptr.clear_free(); + self.free_areas[order as usize].remove_folio(buddy); + pfn = pfn.combined_pfn(buddy_pfn); + order += 1; } - pages_ptr + self.add_folio(pfn, order); } - pub unsafe fn dealloc(&mut self, page_ptr: T) { - self.zone.free_pages(page_ptr); + /// This function checks whether the given page is within our [`Zone`] and + /// is a free buddy page with the specified order. + /// + /// We can assure exclusive access to a buddy page of [`order`] if + /// - the buddy is within the same [`Zone`] as us. + /// - the buddy is a free buddy (in some [`FreeArea`]) + /// - the buddy has order [`order`] + fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut F> { + let mut buddy = self.zone.get_page(buddy_pfn)?; + + unsafe { + // SAFETY: We just test whether the page is a buddy. + let buddy_ref = buddy.as_ref(); + + if !buddy_ref.is_buddy() { + return None; + } + + // Sad... + if buddy_ref.get_order() != order { + return None; + } + + // SAFETY: We have the mutable reference to the buddy allocator. + // So all the pages within are exclusively accessible to us. + Some(buddy.as_mut()) + } } +} + +impl FreeArea +where + L: FolioListSized, +{ + const fn new() -> Self { + Self { + free_list: L::NEW, + count: 0, + } + } +} + +impl FreeArea +where + L: FolioList, + L::Folio: BuddyFolio + 'static, +{ + pub fn get_free_folio(&mut self) -> Option<&'static mut L::Folio> { + self.free_list.pop_head().map(|folio| { + assert_ne!(self.count, 0, "Oops"); + + folio.set_buddy(false); + self.count -= 1; + + folio + }) + } + + pub fn add_folio(&mut self, folio: &'static mut L::Folio, order: u32) { + folio.set_order(order); + folio.set_buddy(true); + + self.count += 1; + self.free_list.push_tail(folio); + } + + pub fn remove_folio(&mut self, folio: &mut L::Folio) { + assert_ne!(self.count, 0, "Oops"); + folio.set_buddy(false); - pub fn has_management_over(page_ptr: T) -> bool { - !page_ptr.is_free() && page_ptr.is_buddy() + self.count -= 1; + self.free_list.remove(folio); } } -pub(self) trait BuddyPFNOps { +trait BuddyPFNOps { fn buddy_pfn(self, order: u32) -> PFN; fn combined_pfn(self, buddy_pfn: PFN) -> PFN; } diff --git a/crates/buddy_allocator/src/zone.rs b/crates/buddy_allocator/src/zone.rs deleted file mode 100644 index 2c850ef9..00000000 --- a/crates/buddy_allocator/src/zone.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::free_area::FreeArea; -use crate::{BuddyPFNOps as _, BuddyRawPage}; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PAddr}, - paging::PFN, -}; - -pub(super) struct Zone { - free_areas: [FreeArea; AREAS], -} - -impl Zone -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_areas: [const { FreeArea::new() }; AREAS], - } - } - - pub fn get_free_pages(&mut self, order: u32) -> Option { - for current_order in order..AREAS as u32 { - let pages_ptr = self.free_areas[current_order as usize].get_free_pages(); - let Some(pages_ptr) = pages_ptr else { continue }; - - pages_ptr.set_order(order); - - if current_order > order { - self.expand(pages_ptr, current_order, order); - } - - assert!( - pages_ptr.is_present(), - "Page {:?} is not present", - pages_ptr.into(), - ); - - assert!( - pages_ptr.is_free(), - "Page {:?} is not free", - pages_ptr.into(), - ); - - return Some(pages_ptr); - } - None - } - - fn expand(&mut self, pages_ptr: Raw, order: u32, target_order: u32) { - let mut offset = 1 << order; - let pages_pfn = Into::::into(pages_ptr); - - for order in (target_order..order).rev() { - offset >>= 1; - - let split_pages_ptr = Raw::from(pages_pfn + offset); - split_pages_ptr.set_order(order); - split_pages_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(split_pages_ptr); - } - } - - pub fn free_pages(&mut self, mut pages_ptr: Raw) { - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - let mut pfn = Into::::into(pages_ptr); - let mut current_order = pages_ptr.order(); - - assert!( - pages_ptr.is_present(), - "Freeing a page that is not present: {:?}", - pages_ptr.into(), - ); - - assert!( - !pages_ptr.is_free(), - "Freeing a page that is free: {:?}", - pages_ptr.into(), - ); - - while current_order < (AREAS - 1) as u32 { - let buddy_pfn = pfn.buddy_pfn(current_order); - let buddy_pages_ptr = Raw::from(buddy_pfn); - - if !self.buddy_check(buddy_pages_ptr, current_order) { - break; - } - - pages_ptr.clear_buddy(); - buddy_pages_ptr.clear_buddy(); - self.free_areas[current_order as usize].del_pages(buddy_pages_ptr); - - pages_ptr = Raw::from(pfn.combined_pfn(buddy_pfn)); - pfn = pfn.combined_pfn(buddy_pfn); - - pages_ptr.set_buddy(); - current_order += 1; - } - - pages_ptr.set_order(current_order); - self.free_areas[current_order as usize].add_pages(pages_ptr); - } - - /// This function checks whether a page is free && is a buddy - /// we can coalesce a page and its buddy if - /// - the buddy is valid(present) && - /// - the buddy is right now in free_areas && - /// - a page and its buddy have the same order && - /// - a page and its buddy are in the same zone (on smp systems). - fn buddy_check(&self, pages_ptr: Raw, order: u32) -> bool { - if !pages_ptr.is_present() { - return false; - } - if !pages_ptr.is_free() { - return false; - } - if pages_ptr.order() != order { - return false; - } - - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - true - } - - /// Only used on buddy initialization - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - let mut start_pfn = PFN::from(start.ceil()); - let end_pfn = PFN::from(end.floor()); - - while start_pfn < end_pfn { - let mut order = usize::from(start_pfn) - .trailing_zeros() - .min((AREAS - 1) as u32); - - while start_pfn + (1 << order) as usize > end_pfn { - order -= 1; - } - let page_ptr = Raw::from(start_pfn); - page_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(page_ptr); - start_pfn = start_pfn + (1 << order) as usize; - } - } -} diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 0f1dff63..b0c235aa 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -1,39 +1,42 @@ -use super::{ - config::{self, mm::*}, - console::write_str, - cpu::{CPUID, CPU_COUNT}, - time::set_next_timer, -}; -use crate::{ - arch::{ - cpu::CPU, - fdt::{init_dtb_and_fdt, FdtExt, FDT}, - mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}, - }, - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}, -}; -use core::{ - alloc::Allocator, - arch::asm, - cell::RefCell, - sync::atomic::{AtomicBool, AtomicUsize}, -}; -use core::{ - arch::{global_asm, naked_asm}, - hint::spin_loop, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::alloc::Allocator; +use core::arch::{asm, global_asm, naked_asm}; +use core::cell::RefCell; +use core::hint::spin_loop; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; + use eonix_hal_traits::mm::Memory; -use eonix_mm::{ - address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}, - page_table::{PageAttribute, PagingMode, PTE as _}, - paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}, +use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _, +}; +use eonix_mm::paging::{ + Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN, }; use eonix_percpu::PercpuArea; use fdt::Fdt; -use riscv::{asm::sfence_vma_all, register::satp}; -use sbi::{hsm::hart_start, legacy::console_putchar, PhysicalAddress}; +use riscv::asm::sfence_vma_all; +use riscv::register::satp; +use sbi::hsm::hart_start; +use sbi::legacy::console_putchar; +use sbi::PhysicalAddress; + +use super::config::mm::*; +use super::config::{self}; +use super::console::write_str; +use super::cpu::{CPUID, CPU_COUNT}; +use super::time::set_next_timer; +use crate::arch::cpu::CPU; +use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; +use crate::arch::mm::{ + ArchPagingMode, ArchPhysAccess, PageAccessImpl, PageAttribute64, + RawPageTableSv48, GLOBAL_PAGE_TABLE, +}; +use crate::bootstrap::BootStrapData; +use crate::mm::{ + ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator, +}; +use crate::{extern_symbol_addr, extern_symbol_value}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -46,29 +49,30 @@ static TEMP_AP_STACK: [u8; 256] = [0; 256]; static TEMP_AP_STACK_START: &'static [u8; 256] = &TEMP_AP_STACK; #[repr(C, align(4096))] -struct PageTable([u64; PTES_PER_PAGE]); +struct BootPageTable([u64; PTES_PER_PAGE]); /// map 0x8000 0000 to itself and 0xffff ffff 8000 0000 #[unsafe(link_section = ".bootstrap.page_table.1")] -static BOOT_PAGE_TABLE: PageTable = { +static BOOT_PAGE_TABLE: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[0] = 0 | 0x2f; arr[510] = 0 | 0x2f; arr[511] = (0x80202 << 10) | 0x21; - PageTable(arr) + BootPageTable(arr) }; #[unsafe(link_section = ".bootstrap.page_table.2")] #[used] -static PT1: PageTable = { +static PT1: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[510] = (0x80000 << 10) | 0x2f; - PageTable(arr) + BootPageTable(arr) }; -static BSP_PAGE_ALLOC: AtomicPtr> = AtomicPtr::new(core::ptr::null_mut()); +static BSP_PAGE_ALLOC: AtomicPtr> = + AtomicPtr::new(core::ptr::null_mut()); static AP_COUNT: AtomicUsize = AtomicUsize::new(0); static AP_STACK: AtomicUsize = AtomicUsize::new(0); @@ -78,7 +82,7 @@ static AP_SEM: AtomicBool = AtomicBool::new(false); #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.entry")] -unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) -> ! { +unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) { naked_asm!( " ld sp, 2f @@ -109,17 +113,16 @@ unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) -> ! { } pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { - let fdt = Fdt::from_ptr(ArchPhysAccess::as_ptr(dtb_addr).as_ptr()) - .expect("Failed to parse DTB from static memory."); + let fdt = unsafe { FdtExt::new(dtb_addr) }; let real_allocator = RefCell::new(BasicPageAlloc::new()); let alloc = BasicPageAllocRef::new(&real_allocator); - for range in fdt.present_ram().free_ram() { + for range in fdt.free_ram() { real_allocator.borrow_mut().add_range(range); } - setup_kernel_page_table(&alloc); + setup_kernel_page_table(alloc.clone()); unsafe { init_dtb_and_fdt(dtb_addr); } @@ -134,11 +137,14 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } let start = unsafe { - ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() as *const _ - as usize + ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() + as *const _ as usize }; let bootstrap_data = BootStrapData { - early_stack: PRange::new(PAddr::from(start), PAddr::from(start + 4096 * 16)), + early_stack: PRange::new( + PAddr::from(start), + PAddr::from(start + 4096 * 16), + ), allocator: Some(real_allocator), }; @@ -150,43 +156,40 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } } -unsafe extern "C" { - fn BSS_LENGTH(); -} - /// TODO: /// 对kernel image添加更细的控制,或者不加也行 -fn setup_kernel_page_table(alloc: impl PageAlloc) { - let global_page_table = &GLOBAL_PAGE_TABLE; +fn setup_kernel_page_table(alloc: BasicPageAllocRef) { + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); let attr = PageAttribute::WRITE | PageAttribute::READ - | PageAttribute::EXECUTE | PageAttribute::GLOBAL | PageAttribute::PRESENT; const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000); + let bss_length = extern_symbol_addr!(BSS_LENGTH); + // Map kernel BSS - for pte in global_page_table.iter_kernel_in( - VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize), - ArchPagingMode::LEVELS, - &alloc, - ) { - let page = Page::alloc_in(&alloc); - - let attr = { - let mut attr = attr.clone(); - attr.remove(PageAttribute::EXECUTE); - attr - }; + let bss_range = VRange::from(KERNEL_BSS_START).grow(bss_length); + for pte in global_page_table.iter_kernel(bss_range) { + let page = alloc.alloc().unwrap(); + pte.set(page.into_raw(), attr.into()); } sfence_vma_all(); unsafe { - core::ptr::write_bytes(KERNEL_BSS_START.addr() as *mut (), 0, BSS_LENGTH as usize); + core::ptr::write_bytes( + KERNEL_BSS_START.addr() as *mut u8, + 0, + bss_length, + ); } unsafe { @@ -197,17 +200,22 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) { ); } sfence_vma_all(); + + core::mem::forget(global_page_table); } /// set up tp register to percpu -fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { +fn setup_cpu(alloc: impl FrameAlloc, hart_id: usize) { CPU_COUNT.fetch_add(1, Ordering::Relaxed); let mut percpu_area = PercpuArea::new(|layout| { let page_count = layout.size().div_ceil(PAGE_SIZE); - let page = Page::alloc_at_least_in(page_count, alloc); + let page = alloc.alloc_at_least(page_count).unwrap(); - let ptr = ArchPhysAccess::get_ptr_for_page(&page).cast(); + let ptr = unsafe { + // TODO: safety + ArchPhysAccess::as_ptr(page.start()) + }; page.into_raw(); ptr @@ -235,15 +243,6 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { percpu_area.register(cpu.cpuid()); } -fn get_ap_start_addr() -> usize { - unsafe extern "C" { - fn _ap_start(); - } - static AP_START_VALUE: &'static unsafe extern "C" fn() = - &(_ap_start as unsafe extern "C" fn()); - unsafe { (AP_START_VALUE as *const _ as *const usize).read_volatile() } -} - fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { let local_hart_id = CPU::local().cpuid(); let mut ap_count = 0; @@ -251,13 +250,14 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { for hart_id in FDT.harts().filter(|&id| id != local_hart_id) { let stack_range = { let page_alloc = BasicPageAllocRef::new(&page_alloc); - let ap_stack = Page::alloc_order_in(4, page_alloc); + let ap_stack = page_alloc.alloc_order(4).unwrap(); let stack_range = ap_stack.range(); ap_stack.into_raw(); stack_range }; - let old = BSP_PAGE_ALLOC.swap((&raw const *page_alloc) as *mut _, Ordering::Release); + let old = BSP_PAGE_ALLOC + .swap((&raw const *page_alloc) as *mut _, Ordering::Release); assert!(old.is_null()); while AP_STACK @@ -273,7 +273,11 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { } unsafe { - hart_start(hart_id, PhysicalAddress::new(get_ap_start_addr()), 0); + hart_start( + hart_id, + PhysicalAddress::new(extern_symbol_value!(_ap_start)), + 0, + ); } while AP_COUNT.load(Ordering::Acquire) == ap_count { @@ -289,7 +293,7 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.apentry")] -unsafe extern "C" fn _ap_start(hart_id: usize) -> ! { +unsafe extern "C" fn _ap_start(hart_id: usize) { naked_asm!( " la sp, 1f // set temp stack @@ -326,7 +330,12 @@ unsafe extern "C" fn _ap_start(hart_id: usize) -> ! { fn get_ap_stack() -> usize { while AP_SEM - .compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed) + .compare_exchange_weak( + false, + true, + Ordering::Acquire, + Ordering::Relaxed, + ) .is_err() { core::hint::spin_loop(); @@ -346,12 +355,14 @@ fn get_ap_stack() -> usize { } fn ap_entry(hart_id: usize, stack_bottom: PAddr) -> ! { - let stack_range = PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); + let stack_range = + PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); { // SAFETY: Acquire all the work done by the BSP and other APs. let alloc = loop { - let alloc = BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); + let alloc = + BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); if !alloc.is_null() { break alloc; diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 9c843eaf..3c58580e 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -27,22 +27,11 @@ static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); -#[derive(Debug, Clone)] -pub enum UserTLS { - Base(u64), -} - /// RISC-V Hart pub struct CPU { pub(crate) interrupt: InterruptControl, } -impl UserTLS { - pub fn new(base: u64) -> Self { - Self::Base(base) - } -} - impl CPU { fn new(cpuid: usize) -> Self { Self { @@ -66,12 +55,6 @@ impl CPU { sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} - - pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { - // nothing - } - pub fn local() -> PreemptGuard> { unsafe { // SAFETY: We pass the reference into a `PreemptGuard`, which ensures diff --git a/crates/eonix_hal/src/arch/riscv64/fdt.rs b/crates/eonix_hal/src/arch/riscv64/fdt.rs index 5efcc98d..908256c7 100644 --- a/crates/eonix_hal/src/arch/riscv64/fdt.rs +++ b/crates/eonix_hal/src/arch/riscv64/fdt.rs @@ -1,62 +1,95 @@ -use super::mm::{ArchPhysAccess, PresentRam}; -use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; -use core::sync::atomic::{AtomicPtr, Ordering}; -use eonix_mm::address::{PAddr, PRange, PhysAccess}; +use core::ops::Deref; +use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering}; + +use eonix_mm::address::{Addr, AddrOps, PAddr, PRange, PhysAccess}; use eonix_sync_base::LazyLock; use fdt::Fdt; -static DTB_VIRT_PTR: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); -pub static FDT: LazyLock> = LazyLock::new(|| unsafe { - Fdt::from_ptr(DTB_VIRT_PTR.load(Ordering::Acquire)) - .expect("Failed to parse DTB from static memory.") +use super::mm::ArchPhysAccess; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; + +static DTB_PADDR: AtomicUsize = AtomicUsize::new(0); +pub static FDT: LazyLock = LazyLock::new(|| unsafe { + FdtExt::new(PAddr::from_val(DTB_PADDR.load(Ordering::Relaxed))) }); -pub trait FdtExt { - fn harts(&self) -> impl Iterator; +pub struct FdtExt { + fdt: Fdt<'static>, + range: PRange, +} - fn hart_count(&self) -> usize { - self.harts().count() - } +impl FdtExt { + /// # Safety + /// The caller MUST ensure that [`addr`] points to valid FDT. + pub unsafe fn new(addr: PAddr) -> Self { + let fdt = unsafe { + Fdt::from_ptr(ArchPhysAccess::as_ptr(addr).as_ptr()) + .expect("Failed to parse DTB from static memory.") + }; - fn present_ram(&self) -> impl Iterator; -} + Self { + range: PRange::from(addr).grow(fdt.total_size()), + fdt, + } + } -impl FdtExt for Fdt<'_> { - fn harts(&self) -> impl Iterator { + pub fn harts(&self) -> impl Iterator { self.cpus().map(|cpu| cpu.ids().all()).flatten() } - fn present_ram(&self) -> impl Iterator + PresentRam { - struct Present(I); - impl PresentRam for Present where I: Iterator {} - impl Iterator for Present - where - I: Iterator, - { - type Item = PRange; - - fn next(&mut self) -> Option { - self.0.next() - } - } + pub fn hart_count(&self) -> usize { + self.harts().count() + } + pub fn present_ram(&self) -> impl Iterator { let mut index = 0; - Present(core::iter::from_fn(move || { - self.memory() + + core::iter::from_fn(move || { + let item = self + .memory() .regions() .filter_map(|region| { - region.size.map(|len| { - PRange::from(PAddr::from(region.starting_address as usize)).grow(len) - }) + let start = PAddr::from(region.starting_address as usize); + Some(start).zip(region.size) }) - .skip(index) - .next() - .inspect(|_| index += 1) - })) + .map(|(start, len)| PRange::from(start).grow(len)) + .nth(index); + + index += 1; + item + }) + } + + pub fn free_ram(&self) -> impl Iterator { + let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; + let kernel_end = PAddr::from(kernel_end).ceil(); + + // TODO: move this to some platform-specific crate + self.present_ram().map(move |mut range| { + // Strip out parts before __kernel_end + if range.overlap_with(&PRange::from(kernel_end)) { + (_, range) = range.split_at(kernel_end); + } + + // Strip out part after the FDT + if range.overlap_with(&self.range) { + (range, _) = range.split_at(self.range.start()); + } + + range + }) + } +} + +impl Deref for FdtExt { + type Target = Fdt<'static>; + + fn deref(&self) -> &Self::Target { + &self.fdt } } pub unsafe fn init_dtb_and_fdt(dtb_paddr: PAddr) { - let dtb_virt_ptr = ArchPhysAccess::as_ptr(dtb_paddr); - DTB_VIRT_PTR.store(dtb_virt_ptr.as_ptr(), Ordering::Release); + DTB_PADDR.store(dtb_paddr.addr(), Ordering::Relaxed); } diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index e348e1be..a74f0d0d 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -81,10 +81,12 @@ INSERT AFTER .rodata; SECTIONS { .vdso ALIGN(0x1000) : ALIGN(0x1000) { + VDSO_START = ABSOLUTE(.); + KEEP(*(.vdso .vdso.*)); . = ALIGN(0x1000); - } > VDSO AT> RAM + } > REGION_DATA AT> RAM VDSO_PADDR = LOADADDR(.vdso); } diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 0dc7c4ff..f2029c9a 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -3,7 +3,6 @@ ENTRY(_start) MEMORY { RAM : org = 0x0000000080200000, len = 8M - VDSO : org = 0x00007f0000000000, len = 4K KBSS : org = 0xffffffff40000000, len = 2M KIMAGE : org = 0xffffffff80200000, len = 8M } diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 46dd9437..7891f094 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -1,31 +1,26 @@ -use super::{ - config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}, - fdt::{FdtExt, FDT}, -}; -use crate::{arch::riscv64::config::mm::KIMAGE_OFFSET, traits::mm::Memory}; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::{ - address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}, - page_table::{ - PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, - TableAttribute, PTE, - }, - paging::{NoAlloc, Page, PageBlock, PFN}, +use core::marker::PhantomData; +use core::ptr::NonNull; + +use eonix_hal_traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, + RawPageTable, TableAttribute, PTE, }; +use eonix_mm::paging::{BasicFolio, Folio, PageAccess, PageBlock, PFN}; use eonix_sync_base::LazyLock; use fdt::Fdt; -use riscv::{ - asm::{sfence_vma, sfence_vma_all}, - register::satp, -}; +use riscv::asm::{sfence_vma, sfence_vma_all}; +use riscv::register::satp; + +use super::config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}; +use super::fdt::{FdtExt, FDT}; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; +use crate::mm::BasicPageAlloc; -pub const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); -pub static GLOBAL_PAGE_TABLE: LazyLock> = - LazyLock::new(|| unsafe { - Page::with_raw(PAGE_TABLE_BASE, |root_table_page| { - PageTable::with_root_table(root_table_page.clone()) - }) - }); +const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); +pub const GLOBAL_PAGE_TABLE: BasicFolio = BasicFolio::new(PAGE_TABLE_BASE, 0); pub const PA_V: u64 = 0b1 << 0; pub const PA_R: u64 = 0b1 << 1; @@ -61,6 +56,9 @@ pub struct ArchPhysAccess; pub struct ArchMemory; +#[derive(Clone)] +pub struct PageAccessImpl; + impl PTE for PTE64 { type Attr = PageAttribute64; @@ -118,7 +116,9 @@ impl RawAttribute for PageAttribute64 { table_attr |= TableAttribute::PRESENT; } - if table_attr.contains(TableAttribute::PRESENT) && self.0 & (PA_R | PA_W | PA_X) != 0 { + if table_attr.contains(TableAttribute::PRESENT) + && self.0 & (PA_R | PA_W | PA_X) != 0 + { return None; } @@ -142,7 +142,9 @@ impl RawAttribute for PageAttribute64 { page_attr |= PageAttribute::PRESENT; } - if page_attr.contains(PageAttribute::PRESENT) && (self.0 & (PA_R | PA_W | PA_X) == 0) { + if page_attr.contains(PageAttribute::PRESENT) + && (self.0 & (PA_R | PA_W | PA_X) == 0) + { return None; } @@ -261,70 +263,24 @@ impl PhysAccess for ArchPhysAccess { } } +impl PageAccess for PageAccessImpl { + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull { + unsafe { ArchPhysAccess::as_ptr(PAddr::from(pfn)) } + } +} + impl Memory for ArchMemory { fn present_ram() -> impl Iterator { FDT.present_ram() } fn free_ram() -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } - - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - Self::present_ram() - .filter(move |range| range.end() > paddr_after_kimage_aligned) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) + FDT.free_ram() } } pub type DefaultPagingMode = PagingModeSv48; -pub trait PresentRam: Iterator {} - -pub trait FreeRam: PresentRam { - fn free_ram(self) -> impl Iterator; -} - -impl FreeRam for T -where - T: PresentRam, -{ - fn free_ram(self) -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } - - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - self.filter(move |range| range.end() > paddr_after_kimage_aligned) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) - } -} - #[inline(always)] pub fn flush_tlb(vaddr: usize) { sfence_vma(0, vaddr); diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index e789ecbb..d8696994 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -11,7 +11,9 @@ pub mod mm; pub mod trap; pub mod fence { - pub use crate::arch::fence::{memory_barrier, read_memory_barrier, write_memory_barrier}; + pub use crate::arch::fence::{ + memory_barrier, read_memory_barrier, write_memory_barrier, + }; } pub mod fpu { @@ -19,7 +21,7 @@ pub mod fpu { } pub mod processor { - pub use crate::arch::cpu::{halt, UserTLS, CPU, CPU_COUNT}; + pub use crate::arch::cpu::{halt, CPU, CPU_COUNT}; } /// Re-export the arch module for use in other crates @@ -43,3 +45,43 @@ pub mod arch_exported { pub use eonix_hal_macros::{ap_main, default_trap_handler, main}; pub use eonix_hal_traits as traits; + +#[macro_export] +macro_rules! symbol_addr { + ($sym:expr) => {{ + ($sym) as *const () as usize + }}; + ($sym:expr, $type:ty) => {{ + ($sym) as *const () as *const $type + }}; +} + +#[macro_export] +macro_rules! extern_symbol_addr { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym) + }}; + ($sym:ident, $type:ty) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym, $type) + }}; +} + +#[macro_export] +macro_rules! extern_symbol_value { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + + static SYMBOL_ADDR: &'static unsafe extern "C" fn() = + &($sym as unsafe extern "C" fn()); + + unsafe { (SYMBOL_ADDR as *const _ as *const usize).read_volatile() } + }}; +} diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index 81c269c2..eaabdfda 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,7 +18,7 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); - + . = ALIGN(8); PROVIDE(__eh_frame = .); @@ -41,7 +41,7 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA - .data.after : + .data.after : ALIGN(0x1000) { __data_after = .; } > REGION_DATA AT> LINK_REGION_DATA diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index 0a5597ac..ef006cb5 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -1,16 +1,14 @@ -use core::{ - alloc::{AllocError, Allocator, Layout}, - cell::RefCell, - ptr::NonNull, -}; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{PageAlloc, UnmanagedRawPage, PAGE_SIZE, PFN}, -}; +use core::alloc::{AllocError, Allocator, Layout}; +use core::cell::RefCell; +use core::ptr::NonNull; + +use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{BasicFolio, FrameAlloc, PAGE_SIZE, PFN}; pub use crate::arch::mm::{ - flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchMemory, - ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE, + flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, + ArchMemory, ArchPhysAccess, GLOBAL_PAGE_TABLE, }; pub struct BasicPageAlloc { @@ -43,9 +41,8 @@ impl BasicPageAlloc { fn alloc_one(&mut self) -> PFN { assert_ne!(self.head, self.tail, "No free pages available"); let mut range = self.ranges[self.head].take().unwrap(); - range = range.shrink(PAGE_SIZE); - - let pfn = PFN::from(range.end()); + let pfn = PFN::from(range.start()); + range = PRange::new(range.start() + PAGE_SIZE, range.end()); if range.len() != 0 { self.ranges[self.head] = Some(range); @@ -89,7 +86,8 @@ impl BasicPageAlloc { panic!("Page allocator is full"); } - self.ranges[tail] = Some(PRange::new(range.start().ceil(), range.end().floor())); + self.ranges[tail] = + Some(PRange::new(range.start().ceil(), range.end().floor())); } pub fn alloc(&mut self, order: u32) -> PFN { @@ -118,19 +116,23 @@ impl<'a> BasicPageAllocRef<'a> { } } -impl PageAlloc for BasicPageAllocRef<'_> { - type RawPage = UnmanagedRawPage; +impl FrameAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - fn alloc_order(&self, order: u32) -> Option { - Some(Self::RawPage::new(self.0.borrow_mut().alloc(order), order)) + fn alloc_order(&self, order: u32) -> Option { + Some(BasicFolio::new(self.0.borrow_mut().alloc(order), order)) } +} + +impl PageTableAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("Dealloc is not supported in BasicPageAlloc"); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, _: Self::RawPage) -> bool { - true + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + BasicFolio::new(pfn, 0) } } @@ -145,7 +147,10 @@ impl<'a> ScopedAllocator<'a> { } } - pub fn with_alloc<'b, 'r, O>(&'r self, func: impl FnOnce(&'b ScopedAllocator<'a>) -> O) -> O + pub fn with_alloc<'b, 'r, O>( + &'r self, + func: impl FnOnce(&'b ScopedAllocator<'a>) -> O, + ) -> O where 'a: 'b, 'r: 'b, diff --git a/crates/eonix_log/src/lib.rs b/crates/eonix_log/src/lib.rs index 92b1639f..01b6a587 100644 --- a/crates/eonix_log/src/lib.rs +++ b/crates/eonix_log/src/lib.rs @@ -2,6 +2,7 @@ use alloc::sync::Arc; use core::fmt::{self, Write}; + use eonix_sync::{Spin, SpinIrq as _}; extern crate alloc; @@ -91,18 +92,31 @@ macro_rules! println_fatal { #[macro_export] macro_rules! println_trace { - ($feat:literal) => { + (feat:$feat:literal) => { #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] ") + $crate::println!("[kernel:trace]") } }; - ($feat:literal, $($arg:tt)*) => {{ + (feat:$feat:literal, $fmt:literal) => {{ #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] {}", format_args!($($arg)*)) + $crate::println!(concat!("[kernel:trace] ", $feat)) } }}; + (feat:$feat:literal, $fmt:literal, $($arg:expr $(,)?)*) => { + #[deny(unexpected_cfgs)] + { + // Suppress unused variables warning + #[cfg(not(feature = $feat))] + { + $(let _ = $arg;)* + } + + #[cfg(feature = $feat)] + $crate::println!("[kernel:trace] {}", format_args!($fmt, $($arg,)*)) + } + }; } diff --git a/crates/eonix_mm/src/address/paddr.rs b/crates/eonix_mm/src/address/paddr.rs index 6fadbd2a..bbfa299e 100644 --- a/crates/eonix_mm/src/address/paddr.rs +++ b/crates/eonix_mm/src/address/paddr.rs @@ -1,11 +1,11 @@ +use core::fmt; +use core::ops::{Add, Sub}; +use core::ptr::NonNull; + use super::addr::Addr; use crate::paging::{PAGE_SIZE_BITS, PFN}; -use core::{ - fmt, - ops::{Add, Sub}, - ptr::NonNull, -}; +/// Convert PAddr to VAddr. pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. diff --git a/crates/eonix_mm/src/page_table.rs b/crates/eonix_mm/src/page_table.rs index 55732f72..f3528060 100644 --- a/crates/eonix_mm/src/page_table.rs +++ b/crates/eonix_mm/src/page_table.rs @@ -3,7 +3,7 @@ mod paging_mode; mod pte; mod pte_iterator; -pub use page_table::{PageTable, RawPageTable}; +pub use page_table::{PageTable, PageTableAlloc, RawPageTable}; pub use paging_mode::{PageTableLevel, PagingMode}; pub use pte::{PageAttribute, RawAttribute, TableAttribute, PTE}; pub use pte_iterator::PageTableIterator; diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 8318049f..80be63b9 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -1,15 +1,12 @@ -use super::{ - paging_mode::PageTableLevel, - pte::{RawAttribute, TableAttribute}, - pte_iterator::{KernelIterator, UserIterator}, - PagingMode, PTE, -}; -use crate::{ - address::{PAddr, VRange}, - page_table::PageTableIterator, - paging::{GlobalPageAlloc, Page, PageAccess, PageAlloc, PageBlock}, -}; -use core::{marker::PhantomData, ptr::NonNull}; +use core::marker::PhantomData; +use core::ptr::NonNull; + +use super::paging_mode::PageTableLevel; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PagingMode, PTE}; +use crate::address::{PAddr, VRange}; +use crate::page_table::PageTableIterator; +use crate::paging::{Folio, PageAccess, PageBlock, PFN}; pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; @@ -24,45 +21,60 @@ pub trait RawPageTable<'a>: Send + 'a { unsafe fn from_ptr(ptr: NonNull) -> Self; } +pub trait PageTableAlloc: Clone { + type Folio: Folio; + + fn alloc(&self) -> Self::Folio; + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio; +} + +pub trait GlobalPageTableAlloc: PageTableAlloc { + const GLOBAL: Self; +} + pub struct PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - root_table_page: Page, - phantom: PhantomData<&'a (M, X)>, + root_table_page: A::Folio, + alloc: A, + access: X, + phantom: PhantomData<&'a M>, } impl<'a, M, A, X> PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - pub fn with_root_table(root_table_page: Page) -> Self { + pub fn new(root_table_page: A::Folio, alloc: A, access: X) -> Self { Self { root_table_page, + alloc, + access, phantom: PhantomData, } } pub fn clone_global<'b, B>(&self) -> PageTable<'b, M, B, X> where - B: GlobalPageAlloc, + B: GlobalPageTableAlloc, { - self.clone_in(B::global()) + self.clone_in(B::GLOBAL) } pub fn clone_in<'b, B>(&self, alloc: B) -> PageTable<'b, M, B, X> where - B: PageAlloc, + B: PageTableAlloc, { - let new_root_table_page = Page::alloc_in(alloc); - let new_table_data = X::get_ptr_for_page(&new_root_table_page); - let kernel_table_data = X::get_ptr_for_page(&self.root_table_page); + let new_root_table_page = alloc.alloc(); + let new_table_data = self.access.get_ptr_for_page(&new_root_table_page); + let kernel_table_data = self.access.get_ptr_for_page(&self.root_table_page); unsafe { // SAFETY: `new_table_data` and `kernel_table_data` are both valid pointers @@ -82,7 +94,7 @@ where root_page_table.index_mut(idx).take(); } - PageTable::with_root_table(new_root_table_page) + PageTable::new(new_root_table_page, alloc, self.access.clone()) } pub fn addr(&self) -> PAddr { @@ -90,100 +102,59 @@ where } pub fn iter_user(&self, range: VRange) -> impl Iterator { - let alloc = self.root_table_page.allocator(); - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::new(root_page_table, range, alloc.clone()) - } - - pub fn iter_kernel(&self, range: VRange) -> impl Iterator { - self.iter_kernel_levels(range, M::LEVELS) + PageTableIterator::::new( + root_page_table, + range, + TableAttribute::USER, + self.alloc.clone(), + self.access.clone(), + ) } - /// Iterates over the kernel space entries in the page table for the specified levels. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. + /// Iterates over the kernel space entries in the page table. /// /// # Returns /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. + /// specified range. /// /// # Example /// ``` /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_levels(range, levels) { + /// for pte in page_table.iter_kernel(range) { /// // Process each entry /// } /// ``` - pub fn iter_kernel_levels( - &self, - range: VRange, - levels: &'static [PageTableLevel], - ) -> impl Iterator { - self.iter_kernel_in(range, levels, self.root_table_page.allocator()) - } - - /// Iterates over the kernel space entries in the page table for the specified levels - /// with a given page allocator. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. - /// - `alloc`: A page allocator that provides memory for the page table entries. - /// - /// # Returns - /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. - /// - /// # Example - /// ```no_run - /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_in(range, levels, NoAlloc) { - /// // Process each entry - /// } - /// ``` - pub fn iter_kernel_in( - &self, - range: VRange, - levels: &'static [PageTableLevel], - alloc: A1, - ) -> impl Iterator { - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + pub fn iter_kernel(&self, range: VRange) -> impl Iterator { + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::with_levels( + PageTableIterator::::with_levels( root_page_table, range, - alloc, - levels, + TableAttribute::GLOBAL, + self.alloc.clone(), + self.access.clone(), + M::LEVELS, ) } - fn drop_page_table_recursive(page_table: &Page, levels: &[PageTableLevel]) { + fn drop_page_table_recursive(&self, page_table: &A::Folio, levels: &[PageTableLevel]) { let [level, remaining_levels @ ..] = levels else { return }; if remaining_levels.is_empty() { // We reached the last level, no need to go deeper. return; } - let alloc = page_table.allocator(); - - let page_table_ptr = X::get_ptr_for_page(page_table); + let page_table_ptr = self.access.get_ptr_for_page(page_table); let mut page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) @@ -201,10 +172,10 @@ where let page_table = unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw_in(pfn, alloc.clone()) + self.alloc.from_raw(pfn) }; - Self::drop_page_table_recursive(&page_table, remaining_levels); + self.drop_page_table_recursive(&page_table, remaining_levels); } } } @@ -213,10 +184,10 @@ impl<'a, M, A, X> Drop for PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { fn drop(&mut self) { - Self::drop_page_table_recursive(&self.root_table_page, M::LEVELS); + self.drop_page_table_recursive(&self.root_table_page, M::LEVELS); } } diff --git a/crates/eonix_mm/src/page_table/pte.rs b/crates/eonix_mm/src/page_table/pte.rs index e067d207..c14d5738 100644 --- a/crates/eonix_mm/src/page_table/pte.rs +++ b/crates/eonix_mm/src/page_table/pte.rs @@ -1,6 +1,7 @@ -use crate::paging::PFN; use bitflags::bitflags; +use crate::paging::PFN; + bitflags! { #[derive(Clone, Copy, PartialEq)] pub struct TableAttribute: usize { diff --git a/crates/eonix_mm/src/page_table/pte_iterator.rs b/crates/eonix_mm/src/page_table/pte_iterator.rs index 89b9fb9f..a9e4ff46 100644 --- a/crates/eonix_mm/src/page_table/pte_iterator.rs +++ b/crates/eonix_mm/src/page_table/pte_iterator.rs @@ -1,62 +1,14 @@ -use super::{ - pte::{RawAttribute, TableAttribute}, - PageTableLevel, PagingMode, RawPageTable as _, PTE, -}; -use crate::{ - address::{AddrOps as _, VRange}, - paging::{Page, PageAccess, PageAlloc}, -}; -use core::{marker::PhantomData}; - -pub struct KernelIterator; -pub struct UserIterator; - -pub trait IteratorType { - fn page_table_attributes() -> TableAttribute; - - fn get_page_table<'a, A, X>(pte: &mut M::Entry, alloc: &A) -> M::RawTable<'a> - where - A: PageAlloc, - X: PageAccess, - { - let attr = pte.get_attr().as_table_attr().expect("Not a page table"); - - if attr.contains(TableAttribute::PRESENT) { - let pfn = pte.get_pfn(); - unsafe { - // SAFETY: We are creating a pointer to a page referenced to in - // some page table, which should be valid. - let page_table_ptr = X::get_ptr_for_pfn(pfn); - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } else { - let page = Page::alloc_in(alloc.clone()); - let page_table_ptr = X::get_ptr_for_page(&page); - - unsafe { - // SAFETY: `page_table_ptr` is good for writing and properly aligned. - page_table_ptr.write_bytes(0, 1); - } - - pte.set( - page.into_raw(), - ::Attr::from(Self::page_table_attributes()), - ); - - unsafe { - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } - } -} +use super::page_table::PageTableAlloc; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PagingMode, RawPageTable as _, PTE}; +use crate::address::{AddrOps as _, VRange}; +use crate::paging::{Folio, PageAccess}; -pub struct PageTableIterator<'a, M, A, X, K> +pub struct PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { /// Specifies the hierarchy of page table levels to iterate over. @@ -69,19 +21,19 @@ where indicies: [u16; 8], tables: [Option>; 8], + fill_entry_attr: TableAttribute, + alloc: A, - _phantom: PhantomData<&'a (X, K)>, + access: X, } -impl<'a, M, A, X, K> PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { - fn parse_tables_starting_from(&mut self, idx_level: usize) { for (idx, &pt_idx) in self .indicies @@ -98,18 +50,58 @@ where }; let parent_table = parent_table.as_mut().expect("Parent table is None"); let next_pte = parent_table.index_mut(pt_idx); - child_table.replace(K::get_page_table::(next_pte, &self.alloc)); + + child_table.replace({ + let attr = next_pte + .get_attr() + .as_table_attr() + .expect("Not a page table"); + + if attr.contains(TableAttribute::PRESENT) { + let pfn = next_pte.get_pfn(); + unsafe { + // SAFETY: We are creating a pointer to a page referenced to in + // some page table, which should be valid. + let page_table_ptr = self.access.get_ptr_for_pfn(pfn); + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } else { + let page = self.alloc.alloc(); + let page_table_ptr = self.access.get_ptr_for_page(&page); + + unsafe { + // SAFETY: `page_table_ptr` is good for writing and properly aligned. + page_table_ptr.write_bytes(0, 1); + } + + next_pte.set(page.into_raw(), self.fill_entry_attr.into()); + + unsafe { + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } + }); } } - pub fn new(page_table: M::RawTable<'a>, range: VRange, alloc: A) -> Self { - Self::with_levels(page_table, range, alloc, M::LEVELS) + pub fn new( + page_table: M::RawTable<'a>, + range: VRange, + fill_entry_attr: TableAttribute, + alloc: A, + access: X, + ) -> Self { + Self::with_levels(page_table, range, fill_entry_attr, alloc, access, M::LEVELS) } pub fn with_levels( page_table: M::RawTable<'a>, range: VRange, + fill_entry_attr: TableAttribute, alloc: A, + access: X, levels: &'static [PageTableLevel], ) -> Self { let start = range.start().floor(); @@ -122,8 +114,9 @@ where remaining: (end - start) / last_level.page_size(), indicies: [0; 8], tables: [const { None }; 8], + fill_entry_attr: fill_entry_attr.union(TableAttribute::PRESENT), alloc, - _phantom: PhantomData, + access, }; for (i, level) in levels.iter().enumerate() { @@ -137,13 +130,12 @@ where } } -impl<'a, M, A, X, K> Iterator for PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> Iterator for PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { type Item = &'a mut M::Entry; @@ -178,15 +170,3 @@ where Some(retval) } } - -impl IteratorType for KernelIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::GLOBAL - } -} - -impl IteratorType for UserIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::USER - } -} diff --git a/crates/eonix_mm/src/page_table/walk.rs b/crates/eonix_mm/src/page_table/walk.rs new file mode 100644 index 00000000..aba80b09 --- /dev/null +++ b/crates/eonix_mm/src/page_table/walk.rs @@ -0,0 +1,210 @@ +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PTE}; +use crate::address::{AddrOps, VAddr, VRange}; +use crate::paging::PFN; + +pub enum WalkState { + Next, + Skip, + Break, +} + +pub trait PageTable: Sized { + type Entry: PTE; + const LEVELS: &'static [PageTableLevel]; + + fn index(&self, index: usize) -> &Self::Entry; + fn index_mut(&mut self, index: usize) -> &mut Self::Entry; + + fn from_pfn(pfn: PFN) -> Self; + unsafe fn take_pfn(pfn: PFN) -> Self; +} + +pub struct PageTableWalk<'a, T, D> +where + T: PageTable, +{ + levels: &'a [PageTableLevel], + fill_entry: &'a [fn(&mut D, &mut T::Entry) -> Option], + walk_entry: &'a [fn(&mut D, &mut T::Entry) -> WalkState], + data: D, +} + +fn try_get_table( + entry: &mut T::Entry, + data: &mut D, + fill_entry: fn(&mut D, &mut T::Entry) -> Option, +) -> Option +where + T: PageTable, +{ + let (mut pfn, attr) = entry.get(); + + // Always skip huge page entries + let attr = attr.as_table_attr()?; + + // For normal entries, check present flags + if !attr.contains(TableAttribute::PRESENT) { + // Skip entries filled with nothing + pfn = fill_entry(data, entry)?; + } + + Some(T::from_pfn(pfn)) +} + +fn _walk_page_table( + walk: &mut PageTableWalk, + cur_level: usize, + table: &mut T, + range: VRange, +) where + T: PageTable, +{ + let level = walk.levels[cur_level]; + + let page_size = level.page_size(); + let mut addr = range.start(); + + while addr < range.end() { + let idx = level.index_of(addr); + let entry = table.index_mut(idx); + + let mut next_table = None; + if cur_level < walk.levels.len() - 1 { + next_table = try_get_table(entry, &mut walk.data, walk.fill_entry[cur_level]); + } + + match ( + walk.walk_entry[cur_level](&mut walk.data, entry), + &mut next_table, + ) { + (WalkState::Break, _) => break, + (WalkState::Next, Some(next_table)) => _walk_page_table( + walk, + cur_level + 1, + next_table, + VRange::new(addr, range.end()), + ), + // `fill_entry` says that we shouldn't continue. + (WalkState::Next, None) => {} + _ => {} + } + + addr = addr.floor_to(page_size) + page_size; + } +} + +pub fn walk_page_table(walk: &mut PageTableWalk, table: &mut T, range: VRange) +where + T: PageTable, +{ + _walk_page_table(walk, 0, table, range); +} + +pub fn drop_user_page_table(mut root_page_table: T) +where + T: PageTable, +{ + fn walk(_: &mut (), entry: &mut T::Entry) -> WalkState { + let (pfn, attr) = entry.get(); + let Some(attr) = attr.as_table_attr() else { + return WalkState::Skip; + }; + + if !attr.contains(TableAttribute::USER) { + return WalkState::Skip; + } + + unsafe { + // Check `_walk_page_table`: We will and only will touch the next level of table with + // `next_table` holding a refcount. We take the table away from the parent table now. + T::take_pfn(pfn); + } + + entry.set(PFN::from_val(0), TableAttribute::empty().into()); + + if LEVEL == 2 { + WalkState::Skip + } else { + WalkState::Next + } + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[no_fill::, no_fill::, no_fill::], + walk_entry: &[walk::, walk::, walk::, skip_walk::], + data: (), + }; + + walk_page_table( + &mut walk, + &mut root_page_table, + VRange::new(VAddr::from(0), VAddr::from(0x0000_8000_0000_0000)), + ); +} + +pub fn iter_pte( + page_table: &mut T, + range: VRange, + fill_func: impl FnMut(&mut T::Entry) -> Option, + for_each: impl FnMut(&mut T::Entry), +) { + let walker = (fill_func, for_each); + + fn fill_entry( + (fill, _): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> Option { + fill(entry) + } + + fn walk_entry( + (_, for_each): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> WalkState { + for_each(entry); + WalkState::Next + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[fill_entry::, fill_entry::, fill_entry::], + walk_entry: &[ + cont_walk::, + cont_walk::, + cont_walk::, + walk_entry::, + ], + data: walker, + }; + + walk_page_table(&mut walk, page_table, range); +} + +pub fn no_fill(_: &mut D, _: &mut T::Entry) -> Option +where + T: PageTable, +{ + None +} + +pub fn skip_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Skip +} + +pub fn cont_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Next +} diff --git a/crates/eonix_mm/src/paging.rs b/crates/eonix_mm/src/paging.rs index 88da902e..f0166cf3 100644 --- a/crates/eonix_mm/src/paging.rs +++ b/crates/eonix_mm/src/paging.rs @@ -1,9 +1,11 @@ +mod list; mod page; mod page_alloc; mod pfn; -mod raw_page; +mod zone; -pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; -pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc}; +pub use list::{FolioList, FolioListSized}; +pub use page::{BasicFolio, Folio, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; +pub use page_alloc::{FrameAlloc, GlobalFrameAlloc}; pub use pfn::PFN; -pub use raw_page::{RawPage, UnmanagedRawPage}; +pub use zone::Zone; diff --git a/crates/eonix_mm/src/paging/list.rs b/crates/eonix_mm/src/paging/list.rs new file mode 100644 index 00000000..2dd557c9 --- /dev/null +++ b/crates/eonix_mm/src/paging/list.rs @@ -0,0 +1,19 @@ +pub trait FolioList { + type Folio; + + fn is_empty(&self) -> bool; + + fn peek_head(&mut self) -> Option<&mut Self::Folio>; + + fn pop_head(&mut self) -> Option<&'static mut Self::Folio>; + fn push_tail(&mut self, page: &'static mut Self::Folio); + fn remove(&mut self, page: &mut Self::Folio); +} + +pub trait FolioListSized: FolioList + Sized { + const NEW: Self; + + fn new() -> Self { + Self::NEW + } +} diff --git a/crates/eonix_mm/src/paging/page.rs b/crates/eonix_mm/src/paging/page.rs index c5a14b5e..8b067e43 100644 --- a/crates/eonix_mm/src/paging/page.rs +++ b/crates/eonix_mm/src/paging/page.rs @@ -1,6 +1,8 @@ -use super::{GlobalPageAlloc, PageAlloc, RawPage as _, PFN}; -use crate::address::{AddrRange, PAddr, PhysAccess}; -use core::{fmt, mem::ManuallyDrop, ptr::NonNull, sync::atomic::Ordering}; +use core::mem::ManuallyDrop; +use core::ptr::NonNull; + +use super::PFN; +use crate::address::{PAddr, PRange}; pub const PAGE_SIZE: usize = 4096; pub const PAGE_SIZE_BITS: u32 = PAGE_SIZE.trailing_zeros(); @@ -15,306 +17,81 @@ pub struct PageBlock([u8; PAGE_SIZE]); /// A trait that provides the kernel access to the page. #[doc(notable_trait)] -pub trait PageAccess { +pub trait PageAccess: Clone { /// Returns a kernel-accessible pointer to the page referenced by the given /// physical frame number. /// /// # Safety /// This function is unsafe because calling this function on some non-existing /// pfn will cause undefined behavior. - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull; + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull; /// Returns a kernel-accessible pointer to the given page. - fn get_ptr_for_page(page: &Page) -> NonNull { + fn get_ptr_for_page(&self, page: &F) -> NonNull { unsafe { // SAFETY: `page.pfn()` is guaranteed to be valid. - Self::get_ptr_for_pfn(page.pfn()) + self.get_ptr_for_pfn(page.pfn()) } } } -/// A Page allocated in allocator `A`. -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub struct Page { - raw_page: A::RawPage, - alloc: A, -} - -unsafe impl Send for Page {} -unsafe impl Sync for Page {} - -impl Page -where - A: GlobalPageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order(order: u32) -> Self { - Self::alloc_order_in(order, A::global()) - } - - /// Allocate exactly one page. - pub fn alloc() -> Self { - Self::alloc_in(A::global()) - } +/// A [`Folio`] represents one page or a bunch of adjacent pages. +pub trait Folio { + /// Returns the physical frame number of the folio, which is aligned with + /// the folio's size and valid. + fn pfn(&self) -> PFN; - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least(count: usize) -> Self { - Self::alloc_at_least_in(count, A::global()) - } + /// Returns the folio's *order* (log2 of the number of pages contained in + /// the folio). + fn order(&self) -> u32; - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page allocated through `alloc_order()` and that the - /// page have not been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked(pfn: PFN) -> Self { - unsafe { Self::from_raw_unchecked_in(pfn, A::global()) } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw(pfn: PFN) -> Self { - unsafe { Self::from_raw_in(pfn, A::global()) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw()` for the safety requirements. - pub unsafe fn with_raw(pfn: PFN, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_in(pfn, A::global(), func) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked()` for the safety requirements. - pub unsafe fn with_raw_unchecked(pfn: PFN, func: F, alloc: A) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_unchecked_in(pfn, func, alloc) } - } -} - -impl Page -where - A: PageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order_in(order: u32, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_order(order).expect("Out of memory"), - alloc, - } - } - - /// Allocate exactly one page. - pub fn alloc_in(alloc: A) -> Self { - Self { - raw_page: alloc.alloc().expect("Out of memory"), - alloc, - } - } - - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least_in(count: usize, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_at_least(count).expect("Out of memory"), - alloc, - } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page managed by `alloc` and that the page have not - /// been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked_in(pfn: PFN, alloc: A) -> Self { - Self { - raw_page: A::RawPage::from(pfn), - alloc, - } + /// Returns the total size of the folio in bytes. + fn len(&self) -> usize { + 1 << (self.order() + PAGE_SIZE_BITS) } - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw_in(pfn: PFN, alloc: A) -> Self { - unsafe { - // SAFETY: The caller guarantees that the page is inside the global page array. - assert!(alloc.has_management_over(A::RawPage::from(pfn))); - - // SAFETY: We've checked that the validity of the page. And the caller guarantees - // that the page will not be freed or deallocated during the call. - Self::from_raw_unchecked_in(pfn, alloc) - } + /// Returns the start physical address of the folio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn start(&self) -> PAddr { + PAddr::from(self.pfn()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw_in()` for the safety requirements. - pub unsafe fn with_raw_in(pfn: PFN, alloc: A, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_in(pfn, alloc)); - func(&me) - } + /// Returns the physical address range of the ifolio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn range(&self) -> PRange { + PRange::from(self.start()).grow(self.len()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked_in()` for the safety requirements. - pub unsafe fn with_raw_unchecked_in(pfn: PFN, func: F, alloc: A) -> O + /// Consumes the folio and returns the PFN without dropping the reference + /// count the folio holds. + fn into_raw(self) -> PFN where - F: FnOnce(&Self) -> O, + Self: Sized, { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_unchecked_in(pfn, alloc)); - func(&me) - } - } - - /// Whether we are the only owner of the page. - pub fn is_exclusive(&self) -> bool { - self.raw_page.refcount().load(Ordering::Acquire) == 1 - } - - /// Returns the *order* of the page, which is the log2 of the number of pages - /// contained in the page object. - pub fn order(&self) -> u32 { - self.raw_page.order() - } - - /// Returns the total size of the page in bytes. - pub fn len(&self) -> usize { - 1 << (self.order() + PAGE_SIZE_BITS) - } - - /// Consumes the `Page` and returns the physical frame number without dropping - /// the reference count the page holds. - pub fn into_raw(self) -> PFN { let me = ManuallyDrop::new(self); me.pfn() } - - /// Returns the physical frame number of the page, which is aligned with the - /// page size and valid. - pub fn pfn(&self) -> PFN { - Into::::into(self.raw_page) - } - - /// Returns the start physical address of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn start(&self) -> PAddr { - PAddr::from(self.pfn()) - } - - /// Returns the physical address range of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn range(&self) -> AddrRange { - AddrRange::from(self.start()).grow(self.len()) - } - - /// Get the allocator that manages this page. - pub fn allocator(&self) -> &A { - &self.alloc - } } -impl Clone for Page -where - A: PageAlloc, -{ - fn clone(&self) -> Self { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - self.raw_page.refcount().fetch_add(1, Ordering::Relaxed); - - Self { - raw_page: self.raw_page, - alloc: self.alloc.clone(), - } - } +/// A simple [`Folio`] with no reference counting or other ownership mechanism. +#[derive(Clone)] +pub struct BasicFolio { + pfn: PFN, + order: u32, } -impl Drop for Page -where - A: PageAlloc, -{ - fn drop(&mut self) { - match self.raw_page.refcount().fetch_sub(1, Ordering::AcqRel) { - 0 => panic!("Refcount for an in-use page is 0"), - 1 => unsafe { - // SAFETY: `self.raw_page` points to a valid page inside the global page array. - assert!(self.alloc.has_management_over(self.raw_page)); - - // SAFETY: `self.raw_page` is managed by the allocator and we're dropping the page. - self.alloc.dealloc(self.raw_page) - }, - _ => {} - } +impl BasicFolio { + pub const fn new(pfn: PFN, order: u32) -> Self { + Self { pfn, order } } } -impl fmt::Debug for Page { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Page({:?}, order={})", - Into::::into(self.raw_page), - self.order() - ) +impl Folio for BasicFolio { + fn pfn(&self) -> PFN { + self.pfn } -} -impl PageAccess for T -where - T: PhysAccess, -{ - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull { - unsafe { - // SAFETY: The physical address of a existing page must be - // aligned to the page size. - T::as_ptr(PAddr::from(pfn)) - } + fn order(&self) -> u32 { + self.order } } diff --git a/crates/eonix_mm/src/paging/page_alloc.rs b/crates/eonix_mm/src/paging/page_alloc.rs index fe222605..267d3ccb 100644 --- a/crates/eonix_mm/src/paging/page_alloc.rs +++ b/crates/eonix_mm/src/paging/page_alloc.rs @@ -1,89 +1,44 @@ -use super::{raw_page::UnmanagedRawPage, RawPage}; +use super::Folio; -/// A trait for allocating and deallocating pages of memory. +/// A trait for allocating and deallocating folios. /// /// Note that the instances of this trait should provide pointer-like or reference-like /// behavior, meaning that the allocators are to be passed around by value and stored in /// managed data structures. This is because the allocator may be used to deallocate the /// pages it allocates. -#[doc(notable_trait)] -pub trait PageAlloc: Clone { - type RawPage: RawPage; +pub trait FrameAlloc: Clone { + type Folio: Folio; - /// Allocate a page of the given *order*. - fn alloc_order(&self, order: u32) -> Option; + /// Allocate a folio of the given *order*. + fn alloc_order(&self, order: u32) -> Option; - /// Allocate exactly one page. - fn alloc(&self) -> Option { + /// Allocate exactly one folio. + fn alloc(&self) -> Option { self.alloc_order(0) } - /// Allocate a contiguous block of pages that can contain at least `count` pages. - fn alloc_at_least(&self, count: usize) -> Option { + /// Allocate a folio that can contain at least [`count`] contiguous pages. + fn alloc_at_least(&self, count: usize) -> Option { let order = count.next_power_of_two().trailing_zeros(); self.alloc_order(order) } - - /// Deallocate a page. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller MUST ensure that - /// `raw_page` is allocated in this allocator and never used after this call. - unsafe fn dealloc(&self, raw_page: Self::RawPage); - - /// Check whether the page is allocated and managed by the allocator. - fn has_management_over(&self, page_ptr: Self::RawPage) -> bool; } /// A trait for global page allocators. /// /// Global means that we can get an instance of the allocator from anywhere in the kernel. -#[doc(notable_trait)] -pub trait GlobalPageAlloc: PageAlloc + 'static { - /// Get the global page allocator. - fn global() -> Self; +pub trait GlobalFrameAlloc: FrameAlloc + 'static { + /// The global page allocator. + const GLOBAL: Self; } -#[derive(Clone)] -pub struct NoAlloc; - -impl<'a, A> PageAlloc for &'a A +impl<'a, A> FrameAlloc for &'a A where - A: PageAlloc, + A: FrameAlloc, { - type RawPage = A::RawPage; + type Folio = A::Folio; - fn alloc_order(&self, order: u32) -> Option { + fn alloc_order(&self, order: u32) -> Option { (*self).alloc_order(order) } - - unsafe fn dealloc(&self, raw_page: Self::RawPage) { - unsafe { (*self).dealloc(raw_page) } - } - - fn has_management_over(&self, raw_page: Self::RawPage) -> bool { - (*self).has_management_over(raw_page) - } -} - -impl PageAlloc for NoAlloc { - type RawPage = UnmanagedRawPage; - - fn alloc_order(&self, _: u32) -> Option { - panic!("`NoAlloc` cannot allocate pages"); - } - - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("`NoAlloc` cannot free pages"); - } - - fn has_management_over(&self, _: Self::RawPage) -> bool { - true - } -} - -impl GlobalPageAlloc for NoAlloc { - fn global() -> Self { - Self - } } diff --git a/crates/eonix_mm/src/paging/raw_page.rs b/crates/eonix_mm/src/paging/raw_page.rs deleted file mode 100644 index 7951729d..00000000 --- a/crates/eonix_mm/src/paging/raw_page.rs +++ /dev/null @@ -1,52 +0,0 @@ -use super::PFN; -use core::sync::atomic::AtomicUsize; - -/// A `RawPage` represents a page of memory in the kernel. It is a low-level -/// representation of a page that is used by the kernel to manage memory. -#[doc(notable_trait)] -pub trait RawPage: Clone + Copy + From + Into { - fn order(&self) -> u32; - fn refcount(&self) -> &AtomicUsize; - - fn is_present(&self) -> bool; -} - -#[derive(Clone, Copy)] -pub struct UnmanagedRawPage(PFN, u32); - -/// Unmanaged raw pages should always have a non-zero refcount to -/// avoid `free()` from being called. -static UNMANAGED_RAW_PAGE_CLONE_COUNT: AtomicUsize = AtomicUsize::new(1); - -impl UnmanagedRawPage { - pub const fn new(pfn: PFN, order: u32) -> Self { - Self(pfn, order) - } -} - -impl From for UnmanagedRawPage { - fn from(value: PFN) -> Self { - Self::new(value, 0) - } -} - -impl Into for UnmanagedRawPage { - fn into(self) -> PFN { - let Self(pfn, _) = self; - pfn - } -} - -impl RawPage for UnmanagedRawPage { - fn order(&self) -> u32 { - self.1 - } - - fn refcount(&self) -> &AtomicUsize { - &UNMANAGED_RAW_PAGE_CLONE_COUNT - } - - fn is_present(&self) -> bool { - true - } -} diff --git a/crates/eonix_mm/src/paging/zone.rs b/crates/eonix_mm/src/paging/zone.rs new file mode 100644 index 00000000..a2e85343 --- /dev/null +++ b/crates/eonix_mm/src/paging/zone.rs @@ -0,0 +1,19 @@ +use core::ptr::NonNull; + +use super::PFN; +use crate::address::PRange; + +/// A [`Zone`] holds a lot of [`Page`]s that share the same NUMA node or +/// "physical location". +pub trait Zone: Send + Sync { + type Page; + + /// Whether the [`range`] is within this [`Zone`]. + fn contains_prange(&self, range: PRange) -> bool; + + /// Get the [`RawPage`] that [`pfn`] points to. + /// + /// # Return + /// [`None`] if [`pfn`] is not in this [`Zone`]. + fn get_page(&self, pfn: PFN) -> Option>; +} diff --git a/crates/eonix_percpu/src/lib.rs b/crates/eonix_percpu/src/lib.rs index 1fc7ffb8..a00b5c05 100644 --- a/crates/eonix_percpu/src/lib.rs +++ b/crates/eonix_percpu/src/lib.rs @@ -1,28 +1,21 @@ #![no_std] use core::alloc::Layout; -use core::ptr::null_mut; -use core::ptr::NonNull; -use core::sync::atomic::AtomicPtr; -use core::sync::atomic::Ordering; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +use core::ptr::{null_mut, NonNull}; +use core::sync::atomic::{AtomicPtr, Ordering}; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_loongarch64 as define_percpu; - +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_shared_loongarch64 as define_percpu_shared; +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; const MAX_CPUS: usize = 256; @@ -41,7 +34,7 @@ impl PercpuArea { unsafe extern "C" { fn PERCPU_LENGTH(); } - let len = PERCPU_LENGTH as usize; + let len = PERCPU_LENGTH as *const () as usize; assert_ne!(len, 0, "Percpu length should not be zero."); len @@ -52,7 +45,7 @@ impl PercpuArea { fn PERCPU_DATA_START(); } - let addr = PERCPU_DATA_START as usize; + let addr = PERCPU_DATA_START as *const () as usize; NonNull::new(addr as *mut _).expect("Percpu data should not be null.") } diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 3f72fbf4..b4b7960d 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,19 @@ -use crate::{ - executor::OutputHandle, - ready_queue::{local_rq, ReadyQueue}, - task::{Task, TaskAdapter, TaskHandle, TaskState}, -}; -use alloc::{sync::Arc, task::Wake}; -use core::{ - ops::{Deref, DerefMut}, - ptr::NonNull, - task::{Context, Poll, Waker}, -}; +use alloc::sync::Arc; +use alloc::task::Wake; +use core::ops::{Deref, DerefMut}; +use core::ptr::NonNull; +use core::task::{Context, Poll, Waker}; + use eonix_hal::processor::halt; use eonix_log::println_trace; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; +use crate::executor::OutputHandle; +use crate::ready_queue::{local_rq, ReadyQueue}; +use crate::task::{Task, TaskAdapter, TaskHandle, TaskState}; + #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; @@ -93,12 +92,6 @@ impl Runtime { } } - fn current(&self) -> Option> { - CURRENT_TASK - .get() - .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) - } - fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { let Some(current) = CURRENT_TASK .swap(None) @@ -116,7 +109,7 @@ impl Runtime { }) { Ok(TaskState::READY_RUNNING) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Re-enqueueing task {:?} (CPU{})", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -126,7 +119,7 @@ impl Runtime { } Ok(_) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Current task {:?} (CPU{}) is blocked, not re-enqueueing", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -184,7 +177,7 @@ impl Runtime { }; println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Switching to task {:?} (CPU{})", next.id, eonix_hal::processor::CPU::local().cpuid(), @@ -212,7 +205,7 @@ impl Runtime { ); println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Task {:?} finished execution, removing...", Task::current().id, ); diff --git a/crates/eonix_sync/eonix_spin/src/lib.rs b/crates/eonix_sync/eonix_spin/src/lib.rs index 4718b867..7225aceb 100644 --- a/crates/eonix_sync/eonix_spin/src/lib.rs +++ b/crates/eonix_sync/eonix_spin/src/lib.rs @@ -2,13 +2,11 @@ mod guard; -use core::{ - cell::UnsafeCell, - marker::PhantomData, - sync::atomic::{AtomicBool, Ordering}, -}; -use eonix_sync_base::{Relax, SpinRelax}; +use core::cell::UnsafeCell; +use core::marker::PhantomData; +use core::sync::atomic::{AtomicBool, Ordering}; +use eonix_sync_base::{Relax, SpinRelax}; pub use guard::{SpinGuard, UnlockedSpinGuard}; pub trait SpinContext { @@ -84,7 +82,7 @@ where T: ?Sized, R: Relax, { - pub fn lock_with_context(&self, context: C) -> SpinGuard + pub fn lock_with_context(&self, context: C) -> SpinGuard<'_, T, C, R> where C: SpinContext, { @@ -100,7 +98,7 @@ where ) } - pub fn lock(&self) -> SpinGuard { + pub fn lock(&self) -> SpinGuard<'_, T, DisablePreemption, R> { self.lock_with_context(DisablePreemption::save()) } diff --git a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs index 76a28682..b70cdc3d 100644 --- a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs +++ b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs @@ -12,7 +12,7 @@ pub trait SpinIrq { type Context: SpinContext; type Relax; - fn lock_irq(&self) -> SpinGuard; + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax>; } impl SpinContext for IrqContext { @@ -50,7 +50,7 @@ where type Context = IrqContext; type Relax = R; - fn lock_irq(&self) -> SpinGuard { + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax> { self.lock_with_context(IrqContext::save()) } } diff --git a/crates/intrusive_list/src/lib.rs b/crates/intrusive_list/src/lib.rs index af8c4f1a..440944d0 100644 --- a/crates/intrusive_list/src/lib.rs +++ b/crates/intrusive_list/src/lib.rs @@ -25,6 +25,8 @@ impl List { } pub fn insert(&mut self, node: &mut Link) { + // TODO: `node` above should be of 'static. + self.head.insert(node); self.count += 1; } diff --git a/crates/posix_types/src/open.rs b/crates/posix_types/src/open.rs index 758ea331..7135e5b8 100644 --- a/crates/posix_types/src/open.rs +++ b/crates/posix_types/src/open.rs @@ -11,6 +11,8 @@ bitflags! { const O_CREAT = 0x40; /// Exclusive access, fail if file exists const O_EXCL = 0x80; + /// Don't set controlling terminal. + const O_NOCTTY = 0x100; /// Truncate file to zero length if it exists const O_TRUNC = 0x200; /// Open file in append mode @@ -116,6 +118,8 @@ impl AtFlags { } pub fn statx_default_sync(&self) -> bool { - !self.intersects(AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC) + !self.intersects( + AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC, + ) } } diff --git a/crates/posix_types/src/poll.rs b/crates/posix_types/src/poll.rs index 781f589f..dcf5f9b2 100644 --- a/crates/posix_types/src/poll.rs +++ b/crates/posix_types/src/poll.rs @@ -1,5 +1,7 @@ pub const FDSET_LENGTH: usize = 1024 / (8 * size_of::()); +// TODO: Implement syscall pselect +#[allow(unused)] pub struct FDSet { fds_bits: [usize; FDSET_LENGTH], } diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index a10ff0ad..1535c444 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -1,14 +1,18 @@ pub enum PosixError { + ENOENT = 2, EFAULT = 14, EXDEV = 18, + ENOTDIR = 20, EINVAL = 22, } impl From for u32 { fn from(error: PosixError) -> Self { match error { + PosixError::ENOENT => 2, PosixError::EFAULT => 14, PosixError::EXDEV => 18, + PosixError::ENOTDIR => 20, PosixError::EINVAL => 22, } } @@ -17,8 +21,10 @@ impl From for u32 { impl core::fmt::Debug for PosixError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { + Self::ENOENT => write!(f, "ENOENT"), Self::EFAULT => write!(f, "EFAULT"), Self::EXDEV => write!(f, "EXDEV"), + Self::ENOTDIR => write!(f, "ENOTDIR"), Self::EINVAL => write!(f, "EINVAL"), } } diff --git a/crates/posix_types/src/syscall_no/loongarch64.rs b/crates/posix_types/src/syscall_no/loongarch64.rs index 19776a65..b0d54689 100644 --- a/crates/posix_types/src/syscall_no/loongarch64.rs +++ b/crates/posix_types/src/syscall_no/loongarch64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; @@ -295,7 +295,6 @@ pub const SYS_RECVMMSG: usize = 417; pub const SYS_MQ_TIMEDSEND: usize = 418; pub const SYS_MQ_TIMEDRECEIVE: usize = 419; pub const SYS_SEMTIMEDOP: usize = 420; -pub const SYS_RT_SIGTIMEDWAIT: usize = 421; pub const SYS_FUTEX: usize = 422; pub const SYS_SCHED_RR_GET_INTERVAL: usize = 423; pub const SYS_PIDFD_SEND_SIGNAL: usize = 424; diff --git a/crates/posix_types/src/syscall_no/riscv64.rs b/crates/posix_types/src/syscall_no/riscv64.rs index 4457c20e..076942e9 100644 --- a/crates/posix_types/src/syscall_no/riscv64.rs +++ b/crates/posix_types/src/syscall_no/riscv64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; diff --git a/crates/slab_allocator/Cargo.toml b/crates/slab_allocator/Cargo.toml index 067b6f53..926ac688 100644 --- a/crates/slab_allocator/Cargo.toml +++ b/crates/slab_allocator/Cargo.toml @@ -6,5 +6,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } eonix_sync = { path = "../eonix_sync" } -intrusive_list = { path = "../intrusive_list" } - diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index ce163183..c3e7f392 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -1,69 +1,281 @@ #![no_std] -mod slab_cache; +use core::ptr::NonNull; -use core::{cmp::max, ptr::NonNull}; - -use eonix_mm::paging::{PageAlloc, RawPage}; +use eonix_mm::paging::{FolioList, FolioListSized}; use eonix_sync::Spin; -use intrusive_list::Link; -use slab_cache::SlabCache; -pub trait SlabRawPage: RawPage { - /// Get the container raw page struct of the list link. +#[repr(C)] +pub union SlabSlot { + slab_slot: Option>, + data: u8, +} + +pub trait SlabPage: Sized + 'static { + fn get_data_ptr(&self) -> NonNull<[u8]>; + + fn get_free_slot(&self) -> Option>; + fn set_free_slot(&mut self, next: Option>); + + fn get_alloc_count(&self) -> usize; + + /// Increase the allocation count by 1 and return the increased value. + fn inc_alloc_count(&mut self) -> usize; + + /// Decrease the allocation count by 1 and return the decreased value. + fn dec_alloc_count(&mut self) -> usize; + + /// Get the [`SlabPage`] that `ptr` is allocated from. /// /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; + /// The caller MUST ensure that no others could be calling this function and + /// getting the [`SlabPage`] at the same time. + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self; +} + +pub(crate) trait SlabPageExt { + fn alloc_slot(&mut self) -> Option>; - /// Get the list link of the raw page. - /// /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; + /// The caller MUST ensure that `slot_data_ptr` points to some position + /// previously allocated by [`SlabPageExt::alloc_slot`]. + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull); - fn slab_init(&self, first_free: Option>); + fn is_empty(&self) -> bool; + fn is_full(&self) -> bool; +} + +impl SlabPageExt for T +where + T: SlabPage, +{ + fn alloc_slot(&mut self) -> Option> { + let mut free_slot = self.get_free_slot()?; + + unsafe { + let free_slot = free_slot.as_mut(); - // which slab page the ptr belong - fn in_which(ptr: *mut u8) -> Self; + let next_slot = free_slot.slab_slot; + // ===== `free_slot` is now safe to be overwritten - fn real_page_ptr(&self) -> *mut u8; + self.set_free_slot(next_slot); + self.inc_alloc_count(); - fn allocated_count(&self) -> &mut u32; + Some(NonNull::new_unchecked(&mut free_slot.data)) + } + } + + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull) { + unsafe { + let mut free_slot: NonNull = slot_data_ptr.cast(); + free_slot.as_mut().slab_slot = self.get_free_slot(); + + self.set_free_slot(Some(free_slot)); + self.dec_alloc_count(); + } + } - fn next_free(&self) -> &mut Option>; + fn is_empty(&self) -> bool { + self.get_alloc_count() == 0 + } + + fn is_full(&self) -> bool { + self.get_free_slot().is_none() + } } -pub struct SlabAllocator { - slabs: [Spin>; SLAB_CACHE_COUNT], - alloc: A, +/// Allocate a page suitable for slab system use. The page MUST come with +/// its allocation count 0 and next free slot None. +/// +/// # Safety +/// The page returned MUST have been properly initialized after allocation. +pub unsafe trait SlabPageAlloc { + type Page: SlabPage; + type PageList: FolioList; + + fn alloc_slab_page(&self) -> &'static mut Self::Page; } -unsafe impl Send for SlabAllocator {} -unsafe impl Sync for SlabAllocator {} +pub(crate) struct SlabList +where + T: FolioList, +{ + empty_list: T, + partial_list: T, + full_list: T, + object_size: usize, +} -impl SlabAllocator +pub struct SlabAlloc where - Raw: SlabRawPage, - Allocator: PageAlloc, + P: SlabPageAlloc, { - pub fn new_in(alloc: Allocator) -> Self { + slabs: [Spin>; COUNT], + alloc: P, +} + +unsafe impl Send for SlabAlloc where P: SlabPageAlloc {} +unsafe impl Sync for SlabAlloc where P: SlabPageAlloc {} + +impl SlabAlloc +where + L: SlabPageAlloc, + L::PageList: FolioListSized, +{ + pub fn new_in(alloc: L) -> Self { Self { - slabs: core::array::from_fn(|i| Spin::new(SlabCache::new_in(1 << (i + 3)))), + slabs: core::array::from_fn(|i| Spin::new(SlabList::new(1 << (i + 3)))), alloc, } } - pub fn alloc(&self, mut size: usize) -> *mut u8 { - size = max(8, size); + pub fn alloc(&self, mut size: usize) -> NonNull { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; self.slabs[idx as usize].lock().alloc(&self.alloc) } - pub fn dealloc(&self, ptr: *mut u8, mut size: usize) { - size = max(8, size); + pub unsafe fn dealloc(&self, ptr: NonNull, mut size: usize) { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; - self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + + unsafe { + // SAFETY: + self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + } + } +} + +impl SlabList +where + T: FolioListSized, +{ + const fn new(object_size: usize) -> Self { + Self { + empty_list: T::NEW, + partial_list: T::NEW, + full_list: T::NEW, + object_size, + } } } + +impl SlabList +where + T: FolioList, + T::Folio: SlabPage, +{ + fn alloc_from_partial(&mut self) -> NonNull { + let head = self.partial_list.peek_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + let head = self.partial_list.pop_head().unwrap(); + self.full_list.push_tail(head); + } + + slot + } + + fn alloc_from_empty(&mut self) -> NonNull { + let head = self.empty_list.pop_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + self.full_list.push_tail(head); + } else { + self.partial_list.push_tail(head); + } + + slot + } + + fn charge(&mut self, alloc: &impl SlabPageAlloc) { + let slab = alloc.alloc_slab_page(); + let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); + + slab.set_free_slot(Some(free_slot)); + + self.empty_list.push_tail(slab); + } + + fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { + if !self.partial_list.is_empty() { + return self.alloc_from_partial(); + } + + if self.empty_list.is_empty() { + self.charge(alloc); + } + + self.alloc_from_empty() + } + + unsafe fn dealloc(&mut self, ptr: NonNull, _alloc: &impl SlabPageAlloc) { + let slab_page = unsafe { + // SAFETY: + ::from_allocated(ptr) + }; + + let (was_full, is_empty); + + was_full = slab_page.is_full(); + + unsafe { + // SAFETY: + slab_page.free_slot(ptr); + } + + is_empty = slab_page.is_empty(); + + match (was_full, is_empty) { + (false, false) => {} + (false, true) => { + self.partial_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + (true, false) => { + self.full_list.remove(slab_page); + self.partial_list.push_tail(slab_page); + } + (true, true) => { + self.full_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + } + + // TODO: Check whether we should place some pages back with `alloc` if + // the global free page count is below the watermark. + } +} + +pub fn make_slab_page(page_ptr: NonNull<[u8]>, slot_size: usize) -> NonNull { + assert!( + slot_size >= core::mem::size_of::(), + "The minimum slot size is of a pointer's width" + ); + + let page_size = page_ptr.len(); + let slot_count = page_size / slot_size; + let page_start: NonNull = page_ptr.cast(); + + // Quick checks + assert!( + page_size % slot_size == 0, + "The page's size should be a multiple of the slot size" + ); + + let mut prev_free_slot = None; + for i in (0..slot_count).rev() { + let offset = i * slot_size; + + unsafe { + let mut slot_ptr: NonNull = page_start.add(offset).cast(); + + slot_ptr.as_mut().slab_slot = prev_free_slot; + prev_free_slot = Some(slot_ptr); + } + } + + prev_free_slot.expect("There should be at least one slot.") +} diff --git a/crates/slab_allocator/src/slab_cache.rs b/crates/slab_allocator/src/slab_cache.rs deleted file mode 100644 index 98e27fc8..00000000 --- a/crates/slab_allocator/src/slab_cache.rs +++ /dev/null @@ -1,164 +0,0 @@ -use super::SlabRawPage; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::paging::{PageAlloc, PAGE_SIZE}; -use intrusive_list::List; - -pub(crate) struct SlabCache { - empty_list: List, - partial_list: List, - full_list: List, - object_size: u32, - _phantom: PhantomData<(T, A)>, -} - -trait SlabRawPageExt { - fn alloc_slot(&self) -> Option>; - fn dealloc_slot(&self, slot_ptr: *mut u8); - fn is_full(&self) -> bool; - fn is_empty(&self) -> bool; - fn slab_page_init(&self, object_size: u32) -> Option>; -} - -impl SlabRawPageExt for T -where - T: SlabRawPage, -{ - fn alloc_slot(&self) -> Option> { - let ptr = self.next_free().clone(); - - let next_free = match ptr { - Some(ptr) => unsafe { ptr.read() as *mut usize }, - None => unreachable!(), - }; - *self.allocated_count() += 1; - *self.next_free() = NonNull::new(next_free); - return ptr; - } - - fn dealloc_slot(&self, slot_ptr: *mut u8) { - let slot_ptr = slot_ptr as *mut usize; - - if let Some(last_free) = self.next_free().clone() { - unsafe { *slot_ptr = last_free.as_ptr() as usize } - } else { - unsafe { *slot_ptr = 0 } - } - - *self.allocated_count() -= 1; - *self.next_free() = NonNull::new(slot_ptr); - } - - fn slab_page_init(&self, object_size: u32) -> Option> { - assert!(object_size >= core::mem::size_of::() as u32); - - let first_free = self.real_page_ptr() as *mut usize; - - let mut slot_ptr = first_free; - let mut slot_count = PAGE_SIZE / object_size as usize; - - // SAFETY: carefully ptr operate - unsafe { - loop { - if slot_count == 1 { - *slot_ptr = 0; - break; - } - - let next_ptr = slot_ptr.byte_add(object_size as usize); - *slot_ptr = next_ptr as usize; - slot_ptr = next_ptr; - slot_count -= 1; - } - } - - NonNull::new(first_free) - } - - fn is_empty(&self) -> bool { - self.allocated_count().clone() == 0 - } - - fn is_full(&self) -> bool { - self.next_free().is_none() - } -} - -impl SlabCache -where - Raw: SlabRawPage, - Allocator: PageAlloc, -{ - pub(crate) const fn new_in(object_size: u32) -> Self { - // avoid unnecessary branch in alloc and dealloc - assert!(object_size <= PAGE_SIZE as u32 / 2); - - Self { - empty_list: List::new(), - partial_list: List::new(), - full_list: List::new(), - object_size: object_size, - _phantom: PhantomData, - } - } - - pub(crate) fn alloc(&mut self, alloc: &Allocator) -> *mut u8 { - if !self.partial_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.partial_list - .head() - .expect("partial pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - - if page_ptr.is_full() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.full_list.insert(unsafe { page_ptr.get_link() }); - } - return ptr.as_ptr() as *mut u8; - } - - if !self.empty_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.empty_list - .head() - .expect("empty pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - self.empty_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - return ptr.as_ptr() as *mut u8; - } - - let new_page_ptr = alloc.alloc().expect("slab_cache get page fail!"); - let first_free = new_page_ptr.slab_page_init(self.object_size); - new_page_ptr.slab_init(first_free); - let ptr = new_page_ptr.alloc_slot().expect("should get slot"); - self.partial_list.insert(unsafe { new_page_ptr.get_link() }); - ptr.as_ptr() as *mut u8 - } - - pub(crate) fn dealloc(&mut self, ptr: *mut u8, _alloc: &Allocator) { - let page_ptr = Raw::in_which(ptr); - - if page_ptr.is_full() { - self.full_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - } - - page_ptr.dealloc_slot(ptr); - - if page_ptr.is_empty() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.empty_list.insert(unsafe { page_ptr.get_link() }); - } - - // TODO: Check whether we should place some pages back with `alloc` if the global - // free page count is below the watermark. - } -} diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 722fa5da..09e12f99 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -123,7 +123,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { Box::new_in( async move { eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "tid{}: {}({}) => {{", thd.tid, #syscall_name_str, @@ -133,7 +133,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "}} => {:x?}", retval, ); diff --git a/rust-toolchain b/rust-toolchain index 8adb8e58..11ad5efd 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2025-05-16 +nightly-2026-01-09 diff --git a/script/backtrace b/script/backtrace new file mode 100755 index 00000000..8a60c829 --- /dev/null +++ b/script/backtrace @@ -0,0 +1,100 @@ +#!/bin/bash + +ADDR2LINE=${ADDR2LINE:-riscv64-unknown-elf-addr2line} + +ksym=build/riscv64gc-unknown-none-elf/debug/eonix_kernel + +usage() { + cat < Use the given kernel symbol file + -o, --only-gbos Show kernel function calls only + -h, --help Show this message +EOF + exit "$1" +} + +# $1: instruction address +parse_pos() { + addr="$1" + shift + + "$ADDR2LINE" -e "$ksym" -i "$addr" "$@" 2>/dev/null +} + +filter_col() { + [ "$1" -eq 0 ] || awk "{ print \$$1; }" +} + +str_contains() { + grep -E "$1" >/dev/null 2>&1 +} + +filter_stacktrace() { + NL=$'\n' + _state=nonstart + _out= + while [ $_state != "end" ]; do + read -r _line + case $_state in + nonstart) + str_contains "8< CUT HERE" <<< "$_line" && _state=save + ;; + save) + if str_contains "8< CUT HERE" <<< "$_line"; then + _state=end + else + _out="$_out$_line$NL" + fi + ;; + esac + done + + echo "$_out" +} + +while [ "$#" -gt 0 ]; do + case "$1" in + -s|--ksym) + shift + ksym="$1" + ;; + -o|--only-gbos) + only_gb=y + ;; + --) + shift + break + ;; + -h|--help) + usage 0 + ;; + *) + usage 1 + ;; + esac + shift +done + +stacktrace="$(filter_stacktrace)" + +i=1 +for addr in $(filter_col 3 <<< "$stacktrace"); do + pos="$(parse_pos "$addr" "$@")" + + if [ -n "$only_gb" ]; then + if ! str_contains "greatbridf_os" <<< "$pos"; then + continue + fi + fi + + printf "========== %4d ==========\n" "$i" + + parse_pos "$addr" "$@" + + i=$((i + 1)) +done diff --git a/src/driver/ahci/command.rs b/src/driver/ahci/command.rs index c83339b7..4609d38d 100644 --- a/src/driver/ahci/command.rs +++ b/src/driver/ahci/command.rs @@ -1,9 +1,11 @@ +use eonix_mm::paging::Folio as _; + use crate::kernel::constants::EINVAL; -use crate::kernel::mem::paging::Page; +use crate::kernel::mem::Folio; use crate::prelude::*; pub trait Command { - fn pages(&self) -> &[Page]; + fn pages(&self) -> &[Folio]; fn lba(&self) -> u64; // in sectors @@ -14,19 +16,19 @@ pub trait Command { } pub struct IdentifyCommand { - page: Page, + page: Folio, } impl IdentifyCommand { pub fn new() -> Self { Self { - page: Page::alloc(), + page: Folio::alloc(), } } } impl Command for IdentifyCommand { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { core::slice::from_ref(&self.page) } @@ -47,14 +49,14 @@ impl Command for IdentifyCommand { } } -pub struct ReadLBACommand<'lt> { - pages: &'lt [Page], +pub struct ReadLBACommand<'a> { + pages: &'a [Folio], lba: u64, count: u16, } -impl<'lt> ReadLBACommand<'lt> { - pub fn new(pages: &'lt [Page], lba: u64, count: u16) -> KResult { +impl<'a> ReadLBACommand<'a> { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -69,7 +71,7 @@ impl<'lt> ReadLBACommand<'lt> { } impl Command for ReadLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } @@ -91,13 +93,13 @@ impl Command for ReadLBACommand<'_> { } pub struct WriteLBACommand<'a> { - pages: &'a [Page], + pages: &'a [Folio], lba: u64, count: u16, } impl<'a> WriteLBACommand<'a> { - pub fn new(pages: &'a [Page], lba: u64, count: u16) -> KResult { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -112,7 +114,7 @@ impl<'a> WriteLBACommand<'a> { } impl Command for WriteLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } diff --git a/src/driver/ahci/command_table.rs b/src/driver/ahci/command_table.rs index c77b4abd..00fc8a0b 100644 --- a/src/driver/ahci/command_table.rs +++ b/src/driver/ahci/command_table.rs @@ -1,45 +1,55 @@ -use super::{command::Command, PRDTEntry, FISH2D}; -use crate::kernel::mem::{AsMemoryBlock as _, Page}; +use core::ptr::NonNull; + use eonix_mm::address::PAddr; +use eonix_mm::paging::Folio as _; -pub struct CommandTable<'a> { - page: Page, - command_fis: &'a mut FISH2D, +use super::command::Command; +use super::{PRDTEntry, FISH2D}; +use crate::kernel::mem::FolioOwned; - prdt: &'a mut [PRDTEntry; 248], - prdt_entries: Option, +pub struct CommandTable { + page: FolioOwned, + cmd_fis: NonNull, + prdt: NonNull<[PRDTEntry; 248]>, + prdt_entries: usize, } -impl CommandTable<'_> { - pub fn new() -> Self { - let page = Page::alloc(); - let memory = page.as_memblk(); - - let (lhs, prdt) = memory.split_at(0x80); - - let (command_fis, _) = lhs.split_at(size_of::()); - let command_fis = unsafe { command_fis.as_ptr().as_mut() }; - let prdt = unsafe { prdt.as_ptr().as_mut() }; +unsafe impl Send for CommandTable {} +unsafe impl Sync for CommandTable {} - Self { - page, - command_fis, - prdt, - prdt_entries: None, +impl CommandTable { + pub fn new() -> Self { + let page = FolioOwned::alloc(); + let base = page.get_ptr(); + + unsafe { + Self { + page, + cmd_fis: base.cast(), + prdt: base.byte_add(0x80).cast(), + prdt_entries: 0, + } } } pub fn setup(&mut self, cmd: &impl Command) { - self.command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count()); - self.prdt_entries = Some(cmd.pages().len() as u16); + unsafe { + self.cmd_fis + .as_mut() + .setup(cmd.cmd(), cmd.lba(), cmd.count()); + } + + self.prdt_entries = cmd.pages().len(); for (idx, page) in cmd.pages().iter().enumerate() { - self.prdt[idx].setup(page); + unsafe { + self.prdt.as_mut()[idx].setup(page); + } } } - pub fn prdt_len(&self) -> u16 { - self.prdt_entries.unwrap() + pub fn prdt_len(&self) -> usize { + self.prdt_entries } pub fn base(&self) -> PAddr { diff --git a/src/driver/ahci/defs.rs b/src/driver/ahci/defs.rs index c5440246..66841da8 100644 --- a/src/driver/ahci/defs.rs +++ b/src/driver/ahci/defs.rs @@ -1,7 +1,9 @@ #![allow(dead_code)] -use crate::kernel::mem::paging::Page; use eonix_mm::address::Addr as _; +use eonix_mm::paging::Folio as _; + +use crate::kernel::mem::Folio; pub const VENDOR_INTEL: u16 = 0x8086; pub const DEVICE_AHCI: u16 = 0x2922; @@ -239,7 +241,7 @@ pub struct PRDTEntry { } impl PRDTEntry { - pub fn setup(&mut self, page: &Page) { + pub fn setup(&mut self, page: &Folio) { self.base = page.start().addr() as u64; self._reserved1 = 0; diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index c3b1cfa0..3ea44ed3 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -1,24 +1,23 @@ -use crate::{ - fs::procfs, - io::Buffer as _, - kernel::{ - block::{make_device, BlockDevice}, - constants::{EINVAL, EIO}, - interrupt::register_irq_handler, - pcie::{self, Header, PCIDevice, PCIDriver, PciError}, - task::block_on, - }, - prelude::*, -}; -use alloc::{format, sync::Arc}; +use alloc::format; +use alloc::sync::Arc; + +use async_trait::async_trait; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; use eonix_sync::SpinIrq as _; use port::AdapterPort; - pub(self) use register::Register; +use crate::fs::procfs; +use crate::io::Buffer as _; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::kernel::vfs::types::DeviceId; +use crate::prelude::*; + mod command; mod command_table; mod control; @@ -29,7 +28,7 @@ pub(self) mod slot; mod stats; pub struct AHCIDriver { - devices: Spin>>>, + devices: Spin>>, } pub struct BitsIterator { @@ -63,22 +62,22 @@ impl Iterator for BitsIterator { } } -struct Device<'a> { +struct Device { control_base: PAddr, control: AdapterControl, _pcidev: Arc>, /// # Lock /// Might be accessed from irq handler, use with `lock_irq()` - ports: Spin<[Option>>; 32]>, + ports: Spin<[Option>; 32]>, } /// # Safety /// `pcidev` is never accessed from Rust code /// TODO!!!: place *mut pci_device in a safe wrapper -unsafe impl Send for Device<'_> {} -unsafe impl Sync for Device<'_> {} +unsafe impl Send for Device {} +unsafe impl Sync for Device {} -impl Device<'_> { +impl Device { fn handle_interrupt(&self) { // Safety // `self.ports` is accessed inside irq handler @@ -107,8 +106,31 @@ impl Device<'_> { } } -impl Device<'static> { - fn probe_ports(&self) -> KResult<()> { +impl Device { + async fn probe_port(&self, port: Arc) -> KResult<()> { + port.init().await?; + + { + let port = port.clone(); + let name = format!("ahci-p{}-stats", port.nport); + procfs::populate_root(name.into_bytes().into(), move |buffer| { + port.print_stats(&mut buffer.get_writer()) + }) + .await; + } + + let port = BlockDevice::register_disk( + DeviceId::new(8, port.nport as u16 * 16), + 2147483647, // TODO: get size from device + port, + )?; + + port.partprobe().await?; + + Ok(()) + } + + async fn probe_ports(&self) -> KResult<()> { for nport in self.control.implemented_ports() { let port = Arc::new(AdapterPort::new(self.control_base, nport)); if !port.status_ok() { @@ -116,27 +138,7 @@ impl Device<'static> { } self.ports.lock_irq()[nport as usize] = Some(port.clone()); - if let Err(e) = (|| -> KResult<()> { - port.init()?; - - { - let port = port.clone(); - let name = format!("ahci-p{}-stats", port.nport); - procfs::populate_root(name.into_bytes().into(), move |buffer| { - port.print_stats(&mut buffer.get_writer()) - })?; - } - - let port = BlockDevice::register_disk( - make_device(8, nport * 16), - 2147483647, // TODO: get size from device - port, - )?; - - block_on(port.partprobe())?; - - Ok(()) - })() { + if let Err(e) = self.probe_port(port).await { self.ports.lock_irq()[nport as usize] = None; println_warn!("probe port {nport} failed with {e}"); } @@ -154,6 +156,7 @@ impl AHCIDriver { } } +#[async_trait] impl PCIDriver for AHCIDriver { fn vendor_id(&self) -> u16 { VENDOR_INTEL @@ -163,7 +166,7 @@ impl PCIDriver for AHCIDriver { DEVICE_AHCI } - fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = pcidev.header else { Err(EINVAL)? }; @@ -200,7 +203,7 @@ impl PCIDriver for AHCIDriver { let device_irq = device.clone(); register_irq_handler(irqno as i32, move || device_irq.handle_interrupt())?; - device.probe_ports()?; + device.probe_ports().await?; self.devices.lock().push(device); @@ -208,6 +211,8 @@ impl PCIDriver for AHCIDriver { } } -pub fn register_ahci_driver() { - pcie::register_driver(AHCIDriver::new()).expect("Register ahci driver failed"); +pub async fn register_ahci_driver() { + pcie::register_driver(AHCIDriver::new()) + .await + .expect("Register ahci driver failed"); } diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index f558f6e1..a54bbbba 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -1,20 +1,18 @@ +use alloc::collections::vec_deque::VecDeque; +use core::task::{Poll, Waker}; + +use async_trait::async_trait; +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_sync::SpinIrq as _; + use super::command::{Command, IdentifyCommand, ReadLBACommand, WriteLBACommand}; -use super::slot::CommandSlot; +use super::slot::CommandList; use super::stats::AdapterPortStats; -use super::{ - CommandHeader, Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT, -}; +use super::{Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT}; use crate::driver::ahci::command_table::CommandTable; use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::paging::Page; -use crate::kernel::mem::AsMemoryBlock as _; -use crate::kernel::task::block_on; use crate::prelude::*; -use alloc::collections::vec_deque::VecDeque; -use core::pin::pin; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. /// @@ -55,6 +53,8 @@ pub struct AdapterPortData { struct FreeList { free: VecDeque, working: VecDeque, + + wakers: VecDeque, } impl FreeList { @@ -62,57 +62,32 @@ impl FreeList { Self { free: (0..32).collect(), working: VecDeque::new(), + wakers: VecDeque::new(), } } } -pub struct AdapterPort<'a> { +pub struct AdapterPort { pub nport: u32, regs_base: PAddr, - slots: [CommandSlot<'a>; 32], + cmdlist: CommandList, free_list: Spin, - free_list_wait: WaitList, - - /// Holds the command list. - /// **DO NOT USE IT DIRECTLY** - _page: Page, - - cmdlist_base: PAddr, - fis_base: PAddr, stats: AdapterPortStats, } -impl<'a> AdapterPort<'a> { +impl AdapterPort { pub fn new(base: PAddr, nport: u32) -> Self { - let page = Page::alloc(); - let cmdlist_base = page.start(); - let cmdlist_size = 32 * size_of::(); - let fis_base = cmdlist_base + cmdlist_size; - - let (mut cmdheaders, _) = page.as_memblk().split_at(cmdlist_size); - let slots = core::array::from_fn(move |_| { - let (cmdheader, next) = cmdheaders.split_at(size_of::()); - cmdheaders = next; - CommandSlot::new(unsafe { cmdheader.as_ptr().as_mut() }) - }); - Self { nport, regs_base: base + 0x100 + 0x80 * nport as usize, - slots, + cmdlist: CommandList::new(), free_list: Spin::new(FreeList::new()), - free_list_wait: WaitList::new(), - _page: page, stats: AdapterPortStats::new(), - cmdlist_base, - fis_base, } } -} -impl AdapterPort<'_> { fn command_list_base(&self) -> Register { Register::new(self.regs_base + 0x00) } @@ -145,19 +120,17 @@ impl AdapterPort<'_> { self.sata_status().read_once() & 0xf == 0x3 } - fn get_free_slot(&self) -> u32 { - loop { + async fn get_free_slot(&self) -> u32 { + core::future::poll_fn(|ctx| { let mut free_list = self.free_list.lock_irq(); - let free_slot = free_list.free.pop_front(); - if let Some(slot) = free_slot { - return slot; + if let Some(slot) = free_list.free.pop_front() { + return Poll::Ready(slot); } - let mut wait = pin!(self.free_list_wait.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - drop(free_list); - block_on(wait); - } + free_list.wakers.push_back(ctx.waker().clone()); + Poll::Pending + }) + .await } fn save_working(&self, slot: u32) { @@ -165,8 +138,10 @@ impl AdapterPort<'_> { } fn release_free_slot(&self, slot: u32) { - self.free_list.lock_irq().free.push_back(slot); - self.free_list_wait.notify_one(); + let mut free_list = self.free_list.lock_irq(); + + free_list.free.push_back(slot); + free_list.wakers.drain(..).for_each(|waker| waker.wake()); } pub fn handle_interrupt(&self) { @@ -180,7 +155,7 @@ impl AdapterPort<'_> { return true; } - self.slots[n as usize].handle_irq(); + self.cmdlist.get(n as usize).handle_irq(); self.stats.inc_int_fired(); false @@ -204,12 +179,12 @@ impl AdapterPort<'_> { Ok(()) } - fn send_command(&self, cmd: &impl Command) -> KResult<()> { + async fn send_command(&self, cmd: &impl Command) -> KResult<()> { let mut cmdtable = CommandTable::new(); cmdtable.setup(cmd); - let slot_index = self.get_free_slot(); - let slot = &self.slots[slot_index as usize]; + let slot_index = self.get_free_slot().await; + let slot = self.cmdlist.get(slot_index as usize); slot.prepare_command(&cmdtable, cmd.write()); self.save_working(slot_index); @@ -222,36 +197,36 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = block_on(slot.wait_finish()) { + slot.wait_finish().await.inspect_err(|_| { self.stats.inc_cmd_error(); - return Err(EIO); - }; + })?; self.release_free_slot(slot_index); Ok(()) } - fn identify(&self) -> KResult<()> { + async fn identify(&self) -> KResult<()> { let cmd = IdentifyCommand::new(); // TODO: check returned data - self.send_command(&cmd)?; + self.send_command(&cmd).await?; Ok(()) } - pub fn init(&self) -> KResult<()> { + pub async fn init(&self) -> KResult<()> { self.stop_command()?; self.command_list_base() - .write(self.cmdlist_base.addr() as u64); - self.fis_base().write(self.fis_base.addr() as u64); + .write(self.cmdlist.cmdlist_base().addr() as u64); + self.fis_base() + .write(self.cmdlist.recv_fis_base().addr() as u64); self.interrupt_enable().write_once(PORT_IE_DEFAULT); self.start_command()?; - match self.identify() { + match self.identify().await { Err(err) => { self.stop_command()?; Err(err) @@ -269,12 +244,13 @@ impl AdapterPort<'_> { } } -impl BlockRequestQueue for AdapterPort<'_> { +#[async_trait] +impl BlockRequestQueue for AdapterPort { fn max_request_pages(&self) -> u64 { 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Read { sector, @@ -287,7 +263,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = ReadLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } BlockDeviceRequest::Write { sector, @@ -300,7 +276,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = WriteLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } } } diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 2198c457..fdb61f96 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -1,20 +1,37 @@ -use super::{command_table::CommandTable, CommandHeader}; +use core::cell::UnsafeCell; +use core::ptr::NonNull; +use core::task::{Poll, Waker}; + +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_mm::paging::Folio as _; +use eonix_sync::{Spin, SpinIrq as _}; + +use super::command_table::CommandTable; +use super::CommandHeader; +use crate::kernel::constants::EIO; +use crate::kernel::mem::FolioOwned; use crate::KResult; -use core::pin::pin; -use eonix_mm::address::Addr as _; -use eonix_sync::{Spin, SpinIrq as _, WaitList}; + +pub struct CommandList { + base: NonNull, + _page: FolioOwned, +} + +unsafe impl Send for CommandList {} +unsafe impl Sync for CommandList {} pub struct CommandSlot<'a> { - /// # Usage - /// `inner.cmdheader` might be used in irq handler. So in order to wait for - /// commands to finish, we should use `lock_irq` on `inner` - inner: Spin>, - wait_list: WaitList, + cmdheader: &'a UnsafeCell, + /// [`Self::control`] might be used in irq handlers. + control: &'a Spin, } -struct CommandSlotInner<'a> { +unsafe impl Send for CommandSlot<'_> {} +unsafe impl Sync for CommandSlot<'_> {} + +struct SlotControl { state: SlotState, - cmdheader: &'a mut CommandHeader, + waker: Option, } #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -22,35 +39,110 @@ enum SlotState { Idle, Working, Finished, + // TODO: Implement AHCI error handling + #[allow(unused)] Error, } -impl<'a> CommandSlot<'a> { - pub fn new(cmdheader: &'a mut CommandHeader) -> Self { +impl CommandList { + fn cmdheaders(&self) -> &[UnsafeCell; 32] { + unsafe { self.base.cast().as_ref() } + } + + fn controls_ptr(base: NonNull) -> NonNull> { + // 24 bytes for SlotControl and extra 8 bytes for Spin. + const_assert_eq!(size_of::>(), 32); + + unsafe { base.add(size_of::>() * 32).cast() } + } + + fn controls(&self) -> &[Spin; 32] { + unsafe { Self::controls_ptr(self.base).cast().as_ref() } + } + + pub fn cmdlist_base(&self) -> PAddr { + self._page.start() + } + + pub fn recv_fis_base(&self) -> PAddr { + self._page.start() + + (size_of::>() + size_of::>()) * 32 + } + + pub fn get(&self, index: usize) -> CommandSlot<'_> { + CommandSlot { + cmdheader: &self.cmdheaders()[index], + control: &self.controls()[index], + } + } + + pub fn new() -> Self { + let mut page = FolioOwned::alloc(); + page.as_bytes_mut().fill(0); + + let base = page.get_ptr(); + + let controls_ptr = Self::controls_ptr(base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).write(Spin::new(SlotControl { + state: SlotState::Idle, + waker: None, + })); + } + } + Self { - inner: Spin::new(CommandSlotInner { - state: SlotState::Idle, - cmdheader, - }), - wait_list: WaitList::new(), + base: page.get_ptr(), + _page: page, } } +} +impl Drop for CommandList { + fn drop(&mut self) { + let controls_ptr = Self::controls_ptr(self.base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).drop_in_place(); + } + } + } +} + +impl CommandSlot<'_> { pub fn handle_irq(&self) { - let mut inner = self.inner.lock(); - debug_assert_eq!(inner.state, SlotState::Working); + // We are already in the IRQ handler. + let mut control = self.control.lock(); + assert_eq!(control.state, SlotState::Working); + + let cmdheader = unsafe { + // SAFETY: The IRQ handler is only called after the command + // is finished. + &mut *self.cmdheader.get() + }; // TODO: Check errors. - inner.state = SlotState::Finished; - inner.cmdheader.bytes_transferred = 0; - inner.cmdheader.prdt_length = 0; + cmdheader.bytes_transferred = 0; + cmdheader.prdt_length = 0; - self.wait_list.notify_all(); + control.state = SlotState::Finished; + + if let Some(waker) = control.waker.take() { + waker.wake(); + } } pub fn prepare_command(&self, cmdtable: &CommandTable, write: bool) { - let mut inner = self.inner.lock_irq(); - let cmdheader = &mut inner.cmdheader; + let mut control = self.control.lock_irq(); + assert_eq!(control.state, SlotState::Idle); + + let cmdheader = unsafe { + // SAFETY: We are in the idle state. + &mut *self.cmdheader.get() + }; cmdheader.first = 0x05; // FIS type @@ -60,35 +152,37 @@ impl<'a> CommandSlot<'a> { cmdheader.second = 0x00; - cmdheader.prdt_length = cmdtable.prdt_len(); + cmdheader.prdt_length = cmdtable.prdt_len() as u16; cmdheader.bytes_transferred = 0; cmdheader.command_table_base = cmdtable.base().addr() as u64; cmdheader._reserved = [0; 4]; - inner.state = SlotState::Working; + control.state = SlotState::Working; } pub async fn wait_finish(&self) -> KResult<()> { - let mut inner = loop { - let inner = self.inner.lock_irq(); - if inner.state != SlotState::Working { - break inner; + core::future::poll_fn(|ctx| { + let mut control = self.control.lock_irq(); + + match control.state { + SlotState::Idle => unreachable!("Poll called in idle state"), + SlotState::Working => { + control.waker = Some(ctx.waker().clone()); + Poll::Pending + } + SlotState::Finished => { + control.state = SlotState::Idle; + Poll::Ready(Ok(())) + } + SlotState::Error => { + control.state = SlotState::Idle; + + // TODO: Report errors. + Poll::Ready(Err(EIO)) + } } - - let mut wait = pin!(self.wait_list.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - - if inner.state != SlotState::Working { - break inner; - } - - drop(inner); - wait.await; - }; - - inner.state = SlotState::Idle; - - Ok(()) + }) + .await } } diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index ba31b8b1..923a4594 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -1,18 +1,19 @@ -use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; -use crate::kernel::interrupt::register_irq_handler; -use crate::kernel::mem::paging::{self, AllocZeroed}; -use crate::kernel::mem::{AsMemoryBlock, PhysAccess}; -use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; -use crate::net::netdev; -use crate::prelude::*; use alloc::boxed::Box; use alloc::sync::Arc; -use alloc::vec::Vec; use core::ptr::NonNull; + +use async_trait::async_trait; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; +use eonix_mm::paging::Folio as _; use eonix_sync::SpinIrq; -use paging::Page; + +use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::mem::{FolioOwned, PhysAccess}; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::net::netdev; +use crate::prelude::*; mod defs; @@ -54,13 +55,15 @@ struct E1000eDev { id: u32, regs: Registers, - rt_desc_page: Page, + rt_desc_page: FolioOwned, rx_head: Option, rx_tail: Option, tx_tail: Option, - rx_buffers: Option>>, - tx_buffers: Option>>, + rx_buffers: Box<[FolioOwned; RX_DESC_SIZE]>, + // TODO: Implement E1000e send + #[allow(unused)] + tx_buffers: Box<[Option; TX_DESC_SIZE]>, } fn test(val: u32, bit: u32) -> bool { @@ -195,7 +198,7 @@ impl netdev::Netdev for E1000eDev { break; } - let ref mut desc = self.rx_desc_table()[next_tail as usize]; + let desc = unsafe { &mut self.rx_desc_table()[next_tail as usize] }; if !test(desc.status as u32, defs::RXD_STAT_DD as u32) { Err(EIO)?; } @@ -203,11 +206,8 @@ impl netdev::Netdev for E1000eDev { desc.status = 0; let len = desc.length as usize; - let buffers = self.rx_buffers.as_mut().ok_or(EIO)?; - let data = unsafe { - // SAFETY: No one could be writing to the buffer at this point. - &buffers[next_tail as usize].as_memblk().as_bytes()[..len] - }; + let buffer = &self.rx_buffers[next_tail as usize]; + let data = &buffer.as_bytes()[..len]; println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data)); self.rx_tail = Some(next_tail); @@ -225,20 +225,17 @@ impl netdev::Netdev for E1000eDev { return Err(EAGAIN); } - let ref mut desc = self.tx_desc_table()[tail as usize]; + let desc = unsafe { &mut self.tx_desc_table()[tail as usize] }; if !test(desc.status as u32, defs::TXD_STAT_DD as u32) { return Err(EIO); } - let buffer_page = Page::alloc(); + let mut buffer_page = FolioOwned::alloc(); if buf.len() > buffer_page.len() { return Err(EFAULT); } - unsafe { - // SAFETY: We are the only one writing to this memory block. - buffer_page.as_memblk().as_bytes_mut()[..buf.len()].copy_from_slice(buf); - } + buffer_page.as_bytes_mut()[..buf.len()].copy_from_slice(buf); desc.buffer = PAddr::from(buffer_page.pfn()).addr() as u64; desc.length = buf.len() as u16; @@ -248,9 +245,8 @@ impl netdev::Netdev for E1000eDev { self.tx_tail = Some(next_tail); self.regs.write(defs::REG_TDT, next_tail); - // TODO: check if the packets are sent and update self.tx_head state - - Ok(()) + unimplemented!("Check if the packets are sent and update self.tx_head state"); + // Ok(()) } } @@ -323,26 +319,26 @@ impl E1000eDev { Ok(()) } - fn reset(&self) -> Result<(), u32> { + fn reset(regs: &Registers) -> Result<(), u32> { // disable interrupts so we won't mess things up - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); - while self.regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { + while regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { // wait for link up } - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); - while self.regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { + while regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { // wait for reset } // disable interrupts again - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); Ok(()) } @@ -359,64 +355,49 @@ impl E1000eDev { Ok(()) } - pub fn new(base: PAddr, irq_no: usize) -> Result { - let page = Page::zeroed(); + pub fn new(base: PAddr, irq_no: usize) -> KResult { + let regs = Registers::new(base); + Self::reset(®s)?; - let mut dev = Self { + let dev = Self { irq_no, - mac: [0; 6], + mac: regs.read_as(0x5400), status: netdev::LinkStatus::Down, speed: netdev::LinkSpeed::SpeedUnknown, id: netdev::alloc_id(), - regs: Registers::new(base), - rt_desc_page: page, + regs, + rt_desc_page: { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }, rx_head: None, rx_tail: None, tx_tail: None, - rx_buffers: None, - tx_buffers: None, + rx_buffers: Box::new(core::array::from_fn(|_| FolioOwned::alloc_order(2))), + tx_buffers: Box::new([const { None }; 32]), }; - dev.reset()?; - - dev.mac = dev.regs.read_as(0x5400); - dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE))); - - let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE)); - - for index in 0..RX_DESC_SIZE { - let page = Page::alloc_order(2); - - let ref mut desc = dev.rx_desc_table()[index]; - desc.buffer = PAddr::from(page.pfn()).addr() as u64; - desc.status = 0; - - rx_buffers.push(page); - } + unsafe { + for (desc, page) in dev.rx_desc_table().into_iter().zip(dev.rx_buffers.iter()) { + desc.buffer = page.start().addr() as u64; + desc.status = 0; + } - for index in 0..TX_DESC_SIZE { - let ref mut desc = dev.tx_desc_table()[index]; - desc.status = defs::TXD_STAT_DD; + for desc in dev.tx_desc_table() { + desc.status = defs::TXD_STAT_DD; + } } - dev.rx_buffers = Some(rx_buffers); - Ok(dev) } - fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { - unsafe { - // SAFETY: TODO - self.rt_desc_page.as_memblk().as_ptr().as_mut() - } + unsafe fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { + self.rt_desc_page.get_ptr().cast().as_mut() } - fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { - let (_, right) = self.rt_desc_page.as_memblk().split_at(0x200); - unsafe { - // SAFETY: TODO - right.as_ptr().as_mut() - } + unsafe fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { + self.rt_desc_page.get_ptr().add(0x200).cast().as_mut() } } @@ -424,12 +405,8 @@ impl Drop for E1000eDev { fn drop(&mut self) { assert_eq!(self.status, netdev::LinkStatus::Down); - if let Some(_) = self.rx_buffers.take() {} - - // TODO: we should wait until all packets are sent - if let Some(_) = self.tx_buffers.take() {} - - let _ = self.rt_desc_page; + // TODO: we should wait until all packets are sent before dropping + // tx buffers. } } @@ -437,6 +414,7 @@ struct Driver { dev_id: u16, } +#[async_trait] impl PCIDriver for Driver { fn vendor_id(&self) -> u16 { 0x8086 @@ -446,7 +424,7 @@ impl PCIDriver for Driver { self.dev_id } - fn handle_device(&self, device: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, device: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = device.header else { Err(EINVAL)? }; @@ -473,10 +451,10 @@ impl PCIDriver for Driver { } } -pub fn register_e1000e_driver() { +pub async fn register_e1000e_driver() { let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a]; for id in dev_ids.into_iter() { - pcie::register_driver(Driver { dev_id: id }).unwrap(); + pcie::register_driver(Driver { dev_id: id }).await.unwrap(); } } diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d69965f4..b634c232 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -2,8 +2,8 @@ mod io; use crate::{ kernel::{ - block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - CharDevice, CharDeviceType, Terminal, TerminalDevice, + console::set_console, constants::EIO, interrupt::register_irq_handler, + vfs::types::DeviceId, CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; @@ -167,7 +167,7 @@ impl Serial { eonix_log::set_console(terminal.clone()); CharDevice::register( - make_device(4, 64 + port.id), + DeviceId::new(4, 64 + port.id as u16), port.name.clone(), CharDeviceType::Terminal(terminal), )?; diff --git a/src/driver/serial/io.rs b/src/driver/serial/io.rs index aec18f20..57e61c56 100644 --- a/src/driver/serial/io.rs +++ b/src/driver/serial/io.rs @@ -1,10 +1,11 @@ -use super::SerialRegister; use core::ptr::NonNull; -use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; -use eonix_mm::address::{PAddr, PhysAccess}; #[cfg(target_arch = "x86_64")] use eonix_hal::arch_exported::io::Port8; +use eonix_hal::mm::ArchPhysAccess; +use eonix_mm::address::{PAddr, PhysAccess}; + +use super::SerialRegister; #[cfg(target_arch = "x86_64")] pub struct SerialIO { @@ -73,10 +74,12 @@ impl SerialIO { self.line_status } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { self.modem_status } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { self.scratch } @@ -100,7 +103,7 @@ impl SerialRegister for NonNull { let retval = unsafe { self.as_ptr().read_volatile() }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); retval } @@ -110,7 +113,7 @@ impl SerialRegister for NonNull { unsafe { self.as_ptr().write_volatile(data) }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); } } @@ -155,10 +158,12 @@ impl SerialIO { unsafe { self.base_addr.add(5) } } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { unsafe { self.base_addr.add(6) } } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { unsafe { self.base_addr.add(7) } } diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 9bdbf6ce..b33e16ac 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,8 +1,5 @@ use super::virtio_blk::HAL; -use crate::kernel::{ - block::{make_device, BlockDevice}, - task::block_on, -}; +use crate::kernel::{block::BlockDevice, task::block_on, vfs::types::DeviceId}; use alloc::{sync::Arc, vec::Vec}; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; @@ -43,7 +40,7 @@ pub fn init() { .expect("Failed to initialize VirtIO Block device"); let block_device = BlockDevice::register_disk( - make_device(8, 16 * disk_id), + DeviceId::new(8, 16 * disk_id), 2147483647, Arc::new(Spin::new(block_device)), ) diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 37e4fe77..5dfed88a 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -1,19 +1,19 @@ -use crate::{ - io::Chunks, - kernel::{ - block::{BlockDeviceRequest, BlockRequestQueue}, - constants::EIO, - mem::{AsMemoryBlock, Page}, - }, - prelude::KResult, -}; +use alloc::boxed::Box; + +use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; +use eonix_mm::address::{Addr, PAddr, PhysAccess}; +use eonix_mm::paging::{Folio as _, PFN}; use eonix_sync::Spin; -use virtio_drivers::{device::blk::VirtIOBlk, transport::Transport, Hal}; +use virtio_drivers::device::blk::VirtIOBlk; +use virtio_drivers::transport::Transport; +use virtio_drivers::Hal; + +use crate::io::Chunks; +use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; +use crate::kernel::constants::EIO; +use crate::kernel::mem::Folio; +use crate::prelude::KResult; pub struct HAL; @@ -22,13 +22,12 @@ unsafe impl Hal for HAL { pages: usize, _direction: virtio_drivers::BufferDirection, ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull) { - let page = Page::alloc_at_least(pages); + let page = Folio::alloc_at_least(pages); - let paddr = page.start().addr(); - let ptr = page.as_memblk().as_byte_ptr(); - page.into_raw(); + let ptr = page.get_ptr(); + let pfn = page.into_raw(); - (paddr, ptr) + (PAddr::from(pfn).addr(), ptr) } unsafe fn dma_dealloc( @@ -41,7 +40,7 @@ unsafe impl Hal for HAL { unsafe { // SAFETY: The caller ensures that the pfn corresponds to a valid // page allocated by `dma_alloc`. - Page::from_raw(pfn); + Folio::from_raw(pfn); } 0 @@ -74,6 +73,7 @@ unsafe impl Hal for HAL { } } +#[async_trait] impl BlockRequestQueue for Spin> where T: Transport + Send, @@ -82,7 +82,7 @@ where 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Write { sector, @@ -90,15 +90,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &buffer_page.as_memblk().as_bytes()[..len as usize * 512] - }; + let len = sectors * 512; + let pg = buffer_page.lock(); - dev.write_blocks(start, buffer).map_err(|_| EIO)?; + dev.write_blocks(start, &pg.as_bytes()[..len]) + .map_err(|_| EIO)?; } } BlockDeviceRequest::Read { @@ -107,15 +106,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &mut buffer_page.as_memblk().as_bytes_mut()[..len as usize * 512] - }; + let len = sectors * 512; + let mut pg = buffer_page.lock(); - dev.read_blocks(start, buffer).map_err(|_| EIO)?; + dev.read_blocks(start, &mut pg.as_bytes_mut()[..len]) + .map_err(|_| EIO)?; } } } diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 76ca4a34..121339d3 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,5 +1,3 @@ -use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; - use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::kernel::task::block_on; use crate::kernel::timer::Ticks; @@ -31,6 +29,8 @@ use alloc::{ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; +use async_trait::async_trait; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use eonix_sync::RwLock; pub struct Ext4BlockDevice { @@ -194,7 +194,7 @@ impl Ext4Fs { root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), - vfs: Arc::downgrade(&ext4fs) as _, + sb: Arc::downgrade(&ext4fs) as _, }, ) }; @@ -290,7 +290,7 @@ impl Inode for FileInode { } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; @@ -334,7 +334,7 @@ impl Inode for FileInode { fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { //let _lock = Task::block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; 4096]; @@ -363,7 +363,7 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -428,7 +428,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = dentry.get_name(); @@ -477,7 +477,7 @@ impl Inode for DirInode { ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), rwsem: RwLock::new(()), - vfs: self.vfs.clone(), + sb: self.sb.clone(), }, ); @@ -489,7 +489,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let entries = ext4fs @@ -519,7 +519,7 @@ impl Inode for DirInode { fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -534,7 +534,7 @@ impl Inode for DirInode { ) .unwrap(); - let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let file = FileInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(file.as_ref(), now); self.link_file(); @@ -547,7 +547,7 @@ impl Inode for DirInode { fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -562,7 +562,7 @@ impl Inode for DirInode { ) .unwrap(); - let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let new_dir = DirInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(new_dir.as_ref(), now); self.link_dir(); @@ -575,7 +575,7 @@ impl Inode for DirInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _dir_lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let file = at.get_inode()?; @@ -602,7 +602,7 @@ impl Inode for DirInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -638,7 +638,7 @@ impl Inode for DirInode { // TODO: may need another lock let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_file = old_dentry.get_inode()?; @@ -698,6 +698,7 @@ impl From for Mode { struct Ext4MountCreator; +#[async_trait] impl MountCreator for Ext4MountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(1080..) { @@ -707,7 +708,7 @@ impl MountCreator for Ext4MountCreator { } } - fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { + async fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { let source = source.as_bytes(); let path = Path::new(source)?; diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 12eabdd5..c1feebdf 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,45 +1,68 @@ mod dir; -mod file; - -use crate::io::Stream; -use crate::kernel::constants::EIO; -use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::{Mode, WriteOffset}; -use crate::{ - io::{Buffer, ByteBuffer, UninitBuffer}, - kernel::{ - block::{make_device, BlockDevice, BlockDeviceRequest}, - mem::{ - paging::Page, - {CachePage, PageCache, PageCacheBackend}, - }, - vfs::{ - dentry::Dentry, - inode::{define_struct_inode, Ino, Inode, InodeData}, - mount::{register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, - }, - }, - prelude::*, - KResult, -}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, - vec::Vec, -}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use dir::Dirs as _; + +use alloc::sync::Arc; +use core::ops::Deref; + +use async_trait::async_trait; +use dir::{as_raw_dirents, ParseDirent}; +use eonix_mm::paging::PAGE_SIZE; use eonix_sync::RwLock; -use file::ClusterRead; +use itertools::Itertools; + +use crate::io::{Buffer, ByteBuffer, UninitBuffer}; +use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; +use crate::kernel::constants::{EINVAL, EIO}; +use crate::kernel::mem::{CachePage, Folio, FolioOwned, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; +use crate::KResult; + +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct Cluster(u32); + +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +struct RawCluster(pub u32); + +impl RawCluster { + const START: u32 = 2; + const EOC: u32 = 0x0FFF_FFF8; + const INVL: u32 = 0xF000_0000; + + fn parse(self) -> Option { + match self.0 { + ..Self::START | Self::EOC..Self::INVL => None, + Self::INVL.. => { + unreachable!("invalid cluster number: RawCluster({:#08x})", self.0) + } + no => Some(Cluster(no)), + } + } +} -type ClusterNo = u32; +impl Cluster { + pub fn as_ino(self) -> Ino { + Ino::new(self.0 as _) + } + + pub fn from_ino(ino: Ino) -> Self { + Self(ino.as_raw() as u32) + } + + fn normalized(self) -> Self { + Self(self.0 - 2) + } +} const SECTOR_SIZE: usize = 512; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[repr(C, packed)] struct Bootsector { jmp: [u8; 3], @@ -59,7 +82,7 @@ struct Bootsector { sectors_per_fat: u32, flags: u16, fat_version: u16, - root_cluster: ClusterNo, + root_cluster: RawCluster, fsinfo_sector: u16, backup_bootsector: u16, _reserved: [u8; 12], @@ -73,341 +96,355 @@ struct Bootsector { mbr_signature: u16, } -impl_any!(FatFs); /// # Lock order /// 2. FatTable /// 3. Inodes /// struct FatFs { sectors_per_cluster: u8, - rootdir_cluster: ClusterNo, - data_start: u64, - volume_label: [u8; 11], + data_start_sector: u64, + _rootdir_cluster: Cluster, + _volume_label: Box, device: Arc, - fat: RwLock>, - weak: Weak, - icache: BTreeMap, + fat: RwLock>, } -impl Vfs for FatFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - self.device.devid() - } - - fn is_read_only(&self) -> bool { - true - } -} +impl SuperBlock for FatFs {} impl FatFs { - fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> { - let cluster = cluster - 2; + async fn read_cluster(&self, mut cluster: Cluster, buf: &Folio) -> KResult<()> { + cluster = cluster.normalized(); let rq = BlockDeviceRequest::Read { - sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64, + sector: self.data_start_sector as u64 + + cluster.0 as u64 * self.sectors_per_cluster as u64, count: self.sectors_per_cluster as u64, buffer: core::slice::from_ref(buf), }; - self.device.commit_request(rq)?; + self.device.commit_request(rq).await?; Ok(()) } - - fn get_or_alloc_inode(&self, ino: Ino, is_directory: bool, size: u32) -> Arc { - self.icache - .get(&ino) - .cloned() - .map(FatInode::unwrap) - .unwrap_or_else(|| { - if is_directory { - DirInode::new(ino, self.weak.clone(), size) - } else { - FileInode::new(ino, self.weak.clone(), size) - } - }) - } } impl FatFs { - pub fn create(device: DevId) -> KResult<(Arc, Arc)> { + pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { let device = BlockDevice::get(device)?; - let mut fatfs_arc = Arc::new_cyclic(|weak: &Weak| Self { - device, - sectors_per_cluster: 0, - rootdir_cluster: 0, - data_start: 0, - fat: RwLock::new(Vec::new()), - weak: weak.clone(), - icache: BTreeMap::new(), - volume_label: [0; 11], - }); - - let fatfs = unsafe { Arc::get_mut_unchecked(&mut fatfs_arc) }; - - let mut info: UninitBuffer = UninitBuffer::new(); - fatfs.device.read_some(0, &mut info)?.ok_or(EIO)?; + + let mut info = UninitBuffer::::new(); + device.read_some(0, &mut info).await?.ok_or(EIO)?; let info = info.assume_filled_ref()?; - fatfs.sectors_per_cluster = info.sectors_per_cluster; - fatfs.rootdir_cluster = info.root_cluster; - fatfs.data_start = - info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; + let mut fat = Box::new_uninit_slice( + 512 * info.sectors_per_fat as usize / core::mem::size_of::(), + ); - let fat = fatfs.fat.get_mut(); + device + .read_some( + info.reserved_sectors as usize * 512, + &mut ByteBuffer::from(fat.as_mut()), + ) + .await? + .ok_or(EIO)?; - fat.resize( - 512 * info.sectors_per_fat as usize / core::mem::size_of::(), - 0, - ); + let sectors_per_cluster = info.sectors_per_cluster; + let rootdir_cluster = info.root_cluster.parse().ok_or(EINVAL)?; - let mut buffer = ByteBuffer::from(fat.as_mut_slice()); + let data_start_sector = + info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; - fatfs - .device - .read_some(info.reserved_sectors as usize * 512, &mut buffer)? - .ok_or(EIO)?; + let volume_label = { + let end = info + .volume_label + .iter() + .position(|&c| c == b' ') + .unwrap_or(info.volume_label.len()); - info.volume_label - .iter() - .take_while(|&&c| c != ' ' as u8) - .take(11) - .enumerate() - .for_each(|(idx, c)| fatfs.volume_label[idx] = *c); - - let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count(); - let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512; - - let root_inode = DirInode::new( - (info.root_cluster & !0xF000_0000) as Ino, - fatfs.weak.clone(), - root_dir_size, + String::from_utf8_lossy(&info.volume_label[..end]) + .into_owned() + .into_boxed_str() + }; + + let fat = unsafe { fat.assume_init() }; + + let rootdir_cluster_count = ClusterIterator::new(fat.as_ref(), rootdir_cluster).count(); + let rootdir_size = rootdir_cluster_count as u32 * sectors_per_cluster as u32 * 512; + + let fatfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: device.devid(), + read_only: true, + }, + Self { + device, + sectors_per_cluster, + _rootdir_cluster: rootdir_cluster, + data_start_sector, + fat: RwLock::new(fat), + _volume_label: volume_label, + }, ); - Ok((fatfs_arc, root_inode)) + let sbref = SbRef::from(&fatfs); + Ok((fatfs, DirInode::new(rootdir_cluster, sbref, rootdir_size))) } } -struct ClusterIterator<'fat> { - fat: &'fat [ClusterNo], - cur: ClusterNo, +struct ClusterIterator<'a> { + fat: &'a [RawCluster], + cur: Option, } -impl<'fat> ClusterIterator<'fat> { - fn new(fat: &'fat [ClusterNo], start: ClusterNo) -> Self { - Self { fat, cur: start } +impl<'a> ClusterIterator<'a> { + fn new(fat: &'a [RawCluster], start: Cluster) -> Self { + Self { + fat, + cur: Some(start), + } } } impl<'fat> Iterator for ClusterIterator<'fat> { - type Item = ClusterNo; + type Item = Cluster; fn next(&mut self) -> Option { - const EOC: ClusterNo = 0x0FFF_FFF8; - const INVL: ClusterNo = 0xF000_0000; - - match self.cur { - ..2 | EOC..INVL => None, - INVL.. => unreachable!("Invalid cluster number: {}", self.cur), - next => { - self.cur = self.fat[next as usize] & !INVL; - Some(next) - } - } + self.cur.inspect(|&Cluster(no)| { + self.cur = self.fat[no as usize].parse(); + }) } } -#[allow(dead_code)] -#[derive(Clone)] -enum FatInode { - File(Arc), - Dir(Arc), -} +struct FileInode; -impl FatInode { - fn unwrap(self) -> Arc { - match self { - FatInode::File(inode) => inode, - FatInode::Dir(inode) => inode, - } +impl FileInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::REG, + InodeInfo { + size: size as u64, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }, + Self, + ) } } -define_struct_inode! { - struct FileInode { - page_cache: PageCache, +impl InodeOps for FileInode { + type SuperBlock = FatFs; + + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + inode.get_page_cache().read(buffer, offset).await } -} -impl FileInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| Self { - idata: InodeData::new(ino, weak), - page_cache: PageCache::new(weak_self.clone()), - }); - - // Safety: We are initializing the inode - inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(Mode::REG.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); - - inode + async fn read_page( + &self, + sb: SbUse, + inode: &InodeUse, + page: &mut CachePage, + offset: PageOffset, + ) -> KResult<()> { + let fs = &sb.backend; + let fat = sb.backend.fat.read().await; + + if offset >= PageOffset::from_byte_ceil(inode.info.lock().size as usize) { + unreachable!("read_page called with offset beyond file size"); + } + + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; + if cluster_size != PAGE_SIZE { + unimplemented!("cluster size != PAGE_SIZE"); + } + + // XXX: Ugly and inefficient O(n^2) algorithm for sequential file read. + let cluster = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)) + .skip(offset.page_count()) + .next() + .ok_or(EIO)?; + + fs.read_cluster(cluster, &page).await?; + + let real_len = (inode.info.lock().size as usize) - offset.byte_count(); + if real_len < PAGE_SIZE { + let mut page = page.lock(); + page.as_bytes_mut()[real_len..].fill(0); + } + + Ok(()) } } -impl Inode for FileInode { - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.page_cache) - } +struct DirInode { + // TODO: Use the new PageCache... + dir_pages: RwLock>, +} - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - block_on(self.page_cache.read(buffer, offset)) +impl DirInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::DIR, + InodeInfo { + size: size as u64, + nlink: 2, // '.' and '..' + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }, + Self { + dir_pages: RwLock::new(Vec::new()), + }, + ) } - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - if self.size.load(Ordering::Relaxed) as usize == 0 { - return Ok(0); + async fn read_dir_pages(&self, sb: &SbUse, inode: &InodeUse) -> KResult<()> { + let mut dir_pages = self.dir_pages.write().await; + if !dir_pages.is_empty() { + return Ok(()); } - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); + let fs = &sb.backend; + let fat = fs.fat.read().await; - let skip_clusters = offset / cluster_size; - let inner_offset = offset % cluster_size; - - let cluster_iter = - ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters); + let clusters = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)); - let buffer_page = Page::alloc(); - for cluster in cluster_iter { - vfs.read_cluster(cluster, &buffer_page)?; + for cluster in clusters { + let page = FolioOwned::alloc(); + fs.read_cluster(cluster, &page).await?; - let data = unsafe { - // SAFETY: We are the only one holding this page. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; + dir_pages.push(page); + } - let end = offset + data.len(); - let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize); - let real_size = real_end - offset; + Ok(()) + } - if buffer.fill(&data[..real_size])?.should_stop() { - break; + async fn get_dir_pages( + &self, + sb: &SbUse, + inode: &InodeUse, + ) -> KResult> + use<'_>> { + { + let dir_pages = self.dir_pages.read().await; + if !dir_pages.is_empty() { + return Ok(dir_pages); } } - Ok(buffer.wrote()) - } + self.read_dir_pages(sb, inode).await?; - fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { - todo!() - } + if let Some(dir_pages) = self.dir_pages.try_read() { + return Ok(dir_pages); + } - fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { - todo!() + Ok(self.dir_pages.read().await) } } -impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_direct(page, offset) - } +impl InodeOps for DirInode { + type SuperBlock = FatFs; - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - todo!() - } + async fn lookup( + &self, + sb: SbUse, + inode: &InodeUse, + dentry: &Arc, + ) -> KResult> { + let dir_pages = self.get_dir_pages(&sb, inode).await?; - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize - } -} + let dir_data = dir_pages.iter().map(|pg| pg.as_bytes()); -define_struct_inode! { - struct DirInode; -} + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok(); -impl DirInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new(Self { - idata: InodeData::new(ino, weak), - }); + let mut dirents = futures::stream::iter(raw_dirents); - // Safety: We are initializing the inode - inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(Mode::DIR.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); + while let Some(result) = dirents.next_dirent().await { + let entry = result?; - inode - } -} + if *entry.filename != ****dentry.name() { + continue; + } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, 0) - .dirs(); - - let entry = entries.find(|entry| { - entry - .as_ref() - .map(|entry| &entry.filename == &***dentry.name()) - .unwrap_or(true) - }); - - match entry { - None => Ok(None), - Some(Err(err)) => Err(err), - Some(Ok(entry)) => Ok(Some(vfs.get_or_alloc_inode( - entry.cluster as Ino, - entry.is_directory, - entry.size, - ))), + let sbref = SbRef::from(&sb); + + if entry.is_directory { + return Ok(Some(DirInode::new(entry.cluster, sbref, entry.size) as _)); + } else { + return Ok(Some(FileInode::new(entry.cluster, sbref, entry.size) as _)); + } } + + Ok(None) } - fn do_readdir( + async fn readdir( &self, + sb: SbUse, + inode: &InodeUse, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let fs = &sb.backend; + let dir_pages = self.get_dir_pages(&sb, inode).await?; - let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, offset) - .dirs(); + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; - let mut nread = 0usize; - for entry in cluster_iter { - let entry = entry?; + let cluster_offset = offset / cluster_size; + let inner_offset = offset % cluster_size; + let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); - vfs.get_or_alloc_inode(entry.cluster as Ino, entry.is_directory, entry.size); - if callback(&entry.filename, entry.cluster as Ino)?.is_break() { - break; - } + let dir_data = dir_pages + .iter() + .skip(cluster_offset) + .map(|pg| pg.as_bytes()); + + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok() + .skip(inner_raw_dirent_offset); + + let mut dirents = futures::stream::iter(raw_dirents); - nread += entry.entry_offset as usize; + let mut nread = 0; + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + match callback(&entry.filename, entry.cluster.as_ino()) { + Err(err) => return Ok(Err(err)), + Ok(true) => nread += entry.entry_offset as usize, + Ok(false) => break, + } } - Ok(nread) + Ok(Ok(nread)) } } struct FatMountCreator; +#[async_trait] impl MountCreator for FatMountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(82..) { @@ -417,8 +454,8 @@ impl MountCreator for FatMountCreator { } } - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let (fatfs, root_inode) = FatFs::create(make_device(8, 1))?; + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fatfs, root_inode) = FatFs::create(DeviceId::new(8, 1)).await?; Mount::new(mp, fatfs, root_inode) } diff --git a/src/fs/fat32/dir.rs b/src/fs/fat32/dir.rs index c4ac6c0d..8a5b6f40 100644 --- a/src/fs/fat32/dir.rs +++ b/src/fs/fat32/dir.rs @@ -1,11 +1,16 @@ -use super::file::ClusterReadIterator; +use core::pin::Pin; + +use alloc::{boxed::Box, string::String}; +use futures::{Stream, StreamExt}; +use posix_types::result::PosixError; + use crate::kernel::constants::EINVAL; use crate::prelude::*; -use alloc::{string::String, sync::Arc}; -use itertools::Itertools; + +use super::{Cluster, RawCluster}; #[repr(C, packed)] -pub(super) struct RawDirEntry { +pub struct RawDirEntry { name: [u8; 8], extension: [u8; 3], attr: u8, @@ -21,9 +26,9 @@ pub(super) struct RawDirEntry { size: u32, } -pub(super) struct FatDirectoryEntry { - pub filename: Arc<[u8]>, - pub cluster: u32, +pub struct FatDirectoryEntry { + pub filename: Box<[u8]>, + pub cluster: Cluster, pub size: u32, pub entry_offset: u32, pub is_directory: bool, @@ -79,7 +84,7 @@ impl RawDirEntry { self.attr & Self::ATTR_DIRECTORY != 0 } - fn long_filename(&self) -> Option<[u16; 13]> { + fn as_raw_long_filename(&self) -> Option<[u16; 13]> { if !self.is_long_filename() { return None; } @@ -103,137 +108,114 @@ impl RawDirEntry { } } -impl<'data, I> RawDirs<'data> for I where I: ClusterReadIterator<'data> {} -trait RawDirs<'data>: ClusterReadIterator<'data> { - fn raw_dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - const ENTRY_SIZE: usize = size_of::(); - - self.map(|result| { - let data = result?; - if data.len() % ENTRY_SIZE != 0 { - return Err(EINVAL); - } - - Ok(unsafe { - core::slice::from_raw_parts( - data.as_ptr() as *const RawDirEntry, - data.len() / ENTRY_SIZE, - ) - }) - }) - .flatten_ok() +pub fn as_raw_dirents(data: &[u8]) -> KResult<&[RawDirEntry]> { + let len = data.len(); + if len % size_of::() != 0 { + return Err(EINVAL); } -} - -pub(super) trait Dirs<'data>: ClusterReadIterator<'data> { - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized; -} -impl<'data, I> Dirs<'data> for I -where - I: ClusterReadIterator<'data>, -{ - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - self.raw_dirs().real_dirs() + unsafe { + Ok(core::slice::from_raw_parts( + data.as_ptr() as *const RawDirEntry, + len / size_of::(), + )) } } -trait RealDirs<'data>: Iterator> + 'data { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized; +pub trait ParseDirent { + async fn next_dirent(&mut self) -> Option>; } -impl<'data, I> RealDirs<'data> for I +impl<'a, T> ParseDirent for T where - I: Iterator> + 'data, + T: Stream>, { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized, - { - DirsIter { iter: self } - } -} + async fn next_dirent(&mut self) -> Option> { + let mut me = unsafe { Pin::new_unchecked(self) }; + + // The long filename entries are stored in reverse order. + // So we reverse all filename segments and then reverse the whole string at the end. + let mut filename_rev = String::new(); + + let mut is_lfn = false; + let mut nr_entry_scanned = 0; + let mut cur_entry; + + loop { + match me.as_mut().next().await { + Some(Err(err)) => return Some(Err(err)), + Some(Ok(ent)) => { + cur_entry = ent; + nr_entry_scanned += 1; + } + None => { + if is_lfn { + // Unterminated long filename entries are invalid. + return Some(Err(PosixError::EINVAL.into())); + } else { + return None; + } + } + }; -pub(super) struct DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - iter: I, -} + if !cur_entry.is_invalid() { + break; + } -impl<'data, I> Iterator for DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - type Item = KResult; - - fn next(&mut self) -> Option { - let mut filename = String::new(); - let mut entry_offset = 0; - let entry = loop { - let entry = match self.iter.next()? { - Ok(entry) => entry, - Err(err) => return Some(Err(err)), - }; - entry_offset += 1; - - let long_filename = entry.long_filename(); - if entry.is_invalid() { - if let Some(long_filename) = long_filename { - let long_filename = long_filename - .iter() - .position(|&ch| ch == 0) - .map(|pos| &long_filename[..pos]) - .unwrap_or(&long_filename); - - filename.extend( - long_filename - .into_iter() - .map(|&ch| char::from_u32(ch as u32).unwrap_or('?')) - .rev(), - ); - } + let Some(raw_long_filename) = cur_entry.as_raw_long_filename() else { continue; - } - break entry; + }; + + // We are processing a long filename entry. + is_lfn = true; + + let real_len = raw_long_filename + .iter() + .position(|&ch| ch == 0) + .unwrap_or(raw_long_filename.len()); + + let name_codes_rev = raw_long_filename.into_iter().take(real_len).rev(); + let name_chars_rev = char::decode_utf16(name_codes_rev).map(|r| r.unwrap_or('?')); + + filename_rev.extend(name_chars_rev); + } + + // From now on, `entry` represents a valid directory entry. + + let raw_cluster = + RawCluster(cur_entry.cluster_low as u32 | ((cur_entry.cluster_high as u32) << 16)); + + let Some(cluster) = raw_cluster.parse() else { + return Some(Err(PosixError::EINVAL.into())); }; - let filename: Arc<[u8]> = if filename.is_empty() { - let mut filename = entry.filename().to_vec(); - let extension = entry.extension(); + let filename; + + if filename_rev.is_empty() { + let mut name = cur_entry.filename().to_vec(); + let extension = cur_entry.extension(); if !extension.is_empty() { - filename.push(b'.'); - filename.extend_from_slice(extension); + name.push(b'.'); + name.extend_from_slice(extension); } - if entry.is_filename_lowercase() { - filename.make_ascii_lowercase(); + if cur_entry.is_filename_lowercase() { + name.make_ascii_lowercase(); } - filename.into() + filename = name.into_boxed_slice(); } else { - let mut bytes = filename.into_bytes(); - bytes.reverse(); - - bytes.into() - }; + let mut name = filename_rev.into_bytes(); + name.reverse(); + filename = name.into_boxed_slice(); + } Some(Ok(FatDirectoryEntry { - size: entry.size, - entry_offset, + size: cur_entry.size, + entry_offset: nr_entry_scanned * size_of::() as u32, filename, - cluster: entry.cluster_low as u32 | (((entry.cluster_high & !0xF000) as u32) << 16), - is_directory: entry.is_directory(), + cluster, + is_directory: cur_entry.is_directory(), })) } } diff --git a/src/fs/fat32/file.rs b/src/fs/fat32/file.rs deleted file mode 100644 index db16df50..00000000 --- a/src/fs/fat32/file.rs +++ /dev/null @@ -1,40 +0,0 @@ -use super::{ClusterIterator, FatFs}; -use crate::{ - kernel::mem::{AsMemoryBlock as _, Page}, - KResult, -}; - -pub trait ClusterReadIterator<'data>: Iterator> + 'data {} -impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator> + 'a {} - -pub(super) trait ClusterRead<'data> { - fn read<'vfs>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> - where - Self: Sized, - 'vfs: 'data; -} - -impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> { - fn read<'vfs: 'data>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> { - const SECTOR_SIZE: usize = 512; - - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); - - let skip_clusters = offset / cluster_size; - let mut inner_offset = offset % cluster_size; - - // TODO: Use block cache. - let buffer_page = Page::alloc(); - - self.skip(skip_clusters).map(move |cluster| { - vfs.read_cluster(cluster, &buffer_page)?; - let data = unsafe { - // SAFETY: No one could be writing to it. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; - inner_offset = 0; - Ok(data) - }) - } -} diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 5d9285ec..c59ee801 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,5 +1,4 @@ +// pub mod ext4; pub mod fat32; pub mod procfs; -pub mod shm; pub mod tmpfs; -pub mod ext4; diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 2ed24613..9a3933bb 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,325 +1,247 @@ -use crate::kernel::constants::{EACCES, ENOTDIR}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{AtomicMode, Mode}; -use crate::{ - io::Buffer, - kernel::{ - mem::paging::PageBuffer, - vfs::{ - dentry::Dentry, - inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData}, - mount::{dump_mounts, register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, - }, - }, - prelude::*, -}; -use alloc::sync::{Arc, Weak}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; -use itertools::Itertools; - -#[allow(dead_code)] -pub trait ProcFsFile: Send + Sync { - fn can_read(&self) -> bool { - false - } +use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; - fn can_write(&self) -> bool { - false - } +use async_trait::async_trait; +use eonix_sync::{LazyLock, RwLock}; - fn read(&self, _buffer: &mut PageBuffer) -> KResult { - Err(EACCES) - } +use crate::io::Buffer; +use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR}; +use crate::kernel::mem::paging::PageBuffer; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::mount::{dump_mounts, register_filesystem, Mount, MountCreator}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; - fn write(&self, _buffer: &[u8]) -> KResult { - Err(EACCES) - } +struct Node { + kind: NodeKind, } -pub enum ProcFsNode { - File(Arc), - Dir(Arc), +enum NodeKind { + File(FileInode), + Dir(DirInode), } -impl ProcFsNode { - fn unwrap(&self) -> Arc { - match self { - ProcFsNode::File(inode) => inode.clone(), - ProcFsNode::Dir(inode) => inode.clone(), - } - } - - fn ino(&self) -> Ino { - match self { - ProcFsNode::File(inode) => inode.ino, - ProcFsNode::Dir(inode) => inode.ino, - } - } +struct FileInode { + read: Option KResult<()> + Send + Sync>>, + // TODO: Implement writes to procfs files + #[allow(unused)] + write: Option<()>, } -define_struct_inode! { - pub struct FileInode { - file: Box, - } +struct DirInode { + entries: RwLock, InodeUse)>>, } -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = Mode::REG; - if file.can_read() { - mode.set_perm(0o444); - } - if file.can_write() { - mode.set_perm(0o222); - } +impl InodeOps for Node { + type SuperBlock = ProcFs; - let mut inode = Self { - idata: InodeData::new(ino, vfs), - file, + async fn read( + &self, + _: SbUse, + _: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + let NodeKind::File(file_inode) = &self.kind else { + return Err(EISDIR); }; - inode.idata.mode.store(mode); - inode.idata.nlink.store(1, Ordering::Relaxed); - *inode.ctime.get_mut() = Instant::now(); - *inode.mtime.get_mut() = Instant::now(); - *inode.atime.get_mut() = Instant::now(); - - Arc::new(inode) - } -} - -impl Inode for FileInode { - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - if !self.file.can_read() { + let Some(read_fn) = &file_inode.read else { return Err(EACCES); - } + }; let mut page_buffer = PageBuffer::new(); - self.file.read(&mut page_buffer)?; + read_fn(&mut page_buffer)?; - let data = page_buffer - .data() - .split_at_checked(offset) - .map(|(_, data)| data); + let Some((_, data)) = page_buffer.data().split_at_checked(offset) else { + return Ok(0); + }; - match data { - None => Ok(0), - Some(data) => Ok(buffer.fill(data)?.allow_partial()), - } + Ok(buffer.fill(data)?.allow_partial()) } -} -define_struct_inode! { - pub struct DirInode { - entries: Locked, ProcFsNode)>, ()>, - } -} + async fn lookup( + &self, + _: SbUse, + _: &InodeUse, + dentry: &Arc, + ) -> KResult> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; -impl DirInode { - pub fn new(ino: Ino, vfs: Weak) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} + let entries = dir.entries.read().await; + + let dent_name = dentry.name(); + for (name, node) in entries.iter() { + if *name == ***dent_name { + return Ok(Some(node.clone() as _)); + } + } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = block_on(self.rwsem.read()); - Ok(self - .entries - .access(lock.prove()) - .iter() - .find_map(|(name, node)| (name == &***dentry.name()).then(|| node.unwrap()))) + Ok(None) } - fn do_readdir( + async fn readdir( &self, + _: SbUse, + _: &InodeUse, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, node)| callback(name.as_ref(), node.ino())) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) - } -} - -impl_any!(ProcFs); -pub struct ProcFs { - root_node: Arc, - next_ino: AtomicIno, -} + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; -impl Vfs for ProcFs { - fn io_blksize(&self) -> usize { - 4096 - } + let entries = dir.entries.read().await; - fn fs_devid(&self) -> DevId { - 10 - } + let mut count = 0; + for (name, node) in entries.iter().skip(offset) { + match callback(name.as_ref(), node.ino) { + Err(err) => return Ok(Err(err)), + Ok(true) => count += 1, + Ok(false) => break, + } + } - fn is_read_only(&self) -> bool { - false + Ok(Ok(count)) + } +} + +impl Node { + pub fn new_file( + ino: Ino, + sb: SbRef, + read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static, + ) -> InodeUse { + InodeUse::new( + sb, + ino, + Format::REG, + InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o444), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }, + Self { + kind: NodeKind::File(FileInode::new(Box::new(read))), + }, + ) + } + + fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { + InodeUse::new( + sb, + ino, + Format::DIR, + InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o755), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }, + Self { + kind: NodeKind::Dir(DirInode::new()), + }, + ) } } -static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { - Arc::new_cyclic(|weak: &Weak| ProcFs { - root_node: DirInode::new(0, weak.clone()), - next_ino: AtomicIno::new(1), - }) -}); - -struct ProcFsMountCreator; - -#[allow(dead_code)] -impl ProcFsMountCreator { - pub fn get() -> Arc { - GLOBAL_PROCFS.clone() - } - - pub fn get_weak() -> Weak { - Arc::downgrade(&GLOBAL_PROCFS) +impl FileInode { + fn new(read: Box KResult<()> + Send + Sync>) -> Self { + Self { + read: Some(read), + write: None, + } } } -impl MountCreator for ProcFsMountCreator { - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let vfs = ProcFsMountCreator::get(); - let root_inode = vfs.root_node.clone(); - Mount::new(mp, vfs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) +impl DirInode { + pub fn new() -> Self { + Self { + entries: RwLock::new(vec![]), + } } } -pub fn root() -> ProcFsNode { - let vfs = ProcFsMountCreator::get(); - let root = vfs.root_node.clone(); - - ProcFsNode::Dir(root) +pub struct ProcFs { + root: InodeUse, + next_ino: AtomicU64, } -pub fn creat( - parent: &ProcFsNode, - name: Arc<[u8]>, - file: Box, -) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = FileInode::new(ino, Arc::downgrade(&fs), file); - - { - let lock = block_on(parent.idata.rwsem.write()); - parent - .entries - .access_mut(lock.prove_mut()) - .push((name, ProcFsNode::File(inode.clone()))); +impl SuperBlock for ProcFs {} +impl ProcFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) } - - Ok(ProcFsNode::File(inode)) } -#[allow(dead_code)] -pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = DirInode::new(ino, Arc::downgrade(&fs)); +static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { + SbUse::new_cyclic( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 10), + read_only: false, + }, + |sbref| ProcFs { + root: Node::new_dir(Ino::new(0), sbref), + next_ino: AtomicU64::new(1), + }, + ) +}); - parent - .entries - .access_mut(block_on(inode.rwsem.write()).prove_mut()) - .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); +struct ProcFsMountCreator; - Ok(ProcFsNode::Dir(inode)) -} +#[async_trait] +impl MountCreator for ProcFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let fs = GLOBAL_PROCFS.clone(); + let root_inode = fs.backend.root.clone(); -struct DumpMountsFile; -impl ProcFsFile for DumpMountsFile { - fn can_read(&self) -> bool { - true + Mount::new(mp, fs, root_inode) } - fn read(&self, buffer: &mut PageBuffer) -> KResult { - dump_mounts(&mut buffer.get_writer()); - - Ok(buffer.data().len()) + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) } } -pub fn init() { - register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); - - creat( - &root(), - Arc::from(b"mounts".as_slice()), - Box::new(DumpMountsFile), - ) - .unwrap(); -} - -pub struct GenericProcFsFile +pub async fn populate_root(name: Arc<[u8]>, read_fn: F) where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, + F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, { - read_fn: Option, -} + let procfs = &GLOBAL_PROCFS.backend; + let root = &procfs.root.get_priv::(); -impl ProcFsFile for GenericProcFsFile -where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, -{ - fn can_read(&self) -> bool { - self.read_fn.is_some() - } + let NodeKind::Dir(root) = &root.kind else { + unreachable!(); + }; - fn read(&self, buffer: &mut PageBuffer) -> KResult { - self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.data().len()) - } + let mut entries = root.entries.write().await; + entries.push(( + name.clone(), + Node::new_file(procfs.assign_ino(), SbRef::from(&GLOBAL_PROCFS), read_fn), + )); } -pub fn populate_root(name: Arc<[u8]>, read_fn: F) -> KResult<()> -where - F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, -{ - let root = root(); - - creat( - &root, - name, - Box::new(GenericProcFsFile { - read_fn: Some(read_fn), - }), - ) - .map(|_| ()) +pub async fn init() { + register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); + + populate_root(Arc::from(b"mounts".as_slice()), |buffer| { + dump_mounts(&mut buffer.get_writer()); + Ok(()) + }) + .await; } diff --git a/src/fs/shm.rs b/src/fs/shm.rs deleted file mode 100644 index 09d36141..00000000 --- a/src/fs/shm.rs +++ /dev/null @@ -1,146 +0,0 @@ -use core::sync::atomic::{AtomicU32, Ordering}; - -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; -use bitflags::bitflags; -use eonix_sync::{LazyLock, Mutex}; - -use crate::{ - fs::tmpfs::{DirectoryInode, FileInode, TmpFs}, - kernel::{constants::ENOSPC, vfs::inode::Mode}, - prelude::KResult, -}; - -bitflags! { - #[derive(Debug, Clone, Copy)] - pub struct ShmFlags: u32 { - /// Create a new segment. If this flag is not used, then shmget() will - /// find the segment associated with key and check to see if the user - /// has permission to access the segment. - const IPC_CREAT = 0o1000; - /// This flag is used with IPC_CREAT to ensure that this call creates - /// the segment. If the segment already exists, the call fails. - const IPC_EXCL = 0o2000; - - /// Attach the segment for read-only access.If this flag is not specified, - /// the segment is attached for read and write access, and the process - /// must have read and write permission for the segment. - const SHM_RDONLY = 0o10000; - /// round attach address to SHMLBA boundary - const SHM_RND = 0o20000; - /// Allow the contents of the segment to be executed. - const SHM_EXEC = 0o100000; - } -} - -pub const IPC_PRIVATE: usize = 0; - -pub struct ShmManager { - tmpfs: Arc, - root: Arc, - areas: BTreeMap, -} - -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct IpcPerm { - key: i32, - uid: u32, - gid: u32, - cuid: u32, - cgid: u32, - mode: u16, - seq: u16, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct ShmIdDs { - // Ownership and permissions - pub shm_perm: IpcPerm, - // Size of segment (bytes). In our system, this must be aligned - pub shm_segsz: usize, - // Last attach time - pub shm_atime: usize, - // Last detach time - pub shm_dtime: usize, - // Creation time/time of last modification via shmctl() - pub shm_ctime: usize, - // PID of creator - pub shm_cpid: usize, - // PID of last shmat(2)/shmdt(2) - pub shm_lpid: usize, - // No. of current attaches - pub shm_nattch: usize, -} - -impl ShmIdDs { - fn new(size: usize, pid: u32) -> Self { - Self { - shm_perm: IpcPerm::default(), - shm_segsz: size, - shm_atime: 0, - shm_dtime: 0, - shm_ctime: 0, // Should set instant now - shm_cpid: pid as usize, - shm_lpid: 0, - shm_nattch: 0, - } - } -} - -#[derive(Debug)] -pub struct ShmArea { - pub area: Arc, - pub shmid_ds: ShmIdDs, -} - -// A big lock here to protect the shared memory area. -// Can be improved with finer-grained locking? -pub static SHM_MANAGER: LazyLock> = - LazyLock::new(|| Mutex::new(ShmManager::new())); - -impl ShmManager { - fn new() -> Self { - let (tmpfs, root) = TmpFs::create(false).expect("should create shm_area successfully"); - Self { - tmpfs, - root, - areas: BTreeMap::new(), - } - } - - pub fn create_shared_area(&self, size: usize, pid: u32, mode: Mode) -> ShmArea { - let ino = self.tmpfs.assign_ino(); - let vfs = Arc::downgrade(&self.tmpfs); - ShmArea { - area: FileInode::new(ino, vfs, size, mode), - shmid_ds: ShmIdDs::new(size, pid), - } - } - - pub fn get(&self, shmid: u32) -> Option<&ShmArea> { - self.areas.get(&shmid) - } - - pub fn insert(&mut self, shmid: u32, area: ShmArea) { - self.areas.insert(shmid, area); - } -} - -pub fn gen_shm_id(key: usize) -> KResult { - const SHM_MAGIC: u32 = 114514000; - - static NEXT_SHMID: AtomicU32 = AtomicU32::new(0); - - if key == IPC_PRIVATE { - let shmid = NEXT_SHMID.fetch_add(1, Ordering::Relaxed); - - if shmid >= SHM_MAGIC { - return Err(ENOSPC); - } else { - return Ok(shmid); - } - } - - (key as u32).checked_add(SHM_MAGIC).ok_or(ENOSPC) -} diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs deleted file mode 100644 index 7a5bd52b..00000000 --- a/src/fs/tmpfs.rs +++ /dev/null @@ -1,613 +0,0 @@ -use crate::io::Stream; -use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::RenameData; -use crate::kernel::vfs::inode::{AtomicMode, InodeData}; -use crate::{ - io::Buffer, - kernel::vfs::{ - dentry::{dcache, Dentry}, - inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, - mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - vfs::Vfs, - DevId, - }, - prelude::*, -}; -use alloc::sync::{Arc, Weak}; -use core::fmt::Debug; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_mm::paging::PAGE_SIZE; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; -use itertools::Itertools; - -fn acquire(vfs: &Weak) -> KResult> { - vfs.upgrade().ok_or(EIO) -} - -fn astmp(vfs: &Arc) -> &TmpFs { - vfs.as_any() - .downcast_ref::() - .expect("corrupted tmpfs data structure") -} - -define_struct_inode! { - struct NodeInode { - devid: DevId, - } -} - -impl NodeInode { - fn new(ino: Ino, vfs: Weak, mode: Mode, devid: DevId) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - addr_of_mut_field!(inode, devid).write(devid); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for NodeInode { - fn devid(&self) -> KResult { - Ok(self.devid) - } -} - -define_struct_inode! { - pub(super) struct DirectoryInode { - entries: Locked, Ino)>, ()>, - } -} - -impl DirectoryInode { - fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries) - .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); - - addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode) - .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } - - fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) { - let now = Instant::now(); - - // SAFETY: Only `unlink` will do something based on `nlink` count - // No need to synchronize here - file.nlink.fetch_add(1, Ordering::Relaxed); - *self.ctime.lock() = now; - - // SAFETY: `rwsem` has done the synchronization - self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; - - self.entries.access_mut(dlock).push((name, file.ino)); - } - - fn do_unlink( - &self, - file: &Arc, - filename: &[u8], - entries: &mut Vec<(Arc<[u8]>, Ino)>, - now: Instant, - decrease_size: bool, - _dir_lock: ProofMut<()>, - _file_lock: ProofMut<()>, - ) -> KResult<()> { - // SAFETY: `file_lock` has done the synchronization - if file.mode.load().is_dir() { - return Err(EISDIR); - } - - entries.retain(|(name, ino)| *ino != file.ino || name.as_ref() != filename); - - if decrease_size { - // SAFETY: `dir_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - } - - *self.mtime.lock() = now; - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `file_lock` has done the synchronization - file.nlink.fetch_sub(1, Ordering::Relaxed); - *file.ctime.lock() = now; - - Ok(()) - } -} - -impl Inode for DirectoryInode { - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, ino)| callback(&name, *ino)) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = FileInode::new(ino, self.vfs.clone(), 0, mode); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !mode.is_chr() && !mode.is_blk() { - return Err(EINVAL); - } - - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_symlink(file) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); - - self.link(at.get_name(), newdir.as_ref(), rwsem.prove_mut()); - at.save_dir(newdir) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - - let dir_lock = block_on(self.rwsem.write()); - - let file = at.get_inode()?; - let filename = at.get_name(); - let file_lock = block_on(file.rwsem.write()); - - let entries = self.entries.access_mut(dir_lock.prove_mut()); - - self.do_unlink( - &file, - &filename, - entries, - Instant::now(), - true, - dir_lock.prove_mut(), - file_lock.prove_mut(), - )?; - - // Remove the dentry from the dentry cache immediately - // so later lookup will fail with ENOENT - dcache::d_remove(at); - - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - let RenameData { - old_dentry, - new_dentry, - new_parent, - is_exchange, - no_replace, - vfs, - } = rename_data; - - if is_exchange { - println_warn!("TmpFs does not support exchange rename for now"); - return Err(ENOSYS); - } - - let vfs = vfs - .as_any() - .downcast_ref::() - .expect("vfs must be a TmpFs"); - - let _rename_lock = block_on(vfs.rename_lock.lock()); - - let old_file = old_dentry.get_inode()?; - let new_file = new_dentry.get_inode(); - - if no_replace && new_file.is_ok() { - return Err(EEXIST); - } - - let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; - if same_parent { - // Same directory rename - // Remove from old location and add to new location - let parent_lock = block_on(self.rwsem.write()); - let entries = self.entries.access_mut(parent_lock.prove_mut()); - - fn rename_old( - old_entry: &mut (Arc<[u8]>, Ino), - old_file: &Arc, - new_dentry: &Arc, - now: Instant, - ) { - let (name, _) = old_entry; - *name = new_dentry.get_name(); - *old_file.ctime.lock() = now; - } - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old and new entries in the directory after we've locked the directory. - let indices = - entries - .iter() - .enumerate() - .fold([None, None], |[old, new], (idx, (name, ino))| { - if Some(*ino) == new_ino && *name == new_name { - [old, Some(idx)] - } else if *ino == old_ino && *name == old_name { - [Some(idx), new] - } else { - [old, new] - } - }); - - let (old_entry_idx, new_entry_idx) = match indices { - [None, ..] => return Err(ENOENT), - [Some(old_idx), new_idx] => (old_idx, new_idx), - }; - - let now = Instant::now(); - - if let Some(new_idx) = new_entry_idx { - // Replace existing file (i.e. rename the old and unlink the new) - let new_file = new_file.unwrap(); - let _new_file_lock = block_on(new_file.rwsem.write()); - - // SAFETY: `new_file_lock` has done the synchronization - match (new_file.mode.load(), old_file.mode.load()) { - (Mode::DIR, _) => return Err(EISDIR), - (_, Mode::DIR) => return Err(ENOTDIR), - _ => {} - } - - entries.remove(new_idx); - - // SAFETY: `parent_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `new_file_lock` has done the synchronization - new_file.nlink.fetch_sub(1, Ordering::Relaxed); - *new_file.ctime.lock() = now; - } - - rename_old(&mut entries[old_entry_idx], &old_file, new_dentry, now); - *self.mtime.lock() = now; - } else { - // Cross-directory rename - handle similar to same directory case - - // Get new parent directory - let new_parent_inode = new_dentry.parent().get_inode()?; - assert!(new_parent_inode.is_dir()); - let new_parent = (new_parent_inode.as_ref() as &dyn Any) - .downcast_ref::() - .expect("new parent must be a DirectoryInode"); - - let old_parent_lock = block_on(self.rwsem.write()); - let new_parent_lock = block_on(new_parent_inode.rwsem.write()); - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old entry in the old directory - let old_entries = self.entries.access_mut(old_parent_lock.prove_mut()); - let old_pos = old_entries - .iter() - .position(|(name, ino)| *ino == old_ino && *name == old_name) - .ok_or(ENOENT)?; - - // Find the new entry in the new directory (if it exists) - let new_entries = new_parent.entries.access_mut(new_parent_lock.prove_mut()); - let has_new = new_entries - .iter() - .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) - .is_some(); - - let now = Instant::now(); - - if has_new { - // Replace existing file (i.e. move the old and unlink the new) - let new_file = new_file.unwrap(); - let new_file_lock = block_on(new_file.rwsem.write()); - - match (old_file.mode.load(), new_file.mode.load()) { - (Mode::DIR, Mode::DIR) => {} - (Mode::DIR, _) => return Err(ENOTDIR), - (_, _) => {} - } - - // Unlink the old file that was replaced - new_parent.do_unlink( - &new_file, - &new_name, - new_entries, - now, - false, - new_parent_lock.prove_mut(), - new_file_lock.prove_mut(), - )?; - } else { - new_parent.size.fetch_add(1, Ordering::Relaxed); - } - - // Remove from old directory - old_entries.remove(old_pos); - - // Add new entry - new_entries.push((new_name, old_ino)); - - self.size.fetch_sub(1, Ordering::Relaxed); - *self.mtime.lock() = now; - *old_file.ctime.lock() = now; - } - - block_on(dcache::d_exchange(old_dentry, new_dentry)); - - Ok(()) - } -} - -define_struct_inode! { - struct SymlinkInode { - target: Arc<[u8]>, - } -} - -impl SymlinkInode { - fn new(ino: Ino, vfs: Weak, target: Arc<[u8]>) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - let len = target.len(); - addr_of_mut_field!(inode, target).write(target); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); - addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for SymlinkInode { - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - buffer - .fill(self.target.as_ref()) - .map(|result| result.allow_partial()) - } - - fn chmod(&self, _: Mode) -> KResult<()> { - Ok(()) - } -} - -define_struct_inode! { - pub struct FileInode { - pages: PageCache, - } -} - -impl Debug for FileInode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "FileInode({:?})", self.idata) - } -} - -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, size: usize, mode: Mode) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| FileInode { - idata: InodeData::new(ino, vfs), - pages: PageCache::new(weak_self.clone()), - }); - - inode.mode.store(Mode::REG.perm(mode.non_format_bits())); - inode.nlink.store(1, Ordering::Relaxed); - inode.size.store(size as u64, Ordering::Relaxed); - inode - } -} - -impl PageCacheBackend for FileInode { - fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize - } -} - -impl Inode for FileInode { - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.pages) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.read(buffer, offset)) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - // TODO: We don't need that strong guarantee, find some way to avoid locks - let _lock = block_on(self.rwsem.write()); - - let mut store_new_end = None; - let offset = match offset { - WriteOffset::Position(offset) => offset, - WriteOffset::End(end) => { - store_new_end = Some(end); - - // SAFETY: `lock` has done the synchronization - self.size.load(Ordering::Relaxed) as usize - } - }; - - let wrote = block_on(self.pages.write(stream, offset))?; - let cursor_end = offset + wrote; - - if let Some(store_end) = store_new_end { - *store_end = cursor_end; - } - - // SAFETY: `lock` has done the synchronization - *self.mtime.lock() = Instant::now(); - self.size.store(cursor_end as u64, Ordering::Relaxed); - - Ok(wrote) - } - - fn truncate(&self, length: usize) -> KResult<()> { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.resize(length))?; - self.size.store(length as u64, Ordering::Relaxed); - *self.mtime.lock() = Instant::now(); - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } -} - -impl_any!(TmpFs); -pub(super) struct TmpFs { - next_ino: AtomicIno, - readonly: bool, - rename_lock: Mutex<()>, -} - -impl Vfs for TmpFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - 2 - } - - fn is_read_only(&self) -> bool { - self.readonly - } -} - -impl TmpFs { - pub(super) fn assign_ino(&self) -> Ino { - self.next_ino.fetch_add(1, Ordering::AcqRel) - } - - pub fn create(readonly: bool) -> KResult<(Arc, Arc)> { - let tmpfs = Arc::new(Self { - next_ino: AtomicIno::new(1), - readonly, - rename_lock: Mutex::new(()), - }); - - let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); - - Ok((tmpfs, root_dir)) - } -} - -struct TmpFsMountCreator; - -impl MountCreator for TmpFsMountCreator { - fn create_mount(&self, _source: &str, flags: u64, mp: &Arc) -> KResult { - let (fs, root_inode) = TmpFs::create(flags & MS_RDONLY != 0)?; - - Mount::new(mp, fs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) - } -} - -pub fn init() { - register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); -} diff --git a/src/fs/tmpfs/dir.rs b/src/fs/tmpfs/dir.rs new file mode 100644 index 00000000..4dd64d52 --- /dev/null +++ b/src/fs/tmpfs/dir.rs @@ -0,0 +1,404 @@ +use alloc::sync::Arc; +use alloc::vec; +use alloc::vec::Vec; + +use eonix_log::println_warn; +use eonix_sync::{LazyLock, RwLock}; + +use super::file::{DeviceInode, FileInode, SymlinkInode}; +use super::TmpFs; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::{dcache, Dentry}; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, RenameData}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; + +pub struct DirectoryInode { + entries: RwLock, Ino)>>, +} + +fn link(dir: &InodeUse, entries: &mut Vec<(Arc<[u8]>, Ino)>, name: Arc<[u8]>, file: &InodeUse) { + let mut dir_info = dir.info.lock(); + let mut file_info = file.info.lock(); + + let now = Instant::now(); + + file_info.nlink += 1; + file_info.ctime = now; + + dir_info.size += 1; + dir_info.mtime = now; + dir_info.ctime = now; + + entries.push((name, file.ino)); +} + +impl DirectoryInode { + pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { + static DOT: LazyLock> = LazyLock::new(|| Arc::from(b".".as_slice())); + + let now = Instant::now(); + + InodeUse::new( + sb, + ino, + Format::DIR, + InodeInfo { + size: 1, + nlink: 1, // link from `.` to itself + perm, + ctime: now, + mtime: now, + atime: now, + uid: 0, + gid: 0, + }, + Self { + entries: RwLock::new(vec![(DOT.clone(), ino)]), + }, + ) + } + + fn do_unlink( + &self, + file: &InodeUse, + filename: &[u8], + entries: &mut Vec<(Arc<[u8]>, Ino)>, + now: Instant, + decrease_size: bool, + self_info: &mut InodeInfo, + file_info: &mut InodeInfo, + ) -> KResult<()> { + // SAFETY: `file_lock` has done the synchronization + if file.format == Format::DIR { + return Err(EISDIR); + } + + let file_ino = file.ino; + entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename); + + if decrease_size { + self_info.size -= 1; + } + + self_info.mtime = now; + self_info.ctime = now; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + file_info.nlink -= 1; + file_info.ctime = now; + + // TODO!!!: Remove the file if nlink == 1 + + Ok(()) + } +} + +impl InodeOps for DirectoryInode { + type SuperBlock = TmpFs; + + async fn readdir( + &self, + sb: SbUse, + _: &InodeUse, + offset: usize, + for_each_entry: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let _sb = sb; + let entries = self.entries.read().await; + + let mut count = 0; + for entry in entries.iter().skip(offset) { + match for_each_entry(&entry.0, entry.1) { + Err(err) => return Ok(Err(err)), + Ok(false) => break, + Ok(true) => count += 1, + } + } + + Ok(Ok(count)) + } + + async fn create( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + perm: Permission, + ) -> KResult<()> { + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file = FileInode::new(ino, sb.get_ref(), 0, perm); + + link(inode, &mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mknod( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + mode: Mode, + dev: DeviceId, + ) -> KResult<()> { + if !mode.is_chr() && !mode.is_blk() { + return Err(EINVAL); + } + + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file = DeviceInode::new(ino, sb.get_ref(), mode, dev); + + link(inode, &mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn symlink( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + target: &[u8], + ) -> KResult<()> { + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file = SymlinkInode::new(ino, sb.get_ref(), target.into()); + + link(inode, &mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mkdir( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + perm: Permission, + ) -> KResult<()> { + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let new_dir = DirectoryInode::new(ino, sb.get_ref(), perm); + + link(inode, &mut entries, at.get_name(), &new_dir); + at.fill(new_dir); + + Ok(()) + } + + async fn unlink( + &self, + _sb: SbUse, + inode: &InodeUse, + at: &Arc, + ) -> KResult<()> { + let mut entries = self.entries.write().await; + + let file = at.get_inode()?; + let filename = at.get_name(); + + self.do_unlink( + &file, + &filename, + &mut entries, + Instant::now(), + true, + &mut inode.info.lock(), + &mut file.info.lock(), + )?; + + // Remove the dentry from the dentry cache immediately + // so later lookup will fail with ENOENT + dcache::d_remove(at); + + Ok(()) + } + + async fn rename( + &self, + sb: SbUse, + inode: &InodeUse, + rename_data: RenameData<'_, '_>, + ) -> KResult<()> { + let _rename_lock = sb.backend.rename_lock.lock().await; + let mut self_entries = self.entries.write().await; + + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + } = rename_data; + + if is_exchange { + println_warn!("TmpFs does not support exchange rename for now"); + return Err(ENOSYS); + } + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.inode(); + + if no_replace && new_file.is_some() { + return Err(EEXIST); + } + + if inode == &new_parent { + // Same directory rename + // Remove from old location and add to new location + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old and new entries in the directory after we've locked the directory. + let (mut old_ent_idx, mut new_ent_idx) = (None, None); + for (idx, (name, ino)) in self_entries.iter().enumerate() { + if *ino == old_ino && *name == old_name { + old_ent_idx = Some(idx); + } + + if Some(*ino) == new_ino && *name == new_name { + new_ent_idx = Some(idx); + } + } + + let Some(old_ent_idx) = old_ent_idx else { + return Err(ENOENT); + }; + + if Some(old_ent_idx) == new_ent_idx { + return Ok(()); + } + + let now = Instant::now(); + if let Some(new_idx) = new_ent_idx { + // Replace existing file (i.e. rename the old and unlink the new) + let new_file = new_file.unwrap(); + + match (new_file.format, old_file.format) { + (Format::DIR, _) => return Err(EISDIR), + (_, Format::DIR) => return Err(ENOTDIR), + _ => {} + } + + self_entries.remove(new_idx); + + inode.info.lock().size -= 1; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + let mut new_info = new_file.info.lock(); + + new_info.nlink -= 1; + new_info.mtime = now; + new_info.ctime = now; + } + + let (name, _) = &mut self_entries[old_ent_idx]; + *name = new_dentry.get_name(); + + let mut self_info = inode.info.lock(); + self_info.mtime = now; + self_info.ctime = now; + } else { + // Cross-directory rename - handle similar to same directory case + + // Get new parent directory + let new_parent = new_dentry.parent().get_inode()?; + assert_eq!(new_parent.format, Format::DIR); + + let new_parent_priv = new_parent.get_priv::(); + let mut new_entries = new_parent_priv.entries.write().await; + + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old entry in the old directory + let old_pos = self_entries + .iter() + .position(|(name, ino)| *ino == old_ino && *name == old_name) + .ok_or(ENOENT)?; + + // Find the new entry in the new directory (if it exists) + let has_new = new_entries + .iter() + .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) + .is_some(); + + let now = Instant::now(); + + if has_new { + // Replace existing file (i.e. move the old and unlink the new) + let new_file = new_file.unwrap(); + + match (old_file.format, new_file.format) { + (Format::DIR, Format::DIR) => {} + (Format::DIR, _) => return Err(ENOTDIR), + (_, _) => {} + } + + // Unlink the old file that was replaced + new_parent_priv.do_unlink( + &new_file, + &new_name, + &mut new_entries, + now, + false, + &mut new_parent.info.lock(), + &mut new_file.info.lock(), + )?; + } else { + let mut info = new_parent.info.lock(); + + info.size += 1; + info.mtime = now; + info.ctime = now; + } + + // Remove from old directory + self_entries.remove(old_pos); + + // Add new entry + new_entries.push((new_name, old_ino)); + + let mut self_info = inode.info.lock(); + self_info.size -= 1; + self_info.mtime = now; + self_info.ctime = now; + } + + dcache::d_exchange(old_dentry, new_dentry).await; + Ok(()) + } + + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); + + Ok(()) + } +} diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs new file mode 100644 index 00000000..aafae539 --- /dev/null +++ b/src/fs/tmpfs/file.rs @@ -0,0 +1,267 @@ +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; + +use super::TmpFs; +use crate::io::{Buffer, Stream}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, WriteOffset}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; + +pub struct FileInode; + +impl FileInode { + pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { + let now = Instant::now(); + + InodeUse::new( + sb, + ino, + Format::REG, + InodeInfo { + size: size as _, + nlink: 1, + uid: 0, + gid: 0, + perm, + atime: now, + ctime: now, + mtime: now, + }, + Self, + ) + } +} + +impl InodeOps for FileInode { + type SuperBlock = TmpFs; + + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + let _lock = inode.rwsem.read().await; + inode.get_page_cache().read(buffer, offset).await + } + + async fn write( + &self, + _: SbUse, + inode: &InodeUse, + stream: &mut dyn Stream, + offset: WriteOffset<'_>, + ) -> KResult { + let _lock = inode.rwsem.write().await; + + let mut store_new_end = None; + let offset = match offset { + WriteOffset::Position(offset) => offset, + WriteOffset::End(end) => { + store_new_end = Some(end); + + // `info.size` won't change since we are holding the write lock. + inode.info.lock().size as usize + } + }; + + let page_cache = inode.get_page_cache(); + + if Arc::strong_count(&page_cache) == 1 { + // XXX: A temporary workaround here. Change this ASAP... + // Prevent the page cache from being dropped during the write. + let _ = Arc::into_raw(page_cache.clone()); + } + + let wrote = page_cache.write(stream, offset).await?; + let cursor_end = offset + wrote; + + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + Ok(wrote) + } + + async fn truncate( + &self, + _: SbUse, + inode: &InodeUse, + length: usize, + ) -> KResult<()> { + let _lock = inode.rwsem.write().await; + + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = length as u64; + + Ok(()) + } + + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + + Ok(()) + } + + async fn read_page( + &self, + _: SbUse, + _: &InodeUse, + page: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + page.lock().as_bytes_mut().fill(0); + Ok(()) + } + + async fn write_page( + &self, + _: SbUse, + _: &InodeUse, + _: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + // XXX: actually we should refuse to do the writeback. + // think of a way to inform that of the page cache. + Ok(()) + } + + async fn write_begin<'a>( + &self, + _: SbUse, + _: &InodeUse, + page_cache: &PageCache, + pages: &'a mut BTreeMap, + offset: usize, + _: usize, + ) -> KResult<&'a mut CachePage> { + // TODO: Remove dependency on `page_cache`. + page_cache + .get_page_locked(pages, PageOffset::from_byte_floor(offset)) + .await + } + + async fn write_end( + &self, + _: SbUse, + inode: &InodeUse, + _: &PageCache, + _: &mut BTreeMap, + offset: usize, + _: usize, + copied: usize, + ) -> KResult<()> { + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = info.size.max((offset + copied) as u64); + + Ok(()) + } +} + +pub struct DeviceInode { + devid: DeviceId, +} + +impl DeviceInode { + pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { + let now = Instant::now(); + + InodeUse::new( + sb, + ino, + mode.format(), + InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(mode.non_format_bits()), + atime: now, + ctime: now, + mtime: now, + }, + Self { devid }, + ) + } +} + +impl InodeOps for DeviceInode { + type SuperBlock = TmpFs; + + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); + + Ok(()) + } + + fn devid(&self, _: SbUse, _: &InodeUse) -> KResult { + Ok(self.devid) + } +} + +pub struct SymlinkInode { + target: Arc<[u8]>, +} + +impl SymlinkInode { + pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { + let now = Instant::now(); + + InodeUse::new( + sb, + ino, + Format::LNK, + InodeInfo { + size: target.len() as _, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: now, + ctime: now, + mtime: now, + }, + Self { target }, + ) + } +} + +impl InodeOps for SymlinkInode { + type SuperBlock = TmpFs; + + async fn readlink( + &self, + _sb: SbUse, + _inode: &InodeUse, + buffer: &mut dyn Buffer, + ) -> KResult { + buffer + .fill(self.target.as_ref()) + .map(|result| result.allow_partial()) + } +} diff --git a/src/fs/tmpfs/mod.rs b/src/fs/tmpfs/mod.rs new file mode 100644 index 00000000..62a0dfc2 --- /dev/null +++ b/src/fs/tmpfs/mod.rs @@ -0,0 +1,70 @@ +mod dir; +mod file; + +use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; + +use async_trait::async_trait; +use dir::DirectoryInode; +use eonix_sync::Mutex; + +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeUse}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; +use crate::kernel::vfs::types::{DeviceId, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; + +pub struct TmpFs { + next_ino: AtomicU64, + rename_lock: Mutex<()>, +} + +impl SuperBlock for TmpFs {} + +impl TmpFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) + } + + fn create() -> KResult<(SbUse, InodeUse)> { + let tmpfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 2), + read_only: false, + }, + Self { + next_ino: AtomicU64::new(1), + rename_lock: Mutex::new(()), + }, + ); + + let root_dir = DirectoryInode::new( + tmpfs.backend.assign_ino(), + SbRef::from(&tmpfs), + Permission::new(0o755), + ); + + Ok((tmpfs, root_dir)) + } +} + +struct TmpFsMountCreator; + +#[async_trait] +impl MountCreator for TmpFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fs, root_inode) = TmpFs::create()?; + + Mount::new(mp, fs, root_inode) + } + + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) + } +} + +pub fn init() { + register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); +} diff --git a/src/io.rs b/src/io.rs index 85675dea..d7094f6d 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,6 +1,8 @@ +use core::mem::MaybeUninit; +use core::ops::{Add, AddAssign, Sub}; + use crate::kernel::constants::EFAULT; use crate::prelude::*; -use core::{cmp, mem::MaybeUninit}; #[must_use] #[derive(Debug)] @@ -236,18 +238,26 @@ impl Buffer for ByteBuffer<'_> { } } +pub trait Integer: + Add + Sub + AddAssign + Copy + PartialOrd + Ord +{ +} + +impl Integer for u64 {} +impl Integer for usize {} + /// Iterator that generates chunks of a given length from a start index /// until the end of the total length. /// /// The iterator returns a tuple of (start, len) for each chunk. -pub struct Chunks { - end: usize, - cur: usize, - chunk_len: usize, +pub struct Chunks { + end: T, + cur: T, + chunk_len: T, } -impl Chunks { - pub const fn new(start: usize, total_len: usize, chunk_len: usize) -> Self { +impl Chunks { + pub fn new(start: T, total_len: T, chunk_len: T) -> Self { Self { end: start + total_len, cur: start, @@ -256,8 +266,8 @@ impl Chunks { } } -impl Iterator for Chunks { - type Item = (usize, usize); +impl Iterator for Chunks { + type Item = (T, T); fn next(&mut self) -> Option { if self.cur >= self.end { @@ -265,7 +275,7 @@ impl Iterator for Chunks { } let start = self.cur; - let len = cmp::min(self.chunk_len, self.end - start); + let len = self.chunk_len.min(self.end - start); self.cur += self.chunk_len; Some((start, len)) diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 349e3656..be2146f8 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -1,25 +1,18 @@ mod mbr; -use super::{ - constants::ENOENT, - mem::{paging::Page, AsMemoryBlock as _}, - vfs::DevId, -}; -use crate::kernel::constants::{EEXIST, EINVAL}; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; use core::cmp::Ordering; + +use async_trait::async_trait; use mbr::MBRPartTable; -pub fn make_device(major: u32, minor: u32) -> DevId { - (major << 8) & 0xff00u32 | minor & 0xffu32 -} +use super::constants::ENOENT; +use super::mem::Folio; +use super::vfs::types::DeviceId; +use crate::io::{Buffer, Chunks, FillResult}; +use crate::kernel::constants::{EEXIST, EINVAL}; +use crate::prelude::*; pub struct Partition { pub lba_offset: u64, @@ -30,11 +23,12 @@ pub trait PartTable { fn partitions(&self) -> impl Iterator + use<'_, Self>; } +#[async_trait] pub trait BlockRequestQueue: Send + Sync { /// Maximum number of sectors that can be read in one request fn max_request_pages(&self) -> u64; - fn submit(&self, req: BlockDeviceRequest) -> KResult<()>; + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()>; } enum BlockDeviceType { @@ -42,7 +36,7 @@ enum BlockDeviceType { queue: Arc, }, Partition { - disk_dev: DevId, + disk_dev: DeviceId, lba_offset: u64, queue: Arc, }, @@ -50,7 +44,7 @@ enum BlockDeviceType { pub struct BlockDevice { /// Unique device identifier, major and minor numbers - devid: DevId, + devid: DeviceId, /// Total size of the device in sectors (512 bytes each) sector_count: u64, @@ -77,11 +71,11 @@ impl Ord for BlockDevice { } } -static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); +static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); impl BlockDevice { pub fn register_disk( - devid: DevId, + devid: DeviceId, size: u64, queue: Arc, ) -> KResult> { @@ -97,13 +91,13 @@ impl BlockDevice { } } - pub fn get(devid: DevId) -> KResult> { + pub fn get(devid: DeviceId) -> KResult> { BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT) } } impl BlockDevice { - pub fn devid(&self) -> DevId { + pub fn devid(&self) -> DeviceId { self.devid } @@ -121,7 +115,7 @@ impl BlockDevice { }; let device = Arc::new(BlockDevice { - devid: make_device(self.devid >> 8, (self.devid & 0xff) + idx as u32 + 1), + devid: DeviceId::new(self.devid.major, self.devid.minor + idx as u16 + 1), sector_count: size, dev_type: BlockDeviceType::Partition { disk_dev: self.devid, @@ -159,7 +153,7 @@ impl BlockDevice { /// - `req.sector` must be within the disk size /// - `req.buffer` must be enough to hold the data /// - pub fn commit_request(&self, mut req: BlockDeviceRequest) -> KResult<()> { + pub async fn commit_request(&self, mut req: BlockDeviceRequest<'_>) -> KResult<()> { // Verify the request parameters. match &mut req { BlockDeviceRequest::Read { sector, count, .. } => { @@ -184,7 +178,7 @@ impl BlockDevice { } } - self.queue().submit(req) + self.queue().submit(req).await } /// Read some from the block device, may involve some copy and fragmentation @@ -194,178 +188,73 @@ impl BlockDevice { /// # Arguments /// `offset` - offset in bytes /// - pub fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512; - - let mut nfilled = 0; - 'outer: while sector_count != 0 { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - let nread; - - match sector_count { - count if count <= 8 => { - nread = count; - - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); + pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { + let sector_start = offset as u64 / 512; + let mut first_sector_offset = offset % 512; + let nr_sectors = (first_sector_offset + buffer.total() + 511) / 512; + + let nr_sectors_per_batch = self.queue().max_request_pages() / 2 * 2 * 8; + + let mut nr_filled = 0; + for (start, nr_batch) in Chunks::new(sector_start, nr_sectors as u64, nr_sectors_per_batch) + { + let (page_slice, page, mut page_vec); + match nr_batch { + ..=8 => { + page = Folio::alloc(); + page_slice = core::slice::from_ref(&page); } - count if count <= 16 => { - nread = count; - - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); + ..=16 => { + page = Folio::alloc_order(1); + page_slice = core::slice::from_ref(&page); + } + ..=32 => { + page = Folio::alloc_order(2); + page_slice = core::slice::from_ref(&page); } count => { - nread = count.min(self.queue().max_request_pages()); + let nr_huge_pages = count as usize / 32; + let nr_small_pages = ((count as usize % 32) + 7) / 8; - let npages = (nread + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); + let nr_pages = nr_huge_pages + nr_small_pages; + page_vec = Vec::with_capacity(nr_pages); + + page_vec.resize_with(nr_huge_pages, || Folio::alloc_order(2)); + page_vec.resize_with(nr_pages, || Folio::alloc()); + page_slice = &page_vec; } } let req = BlockDeviceRequest::Read { - sector: sector_start, - count: nread, - buffer: &pages, + sector: start, + count: nr_batch, + buffer: page_slice, }; - self.commit_request(req)?; + self.commit_request(req).await?; - for page in pages.iter() { - // SAFETY: We are the only owner of the page so no one could be mutating it. - let data = unsafe { &page.as_memblk().as_bytes()[first_sector_offset as usize..] }; + for page in page_slice { + let pg = page.lock(); + let data = &pg.as_bytes()[first_sector_offset..]; first_sector_offset = 0; - match buffer.fill(data)? { - FillResult::Done(n) => nfilled += n, - FillResult::Partial(n) => { - nfilled += n; - break 'outer; - } - FillResult::Full => { - break 'outer; - } - } - } - - sector_start += nread; - sector_count -= nread; - } - - if nfilled == buffer.total() { - Ok(FillResult::Done(nfilled)) - } else { - Ok(FillResult::Partial(nfilled)) - } - } - - /// Write some data to the block device, may involve some copy and fragmentation - /// - /// # Arguments - /// `offset` - offset in bytes - /// `data` - data to write - /// - pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut remaining_data = data; - let mut nwritten = 0; - - while !remaining_data.is_empty() { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - // Calculate sectors needed for this write - let write_end = first_sector_offset + remaining_data.len() as u64; - let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); - - match sector_count { - count if count <= 8 => { - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count if count <= 16 => { - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count => { - let npages = (count + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); - } - } - - if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { - let read_req = BlockDeviceRequest::Read { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(read_req)?; - } - - let mut data_offset = 0; - let mut page_offset = first_sector_offset as usize; - - for page in pages.iter() { - // SAFETY: We own the page and can modify it - let page_data = unsafe { - let memblk = page.as_memblk(); - core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) - }; + nr_filled += buffer.fill(data)?.allow_partial(); - let copy_len = - (remaining_data.len() - data_offset).min(page_data.len() - page_offset); - - if copy_len == 0 { - break; - } - - page_data[page_offset..page_offset + copy_len] - .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); - - data_offset += copy_len; - page_offset = 0; // Only first page has offset - - if data_offset >= remaining_data.len() { + if buffer.available() == 0 { break; } } - let write_req = BlockDeviceRequest::Write { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(write_req)?; - - let bytes_written = data_offset; - nwritten += bytes_written; - remaining_data = &remaining_data[bytes_written..]; - sector_start += sector_count; - first_sector_offset = 0; + if buffer.available() == 0 { + break; + } } - Ok(nwritten) + if buffer.available() == 0 { + Ok(FillResult::Done(nr_filled)) + } else { + Ok(FillResult::Partial(nr_filled)) + } } } @@ -376,7 +265,7 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to read count: u64, /// Buffer pages to read into - buffer: &'lt [Page], + buffer: &'lt [Folio], }, Write { /// Sector to write to, in 512-byte blocks @@ -384,6 +273,6 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to write count: u64, /// Buffer pages to write from - buffer: &'lt [Page], + buffer: &'lt [Folio], }, } diff --git a/src/kernel/block/mbr.rs b/src/kernel/block/mbr.rs index 74cdc36e..c5820679 100644 --- a/src/kernel/block/mbr.rs +++ b/src/kernel/block/mbr.rs @@ -31,7 +31,7 @@ pub struct MBRPartTable { impl MBRPartTable { pub async fn from_disk(disk: &BlockDevice) -> KResult { let mut mbr: UninitBuffer = UninitBuffer::new(); - disk.read_some(0, &mut mbr)?.ok_or(EIO)?; + disk.read_some(0, &mut mbr).await?.ok_or(EIO)?; let mbr = mbr.assume_init()?; if mbr.magic != [0x55, 0xaa] { diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index aff3271e..e4a6e1b3 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -1,23 +1,18 @@ -use super::{ - block::make_device, - console::get_console, - constants::{EEXIST, EIO}, - task::{block_on, ProcessList, Thread}, - terminal::Terminal, - vfs::{DevId, File, FileType, TerminalFile}, -}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - prelude::*, -}; -use alloc::{ - boxed::Box, - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; -use eonix_sync::AsProof as _; +use alloc::boxed::Box; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; + use posix_types::open::OpenFlags; +use super::console::get_console; +use super::constants::{EEXIST, EIO}; +use super::task::{block_on, Thread}; +use super::terminal::Terminal; +use super::vfs::types::DeviceId; +use super::vfs::{File, FileType, TerminalFile}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::prelude::*; + pub trait VirtualCharDevice: Send + Sync { fn read(&self, buffer: &mut dyn Buffer) -> KResult; fn write(&self, stream: &mut dyn Stream) -> KResult; @@ -34,12 +29,15 @@ pub struct CharDevice { device: CharDeviceType, } -static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); +static CHAR_DEVICES: Spin>> = + Spin::new(BTreeMap::new()); impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => { + block_on(terminal.read(buffer)) + } CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -47,18 +45,24 @@ impl CharDevice { pub fn write(&self, stream: &mut dyn Stream) -> KResult { match &self.device { CharDeviceType::Virtual(device) => device.write(stream), - CharDeviceType::Terminal(terminal) => stream.read_till_end(&mut [0; 128], |data| { - terminal.write(data); - Ok(()) - }), + CharDeviceType::Terminal(terminal) => { + stream.read_till_end(&mut [0; 128], |data| { + terminal.write(data); + Ok(()) + }) + } } } - pub fn get(devid: DevId) -> Option> { + pub fn get(devid: DeviceId) -> Option> { CHAR_DEVICES.lock().get(&devid).cloned() } - pub fn register(devid: DevId, name: Arc, device: CharDeviceType) -> KResult<()> { + pub fn register( + devid: DeviceId, + name: Arc, + device: CharDeviceType, + ) -> KResult<()> { match CHAR_DEVICES.lock().entry(devid) { Entry::Vacant(entry) => { entry.insert(Arc::new(CharDevice { name, device })); @@ -68,26 +72,21 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult { - Ok(match &self.device { + pub async fn open( + self: &Arc, + thread: &Thread, + flags: OpenFlags, + ) -> KResult { + let file = match &self.device { + CharDeviceType::Virtual(_) => { + File::new(flags, FileType::CharDev(self.clone())) + } CharDeviceType::Terminal(terminal) => { - let procs = block_on(ProcessList::get().read()); - let current = Thread::current(); - let session = current.process.session(procs.prove()); - // We only set the control terminal if the process is the session leader. - if session.sid == Thread::current().process.pid { - // Silently fail if we can't set the control terminal. - dont_check!(block_on(session.set_control_terminal( - &terminal, - false, - procs.prove() - ))); - } - - TerminalFile::new(terminal.clone(), flags) + TerminalFile::open(thread, terminal, flags).await } - CharDeviceType::Virtual(_) => File::new(flags, FileType::CharDev(self.clone())), - }) + }; + + Ok(file) } } @@ -134,19 +133,19 @@ impl VirtualCharDevice for ConsoleDevice { impl CharDevice { pub fn init() -> KResult<()> { Self::register( - make_device(1, 3), + DeviceId::new(1, 3), Arc::from("null"), CharDeviceType::Virtual(Box::new(NullDevice)), )?; Self::register( - make_device(1, 5), + DeviceId::new(1, 5), Arc::from("zero"), CharDeviceType::Virtual(Box::new(ZeroDevice)), )?; Self::register( - make_device(5, 1), + DeviceId::new(5, 1), Arc::from("console"), CharDeviceType::Virtual(Box::new(ConsoleDevice)), )?; diff --git a/src/kernel/constants.rs b/src/kernel/constants.rs index 4e11d66e..b96387b0 100644 --- a/src/kernel/constants.rs +++ b/src/kernel/constants.rs @@ -36,7 +36,7 @@ pub const ENOTDIR: u32 = 20; pub const EISDIR: u32 = 21; pub const EINVAL: u32 = 22; pub const ENOTTY: u32 = 25; -pub const ENOSPC: u32 = 28; +// pub const ENOSPC: u32 = 28; pub const ESPIPE: u32 = 29; // pub const EROFS: u32 = 30; pub const EPIPE: u32 = 32; diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 742727cb..2092bfcb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,15 +1,17 @@ -use super::mem::handle_kernel_page_fault; -use super::task::block_on; -use super::timer::timer_interrupt; -use crate::kernel::constants::EINVAL; -use crate::prelude::*; use alloc::sync::Arc; + use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::SpinIrq as _; +use super::mem::handle_kernel_page_fault; +use super::task::block_on; +use super::timer::timer_interrupt; +use crate::kernel::constants::EINVAL; +use crate::prelude::*; + static IRQ_HANDLERS: Spin<[Vec>; 16]> = Spin::new([const { Vec::new() }; 16]); diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index efd06824..47b864bb 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -3,14 +3,16 @@ pub mod paging; mod access; mod address; mod allocator; +mod folio; mod mm_area; mod mm_list; mod page_alloc; mod page_cache; -pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; +pub use access::PhysAccess; +pub use folio::{Folio, FolioOwned, LockedFolio}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; -pub use paging::{Page, PageBuffer}; +pub use page_cache::{CachePage, PageCache, PageOffset}; +pub use paging::PageBuffer; diff --git a/src/kernel/mem/access.rs b/src/kernel/mem/access.rs index ce525a0a..328dcfbd 100644 --- a/src/kernel/mem/access.rs +++ b/src/kernel/mem/access.rs @@ -1,22 +1,7 @@ -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _PhysAccess}; -/// A block of memory starting at a non-zero address and having a specific length. -/// -/// This struct is used to represent a memory block that can be accessed -/// in the kernel space. -pub struct MemoryBlock { - addr: NonZero, - len: usize, -} - -pub trait AsMemoryBlock { - /// Translate the physical page the page object pointing to into kernel - /// accessible pointer. Use it with care. - fn as_memblk(&self) -> MemoryBlock; -} - pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. @@ -30,107 +15,6 @@ pub trait PhysAccess { unsafe fn as_ptr(&self) -> NonNull; } -impl MemoryBlock { - /// Create a new `MemoryBlock` with the given address and length. - /// - /// # Safety - /// The caller must ensure that the address is valid. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn new(addr: NonZero, len: usize) -> Self { - Self { addr, len } - } - - /// Get the start address of the memory block. - #[allow(dead_code)] - pub fn addr(&self) -> NonZero { - self.addr - } - - /// Get the length of the memory block. - #[allow(dead_code)] - pub fn len(&self) -> usize { - self.len - } - - /// Split the memory block into two parts at the given offset. - pub fn split_at(&self, at: usize) -> (Self, Self) { - if at > self.len { - panic!("Out of bounds"); - } - - let rhs_start = self.addr.checked_add(at).expect("Overflow"); - - let lhs = unsafe { Self::new(self.addr, at) }; - let rhs = unsafe { Self::new(rhs_start, self.len - at) }; - - (lhs, rhs) - } - - /// Provide a pointer to the data. - /// - /// # Safety - /// Using the returned pointer is undefined behavior if the address is not - /// properly aligned or the size is not equal to the size of `T`. - pub unsafe fn as_ptr_unchecked(&self) -> NonNull { - // SAFETY: `self.addr` is a non-zero value. - NonNull::new_unchecked(self.addr.get() as *mut T) - } - - /// Provide a pointer to the data. - /// - /// # Panic - /// Panic if the address is not properly aligned. - pub fn as_ptr(&self) -> NonNull { - let alignment = align_of::(); - - if self.addr.get() % alignment != 0 { - panic!("Alignment error"); - } - - unsafe { - // SAFETY: We've checked that `self.addr` is properly aligned. - self.as_ptr_unchecked() - } - } - - /// Provide a pointer to the bytes. - pub fn as_byte_ptr(&self) -> NonNull { - unsafe { - // SAFETY: No alignment check is needed for bytes. - self.as_ptr_unchecked() - } - } - - /// Provide immutable access to the data it pointed to. - /// - /// # Safety - /// This function is unsafe because it returns an immutable reference with - /// a created lifetime. - /// - /// The caller must ensure that the data has no other mutable aliases while - /// the reference is in use. Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes<'a>(&self) -> &'a [u8] { - core::slice::from_raw_parts(self.as_ptr_unchecked().as_ptr(), self.len) - } - - /// Provide mutable access to the data it pointed to. - /// - /// # Panic - /// Panic if the address is not properly aligned or the size is not - /// equal to the size of `T`. - /// - /// # Safety - /// This function is unsafe because it returns a mutable reference with a - /// created lifetime. - /// - /// The caller must ensure that the data has no other immutable or mutable - /// aliases while the reference is in use. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes_mut<'a>(&mut self) -> &'a mut [u8] { - core::slice::from_raw_parts_mut(self.as_ptr_unchecked().as_ptr(), self.len) - } -} - impl PhysAccess for PAddr { unsafe fn as_ptr(&self) -> NonNull { ArchPhysAccess::as_ptr(*self) diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index 36b19612..3a70a8c2 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -1,15 +1,17 @@ -use super::page_alloc::RawPagePtr; -use super::{AsMemoryBlock, GlobalPageAlloc, Page}; use core::alloc::{GlobalAlloc, Layout}; use core::ptr::NonNull; + use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; -use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; -use slab_allocator::SlabAllocator; +use slab_allocator::SlabAlloc; + +use super::folio::Folio; +use super::GlobalPageAlloc; -static SLAB_ALLOCATOR: LazyLock> = - LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc)); +static SLAB_ALLOCATOR: LazyLock> = + LazyLock::new(|| SlabAlloc::new_in(GlobalPageAlloc)); struct Allocator; @@ -17,34 +19,31 @@ unsafe impl GlobalAlloc for Allocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let size = layout.size().next_power_of_two(); - let result = if size <= 2048 { - SLAB_ALLOCATOR.alloc(size) + if size <= 2048 { + SLAB_ALLOCATOR.alloc(size).as_ptr() } else { - let page_count = size >> PAGE_SIZE_BITS; - let page = Page::alloc_at_least(page_count); - - let ptr = page.as_memblk().as_ptr(); - page.into_raw(); + let folio = Folio::alloc_at_least(size >> PAGE_SIZE_BITS); + let ptr = folio.get_ptr(); + folio.into_raw(); ptr.as_ptr() - }; - - if result.is_null() { - core::ptr::null_mut() - } else { - result as *mut u8 } } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { let size = layout.size().next_power_of_two(); + let ptr = unsafe { + // SAFETY: The memory we've allocated MUST be non-null. + NonNull::new_unchecked(ptr) + }; if size <= 2048 { SLAB_ALLOCATOR.dealloc(ptr, size) } else { - let paddr = ArchPhysAccess::from_ptr(NonNull::new_unchecked(ptr)); + let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); - Page::from_raw(pfn); + + Folio::from_raw(pfn); }; } } diff --git a/src/kernel/mem/folio.rs b/src/kernel/mem/folio.rs new file mode 100644 index 00000000..8ab4d6be --- /dev/null +++ b/src/kernel/mem/folio.rs @@ -0,0 +1,210 @@ +use core::fmt; +use core::mem::ManuallyDrop; +use core::ops::Deref; +use core::ptr::NonNull; +use core::sync::atomic::Ordering; + +use eonix_mm::paging::{Folio as FolioTrait, FrameAlloc, GlobalFrameAlloc, Zone, PFN}; + +use super::page_alloc::ZONE; +use super::{GlobalPageAlloc, PhysAccess as _, RawPage}; + +#[repr(transparent)] +pub struct Folio(NonNull); + +#[derive(Debug)] +#[repr(transparent)] +pub struct FolioOwned(Folio); + +#[repr(transparent)] +pub struct LockedFolio<'a>(&'a Folio); + +unsafe impl Send for Folio {} +unsafe impl Sync for Folio {} + +impl Folio { + pub(super) const fn from_mut_page(raw_page: &'static mut RawPage) -> Self { + Self(NonNull::new(raw_page).unwrap()) + } + + /// Allocate a folio of the given *order*. + pub fn alloc_order(order: u32) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_order(order) + .expect("Out of memory") + } + + /// Allocate a folio of order 0 + pub fn alloc() -> Self { + Self::alloc_order(0) + } + + /// Allocate a folio consisting of at least [`count`] pages. + pub fn alloc_at_least(count: usize) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_at_least(count) + .expect("Out of memory") + } + + /// Acquire the ownership of the folio pointed to by [`pfn`], leaving + /// [`refcount`] untouched. + /// + /// # Panic + /// This function will panic if the folio is not within the global zone. + /// + /// # Safety + /// This function is unsafe because it assumes that the caller has to ensure + /// that [`pfn`] points to a valid folio allocated through [`Self::alloc()`] + /// and that the folio have not been freed or deallocated yet. + pub unsafe fn from_raw(pfn: PFN) -> Self { + unsafe { + // SAFETY: The caller ensures that [`pfn`] points to a folio within + // the global zone. + Self(ZONE.get_page(pfn).unwrap_unchecked()) + } + } + + /// Do some work with the folio without touching the reference count with + /// the same restrictions as [`Self::from_raw()`]. + /// + /// # Safety + /// Check [`Self::from_raw()`] for safety requirements. + pub unsafe fn with_raw(pfn: PFN, func: F) -> O + where + F: FnOnce(&Self) -> O, + { + unsafe { + let me = ManuallyDrop::new(Self::from_raw(pfn)); + func(&me) + } + } + + pub fn lock(&self) -> LockedFolio<'_> { + // TODO: actually perform the lock... + LockedFolio(self) + } + + /// Get a vmem pointer to the folio data as a byte slice. + pub fn get_bytes_ptr(&self) -> NonNull<[u8]> { + unsafe { + // SAFETY: `self.start()` can't be null. + NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) + } + } + + /// Get a vmem pointer to the start of the folio. + pub fn get_ptr(&self) -> NonNull { + self.get_bytes_ptr().cast() + } +} + +impl Deref for Folio { + type Target = RawPage; + + fn deref(&self) -> &Self::Target { + unsafe { + // SAFETY: We don't expose mutable references to the folio. + self.0.as_ref() + } + } +} + +impl Clone for Folio { + fn clone(&self) -> Self { + // SAFETY: Memory order here can be Relaxed is for the same reason as + // that in the copy constructor of `std::shared_ptr`. + self.refcount.fetch_add(1, Ordering::Relaxed); + + Self(self.0) + } +} + +impl Drop for Folio { + fn drop(&mut self) { + match self.refcount.fetch_sub(1, Ordering::AcqRel) { + 0 => unreachable!("Refcount for an in-use page is 0"), + 1 => unsafe { GlobalPageAlloc::GLOBAL.dealloc_raw(self.0.as_mut()) }, + _ => {} + } + } +} + +impl fmt::Debug for Folio { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Page({:?}, order={})", self.pfn(), self.order) + } +} + +impl FolioTrait for Folio { + fn pfn(&self) -> PFN { + ZONE.get_pfn(self.0.as_ptr()) + } + + fn order(&self) -> u32 { + self.order + } +} + +impl LockedFolio<'_> { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) + } + } +} + +impl Deref for LockedFolio<'_> { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl FolioOwned { + pub fn alloc() -> Self { + Self(Folio::alloc()) + } + + pub fn alloc_order(order: u32) -> Self { + Self(Folio::alloc_order(order)) + } + + pub fn alloc_at_least(count: usize) -> Self { + Self(Folio::alloc_at_least(count)) + } + + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_ref() + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_mut() + } + } + + pub fn share(self) -> Folio { + self.0 + } +} + +impl Deref for FolioOwned { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 731c5303..782c5ef7 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -1,14 +1,17 @@ -use super::mm_list::EMPTY_PAGE; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, Mapping, Page, Permission}; -use crate::kernel::constants::EINVAL; -use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; use core::cmp; +use core::sync::atomic::Ordering; + use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; -use eonix_mm::paging::{PAGE_SIZE, PFN}; +use eonix_mm::paging::{Folio as _, PFN}; + +use super::mm_list::EMPTY_PAGE; +use super::{Mapping, Permission}; +use crate::kernel::mem::folio::Folio; +use crate::kernel::mem::{CachePage, FolioOwned, PageOffset}; +use crate::prelude::KResult; #[derive(Debug)] pub struct MMArea { @@ -96,8 +99,10 @@ impl MMArea { attr.remove(PageAttribute::COPY_ON_WRITE); attr.set(PageAttribute::WRITE, self.permission.write); - let page = unsafe { Page::from_raw(*pfn) }; - if page.is_exclusive() { + let page = unsafe { Folio::from_raw(*pfn) }; + + // XXX: Change me!!! + if page.refcount.load(Ordering::Relaxed) == 1 { // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock // held, there couldn't be neither other processes sharing the page, nor other // threads making the page COW at the same time. @@ -105,25 +110,27 @@ impl MMArea { return; } - let new_page; + let mut new_page; if *pfn == EMPTY_PAGE.pfn() { - new_page = Page::zeroed(); + new_page = { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }; } else { - new_page = Page::alloc(); + new_page = FolioOwned::alloc(); unsafe { // SAFETY: `page` is CoW, which means that others won't write to it. - let old_page_data = page.as_memblk().as_bytes(); - - // SAFETY: `new_page` is exclusive owned by us. - let new_page_data = new_page.as_memblk().as_bytes_mut(); + let old_page_data = page.get_bytes_ptr().as_ref(); + let new_page_data = new_page.as_bytes_mut(); new_page_data.copy_from_slice(old_page_data); }; } attr.remove(PageAttribute::ACCESSED); - *pfn = new_page.into_raw(); + *pfn = new_page.share().into_raw(); } /// # Arguments @@ -141,61 +148,48 @@ impl MMArea { assert!(offset < file_mapping.length, "Offset out of range"); - let Some(page_cache) = file_mapping.file.page_cache() else { - panic!("Mapping file should have pagecache"); + let file_offset = file_mapping.offset + offset; + + let map_page = |cache_page: &CachePage| { + if !self.permission.write { + assert!(!write, "Write fault on read-only mapping"); + + *pfn = cache_page.add_mapping(); + return; + } + + if self.is_shared { + // We don't process dirty flags in write faults. + // Simply assume that page will eventually be dirtied. + // So here we can set the dirty flag now. + cache_page.set_dirty(true); + attr.insert(PageAttribute::WRITE); + *pfn = cache_page.add_mapping(); + return; + } + + if !write { + // Delay the copy-on-write until write fault happens. + attr.insert(PageAttribute::COPY_ON_WRITE); + *pfn = cache_page.add_mapping(); + return; + } + + // XXX: Change this. Let's handle mapped pages before CoW pages. + // Nah, we are writing to a mapped private mapping... + let mut new_page = FolioOwned::alloc(); + new_page + .as_bytes_mut() + .copy_from_slice(cache_page.lock().as_bytes()); + + attr.insert(PageAttribute::WRITE); + *pfn = new_page.share().into_raw(); }; - let file_offset = file_mapping.offset + offset; - let cnt_to_read = (file_mapping.length - offset).min(0x1000); - - page_cache - .with_page(file_offset, |page, cache_page| { - // Non-write faults: we find page in pagecache and do mapping - // Write fault: we need to care about shared or private mapping. - if !write { - // Bss is embarrassing in pagecache! - // We have to assume cnt_to_read < PAGE_SIZE all bss - if cnt_to_read < PAGE_SIZE { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); - } else { - *pfn = page.clone().into_raw(); - } - - if self.permission.write { - if self.is_shared { - // The page may will not be written, - // But we simply assume page will be dirty - cache_page.set_dirty(); - attr.insert(PageAttribute::WRITE); - } else { - attr.insert(PageAttribute::COPY_ON_WRITE); - } - } - } else { - if self.is_shared { - cache_page.set_dirty(); - *pfn = page.clone().into_raw(); - } else { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); - } - - attr.insert(PageAttribute::WRITE); - } - }) - .await? - .ok_or(EINVAL)?; + file_mapping + .page_cache + .with_page(PageOffset::from_byte_floor(file_offset), map_page) + .await?; attr.insert(PageAttribute::PRESENT); attr.remove(PageAttribute::MAPPED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index ad1e45c2..f073025b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -1,34 +1,33 @@ mod mapping; mod page_fault; +mod page_table; -use super::address::{VAddrExt as _, VRangeExt as _}; -use super::page_alloc::GlobalPageAlloc; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, MMArea, Page}; -use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; -use crate::kernel::mem::page_alloc::RawPagePtr; -use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; use core::sync::atomic::{AtomicUsize, Ordering}; + use eonix_hal::mm::{ - flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchPagingMode, - ArchPhysAccess, GLOBAL_PAGE_TABLE, -}; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_mm::page_table::PageAttribute; -use eonix_mm::paging::PFN; -use eonix_mm::{ - address::{AddrOps as _, VAddr, VRange}, - page_table::{PageTable, RawAttribute, PTE}, - paging::PAGE_SIZE, + flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, GLOBAL_PAGE_TABLE, }; +use eonix_mm::address::{Addr as _, AddrOps as _, PAddr, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PFN}; use eonix_sync::{LazyLock, Mutex}; - pub use mapping::{FileMapping, Mapping}; pub use page_fault::handle_kernel_page_fault; +use page_table::KernelPageTable; -pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| Page::zeroed()); +use super::address::{VAddrExt as _, VRangeExt as _}; +use super::{Folio, FolioOwned, MMArea}; +use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; +use crate::prelude::*; +use crate::sync::ArcSwap; + +pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio.share() +}); #[derive(Debug, Clone, Copy)] pub struct Permission { @@ -37,23 +36,21 @@ pub struct Permission { pub execute: bool, } -pub type KernelPageTable<'a> = PageTable<'a, ArchPagingMode, GlobalPageAlloc, ArchPhysAccess>; - -struct MMListInner<'a> { +struct MMListInner { areas: BTreeSet, - page_table: KernelPageTable<'a>, + page_table: KernelPageTable, break_start: Option, break_pos: Option, } pub struct MMList { - inner: ArcSwap>>, + inner: ArcSwap>, user_count: AtomicUsize, /// Only used in kernel space to switch page tables on context switch. root_page_table: AtomicUsize, } -impl MMListInner<'_> { +impl MMListInner { fn overlapping_addr(&self, addr: VAddr) -> Option<&MMArea> { self.areas.get(&VRange::from(addr)) } @@ -99,7 +96,7 @@ impl MMListInner<'_> { } } - fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { + fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { assert_eq!(start.floor(), start); let end = (start + len).ceil(); let range_to_unmap = VRange::new(start, end); @@ -123,7 +120,7 @@ impl MMListInner<'_> { let (pfn, _) = pte.take(); pages_to_free.push(unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw(pfn) + Folio::from_raw(pfn) }); } @@ -278,23 +275,23 @@ impl MMListInner<'_> { } } -impl Drop for MMListInner<'_> { +impl Drop for MMListInner { fn drop(&mut self) { // May buggy for area in &self.areas { if area.is_shared { for pte in self.page_table.iter_user(area.range()) { - let (pfn, _) = pte.take(); - let raw_page = RawPagePtr::from(pfn); - if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { - // Wrong here - // unsafe { Page::from_raw(pfn) }; - } + // XXX: Fix me + let _ = pte.take(); + // let raw_page = RawPagePtr::from(pfn); + // if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { + // unsafe { Page::from_raw(pfn) }; + // } } } else { for pte in self.page_table.iter_user(area.range()) { let (pfn, _) = pte.take(); - unsafe { Page::from_raw(pfn) }; + unsafe { Folio::from_raw(pfn) }; } } } @@ -330,7 +327,7 @@ impl MMList { } pub fn new() -> Self { - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -347,7 +344,7 @@ impl MMList { let inner = self.inner.borrow(); let mut inner = inner.lock().await; - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); let list = Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -395,26 +392,12 @@ impl MMList { } pub fn deactivate(&self) { - set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.addr())); + set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.start())); let old_user_count = self.user_count.fetch_sub(1, Ordering::Release); assert_ne!(old_user_count, 0); } - /// Deactivate `self` and activate `to` with root page table changed only once. - /// This might reduce the overhead of switching page tables twice. - #[allow(dead_code)] - pub fn switch(&self, to: &Self) { - self.user_count.fetch_add(1, Ordering::Acquire); - - let root_page_table = self.root_page_table.load(Ordering::Relaxed); - assert_ne!(root_page_table, 0); - set_root_page_table_pfn(PFN::from(PAddr::from(root_page_table))); - - let old_user_count = to.user_count.fetch_sub(1, Ordering::Release); - assert_ne!(old_user_count, 0); - } - /// Replace the current page table with a new one. /// /// # Safety @@ -447,7 +430,7 @@ impl MMList { let new_root_page_table = match &new { Some(new_mm) => new_mm.root_page_table.load(Ordering::Relaxed), - None => GLOBAL_PAGE_TABLE.addr().addr(), + None => GLOBAL_PAGE_TABLE.start().addr(), }; set_root_page_table_pfn(PFN::from(PAddr::from(new_root_page_table))); @@ -457,10 +440,24 @@ impl MMList { // TODO: Check whether we should wake someone up if they've been put // to sleep when calling `vfork`. - self.inner + let old_mm = self + .inner .swap(new.map(|new_mm| new_mm.inner.swap(None)).flatten()); eonix_preempt::enable(); + + // This could take long... + drop(old_mm); + } + + pub fn release(&self) { + let old_mm = self.inner.swap(None); + let old_table = self.root_page_table.swap(0, Ordering::Relaxed); + + // TODO: Remove this completely... + // XXX: `ArcSwap` is broken and never safe to use. Check `replace` above. + assert_ne!(old_table, 0, "Already released?"); + assert!(old_mm.is_some(), "Already released?"); } /// No need to do invalidation manually, `PageTable` already does it. @@ -696,13 +693,11 @@ impl MMList { unsafe { // SAFETY: We are sure that the page is valid and we have the right to access it. - Page::with_raw(pte.get_pfn(), |page| { - // SAFETY: The caller guarantees that no one else is using the page. - let page_data = page.as_memblk().as_bytes_mut(); - func( - offset + idx * 0x1000, - &mut page_data[start_offset..end_offset], - ); + Folio::with_raw(pte.get_pfn(), |page| { + let mut pg = page.lock(); + let page_data = &mut pg.as_bytes_mut()[start_offset..end_offset]; + + func(offset + idx * 0x1000, page_data); }); } } @@ -729,7 +724,7 @@ trait PageTableExt { fn set_copied(&self, from: &Self, range: VRange); } -impl PageTableExt for KernelPageTable<'_> { +impl PageTableExt for KernelPageTable { fn set_anonymous(&self, range: VRange, permission: Permission) { for pte in self.iter_user(range) { pte.set_anonymous(permission.execute); @@ -810,7 +805,7 @@ where let pfn = unsafe { // SAFETY: We get the pfn from a valid page table entry, so it should be valid as well. - Page::with_raw(from.get_pfn(), |page| page.clone().into_raw()) + Folio::with_raw(from.get_pfn(), |page| page.clone().into_raw()) }; self.set(pfn, T::Attr::from(from_attr & !PageAttribute::ACCESSED)); diff --git a/src/kernel/mem/mm_list/mapping.rs b/src/kernel/mem/mm_list/mapping.rs index 662000ba..2b837ae7 100644 --- a/src/kernel/mem/mm_list/mapping.rs +++ b/src/kernel/mem/mm_list/mapping.rs @@ -1,24 +1,18 @@ -use core::fmt::Debug; - -use crate::kernel::vfs::inode::Inode; use alloc::sync::Arc; + use eonix_mm::paging::PAGE_SIZE; +use crate::kernel::mem::PageCache; + #[derive(Debug, Clone)] pub struct FileMapping { - pub file: Arc, + pub page_cache: Arc, /// Offset in the file, aligned to 4KB boundary. pub offset: usize, /// Length of the mapping. Exceeding part will be zeroed. pub length: usize, } -impl Debug for dyn Inode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Inode()") - } -} - #[derive(Debug, Clone)] pub enum Mapping { // private anonymous memory @@ -28,10 +22,10 @@ pub enum Mapping { } impl FileMapping { - pub fn new(file: Arc, offset: usize, length: usize) -> Self { + pub fn new(page_cache: Arc, offset: usize, length: usize) -> Self { assert_eq!(offset & (PAGE_SIZE - 1), 0); Self { - file, + page_cache, offset, length, } @@ -39,10 +33,10 @@ impl FileMapping { pub fn offset(&self, offset: usize) -> Self { if self.length <= offset { - Self::new(self.file.clone(), self.offset + self.length, 0) + Self::new(self.page_cache.clone(), self.offset + self.length, 0) } else { Self::new( - self.file.clone(), + self.page_cache.clone(), self.offset + offset, self.length - offset, ) diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 6f14583d..5a56efbc 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,11 +1,13 @@ -use super::{MMList, VAddr}; -use crate::kernel::task::Thread; +use eonix_hal::extern_symbol_addr; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; use posix_types::signal::Signal; +use super::{MMList, VAddr}; +use crate::kernel::task::Thread; + #[repr(C)] struct FixEntry { start: u64, @@ -23,27 +25,19 @@ impl FixEntry { VAddr::from((self.start + self.length) as usize) } - #[allow(dead_code)] - fn range(&self) -> VRange { - VRange::new(self.start(), self.end()) - } - fn jump_address(&self) -> VAddr { VAddr::from(self.jump_address as usize) } fn entries() -> &'static [FixEntry] { - extern "C" { - fn FIX_START(); - fn FIX_END(); - } + let fix_seg_len_bytes = extern_symbol_addr!(FIX_END) - extern_symbol_addr!(FIX_START); unsafe { - // SAFETY: `FIX_START` and `FIX_END` are defined in the - // linker script in `.rodata` section. + // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script + // in `.rodata` section. core::slice::from_raw_parts( - FIX_START as usize as *const FixEntry, - (FIX_END as usize - FIX_START as usize) / size_of::(), + extern_symbol_addr!(FIX_START, FixEntry), + fix_seg_len_bytes / size_of::(), ) } } diff --git a/src/kernel/mem/mm_list/page_table.rs b/src/kernel/mem/mm_list/page_table.rs new file mode 100644 index 00000000..8a2acc13 --- /dev/null +++ b/src/kernel/mem/mm_list/page_table.rs @@ -0,0 +1,40 @@ +use core::ops::Deref; + +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; +use eonix_hal::mm::GLOBAL_PAGE_TABLE; +use eonix_mm::page_table::PageTable; +use eonix_mm::paging::{Folio, GlobalFrameAlloc}; + +use crate::kernel::mem::{FolioOwned, GlobalPageAlloc, PhysAccess}; + +#[repr(transparent)] +pub struct KernelPageTable(PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>); + +impl KernelPageTable { + pub fn new() -> Self { + let global_page_table = unsafe { + // SAFETY: The region is valid and read only after initialization. + GLOBAL_PAGE_TABLE.start().as_ptr::<[u8; 4096]>().as_ref() + }; + + let mut table_page = FolioOwned::alloc(); + let entries = table_page.as_bytes_mut().len(); + table_page.as_bytes_mut()[..(entries / 2)].fill(0); + table_page.as_bytes_mut()[(entries / 2)..] + .copy_from_slice(&global_page_table[(entries / 2)..]); + + Self(PageTable::new( + table_page.share(), + GlobalPageAlloc::GLOBAL, + PageAccessImpl, + )) + } +} + +impl Deref for KernelPageTable { + type Target = PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index fcbe9bb3..ac2485da 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -1,21 +1,25 @@ mod raw_page; +mod zones; -use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}, -}; + +use buddy_allocator::BuddyAllocator; +use eonix_mm::address::PRange; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{FolioList, FolioListSized as _, FrameAlloc, GlobalFrameAlloc, PFN}; +use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; -use intrusive_list::List; -use raw_page::PageFlags; +pub use raw_page::{PageFlags, RawPage, RawPageList}; +pub use zones::{GlobalZone, ZONE}; -pub use raw_page::{RawPage, RawPagePtr}; +use super::folio::Folio; const COSTLY_ORDER: u32 = 3; +const AREAS: usize = COSTLY_ORDER as usize + 1; const BATCH_SIZE: u32 = 64; -static BUDDY_ALLOC: Spin> = Spin::new(BuddyAllocator::new()); +static BUDDY_ALLOC: Spin> = + Spin::new(BuddyAllocator::new(&GlobalZone())); #[eonix_percpu::define_percpu] static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); @@ -23,82 +27,48 @@ static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); #[derive(Clone)] pub struct GlobalPageAlloc; -#[derive(Clone)] -pub struct BuddyPageAlloc(); - struct PerCpuPageAlloc { batch: u32, - // TODO: might be used in the future. - // high: u32, - free_areas: [List; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList; AREAS], +} + +pub trait PerCpuPage { + fn set_local(&mut self, val: bool); } impl PerCpuPageAlloc { const fn new() -> Self { Self { batch: BATCH_SIZE, - // high: 0, - free_areas: [const { List::new() }; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList::NEW; AREAS], } } - fn insert_free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - let free_area = &mut self.free_areas[order as usize]; - free_area.insert(unsafe { pages_ptr.get_link() }); - } - - fn get_free_pages(&mut self, order: u32) -> Option { - let free_area = &mut self.free_areas[order as usize]; - free_area.pop().map(|node| unsafe { - // SAFETY: `node` is a valid pointer to a `Link` that is not used by anyone. - RawPagePtr::from_link(node) - }) - } - - fn alloc_order(&mut self, order: u32) -> Option { + fn alloc_order(&mut self, order: u32) -> Option<&'static mut RawPage> { assert!(order <= COSTLY_ORDER); - if let Some(pages) = self.get_free_pages(order) { + if let Some(pages) = self.free_areas[order as usize].pop_head() { return Some(pages); } let batch = self.batch >> order; for _ in 0..batch { - if let Some(pages_ptr) = BUDDY_ALLOC.lock().alloc_order(order) { - pages_ptr.flags().set(PageFlags::LOCAL); - self.insert_free_pages(pages_ptr, order); - } else { + let Some(page) = BUDDY_ALLOC.lock().alloc_order(order) else { break; }; + + page.set_local(true); + self.free_areas[order as usize].push_tail(page); } - self.get_free_pages(order) + self.free_areas[order as usize].pop_head() } - fn free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - assert_eq!(pages_ptr.order(), order); - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - pages_ptr.refcount().store(1, Ordering::Relaxed); - self.insert_free_pages(pages_ptr, order); + fn free_pages(&mut self, page: &'static mut RawPage, order: u32) { + self.free_areas[order as usize].push_tail(page); } } impl GlobalPageAlloc { - #[allow(dead_code)] - pub const fn buddy_alloc() -> BuddyPageAlloc { - BuddyPageAlloc() - } - - pub fn mark_present(range: PRange) { - let mut pfn = PFN::from(range.start().ceil()); - let end_pfn = PFN::from(range.end().floor()); - - while pfn < end_pfn { - RawPagePtr::from(pfn).flags().set(PageFlags::PRESENT); - pfn = pfn + 1; - } - } - /// Add the pages in the PAddr range `range` to the global allocator. /// /// This function is only to be called on system initialization when `eonix_preempt` @@ -110,63 +80,68 @@ impl GlobalPageAlloc { pub unsafe fn add_pages(range: PRange) { BUDDY_ALLOC .lock_with_context(NoContext) - .create_pages(range.start(), range.end()) + .create_folios(range.start(), range.end()) } -} - -impl PageAlloc for GlobalPageAlloc { - type RawPage = RawPagePtr; - fn alloc_order(&self, order: u32) -> Option { + pub fn alloc_raw_order(&self, order: u32) -> Option<&'static mut RawPage> { if order > COSTLY_ORDER { BUDDY_ALLOC.lock().alloc_order(order) } else { unsafe { eonix_preempt::disable(); - let page_ptr = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); + let page = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); eonix_preempt::enable(); - page_ptr + + page } } } - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - if page_ptr.order() > COSTLY_ORDER { - BUDDY_ALLOC.lock().dealloc(page_ptr); + pub unsafe fn dealloc_raw(&self, raw_page: &'static mut RawPage) { + assert_eq!( + raw_page.refcount.load(Ordering::Relaxed), + 0, + "Trying to free a page with refcount > 0" + ); + + if raw_page.order > COSTLY_ORDER { + BUDDY_ALLOC.lock().dealloc(raw_page); } else { - let order = page_ptr.order(); + let order = raw_page.order; + unsafe { - eonix_preempt::disable(); - PERCPU_PAGE_ALLOC.as_mut().free_pages(page_ptr, order); - eonix_preempt::enable(); + PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(raw_page, order); } } } - - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) - && (page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL)) - } } -impl GlobalPageAllocTrait for GlobalPageAlloc { - fn global() -> Self { - GlobalPageAlloc +impl FrameAlloc for GlobalPageAlloc { + type Folio = Folio; + + fn alloc_order(&self, order: u32) -> Option { + self.alloc_raw_order(order).map(|raw_page| { + // SAFETY: Memory order here can be Relaxed is for the same reason + // as that in the copy constructor of `std::shared_ptr`. + + raw_page.refcount.fetch_add(1, Ordering::Relaxed); + Folio::from_mut_page(raw_page) + }) } } -impl PageAlloc for BuddyPageAlloc { - type RawPage = RawPagePtr; +impl GlobalFrameAlloc for GlobalPageAlloc { + const GLOBAL: Self = GlobalPageAlloc; +} - fn alloc_order(&self, order: u32) -> Option { - BUDDY_ALLOC.lock().alloc_order(order) - } +impl PageTableAlloc for GlobalPageAlloc { + type Folio = Folio; - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - BUDDY_ALLOC.lock().dealloc(page_ptr); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + unsafe { Folio::from_raw(pfn) } } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 54d4d590..16d57714 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,91 +1,58 @@ -use crate::kernel::mem::page_cache::PageCacheRawPage; -use crate::kernel::mem::PhysAccess; -use buddy_allocator::BuddyRawPage; -use core::{ - ptr::NonNull, - sync::atomic::{AtomicU32, AtomicUsize, Ordering}, -}; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; + +use buddy_allocator::BuddyFolio; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess as _}, - paging::{RawPage as RawPageTrait, PFN}, -}; -use intrusive_list::{container_of, Link}; -use slab_allocator::SlabRawPage; +use eonix_mm::address::{PAddr, PhysAccess as _}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; +use intrusive_list::{container_of, Link, List}; +use slab_allocator::{SlabPage, SlabPageAlloc, SlabSlot}; -const PAGE_ARRAY: NonNull = - unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; +use super::zones::ZONE; +use super::{GlobalPageAlloc, PerCpuPage}; +use crate::kernel::mem::PhysAccess; pub struct PageFlags(AtomicU32); -struct SlabPageInner { - allocated_count: u32, - free_next: Option>, +#[derive(Clone, Copy)] +struct SlabPageData { + allocated_count: usize, + free_next: Option>, } -impl SlabPageInner { - fn new(free_next: Option>) -> Self { +impl SlabPageData { + const fn new() -> Self { Self { allocated_count: 0, - free_next, + free_next: None, } } } -struct PageCacheInner { - valid_size: usize, -} - -pub struct BuddyPageInner {} - -enum PageType { - Buddy(BuddyPageInner), - Slab(SlabPageInner), - PageCache(PageCacheInner), -} - -impl PageType { - fn slab_data(&mut self) -> &mut SlabPageInner { - if let PageType::Slab(slab_data) = self { - return slab_data; - } else { - unreachable!() - } - } - - fn page_cache_data(&mut self) -> &mut PageCacheInner { - if let PageType::PageCache(cache_data) = self { - return cache_data; - } else { - unreachable!() - } - } +#[repr(C)] +union PageData { + slab: SlabPageData, } pub struct RawPage { /// This can be used for LRU page swap in the future. /// /// Now only used for free page links in the buddy system. - link: Link, + pub link: Link, /// # Safety /// This field is only used in buddy system and is protected by the global lock. - order: u32, - flags: PageFlags, - refcount: AtomicUsize, + pub order: u32, + pub flags: PageFlags, + pub refcount: AtomicUsize, - shared_data: PageType, + shared_data: PageData, } -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct RawPagePtr(NonNull); - impl PageFlags { - pub const PRESENT: u32 = 1 << 0; - // pub const LOCKED: u32 = 1 << 1; + pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; pub const SLAB: u32 = 1 << 3; pub const DIRTY: u32 = 1 << 4; - pub const FREE: u32 = 1 << 5; pub const LOCAL: u32 = 1 << 6; pub fn has(&self, flag: u32) -> bool { @@ -99,174 +66,159 @@ impl PageFlags { pub fn clear(&self, flag: u32) { self.0.fetch_and(!flag, Ordering::Relaxed); } -} -impl RawPagePtr { - pub const fn new(ptr: NonNull) -> Self { - Self(ptr) - } - - /// Get a raw pointer to the underlying `RawPage` struct. + /// Set the flag and return whether it was already set. /// - /// # Safety - /// Doing arithmetic on the pointer returned will cause immediate undefined behavior. - pub const unsafe fn as_ptr(self) -> *mut RawPage { - self.0.as_ptr() - } - - pub const fn as_ref<'a>(self) -> &'a RawPage { - unsafe { &*self.as_ptr() } + /// If multiple flags are given, returns true if any of them were already set. + pub fn test_and_set(&self, flag: u32) -> bool { + (self.0.fetch_or(flag, Ordering::Relaxed) & flag) != 0 } +} - pub const fn as_mut<'a>(self) -> &'a mut RawPage { - unsafe { &mut *self.as_ptr() } +impl BuddyFolio for RawPage { + fn pfn(&self) -> PFN { + ZONE.get_pfn(self) } - pub const fn order(&self) -> u32 { - self.as_ref().order + fn get_order(&self) -> u32 { + self.order } - pub const fn flags(&self) -> &PageFlags { - &self.as_ref().flags + fn is_buddy(&self) -> bool { + self.flags.has(PageFlags::BUDDY) } - pub const fn refcount(&self) -> &AtomicUsize { - &self.as_ref().refcount + fn set_order(&mut self, order: u32) { + self.order = order; } - // return the ptr point to the actually raw page - pub fn real_ptr(&self) -> NonNull { - let pfn = unsafe { PFN::from(RawPagePtr(NonNull::new_unchecked(self.as_ptr()))) }; - unsafe { PAddr::from(pfn).as_ptr::() } + fn set_buddy(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::BUDDY); + } else { + self.flags.clear(PageFlags::BUDDY) + } } } -impl From for PFN { - fn from(value: RawPagePtr) -> Self { - let idx = unsafe { value.as_ptr().offset_from(PAGE_ARRAY.as_ptr()) as usize }; - Self::from(idx) - } -} +impl SlabPage for RawPage { + fn get_data_ptr(&self) -> NonNull<[u8]> { + let paddr_start = PAddr::from(ZONE.get_pfn(self)); + let page_data_ptr = unsafe { paddr_start.as_ptr() }; -impl From for RawPagePtr { - fn from(pfn: PFN) -> Self { - let raw_page_ptr = unsafe { PAGE_ARRAY.add(usize::from(pfn)) }; - Self::new(raw_page_ptr) + NonNull::slice_from_raw_parts(page_data_ptr, 1 << (self.order + 12)) } -} -impl RawPageTrait for RawPagePtr { - fn order(&self) -> u32 { - self.order() + fn get_free_slot(&self) -> Option> { + unsafe { + // SAFETY: TODO + self.shared_data.slab.free_next + } } - fn refcount(&self) -> &AtomicUsize { - self.refcount() + fn set_free_slot(&mut self, next: Option>) { + self.shared_data.slab.free_next = next; } - fn is_present(&self) -> bool { - self.flags().has(PageFlags::PRESENT) + fn get_alloc_count(&self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count + } } -} -impl BuddyRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } + fn inc_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count += 1; - fn set_order(&self, order: u32) { - self.as_mut().order = order; + self.shared_data.slab.allocated_count + } } - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link - } + fn dec_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count -= 1; - fn is_buddy(&self) -> bool { - self.flags().has(PageFlags::BUDDY) + self.shared_data.slab.allocated_count + } } - fn is_free(&self) -> bool { - self.flags().has(PageFlags::FREE) - } + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self { + unsafe { + // SAFETY: The caller ensures that `ptr` is valid. + let paddr = ArchPhysAccess::from_ptr(ptr); + let pfn = PFN::from(paddr); - fn set_buddy(&self) { - self.flags().set(PageFlags::BUDDY); + ZONE.get_page(pfn) + .expect("Page outside of the global zone") + .as_mut() + } } +} - fn set_free(&self) { - self.flags().set(PageFlags::FREE); +impl PerCpuPage for RawPage { + fn set_local(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::LOCAL) + } else { + self.flags.clear(PageFlags::LOCAL) + } } +} - fn clear_buddy(&self) { - self.flags().clear(PageFlags::BUDDY); - } +pub struct RawPageList(List); - fn clear_free(&self) { - self.flags().clear(PageFlags::FREE); - } -} +unsafe impl Send for RawPageList {} -impl SlabRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } +impl FolioList for RawPageList { + type Folio = RawPage; - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link + fn is_empty(&self) -> bool { + self.0.is_empty() } - fn in_which(ptr: *mut u8) -> RawPagePtr { + fn peek_head(&mut self) -> Option<&mut Self::Folio> { unsafe { - // SAFETY: The pointer is allocated from the slab allocator, - // which can't be null. - let ptr = NonNull::new_unchecked(ptr); + let link = self.0.head()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); - // SAFETY: The pointer is valid. - let paddr = ArchPhysAccess::from_ptr(ptr); - let pfn = PFN::from(paddr); - - RawPagePtr::from(pfn) + Some(raw_page_ptr.as_mut()) } } - fn allocated_count(&self) -> &mut u32 { - &mut self.as_mut().shared_data.slab_data().allocated_count - } + fn pop_head(&mut self) -> Option<&'static mut Self::Folio> { + unsafe { + let link = self.0.pop()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); - fn next_free(&self) -> &mut Option> { - &mut self.as_mut().shared_data.slab_data().free_next + Some(raw_page_ptr.as_mut()) + } } - fn real_page_ptr(&self) -> *mut u8 { - self.real_ptr().as_ptr() + fn push_tail(&mut self, page: &'static mut Self::Folio) { + self.0.insert(&mut page.link); } - fn slab_init(&self, first_free: Option>) { - self.as_mut().shared_data = PageType::Slab(SlabPageInner::new(first_free)); + fn remove(&mut self, page: &mut Self::Folio) { + self.0.remove(&mut page.link) } } -impl PageCacheRawPage for RawPagePtr { - fn valid_size(&self) -> &mut usize { - &mut self.as_mut().shared_data.page_cache_data().valid_size - } - - fn is_dirty(&self) -> bool { - self.flags().has(PageFlags::DIRTY) - } +impl FolioListSized for RawPageList { + const NEW: Self = RawPageList(List::new()); +} - fn clear_dirty(&self) { - self.flags().clear(PageFlags::DIRTY); - } +unsafe impl SlabPageAlloc for GlobalPageAlloc { + type Page = RawPage; + type PageList = RawPageList; - fn set_dirty(&self) { - self.flags().set(PageFlags::DIRTY); - } + fn alloc_slab_page(&self) -> &'static mut RawPage { + let raw_page = self.alloc_raw_order(0).expect("Out of memory"); + raw_page.flags.set(PageFlags::SLAB); + raw_page.shared_data.slab = SlabPageData::new(); - fn cache_init(&self) { - self.as_mut().shared_data = PageType::PageCache(PageCacheInner { valid_size: 0 }); + raw_page } } diff --git a/src/kernel/mem/page_alloc/zones.rs b/src/kernel/mem/page_alloc/zones.rs new file mode 100644 index 00000000..032b9cd0 --- /dev/null +++ b/src/kernel/mem/page_alloc/zones.rs @@ -0,0 +1,31 @@ +use core::ptr::NonNull; + +use eonix_mm::address::PRange; +use eonix_mm::paging::{Zone, PFN}; + +use super::RawPage; + +pub static ZONE: GlobalZone = GlobalZone(); + +const PAGE_ARRAY: NonNull = + unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; + +pub struct GlobalZone(); + +impl GlobalZone { + pub fn get_pfn(&self, page_ptr: *const RawPage) -> PFN { + PFN::from(unsafe { page_ptr.offset_from(PAGE_ARRAY.as_ptr()) as usize }) + } +} + +impl Zone for GlobalZone { + type Page = RawPage; + + fn contains_prange(&self, _: PRange) -> bool { + true + } + + fn get_page(&self, pfn: PFN) -> Option> { + Some(unsafe { PAGE_ARRAY.add(usize::from(pfn)) }) + } +} diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 3ccf3255..3fe33d5b 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,376 +1,235 @@ -use super::{paging::AllocZeroed, Page}; -use crate::{ - io::{Buffer, FillResult, Stream}, - kernel::mem::page_alloc::RawPagePtr, - prelude::KResult, - GlobalPageAlloc, -}; -use align_ext::AlignExt; -use alloc::{collections::btree_map::BTreeMap, sync::Weak}; -use core::mem::ManuallyDrop; -use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess}, - paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}, -}; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use core::future::Future; +use core::ops::{Deref, DerefMut}; + +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; -pub struct PageCache { - pages: Mutex>, - backend: Weak, -} +use super::page_alloc::PageFlags; +use super::{Folio, FolioOwned}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::EINVAL; +use crate::kernel::vfs::inode::InodeUse; +use crate::prelude::KResult; -unsafe impl Send for PageCache {} -unsafe impl Sync for PageCache {} +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct PageOffset(usize); -#[derive(Clone, Copy)] -pub struct CachePage(RawPagePtr); +pub struct PageCache { + pages: Mutex>, + inode: InodeUse, +} -unsafe impl Send for CachePage {} +pub struct CachePage(Folio); -impl Buffer for CachePage { - fn total(&self) -> usize { - PAGE_SIZE +impl PageOffset { + pub const fn from_byte_floor(offset: usize) -> Self { + Self(offset >> PAGE_SIZE_BITS) } - fn wrote(&self) -> usize { - self.valid_size() + pub const fn from_byte_ceil(offset: usize) -> Self { + Self((offset + PAGE_SIZE - 1) >> PAGE_SIZE_BITS) } - fn fill(&mut self, data: &[u8]) -> KResult { - let valid_size = self.valid_size(); - let available = &mut self.all_mut()[valid_size..]; - if available.len() == 0 { - return Ok(FillResult::Full); - } - - let len = core::cmp::min(data.len(), available.len()); - available[..len].copy_from_slice(&data[..len]); + pub fn iter_till(self, end: PageOffset) -> impl Iterator { + (self.0..end.0).map(PageOffset) + } - *self.0.valid_size() += len; + pub fn page_count(self) -> usize { + self.0 + } - if len < data.len() { - Ok(FillResult::Partial(len)) - } else { - Ok(FillResult::Done(len)) - } + pub fn byte_count(self) -> usize { + self.page_count() * PAGE_SIZE } } impl CachePage { pub fn new() -> Self { - let page = GlobalPageAlloc.alloc().unwrap(); - page.cache_init(); - Self(page) + CachePage(Folio::alloc()) } pub fn new_zeroed() -> Self { - let page = Page::zeroed(); - let raw_page_ptr = RawPagePtr::from(page.into_raw()); + CachePage({ + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); - raw_page_ptr.cache_init(); - Self(raw_page_ptr) + folio.share() + }) } - pub fn valid_size(&self) -> usize { - *self.0.valid_size() - } - - pub fn set_valid_size(&mut self, valid_size: usize) { - *self.0.valid_size() = valid_size; - } - - pub fn all(&self) -> &[u8] { - unsafe { - core::slice::from_raw_parts( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) - } + pub fn is_dirty(&self) -> bool { + self.flags.has(PageFlags::DIRTY) } - pub fn all_mut(&mut self) -> &mut [u8] { - unsafe { - core::slice::from_raw_parts_mut( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) + pub fn set_dirty(&self, dirty: bool) { + if dirty { + self.flags.set(PageFlags::DIRTY); + } else { + self.flags.clear(PageFlags::DIRTY); } } - pub fn valid_data(&self) -> &[u8] { - &self.all()[..self.valid_size()] + pub fn add_mapping(&self) -> PFN { + // TODO: Increase map_count + self.0.clone().into_raw() } +} - pub fn is_dirty(&self) -> bool { - self.0.is_dirty() - } +impl Deref for CachePage { + type Target = Folio; - pub fn set_dirty(&self) { - self.0.set_dirty(); + fn deref(&self) -> &Self::Target { + &self.0 } +} - pub fn clear_dirty(&self) { - self.0.clear_dirty(); +impl DerefMut for CachePage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 } } impl PageCache { - pub fn new(backend: Weak) -> Self { + pub fn new(inode: InodeUse) -> Self { Self { pages: Mutex::new(BTreeMap::new()), - backend: backend, + inode, } } - pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { - let mut pages = self.pages.lock().await; - let size = self.backend.upgrade().unwrap().size(); - - loop { - if offset >= size { - break; - } - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let available_in_file = size.saturating_sub(offset); - - // TODO: still cause unnecessary IO if valid_size < PAGESIZE - // and fill result is Done - let page_data = &page.valid_data()[inner_offset..]; - let read_size = page_data.len().min(available_in_file); - - if read_size == 0 - || buffer.fill(&page_data[..read_size])?.should_stop() - || buffer.available() == 0 - { - break; - } - offset += read_size; - } - None => { + pub fn get_page_locked<'a>( + &self, + pages: &'a mut BTreeMap, + pgoff: PageOffset, + ) -> impl Future> + Send + use<'_, 'a> { + async move { + match pages.entry(pgoff) { + Entry::Occupied(ent) => Ok(ent.into_mut()), + Entry::Vacant(vacant_entry) => { let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; - pages.insert(page_id, new_page); + self.inode.read_page(&mut new_page, pgoff).await?; + + Ok(vacant_entry.insert(new_page)) } } } + } - Ok(buffer.wrote()) + fn len(&self) -> usize { + self.inode.info.lock().size as usize } - pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { + // TODO: Remove this. + pub async fn with_page(&self, pgoff: PageOffset, func: impl FnOnce(&CachePage)) -> KResult<()> { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); - let mut wrote = 0; - - loop { - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get_mut(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let cursor_end = match stream.poll_data(&mut page.all_mut()[inner_offset..])? { - Some(buf) => { - wrote += buf.len(); - inner_offset + buf.len() - } - None => { - break; - } - }; - - if page.valid_size() < cursor_end { - page.set_valid_size(cursor_end); - } - page.set_dirty(); - offset += PAGE_SIZE - inner_offset; - } - None => { - let new_page = if (offset >> PAGE_SIZE_BITS) > (old_size >> PAGE_SIZE_BITS) { - let new_page = CachePage::new_zeroed(); - new_page - } else { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; - new_page - }; - - pages.insert(page_id, new_page); - } - } + if pgoff > PageOffset::from_byte_ceil(self.len()) { + return Err(EINVAL); } - Ok(wrote) - } + let cache_page = self.get_page_locked(&mut pages, pgoff).await?; + + func(cache_page); - pub async fn fsync(&self) -> KResult<()> { - let pages = self.pages.lock().await; - for (page_id, page) in pages.iter() { - if page.is_dirty() { - self.backend - .upgrade() - .unwrap() - .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; - page.clear_dirty(); - } - } Ok(()) } - // This function is used for extend write or truncate - pub async fn resize(&self, new_size: usize) -> KResult<()> { + pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); - - if new_size < old_size { - let begin = new_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = old_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; - - for page_id in begin..end { - pages.remove(&page_id); - } - } else if new_size > old_size { - let begin = old_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = new_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; - - pages.remove(&begin); - - for page_id in begin..end { - let mut new_page = CachePage::new_zeroed(); + let total_len = self.len(); - if page_id != end - 1 { - new_page.set_valid_size(PAGE_SIZE); - } else { - new_page.set_valid_size(new_size % PAGE_SIZE); - } - new_page.set_dirty(); - pages.insert(page_id, new_page); - } + if offset >= total_len { + return Ok(0); } - Ok(()) - } + let pgoff_start = PageOffset::from_byte_floor(offset); + let pgoff_end = PageOffset::from_byte_ceil(total_len); - pub async fn with_page(&self, offset: usize, func: F) -> KResult> - where - F: FnOnce(&Page, &CachePage) -> O, - { - let offset_aligin = offset.align_down(PAGE_SIZE); - let page_id = offset_aligin >> PAGE_SIZE_BITS; - let size = self.backend.upgrade().unwrap().size(); + for pgoff in pgoff_start.iter_till(pgoff_end) { + let page = self.get_page_locked(&mut pages, pgoff).await?; - if offset_aligin > size { - return Ok(None); - } + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let real_end = end_offset.min(total_len); - let mut pages = self.pages.lock().await; + let inner_offset = offset % PAGE_SIZE; + let data_len = real_end - offset; - let raw_page_ptr = match pages.get(&page_id) { - Some(CachePage(raw_page_ptr)) => *raw_page_ptr, - None => { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset_aligin)?; - pages.insert(page_id, new_page); - new_page.0 + if buffer + .fill(&page.lock().as_bytes()[inner_offset..inner_offset + data_len])? + .should_stop() + || buffer.available() == 0 + { + break; } - }; - - unsafe { - let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(raw_page_ptr))); - Ok(Some(func(&page, &CachePage(raw_page_ptr)))) + offset = real_end; } - } -} -pub struct CachePageStream { - page: CachePage, - cur: usize, -} - -impl CachePageStream { - pub fn new(page: CachePage) -> Self { - Self { page, cur: 0 } + Ok(buffer.wrote()) } - pub fn remaining(&self) -> usize { - self.page.valid_size().saturating_sub(self.cur) - } + pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { + let mut pages = self.pages.lock().await; + let mut total_written = 0; - pub fn is_drained(&self) -> bool { - self.cur >= self.page.valid_size() - } -} + loop { + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let len = end_offset - offset; + + // TODO: Rewrite to return a write state object. + let page = self + .inode + .write_begin(self, &mut pages, offset, len) + .await?; + + let inner_offset = offset % PAGE_SIZE; + let written = stream + .poll_data(&mut page.lock().as_bytes_mut()[inner_offset..])? + .map(|b| b.len()) + .unwrap_or(0); + + page.set_dirty(true); + self.inode + .write_end(self, &mut pages, offset, len, written) + .await?; + + if written == 0 { + break; + } -impl Stream for CachePageStream { - fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); + total_written += written; + offset += written; } - let page_data = &self.page.all()[self.cur..self.page.valid_size()]; - let to_read = buf.len().min(page_data.len()); + Ok(total_written) + } - buf[..to_read].copy_from_slice(&page_data[..to_read]); - self.cur += to_read; + pub async fn fsync(&self) -> KResult<()> { + let mut pages = self.pages.lock().await; - Ok(Some(&mut buf[..to_read])) - } + for (&pgoff, page) in pages.iter_mut() { + if !page.is_dirty() { + continue; + } - fn ignore(&mut self, len: usize) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); + self.inode.write_page(page, pgoff).await?; + page.set_dirty(false); } - let to_ignore = len.min(self.page.valid_size() - self.cur); - self.cur += to_ignore; - Ok(Some(to_ignore)) + Ok(()) } } -// with this trait, "page cache" and "block cache" are unified, -// for fs, offset is file offset (floor algin to PAGE_SIZE) -// for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) -// Oh no, this would make unnecessary cache -pub trait PageCacheBackend { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - - fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; - - fn size(&self) -> usize; -} - -pub trait PageCacheRawPage: RawPage { - fn valid_size(&self) -> &mut usize; - - fn is_dirty(&self) -> bool; - - fn set_dirty(&self); - - fn clear_dirty(&self); - - fn cache_init(&self); +impl core::fmt::Debug for PageCache { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("PageCache").finish() + } } impl Drop for PageCache { fn drop(&mut self) { + // XXX: Send the PageCache to some flusher worker. let _ = self.fsync(); } } diff --git a/src/kernel/mem/paging.rs b/src/kernel/mem/paging.rs index 8c5f41f2..1b95ce79 100644 --- a/src/kernel/mem/paging.rs +++ b/src/kernel/mem/paging.rs @@ -1,12 +1,11 @@ -use super::{access::AsMemoryBlock, page_alloc::GlobalPageAlloc, MemoryBlock, PhysAccess}; -use crate::io::{Buffer, FillResult}; -use eonix_mm::paging::{Page as GenericPage, PageAlloc}; +use eonix_mm::paging::Folio as _; -pub type Page = GenericPage; +use super::folio::FolioOwned; +use crate::io::{Buffer, FillResult}; /// A buffer that wraps a page and provides a `Buffer` interface. pub struct PageBuffer { - page: Page, + page: FolioOwned, offset: usize, } @@ -14,28 +13,16 @@ pub trait AllocZeroed { fn zeroed() -> Self; } -impl AsMemoryBlock for GenericPage { - fn as_memblk(&self) -> MemoryBlock { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - MemoryBlock::new(self.start().as_ptr::<()>().addr(), self.len()) - } - } -} - impl PageBuffer { pub fn new() -> Self { Self { - page: Page::alloc(), + page: FolioOwned::alloc(), offset: 0, } } pub fn all(&self) -> &[u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - self.page.as_memblk().as_bytes() - } + self.page.as_bytes() } pub fn data(&self) -> &[u8] { @@ -43,10 +30,7 @@ impl PageBuffer { } pub fn available_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - &mut self.page.as_memblk().as_bytes_mut()[self.offset..] - } + &mut self.page.as_bytes_mut()[self.offset..] } } @@ -76,14 +60,3 @@ impl Buffer for PageBuffer { } } } - -impl AllocZeroed for Page { - fn zeroed() -> Self { - let page = Self::alloc(); - unsafe { - // SAFETY: The page is exclusivly owned by us. - page.as_memblk().as_bytes_mut().fill(0); - } - page - } -} diff --git a/src/kernel/pcie/device.rs b/src/kernel/pcie/device.rs index 085e7b9a..2a8f150d 100644 --- a/src/kernel/pcie/device.rs +++ b/src/kernel/pcie/device.rs @@ -1,14 +1,17 @@ -use super::{ - header::{Bar, Command}, - CommonHeader, Header, -}; -use crate::kernel::mem::PhysAccess as _; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::num::NonZero; +use core::ops::RangeInclusive; + use align_ext::AlignExt; -use alloc::{collections::btree_map::BTreeMap, sync::Arc, vec::Vec}; -use core::{num::NonZero, ops::RangeInclusive}; use eonix_mm::address::{Addr, PAddr, PRange}; use eonix_sync::Spin; +use super::header::{Bar, Command}; +use super::{CommonHeader, Header}; +use crate::kernel::mem::PhysAccess as _; + pub(super) static PCIE_DEVICES: Spin>>> = Spin::new(BTreeMap::new()); @@ -20,7 +23,7 @@ pub struct PCIDevice<'a> { pub device_id: u16, } -#[allow(dead_code)] +#[allow(unused)] #[derive(Clone)] pub struct SegmentGroup { id: usize, @@ -28,6 +31,7 @@ pub struct SegmentGroup { base_address: PAddr, } +#[allow(unused)] #[derive(Clone)] pub struct ConfigSpace { pub bus: u8, @@ -180,10 +184,12 @@ impl PCIDevice<'_> { ); } + #[allow(unused)] pub fn config_space(&self) -> &ConfigSpace { &self.config_space } + #[allow(unused)] pub fn segment_group(&self) -> &SegmentGroup { &self.segment_group } @@ -209,7 +215,7 @@ impl PciMemoryAllocator { self.start += size; eonix_log::println_trace!( - "trace_pci", + feat: "trace_pci", "PciMemoryAllocator: Allocated {} bytes at {:#x}", size, base diff --git a/src/kernel/pcie/driver.rs b/src/kernel/pcie/driver.rs index be88b7df..eebaa896 100644 --- a/src/kernel/pcie/driver.rs +++ b/src/kernel/pcie/driver.rs @@ -4,21 +4,24 @@ use super::{ }; use crate::{kernel::constants::EEXIST, KResult}; use alloc::{ + boxed::Box, collections::btree_map::{self, BTreeMap}, sync::Arc, }; +use async_trait::async_trait; use eonix_sync::Spin; static PCIE_DRIVERS: Spin>> = Spin::new(BTreeMap::new()); +#[async_trait] pub trait PCIDriver: Send + Sync { fn vendor_id(&self) -> u16; fn device_id(&self) -> u16; - fn handle_device(&self, device: Arc>) -> Result<(), PciError>; + async fn handle_device(&self, device: Arc>) -> Result<(), PciError>; } -pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { +pub async fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let index = (driver.vendor_id() as u32) << 16 | driver.device_id() as u32; let driver = Arc::new(driver); @@ -31,7 +34,7 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let devices = PCIE_DEVICES.lock().get(&index).cloned(); if let Some(devices) = devices { for device in devices { - driver.handle_device(device)?; + driver.handle_device(device).await?; } }; diff --git a/src/kernel/pcie/header.rs b/src/kernel/pcie/header.rs index 889795d3..0a44ea28 100644 --- a/src/kernel/pcie/header.rs +++ b/src/kernel/pcie/header.rs @@ -1,10 +1,9 @@ +use core::marker::PhantomData; +use core::num::NonZero; +use core::ops::{BitAnd, BitOr, Deref, Not}; +use core::sync::atomic::{AtomicU16, AtomicU32, Ordering}; + use bitflags::bitflags; -use core::{ - marker::PhantomData, - num::NonZero, - ops::{BitAnd, BitOr, Deref, Not}, - sync::atomic::{AtomicU16, AtomicU32, Ordering}, -}; use eonix_hal::fence::memory_barrier; pub trait BitFlag: Sized + Copy { @@ -215,14 +214,14 @@ where } impl CommonHeader { - pub fn command(&self) -> Register { + pub fn command(&self) -> Register<'_, Command> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._command) as *mut u16) }, _phantom: PhantomData, } } - pub fn status(&self) -> Register { + pub fn status(&self) -> Register<'_, Status> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._status) as *mut u16) }, _phantom: PhantomData, @@ -231,7 +230,7 @@ impl CommonHeader { } impl Bars<'_> { - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl Iterator> + use<'_> { struct BarsIterator<'a> { bars: &'a [AtomicU32], pos: usize, diff --git a/src/kernel/pcie/init.rs b/src/kernel/pcie/init.rs index c0253f4e..74a490b4 100644 --- a/src/kernel/pcie/init.rs +++ b/src/kernel/pcie/init.rs @@ -1,14 +1,16 @@ -use super::{ - device::{PCIDevice, SegmentGroup, PCIE_DEVICES}, - error::PciError, -}; -use crate::kernel::{mem::PhysAccess as _, pcie::device::PciMemoryAllocator}; -use acpi::{AcpiHandler, PhysicalMapping}; use alloc::collections::btree_map::Entry; use alloc::vec; + +use acpi::{AcpiHandler, PhysicalMapping}; use eonix_log::println_trace; -use eonix_mm::address::PAddr; +use eonix_mm::address::{PAddr, PRange}; + +use super::device::{PCIDevice, SegmentGroup, PCIE_DEVICES}; +use super::error::PciError; +use crate::kernel::mem::PhysAccess as _; +use crate::kernel::pcie::device::PciMemoryAllocator; +#[allow(unused)] #[derive(Clone)] struct AcpiHandlerImpl; @@ -33,7 +35,6 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(target_arch = "x86_64")] { use acpi::{AcpiTables, PciConfigRegions}; - use eonix_mm::address::PAddr; let acpi_tables = unsafe { // SAFETY: Our impl should be correct. @@ -67,9 +68,9 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - use crate::kernel::constants::{EINVAL, EIO, ENOENT}; use eonix_hal::arch_exported::fdt::FDT; - use eonix_mm::address::PRange; + + use crate::kernel::constants::{EINVAL, EIO, ENOENT}; let pcie_node = FDT .find_compatible(&["pci-host-ecam-generic"]) @@ -86,7 +87,7 @@ pub fn init_pcie() -> Result<(), PciError> { let size = u64::from_be_bytes(entry[20..28].try_into().unwrap()); println_trace!( - "trace_pci", + feat: "trace_pci", "PCIe range: PCI address = {:#x}, CPU address = {:#x}, size = {:#x}", pci_address, cpu_address, diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 78ddcd1c..d06c5d88 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,11 +1,17 @@ -use super::task::ThreadAlloc; -use crate::kernel::task::Thread; use alloc::boxed::Box; -use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use core::future::Future; +use core::marker::PhantomData; +use core::ops::Deref; +use core::pin::Pin; + +use eonix_hal::extern_symbol_addr; use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; use posix_types::ctypes::PtrT; +use super::task::ThreadAlloc; +use crate::kernel::task::Thread; + pub mod file_rw; pub mod mm; pub mod net; @@ -280,12 +286,6 @@ impl core::fmt::Debug for UserMut { } static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = LazyLock::new(|| { - extern "C" { - // SAFETY: `SYSCALL_HANDLERS` is defined in linker script. - fn RAW_SYSCALL_HANDLERS(); - fn RAW_SYSCALL_HANDLERS_SIZE(); - } - // DO NOT TOUCH THESE FUNCTIONS!!! // THEY ARE USED FOR KEEPING THE OBJECTS NOT STRIPPED BY THE LINKER!!! file_rw::keep_alive(); @@ -294,15 +294,14 @@ static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = La procops::keep_alive(); sysinfo::keep_alive(); - let raw_handlers_addr = RAW_SYSCALL_HANDLERS as *const (); - let raw_handlers_size_byte = RAW_SYSCALL_HANDLERS_SIZE as usize; + let raw_handlers_size_byte = extern_symbol_addr!(RAW_SYSCALL_HANDLERS_SIZE); assert!(raw_handlers_size_byte % size_of::() == 0); let raw_handlers_count = raw_handlers_size_byte / size_of::(); let raw_handlers = unsafe { core::slice::from_raw_parts( - raw_handlers_addr as *const RawSyscallHandler, + extern_symbol_addr!(RAW_SYSCALL_HANDLERS, RawSyscallHandler), raw_handlers_count, ) }; diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 1a48b255..8ac9c22a 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,34 +1,33 @@ -use super::{FromSyscallArg, User}; -use crate::io::IntoStream; -use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, -}; -use crate::kernel::syscall::UserMut; -use crate::kernel::task::Thread; -use crate::kernel::timer::sleep; -use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; -use crate::kernel::vfs::{PollEvent, SeekOption}; -use crate::{ - io::{Buffer, BufferFill}, - kernel::{ - user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, - vfs::dentry::Dentry, - }, - path::Path, - prelude::*, -}; use alloc::sync::Arc; use core::time::Duration; + use posix_types::ctypes::{Long, PtrT}; use posix_types::namei::RenameFlags; use posix_types::open::{AtFlags, OpenFlags}; use posix_types::poll::FDSet; use posix_types::signal::{SigSet, Signal}; -use posix_types::stat::Stat; -use posix_types::stat::{StatX, TimeSpec}; +use posix_types::stat::{Stat, StatX, TimeSpec}; use posix_types::syscall_no::*; +use super::{FromSyscallArg, User}; +use crate::io::{Buffer, BufferFill, IntoStream}; +use crate::kernel::constants::{ + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, + SEEK_SET, +}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::Thread; +use crate::kernel::timer::sleep; +use crate::kernel::user::{ + CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString, +}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::types::{DeviceId, Mode}; +use crate::kernel::vfs::{PollEvent, SeekOption}; +use crate::path::Path; +use crate::prelude::*; + impl FromSyscallArg for OpenFlags { fn from_arg(value: usize) -> Self { OpenFlags::from_bits_retain(value as u32) @@ -41,7 +40,7 @@ impl FromSyscallArg for AtFlags { } } -fn dentry_from( +async fn dentry_from( thread: &Thread, dirfd: FD, pathname: User, @@ -52,7 +51,7 @@ fn dentry_from( match (path.as_cstr().to_bytes_with_nul()[0], dirfd) { (b'/', _) | (_, FD::AT_FDCWD) => { let path = Path::new(path.as_cstr().to_bytes())?; - Dentry::open(&thread.fs_context, path, follow_symlink) + Dentry::open(&thread.fs_context, path, follow_symlink).await } (0, dirfd) => { let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; @@ -63,7 +62,13 @@ fn dentry_from( let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?; - Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink) + Dentry::open_at( + &thread.fs_context, + dir_dentry, + path, + follow_symlink, + ) + .await } } } @@ -81,7 +86,12 @@ async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PREAD64)] -async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { +async fn pread64( + fd: FD, + buffer: UserMut, + bufsize: usize, + offset: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -106,7 +116,12 @@ async fn write(fd: FD, buffer: User, count: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PWRITE64)] -async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { +async fn pwrite64( + fd: FD, + buffer: User, + count: usize, + offset: usize, +) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); @@ -119,18 +134,22 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; - - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); +async fn openat( + dirfd: FD, + pathname: User, + flags: OpenFlags, + mode: Mode, +) -> KResult { + let dentry = + dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - thread.files.open(&dentry, flags, mode) + thread.files.open(thread, &dentry, flags, perm).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { +async fn open(path: User, flags: OpenFlags, mode: Mode) -> KResult { sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } @@ -147,7 +166,10 @@ async fn dup(fd: FD) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] async fn dup2(old_fd: FD, new_fd: FD) -> KResult { - thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) + thread + .files + .dup_to(old_fd, new_fd, OpenFlags::empty()) + .await } #[eonix_macros::define_syscall(SYS_DUP3)] @@ -157,7 +179,8 @@ async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { #[eonix_macros::define_syscall(SYS_PIPE2)] async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; + let mut buffer = + UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -171,15 +194,29 @@ async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents(&mut buffer) + .await?; + Ok(buffer.wrote()) } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents64( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -206,7 +243,7 @@ async fn newfstatat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; let statbuf = UserPointerMut::new(statbuf)?; @@ -225,7 +262,8 @@ async fn newfstatat( )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { - sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH) + .await } #[eonix_macros::define_syscall(SYS_STATX)] @@ -247,7 +285,7 @@ async fn statx( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; dentry.statx(&mut statx, mask)?; @@ -257,24 +295,23 @@ async fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); +async fn mkdirat(dirfd: FD, pathname: User, mode: Mode) -> KResult<()> { + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - let dentry = dentry_from(thread, dirfd, pathname, true)?; - dentry.mkdir(mode) + dentry.mkdir(perm).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -async fn mkdir(pathname: User, mode: u32) -> KResult<()> { +async fn mkdir(pathname: User, mode: Mode) -> KResult<()> { sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; - file.as_path().ok_or(EBADF)?.truncate(length) + file.as_path().ok_or(EBADF)?.truncate(length).await } #[cfg(target_arch = "x86_64")] @@ -283,53 +320,87 @@ async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; - dentry.truncate(length) + dentry.truncate(length).await } #[eonix_macros::define_syscall(SYS_UNLINKAT)] async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { - dentry_from(thread, dirfd, pathname, false)?.unlink() + dentry_from(thread, dirfd, pathname, false) + .await? + .unlink() + .await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] async fn unlink(pathname: User) -> KResult<()> { - sys_unlinkat(thread, FD::AT_FDCWD, pathname) + sys_unlinkat(thread, FD::AT_FDCWD, pathname).await } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { +async fn symlinkat( + target: User, + dirfd: FD, + linkpath: User, +) -> KResult<()> { let target = UserString::new(target)?; - let dentry = dentry_from(thread, dirfd, linkpath, false)?; + let dentry = dentry_from(thread, dirfd, linkpath, false).await?; - dentry.symlink(target.as_cstr().to_bytes()) + dentry.symlink(target.as_cstr().to_bytes()).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] async fn symlink(target: User, linkpath: User) -> KResult<()> { - sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) + sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath).await +} + +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +struct UserDeviceId(u32); + +impl FromSyscallArg for UserDeviceId { + fn from_arg(value: usize) -> Self { + Self(value as u32) + } +} + +impl UserDeviceId { + pub fn into_devid(self) -> DeviceId { + let major = (self.0 >> 8) & 0xfff; + let minor = (self.0 & 0xff) | ((self.0 >> 12) & 0xfff00); + + // TODO: We strip off the high 4 bits of the minor ID for now... + DeviceId::new(major as u16, minor as u16) + } } #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { +async fn mknodat( + dirfd: FD, + pathname: User, + mut mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { if !mode.is_blk() && !mode.is_chr() { return Err(EINVAL); } - let dentry = dentry_from(thread, dirfd, pathname, true)?; - - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + mode.set_perm(mode.perm().mask_with(*thread.fs_context.umask.lock())); - dentry.mknod(mode, dev) + dentry.mknod(mode, dev.into_devid()).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { +async fn mknod( + pathname: User, + mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } @@ -340,19 +411,28 @@ async fn readlinkat( buffer: UserMut, bufsize: usize, ) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, false)?; + let dentry = dentry_from(thread, dirfd, pathname, false).await?; let mut buffer = UserBuffer::new(buffer, bufsize)?; - dentry.readlink(&mut buffer) + dentry.readlink(&mut buffer).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { +async fn readlink( + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek( + thread: &Thread, + fd: FD, + offset: u64, + whence: u32, +) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { @@ -371,14 +451,15 @@ async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek( +async fn llseek( fd: FD, offset_high: u32, offset_low: u32, result: UserMut, whence: u32, ) -> KResult<()> { - let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; + let mut result = + UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); let new_offset = do_lseek(thread, fd, offset, whence).await?; @@ -409,9 +490,10 @@ async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => { - Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) - } + Ok(IoVec { base, len }) => Some(UserBuffer::new( + UserMut::with_addr(base.addr()), + len.get(), + )), }) .collect::>>()?; @@ -446,8 +528,11 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) - .map(|ptr| ptr.into_stream()), + CheckedUserPointer::new( + User::with_addr(base.addr()), + len.get(), + ) + .map(|ptr| ptr.into_stream()), ), }) .collect::>>()?; @@ -466,12 +551,17 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat( + dirfd: FD, + pathname: User, + _mode: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -497,7 +587,12 @@ async fn access(pathname: User, mode: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { +async fn sendfile64( + out_fd: FD, + in_fd: FD, + offset: UserMut, + count: usize, +) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -602,7 +697,11 @@ async fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { +async fn poll( + fds: UserMut, + nfds: u32, + timeout: u32, +) -> KResult { do_poll(thread, fds, nfds, timeout).await } @@ -614,28 +713,34 @@ async fn fchownat( gid: u32, flags: AtFlags, ) -> KResult<()> { - let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; + let dentry = + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chown(uid, gid) + dentry.chown(uid, gid).await } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { +async fn fchmodat( + dirfd: FD, + pathname: User, + mode: Mode, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chmod(mode) + dentry.chmod(mode).await } #[eonix_macros::define_syscall(SYS_FCHMOD)] @@ -654,7 +759,7 @@ async fn utimensat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -684,14 +789,18 @@ async fn renameat2( let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; // The two flags RENAME_NOREPLACE and RENAME_EXCHANGE are mutually exclusive. - if flags.contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) { + if flags + .contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) + { Err(EINVAL)?; } - let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false)?; - let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false)?; + let old_dentry = + dentry_from(thread, old_dirfd, old_pathname, false).await?; + let new_dentry = + dentry_from(thread, new_dirfd, new_pathname, false).await?; - old_dentry.rename(&new_dentry, flags) + old_dentry.rename(&new_dentry, flags).await } #[cfg(target_arch = "x86_64")] diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index c6300ac7..825440ef 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,22 +1,15 @@ -use super::FromSyscallArg; -use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; -use crate::kernel::mem::FileMapping; -use crate::kernel::task::Thread; -use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; -use crate::{ - kernel::{ - constants::{UserMmapFlags, UserMmapProtocol}, - mem::{Mapping, Permission}, - }, - prelude::*, -}; use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; use posix_types::syscall_no::*; +use super::FromSyscallArg; +use crate::kernel::constants::{UserMmapFlags, UserMmapProtocol, EBADF, EINVAL}; +use crate::kernel::mem::{FileMapping, Mapping, Permission}; +use crate::kernel::task::Thread; +use crate::kernel::vfs::filearray::FD; +use crate::prelude::*; + impl FromSyscallArg for UserMmapProtocol { fn from_arg(value: usize) -> UserMmapProtocol { UserMmapProtocol::from_bits_truncate(value as u32) @@ -66,13 +59,7 @@ async fn do_mmap2( if !is_shared { Mapping::Anonymous } else { - // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = SHM_MANAGER.lock().await.create_shared_area( - len, - thread.process.pid, - Mode::REG.perm(0o777), - ); - Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) + unimplemented!("mmap MAP_ANONYMOUS | MAP_SHARED"); } } else { let file = thread @@ -82,7 +69,7 @@ async fn do_mmap2( .get_inode()? .ok_or(EBADF)?; - Mapping::File(FileMapping::new(file, pgoffset, len)) + Mapping::File(FileMapping::new(file.get_page_cache(), pgoffset, len)) }; let permission = Permission { @@ -179,114 +166,4 @@ async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<() .await } -#[eonix_macros::define_syscall(SYS_SHMGET)] -async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { - let size = size.align_up(PAGE_SIZE); - - let mut shm_manager = SHM_MANAGER.lock().await; - let shmid = gen_shm_id(key)?; - - let mode = Mode::REG.perm(shmflg); - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - if key == IPC_PRIVATE { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - if let Some(_) = shm_manager.get(shmid) { - if shmflg.contains(ShmFlags::IPC_CREAT | ShmFlags::IPC_EXCL) { - return Err(EEXIST); - } - - return Ok(shmid); - } - - if shmflg.contains(ShmFlags::IPC_CREAT) { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - Err(ENOENT) -} - -#[eonix_macros::define_syscall(SYS_SHMAT)] -async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { - let mm_list = &thread.process.mm_list; - let shm_manager = SHM_MANAGER.lock().await; - let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - - // Why is this not used? - let _mode = shmflg & 0o777; - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - let mut permission = Permission { - read: true, - write: true, - execute: false, - }; - - if shmflg.contains(ShmFlags::SHM_EXEC) { - permission.execute = true; - } - if shmflg.contains(ShmFlags::SHM_RDONLY) { - permission.write = false; - } - - let size = shm_area.shmid_ds.shm_segsz; - - let mapping = Mapping::File(FileMapping { - file: shm_area.area.clone(), - offset: 0, - length: size, - }); - - let addr = if addr != 0 { - if addr % PAGE_SIZE != 0 && !shmflg.contains(ShmFlags::SHM_RND) { - return Err(EINVAL); - } - let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list - .mmap_fixed(addr, size, mapping, permission, true) - .await - } else { - mm_list - .mmap_hint(VAddr::NULL, size, mapping, permission, true) - .await - }?; - - thread.process.shm_areas.lock().insert(addr, size); - - Ok(addr.addr()) -} - -#[eonix_macros::define_syscall(SYS_SHMDT)] -async fn shmdt(addr: usize) -> KResult<()> { - let addr = VAddr::from(addr); - - let size = { - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - - size - }; - - thread.process.mm_list.unmap(addr, size).await -} - -#[eonix_macros::define_syscall(SYS_SHMCTL)] -async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { - // TODO - Ok(0) -} - -#[eonix_macros::define_syscall(SYS_MEMBARRIER)] -async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { - // TODO - Ok(()) -} - pub fn keep_alive() {} diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 7dd573cc..3e815f25 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -1,38 +1,37 @@ +use alloc::borrow::ToOwned; +use alloc::ffi::CString; +use core::time::Duration; + +use bitflags::bitflags; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; +use eonix_mm::address::Addr as _; +use eonix_sync::AsProof as _; +use posix_types::ctypes::PtrT; +use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; +use posix_types::stat::{TimeSpec, TimeVal}; +use posix_types::syscall_no::*; +use posix_types::SIGNAL_NOW; + use super::SyscallNoReturn; use crate::io::Buffer; use crate::kernel::constants::{ - CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH, -}; -use crate::kernel::constants::{ - ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOSYS, ENOTDIR, + ERANGE, ESRCH, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, - RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_exec, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, + FutexOp, ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; -use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::UserString; -use crate::kernel::user::{UserPointer, UserPointerMut}; -use crate::kernel::vfs::inode::Mode; -use crate::kernel::vfs::{self, dentry::Dentry}; +use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::types::Permission; +use crate::kernel::vfs::{self}; use crate::path::Path; -use crate::{kernel::user::UserBuffer, prelude::*}; -use alloc::borrow::ToOwned; -use alloc::ffi::CString; -use bitflags::bitflags; -use core::time::Duration; -use eonix_hal::processor::UserTLS; -use eonix_hal::traits::trap::RawTrapContext; -use eonix_hal::trap::TrapContext; -use eonix_mm::address::Addr as _; -use eonix_sync::AsProof as _; -use posix_types::ctypes::PtrT; -use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; -use posix_types::stat::TimeVal; -use posix_types::{syscall_no::*, SIGNAL_NOW}; +use crate::prelude::*; #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -100,10 +99,11 @@ async fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -async fn umask(mask: Mode) -> KResult { - let mut umask = thread.fs_context.umask.lock(); +async fn umask(raw_new_mask: u32) -> KResult { + let new_mask = Permission::new(!raw_new_mask); + let old_mask = core::mem::replace(&mut *thread.fs_context.umask.lock(), new_mask); - Ok(core::mem::replace(&mut *umask, mask.non_format())) + Ok(!old_mask.bits()) } #[eonix_macros::define_syscall(SYS_GETCWD)] @@ -124,7 +124,7 @@ async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; if !dentry.is_valid() { return Err(ENOENT); } @@ -159,7 +159,8 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz &thread.fs_context, Path::new(target.as_cstr().to_bytes())?, true, - )?; + ) + .await?; if !mountpoint.is_valid() { return Err(ENOENT); @@ -172,6 +173,7 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz fstype.as_cstr().to_str().map_err(|_| EINVAL)?, flags as u64, ) + .await } fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> { @@ -199,21 +201,19 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult, argv: User, envp: User) -> KResult SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs - .do_exit(&thread, WaitType::Exited(status), false) - .await; - } + thread.exit(WaitType::Exited(status)); SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] async fn exit_group(status: u32) -> SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs.do_exit(&thread, WaitType::Exited(status), true).await; - } + // XXX: Send SIGKILL to our sibling threads. + thread.exit(WaitType::Exited(status)); SyscallNoReturn } @@ -362,7 +353,7 @@ async fn wait4( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await + sys_wait4(thread, waitpid, arg1, options, UserMut::null()).await } #[eonix_macros::define_syscall(SYS_SETSID)] @@ -489,51 +480,15 @@ async fn gettid() -> KResult { Ok(thread.tid) } -pub fn parse_user_tls(arch_tls: usize) -> KResult { - #[cfg(target_arch = "x86_64")] - { - let desc = arch_tls as *mut posix_types::x86_64::UserDescriptor; - let desc_pointer = UserPointerMut::new(desc)?; - let mut desc = desc_pointer.read()?; - - // Clear the TLS area if it is not present. - if desc.flags.is_read_exec_only() && !desc.flags.is_present() { - if desc.limit != 0 && desc.base != 0 { - let len = if desc.flags.is_limit_in_pages() { - (desc.limit as usize) << 12 - } else { - desc.limit as usize - }; - - CheckedUserPointer::new(desc.base as _, len)?.zero()?; - } - } - - let (new_tls, entry) = - UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages()); - desc.entry = entry; - desc_pointer.write(desc)?; - - Ok(new_tls) - } - - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] - { - Ok(UserTLS::new(arch_tls as u64)) - } -} - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -async fn set_thread_area(arch_tls: usize) -> KResult<()> { - thread.set_user_tls(parse_user_tls(arch_tls)?)?; +async fn set_thread_area(tls: PtrT) -> KResult<()> { + use crate::kernel::task::UserTLSDescriptor; - // SAFETY: Preemption is disabled on calling `load_thread_area32()`. - unsafe { - eonix_preempt::disable(); - thread.load_thread_area32(); - eonix_preempt::enable(); - } + let tls = UserTLSDescriptor::new(tls)?.read()?; + + thread.set_user_tls(tls)?; + thread.activate_tls(); Ok(()) } @@ -647,18 +602,14 @@ async fn rt_sigprocmask( Ok(()) } -#[repr(C)] -#[derive(Clone, Copy)] -struct TimeSpec32 { - tv_sec: i32, - tv_nsec: i32, -} - -#[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -async fn rt_sigtimedwait_time32( +#[cfg_attr( + any(target_arch = "riscv64", target_arch = "loongarch64"), + eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT) +)] +async fn rt_sigtimedwait( _uthese: User, _uinfo: UserMut, - _uts: User, + _uts: User, ) -> KResult { // TODO Ok(0) @@ -816,7 +767,7 @@ async fn clone( clone_flags: usize, new_sp: usize, parent_tidptr: UserMut, - tls: usize, + tls: PtrT, child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; @@ -893,7 +844,7 @@ async fn rt_sigreturn() -> KResult { "`rt_sigreturn` failed in thread {} with error {err}!", thread.tid ); - thread.force_kill(Signal::SIGSEGV).await; + thread.force_kill(Signal::SIGSEGV); return Err(err); } @@ -921,8 +872,23 @@ async fn sigreturn() -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -async fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr).await +async fn arch_prctl(option: u32, addr: PtrT) -> KResult { + match option { + PR_SET_NAME => { + let name = UserPointer::<[u8; 16]>::new(User::with_addr(addr.addr()))?.read()?; + let len = name.iter().position(|&c| c == 0).unwrap_or(15); + thread.set_name(name[..len].into()); + Ok(0) + } + PR_GET_NAME => { + let name = thread.get_name(); + let len = name.len().min(15); + let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(addr.addr()))?.write(name)?; + Ok(0) + } + _ => Err(EINVAL), + } } pub fn keep_alive() {} diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 2ef58069..6505666c 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -8,9 +8,14 @@ mod process_list; mod session; mod signal; mod thread; +mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; -pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; +use eonix_hal::symbol_addr; +pub use futex::{ + futex_exec, futex_exit, futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, + RobustListHead, +}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; pub use process::{alloc_pid, Process, ProcessBuilder, WaitId, WaitObject, WaitType}; @@ -19,6 +24,7 @@ pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; +pub use user_tls::{UserTLS, UserTLSDescriptor}; fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output where @@ -79,30 +85,25 @@ pub async fn stackful(mut future: F) -> F::Output where F: core::future::Future, { - use crate::kernel::{ - interrupt::{default_fault_handler, default_irq_handler}, - timer::{should_reschedule, timer_interrupt}, - }; use alloc::sync::Arc; use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; - use core::sync::atomic::AtomicBool; - use core::sync::atomic::Ordering; - use core::task::Context; - use core::task::Poll; - use core::task::Waker; - use eonix_hal::traits::trap::RawTrapContext; - use eonix_hal::traits::trap::TrapReturn; - use eonix_hal::traits::trap::TrapType; + use core::sync::atomic::{AtomicBool, Ordering}; + use core::task::{Context, Poll, Waker}; + + use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; use eonix_hal::trap::TrapContext; use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; use eonix_runtime::task::Task; use thread::wait_for_wakeups; + use crate::kernel::interrupt::{default_fault_handler, default_irq_handler}; + use crate::kernel::timer::{should_reschedule, timer_interrupt}; + let stack = KernelStack::new(); fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! @@ -188,7 +189,7 @@ where trap_ctx.set_user_mode(false); trap_ctx.set_interrupt_enabled(true); let _ = trap_ctx.set_user_call_frame( - execute:: as usize, + symbol_addr!(execute::), Some(sp.addr().get()), None, &[(&raw mut future) as usize, output.get() as usize], diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index e0d578c1..dd6f538d 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,18 +1,17 @@ -use crate::{ - kernel::{ - syscall::{procops::parse_user_tls, UserMut}, - task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, - user::UserPointerMut, - }, - KResult, -}; -use bitflags::bitflags; use core::num::NonZero; -use eonix_hal::processor::UserTLS; + +use bitflags::bitflags; use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; +use posix_types::ctypes::PtrT; use posix_types::signal::Signal; +use super::{UserTLS, UserTLSDescriptor}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}; +use crate::kernel::user::UserPointerMut; +use crate::KResult; + bitflags! { #[derive(Debug, Default)] pub struct CloneFlags: usize { @@ -46,12 +45,18 @@ bitflags! { #[derive(Debug)] pub struct CloneArgs { pub flags: CloneFlags, - pub sp: Option>, // Stack pointer for the new thread. - pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option>, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. - pub tls: Option, // Pointer to TLS information. + /// Stack pointer for the new thread. + pub sp: Option>, + /// Signal to send to the parent on exit. + pub exit_signal: Option, + /// Pointer to set child TID in user space. + pub set_tid_ptr: Option>, + /// Pointer to clear child TID in user space. + pub clear_tid_ptr: Option>, + /// Pointer to parent TID in user space. + pub parent_tid_ptr: Option>, + /// Pointer to TLS information. + pub tls: Option, } impl CloneArgs { @@ -62,7 +67,7 @@ impl CloneArgs { sp: usize, child_tid_ptr: UserMut, parent_tid_ptr: UserMut, - tls: usize, + tls: PtrT, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); let exit_signal = flags & Self::MASK; @@ -87,7 +92,8 @@ impl CloneArgs { .then_some(parent_tid_ptr); let tls = if clone_flags.contains(CloneFlags::CLONE_SETTLS) { - Some(parse_user_tls(tls)?) + let tls_desc = UserTLSDescriptor::new(tls)?; + Some(tls_desc.read()?) } else { None }; diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index a04d7091..4dd57615 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -1,19 +1,17 @@ -use core::pin::pin; - use alloc::sync::Arc; use alloc::vec::Vec; +use core::pin::pin; + use bitflags::bitflags; +use eonix_mm::address::Addr; use eonix_sync::{LazyLock, Mutex, MutexGuard, WaitList}; use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink}; -use crate::{ - kernel::{ - constants::{EAGAIN, EINVAL}, - syscall::User, - user::UserPointer, - }, - prelude::KResult, -}; +use super::Thread; +use crate::kernel::constants::{EAGAIN, EINVAL}; +use crate::kernel::syscall::User; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::prelude::KResult; #[derive(PartialEq, Debug, Clone, Copy)] #[repr(u32)] @@ -318,3 +316,38 @@ impl RobustListHead { Ok(()) } } + +async fn do_futex_exit(thread: &Thread) -> KResult<()> { + if let Some(clear_ctid) = thread.get_clear_ctid() { + UserPointerMut::new(clear_ctid)?.write(0u32)?; + + futex_wake(clear_ctid.addr(), None, 1).await?; + } + + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + } + + Ok(()) +} + +pub async fn futex_exit(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exit(thread); +} + +async fn do_futex_exec(thread: &Thread) -> KResult<()> { + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + thread.set_robust_list(None); + } + + Ok(()) +} + +pub async fn futex_exec(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exec(thread); +} diff --git a/src/kernel/task/kernel_stack.rs b/src/kernel/task/kernel_stack.rs index d3e9de2f..f00b91bd 100644 --- a/src/kernel/task/kernel_stack.rs +++ b/src/kernel/task/kernel_stack.rs @@ -1,11 +1,12 @@ -use crate::kernel::mem::{paging::Page, PhysAccess as _}; -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; + use eonix_runtime::executor::Stack; +use crate::kernel::mem::FolioOwned; + #[derive(Debug)] pub struct KernelStack { - _pages: Page, - bottom: NonZero, + folio: FolioOwned, } impl KernelStack { @@ -14,15 +15,8 @@ impl KernelStack { const KERNEL_STACK_ORDER: u32 = 7; pub fn new() -> Self { - let pages = Page::alloc_order(Self::KERNEL_STACK_ORDER); - let bottom = unsafe { - // SAFETY: The paddr is from a page, which should be valid. - pages.range().end().as_ptr::().addr() - }; - Self { - _pages: pages, - bottom, + folio: FolioOwned::alloc_order(Self::KERNEL_STACK_ORDER), } } } @@ -33,7 +27,10 @@ impl Stack for KernelStack { } fn get_bottom(&self) -> NonNull<()> { - // SAFETY: The stack is allocated and `bottom` is non-zero. - unsafe { NonNull::new_unchecked(self.bottom.get() as *mut _) } + let ptr = self.folio.get_bytes_ptr(); + let len = ptr.len(); + + // SAFETY: The vaddr of the folio is guaranteed to be non-zero. + unsafe { ptr.cast().byte_add(len) } } } diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 859e0010..9f8aa166 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -1,27 +1,22 @@ +use alloc::ffi::CString; +use alloc::sync::Arc; +use alloc::vec::Vec; + +use align_ext::AlignExt; +use eonix_mm::address::{Addr, AddrOps as _, VAddr}; +use eonix_mm::paging::PAGE_SIZE; +use xmas_elf::header::{self, Class, HeaderPt1, Machine_}; +use xmas_elf::program::{self, ProgramHeader32, ProgramHeader64}; + use super::{LoadInfo, ELF_MAGIC}; -use crate::io::UninitBuffer; +use crate::io::{ByteBuffer, UninitBuffer}; +use crate::kernel::constants::ENOEXEC; +use crate::kernel::mem::{FileMapping, MMList, Mapping, Permission}; use crate::kernel::task::loader::aux_vec::{AuxKey, AuxVec}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::FsContext; use crate::path::Path; -use crate::{ - io::ByteBuffer, - kernel::{ - constants::ENOEXEC, - mem::{FileMapping, MMList, Mapping, Permission}, - vfs::{dentry::Dentry, FsContext}, - }, - prelude::*, -}; -use align_ext::AlignExt; -use alloc::vec::Vec; -use alloc::{ffi::CString, sync::Arc}; -use eonix_mm::{ - address::{Addr, AddrOps as _, VAddr}, - paging::PAGE_SIZE, -}; -use xmas_elf::{ - header::{self, Class, HeaderPt1, Machine_}, - program::{self, ProgramHeader32, ProgramHeader64}, -}; +use crate::prelude::*; const INIT_STACK_SIZE: usize = 0x80_0000; @@ -193,9 +188,9 @@ impl Elf { Err(ENOEXEC) } - fn parse(elf_file: Arc) -> KResult { + async fn parse(elf_file: Arc) -> KResult { let mut elf_header = UninitBuffer::>::new(); - elf_file.read(&mut elf_header, 0)?; + elf_file.read(&mut elf_header, 0).await?; let elf_header = elf_header.assume_init().map_err(|_| ENOEXEC)?; @@ -203,10 +198,12 @@ impl Elf { let ph_count = elf_header.pt2.ph_count; let mut program_headers = vec![E::Ph::default(); ph_count as usize]; - elf_file.read( - &mut ByteBuffer::from(program_headers.as_mut_slice()), - ph_offset.into_usize(), - )?; + elf_file + .read( + &mut ByteBuffer::from(program_headers.as_mut_slice()), + ph_offset.into_usize(), + ) + .await?; Ok(Self { file: elf_file, @@ -364,7 +361,7 @@ impl Elf { vmap_start, file_len, Mapping::File(FileMapping::new( - self.file.get_inode()?, + self.file.get_inode()?.get_page_cache(), file_offset, real_file_length, )), @@ -374,28 +371,40 @@ impl Elf { .await?; } - if vmem_len > file_len { - mm_list - .mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - ) - .await?; + if vmem_vaddr_end > load_vaddr_end { + if load_vaddr_end.page_offset() != 0 { + let mut zero_len = PAGE_SIZE - load_vaddr_end.page_offset(); + zero_len = zero_len.min(vmem_vaddr_end - load_vaddr_end); + + mm_list + .access_mut(load_vaddr_end, zero_len, |_, data| data.fill(0)) + .await?; + } + + if vmem_len - file_len > 0 { + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; + } } Ok(vmap_start + vmem_len) } async fn load_ldso(&self, mm_list: &MMList) -> KResult> { - let ldso_path = self.ldso_path()?; + let ldso_path = self.ldso_path().await?; if let Some(ldso_path) = ldso_path { let fs_context = FsContext::global(); - let ldso_file = Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true)?; - let ldso_elf = Elf::::parse(ldso_file)?; + let ldso_file = + Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true).await?; + let ldso_elf = Elf::::parse(ldso_file).await?; let base = VAddr::from(E::LDSO_BASE_ADDR); @@ -420,7 +429,7 @@ impl Elf { mm_list.map_vdso().await } - fn ldso_path(&self) -> KResult> { + async fn ldso_path(&self) -> KResult> { for program_header in &self.program_headers { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; @@ -430,7 +439,8 @@ impl Elf { let mut ldso_vec = vec![0u8; file_size - 1]; // -1 due to '\0' self.file - .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset)?; + .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset) + .await?; let ldso_path = String::from_utf8(ldso_vec).map_err(|_| ENOEXEC)?; return Ok(Some(ldso_path)); } @@ -445,16 +455,16 @@ pub enum ELF { } impl ELF { - pub fn parse(elf_file: Arc) -> KResult { + pub async fn parse(elf_file: Arc) -> KResult { let mut header_pt1 = UninitBuffer::::new(); - elf_file.read(&mut header_pt1, 0)?; + elf_file.read(&mut header_pt1, 0).await?; let header_pt1 = header_pt1.assume_init().map_err(|_| ENOEXEC)?; assert_eq!(header_pt1.magic, ELF_MAGIC); match header_pt1.class() { - Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file)?)), - Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file)?)), + Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file).await?)), + Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file).await?)), _ => Err(ENOEXEC), } } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index 4e3f4db1..7679aaf4 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -33,7 +33,7 @@ pub struct ProgramLoader { } impl ProgramLoader { - pub fn parse( + pub async fn parse( fs_context: &FsContext, mut exec_path: CString, mut file: Arc, @@ -49,12 +49,15 @@ impl ProgramLoader { } let mut magic = [0; 4]; - file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0)?; + file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0) + .await?; match magic { [b'#', b'!', ..] => { let mut interpreter_line = [0; 256]; - let nread = file.read(&mut ByteBuffer::new(&mut interpreter_line), 0)?; + let nread = file + .read(&mut ByteBuffer::new(&mut interpreter_line), 0) + .await?; // There is a tiny time gap between reading the magic number and // reading the interpreter line, so we need to check if the line @@ -77,7 +80,7 @@ impl ProgramLoader { } let path = Path::new(interpreter_name.as_bytes())?; - file = Dentry::open(fs_context, path, true)?; + file = Dentry::open(fs_context, path, true).await?; args.insert(0, interpreter_name.clone()); if let Some(arg) = interpreter_arg { @@ -92,7 +95,7 @@ impl ProgramLoader { exec_path = interpreter_name; } - ELF_MAGIC => break ELF::parse(file)?, + ELF_MAGIC => break ELF::parse(file).await?, _ => return Err(ENOEXEC), } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 421e4b8b..1385235d 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,34 +1,32 @@ -use super::{ - process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, - ProcessList, Session, Thread, -}; -use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; -use crate::kernel::task::{CloneArgs, CloneFlags}; -use crate::rcu::call_rcu; -use crate::{ - kernel::mem::MMList, - prelude::*, - rcu::{RCUPointer, RCUReadGuard}, - sync::CondVar, -}; -use alloc::{ - collections::{btree_map::BTreeMap, vec_deque::VecDeque}, - sync::{Arc, Weak}, -}; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::Arc; use core::sync::atomic::{AtomicU32, Ordering}; -use eonix_mm::address::VAddr; + use eonix_sync::{ - AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, - UnlockableGuard as _, UnlockedGuard as _, + AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, + SpinGuard, UnlockableGuard as _, UnlockedGuard as _, +}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; use pointers::BorrowedArc; use posix_types::constants::{ - CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_PGID, P_PIDFD, + CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, + P_PGID, P_PID, P_PIDFD, }; -use posix_types::constants::{P_ALL, P_PID}; use posix_types::signal::Signal; use posix_types::SIGNAL_COREDUMP; +use super::signal::RaiseResult; +use super::thread::{ProcessThreads, ThreadBuilder}; +use super::{ProcessGroup, ProcessList, Session, Thread}; +use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; +use crate::kernel::mem::MMList; +use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::prelude::*; +use crate::rcu::{call_rcu, RCUPointer, RCUReadGuard}; +use crate::sync::CondVar; + pub struct ProcessBuilder { mm_list: Option, exit_signal: Option, @@ -39,7 +37,6 @@ pub struct ProcessBuilder { pid: Option, } -#[derive(Debug)] pub struct Process { /// Process id /// @@ -51,8 +48,6 @@ pub struct Process { pub exit_signal: Option, - pub shm_areas: Spin>, - /// Parent process /// /// `parent` must be valid during the whole life of the process. @@ -72,14 +67,55 @@ pub struct Process { /// The only case where it may be `None` is when the process is kernel thread. pub(super) session: RCUPointer, - /// All things related to the process list. - pub(super) inner: Locked, + pub children: Locked, ProcessList>, + pub threads: Locked, ProcessList>, + + all_procs_link: RBTreeAtomicLink, + group_procs_link: RBTreeAtomicLink, + siblings_link: RBTreeAtomicLink, } -#[derive(Debug)] -pub(super) struct ProcessInner { - pub(super) children: BTreeMap>, - pub(super) threads: BTreeMap>, +intrusive_adapter!(pub AllProcs = Arc: Process { + all_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub GroupProcs = Arc: Process { + group_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessChildren = Arc: Process { + siblings_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for GroupProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for ProcessChildren { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } } #[derive(Debug)] @@ -154,7 +190,9 @@ impl WaitType { pub fn to_wstatus(self) -> u32 { match self { WaitType::Exited(status) => (status & 0xff) << 8, - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => signal.into_raw() | 0x80, + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + signal.into_raw() | 0x80 + } WaitType::Signaled(signal) => signal.into_raw(), WaitType::Stopped(signal) => 0x7f | (signal.into_raw() << 8), WaitType::Continued => 0xffff, @@ -165,7 +203,9 @@ impl WaitType { // TODO: CLD_TRAPPED match self { WaitType::Exited(status) => (status, CLD_EXITED), - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => (signal.into_raw(), CLD_DUMPED), + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + (signal.into_raw(), CLD_DUMPED) + } WaitType::Signaled(signal) => (signal.into_raw(), CLD_KILLED), WaitType::Stopped(signal) => (signal.into_raw(), CLD_STOPPED), WaitType::Continued => (Signal::SIGCONT.into_raw(), CLD_CONTINUED), @@ -200,7 +240,11 @@ impl ProcessBuilder { } } - pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from( + mut self, + process: Arc, + clone_args: &CloneArgs, + ) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { process.mm_list.new_shared().await } else { @@ -249,30 +293,37 @@ impl ProcessBuilder { self } - pub fn build(self, process_list: &mut ProcessList) -> (Arc, Arc) { + pub fn build( + self, + process_list: &mut ProcessList, + ) -> (Arc, Arc) { let mm_list = self.mm_list.unwrap_or_else(|| MMList::new()); let process = Arc::new(Process { pid: self.pid.expect("should set pid before building"), wait_list: WaitList::new(), mm_list, - shm_areas: Spin::new(BTreeMap::new()), exit_signal: self.exit_signal, parent: RCUPointer::empty(), pgroup: RCUPointer::empty(), session: RCUPointer::empty(), - inner: Locked::new( - ProcessInner { - children: BTreeMap::new(), - threads: BTreeMap::new(), - }, + children: Locked::new( + RBTree::new(ProcessChildren::NEW), + process_list, + ), + threads: Locked::new( + RBTree::new(ProcessThreads::NEW), process_list, ), + all_procs_link: RBTreeAtomicLink::new(), + group_procs_link: RBTreeAtomicLink::new(), + siblings_link: RBTreeAtomicLink::new(), }); process_list.add_process(&process); - let thread_builder = self.thread_builder.expect("Thread builder is not set"); + let thread_builder = + self.thread_builder.expect("Thread builder is not set"); let thread = thread_builder .process(process.clone()) .tid(process.pid) @@ -288,10 +339,7 @@ impl ProcessBuilder { pgroup.add_member(&process, process_list.prove_mut()); pgroup } - None => ProcessGroupBuilder::new() - .leader(&process) - .session(session.clone()) - .build(process_list), + None => ProcessGroup::new(&process, &session, process_list), }; if let Some(parent) = &self.parent { @@ -311,30 +359,30 @@ impl ProcessBuilder { impl Process { pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let inner = self.inner.access(procs); - for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) { + let threads = self.threads.access(procs); + for thread in threads.iter() { if let RaiseResult::Finished = thread.raise(signal) { break; } } } - pub(super) fn add_child(&self, child: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .children - .insert(child.pid, Arc::downgrade(child)) - .is_none()); + pub fn add_child( + &self, + child: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.children.access_mut(procs).insert(child.clone()); } - pub(super) fn add_thread(&self, thread: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .threads - .insert(thread.tid, Arc::downgrade(thread)) - .is_none()); + pub fn add_thread( + &self, + thread: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.threads.access_mut(procs).insert(thread.clone()); } pub async fn wait( @@ -361,12 +409,7 @@ impl Process { break object; } - if self - .inner - .access(waits.process_list.prove()) - .children - .is_empty() - { + if self.children.access(waits.process_list.prove()).is_empty() { return Err(ECHILD); } @@ -382,12 +425,12 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid).await; + procs.remove_process(wait_object.pid); assert!(self - .inner - .access_mut(procs.prove_mut()) .children - .remove(&wait_object.pid) + .access_mut(procs.prove_mut()) + .find_mut(&wait_object.pid) + .remove() .is_some()); Ok(Some(wait_object)) @@ -403,15 +446,17 @@ impl Process { if process_list.try_find_session(self.pid).is_some() { return Err(EPERM); } + + self.pgroup(process_list.prove()) + .remove_member(self, &mut process_list); + let session = Session::new(self, &mut process_list); - let pgroup = ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(&mut process_list); + let pgroup = ProcessGroup::new(self, &session, &mut process_list); - let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); + let old_session = + unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = + unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); call_rcu(move || { drop(old_session); @@ -424,47 +469,56 @@ impl Process { /// Set the process group id of the process to `pgid`. /// /// This function does the actual work. - fn do_setpgid(self: &Arc, pgid: u32, procs: &mut ProcessList) -> KResult<()> { + fn do_setpgid( + self: &Arc, + pgid: u32, + procs: &mut ProcessList, + ) -> KResult<()> { // SAFETY: We are holding the process list lock. let session = unsafe { self.session.load_locked().unwrap() }; - let pgroup = unsafe { self.pgroup.load_locked().unwrap() }; // Changing the process group of a session leader is not allowed. if session.sid == self.pid { return Err(EPERM); } - let new_pgroup = if let Some(new_pgroup) = procs.try_find_pgroup(pgid) { + let cur_pgroup = self.pgroup(procs.prove()).clone(); + let existing_pgroup = procs.try_find_pgroup(pgid); + + if let Some(new_pgroup) = &existing_pgroup { // Move us to an existing process group. // Check that the two groups are in the same session. - if new_pgroup.session.upgrade().unwrap().sid != session.sid { + if new_pgroup.session.sid != session.sid { return Err(EPERM); } // If we are already in the process group, we are done. - if new_pgroup.pgid == pgroup.pgid { + if new_pgroup.pgid == cur_pgroup.pgid { return Ok(()); } - - new_pgroup.add_member(self, procs.prove_mut()); - - new_pgroup } else { // Create a new process group only if `pgid` matches our `pid`. if pgid != self.pid { return Err(EPERM); } + } - ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(procs) - }; + // Permission checks done. Let's do the actual work. + cur_pgroup.remove_member(self, procs); - pgroup.remove_member(self.pid, procs.prove_mut()); + let new_pgroup; + if let Some(pgroup) = existing_pgroup { + pgroup.add_member(self, procs.prove_mut()); + new_pgroup = pgroup; + } else { + new_pgroup = ProcessGroup::new(self, &session, procs); + } - let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - call_rcu(move || drop(old_pgroup)); + unsafe { + // SAFETY: `cur_pgroup` held above. + self.pgroup.swap(Some(new_pgroup)); + } + call_rcu(move || drop(cur_pgroup)); Ok(()) } @@ -482,15 +536,14 @@ impl Process { let child = { // If `pid` refers to one of our children, the thread leaders must be // in out children list. - let children = &self.inner.access(procs.prove()).children; - let child = { - let child = children.get(&pid); - child.and_then(Weak::upgrade).ok_or(ESRCH)? - }; + let children = self.children.access(procs.prove()); + let child = children.find(&pid).clone_pointer().ok_or(ESRCH)?; // Changing the process group of a child is only allowed // if we are in the same session. - if child.session(procs.prove()).sid != self.session(procs.prove()).sid { + if child.session(procs.prove()).sid + != self.session(procs.prove()).sid + { return Err(EPERM); } @@ -504,39 +557,57 @@ impl Process { } /// Provide locked (consistent) access to the session. - pub fn session<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Session> { + pub fn session<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Session> { // SAFETY: We are holding the process list lock. unsafe { self.session.load_locked() }.unwrap() } /// Provide locked (consistent) access to the process group. - pub fn pgroup<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, ProcessGroup> { + pub fn pgroup<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, ProcessGroup> { // SAFETY: We are holding the process list lock. unsafe { self.pgroup.load_locked() }.unwrap() } /// Provide locked (consistent) access to the parent process. - pub fn parent<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Process> { + pub fn parent<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Process> { // SAFETY: We are holding the process list lock. unsafe { self.parent.load_locked() }.unwrap() } /// Provide RCU locked (maybe inconsistent) access to the session. - pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Session>> { self.session.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the process group. - pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn pgroup_rcu( + &self, + ) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { self.pgroup.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the parent process. - pub fn parent_rcu(&self) -> Option>> { + pub fn parent_rcu( + &self, + ) -> Option>> { self.parent.load() } - pub fn notify(&self, signal: Option, wait: WaitObject, procs: Proof<'_, ProcessList>) { + pub fn notify( + &self, + signal: Option, + wait: WaitObject, + procs: Proof<'_, ProcessList>, + ) { self.wait_list.notify(wait); if let Some(signal) = signal { @@ -569,7 +640,7 @@ impl WaitList { self.cv_wait_procs.notify_all(); } - pub fn drain_exited(&self) -> DrainExited { + pub fn drain_exited(&self) -> DrainExited<'_> { DrainExited { wait_procs: self.wait_procs.lock(), } @@ -578,7 +649,12 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry( + &self, + wait_id: WaitId, + want_stop: bool, + want_continue: bool, + ) -> Entry<'_, '_, '_> { Entry { process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), @@ -609,8 +685,11 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - if let Some(process) = self.process_list.try_find_process(item.pid) { - return process.pgroup(self.process_list.prove()).pgid == pgid; + if let Some(process) = + self.process_list.try_find_process(item.pid) + { + return process.pgroup(self.process_list.prove()).pgid + == pgid; } false } @@ -624,7 +703,10 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { + pub fn wait( + self, + no_block: bool, + ) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_group.rs b/src/kernel/task/process_group.rs index 137c5191..8c708b5c 100644 --- a/src/kernel/task/process_group.rs +++ b/src/kernel/task/process_group.rs @@ -1,87 +1,121 @@ -use super::{Process, ProcessList, Session}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{Locked, Proof, ProofMut}; use posix_types::signal::Signal; -pub struct ProcessGroupBuilder { - pgid: Option, - leader: Option>, - session: Option>, -} +use super::process::GroupProcs; +use super::{Process, ProcessList, Session}; -#[derive(Debug)] pub struct ProcessGroup { pub pgid: u32, - pub _leader: Weak, - pub session: Weak, + pub leader: Weak, + pub session: Arc, - pub processes: Locked>, ProcessList>, + pub procs: Locked, ProcessList>, + + all_groups_link: RBTreeAtomicLink, + session_groups_link: RBTreeAtomicLink, } -impl ProcessGroupBuilder { - pub const fn new() -> Self { - Self { - pgid: None, - leader: None, - session: None, - } - } +intrusive_adapter!(pub AllGroups = Arc: ProcessGroup { + all_groups_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub SessionGroups = Arc: ProcessGroup { + session_groups_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllGroups { + type Key = u32; - pub fn leader(mut self, leader: &Arc) -> Self { - self.pgid = Some(leader.pid); - self.leader = Some(Arc::downgrade(leader)); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} + +impl KeyAdapter<'_> for SessionGroups { + type Key = u32; - pub fn session(mut self, session: Arc) -> Self { - self.session = Some(session); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} - pub fn build(self, process_list: &mut ProcessList) -> Arc { - let pgid = self.pgid.expect("PGID is not set"); - let leader = self.leader.expect("Leader is not set"); - let session = self.session.expect("Session is not set"); +impl ProcessGroup { + /// Create a pgroup and add it to the global pgroup list. + /// Add the pgroup to the session. + /// + /// # Panics + /// Panics if `leader` is already in some pgroup. + pub fn new( + leader: &Arc, + session: &Arc, + procs: &mut ProcessList, + ) -> Arc { + let pgid = leader.pid; + let pgroup_procs = { + let mut list = RBTree::new(GroupProcs::new()); + list.insert(leader.clone()); + list + }; let pgroup = Arc::new(ProcessGroup { pgid, - session: Arc::downgrade(&session), - processes: Locked::new(BTreeMap::from([(pgid, leader.clone())]), process_list), - _leader: leader, + session: session.clone(), + procs: Locked::new(pgroup_procs, procs), + leader: Arc::downgrade(leader), + all_groups_link: RBTreeAtomicLink::new(), + session_groups_link: RBTreeAtomicLink::new(), }); - process_list.add_pgroup(&pgroup); - session.add_member(process_list, &pgroup); + procs.add_pgroup(&pgroup); + session.add_member(&pgroup, procs.prove_mut()); pgroup } -} -impl ProcessGroup { - pub(super) fn add_member(&self, process: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .processes - .access_mut(procs) - .insert(process.pid, Arc::downgrade(process)) - .is_none()); + /// Add `process` to the pgroup. + /// + /// # Panics + /// Panics if `process` is already in some pgroup or the pgroup is dead. + pub fn add_member( + &self, + process: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_groups_link.is_linked(), "Dead pgroup"); + self.procs.access_mut(procs).insert(process.clone()); } - pub(super) fn remove_member(&self, pid: u32, procs: ProofMut<'_, ProcessList>) { - let processes = self.processes.access_mut(procs); - assert!(processes.remove(&pid).is_some()); - if processes.is_empty() { - self.session - .upgrade() - .unwrap() - .remove_member(self.pgid, procs); + pub fn remove_member( + self: &Arc, + process: &Arc, + procs: &mut ProcessList, + ) { + let members = self.procs.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&process.pid).remove().is_some(), + "Not a member" + ); + + if !members.is_empty() { + return; } + + self.session.remove_member(self, procs); + procs.remove_pgroup(self); } pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let processes = self.processes.access(procs); - for process in processes.values().map(|p| p.upgrade().unwrap()) { + let members = self.procs.access(procs); + for process in members.iter() { process.raise(signal, procs); } } diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index af073e84..f3371f25 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -1,36 +1,41 @@ -use core::sync::atomic::Ordering; - -use super::{Process, ProcessGroup, Session, Thread, WaitObject, WaitType}; -use crate::{ - kernel::{task::futex_wake, user::UserPointerMut}, - rcu::rcu_sync, -}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::Arc; +use core::pin::pin; + +use eonix_runtime::scheduler::RUNTIME; +use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList}; +use intrusive_collections::RBTree; + +use super::loader::LoadInfo; +use super::process::AllProcs; +use super::process_group::AllGroups; +use super::session::AllSessions; +use super::thread::AllThreads; +use super::{ + alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, + ThreadBuilder, WaitObject, }; -use eonix_mm::address::Addr; -use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; +use crate::rcu::call_rcu; pub struct ProcessList { /// The init process. init: Option>, /// All threads. - threads: BTreeMap>, + threads: RBTree, /// All processes. - processes: BTreeMap>, + procs: RBTree, /// All process groups. - pgroups: BTreeMap>, + pgroups: RBTree, /// All sessions. - sessions: BTreeMap>, + sessions: RBTree, } static GLOBAL_PROC_LIST: RwLock = RwLock::new(ProcessList { init: None, - threads: BTreeMap::new(), - processes: BTreeMap::new(), - pgroups: BTreeMap::new(), - sessions: BTreeMap::new(), + threads: RBTree::new(AllThreads::NEW), + procs: RBTree::new(AllProcs::NEW), + pgroups: RBTree::new(AllGroups::NEW), + sessions: RBTree::new(AllSessions::NEW), }); impl ProcessList { @@ -39,46 +44,67 @@ impl ProcessList { } pub fn add_session(&mut self, session: &Arc) { - self.sessions.insert(session.sid, Arc::downgrade(session)); + self.sessions.insert(session.clone()); } pub fn add_pgroup(&mut self, pgroup: &Arc) { - self.pgroups.insert(pgroup.pgid, Arc::downgrade(pgroup)); + self.pgroups.insert(pgroup.clone()); } pub fn add_process(&mut self, process: &Arc) { - self.processes.insert(process.pid, Arc::downgrade(process)); + self.procs.insert(process.clone()); } pub fn add_thread(&mut self, thread: &Arc) { - self.threads.insert(thread.tid, thread.clone()); + self.threads.insert(thread.clone()); } - pub async fn remove_process(&mut self, pid: u32) { + pub fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. - if let Some(thread) = self.threads.remove(&pid) { - self.processes.remove(&pid); - - // SAFETY: We wait until all references are dropped below with `rcu_sync()`. - let session = unsafe { thread.process.session.swap(None) }.unwrap(); - let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); - let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); - pgroup.remove_member(pid, self.prove_mut()); - rcu_sync().await; - - if Arc::strong_count(&pgroup) == 1 { - self.pgroups.remove(&pgroup.pgid); - } + let Some(_) = self.threads.find_mut(&pid).remove() else { + panic!("Thread {} not found", pid); + }; - if Arc::strong_count(&session) == 1 { - self.sessions.remove(&session.sid); - } - } else { + let Some(proc) = self.procs.find_mut(&pid).remove() else { panic!("Process {} not found", pid); - } + }; + + // SAFETY: `call_rcu` below. + let session = unsafe { proc.session.swap(None) }.unwrap(); + let pgroup = unsafe { proc.pgroup.swap(None) }.unwrap(); + let parent = unsafe { proc.parent.swap(None) }.unwrap(); + + pgroup.remove_member(&proc, self); + + call_rcu(move || { + drop(session); + drop(pgroup); + drop(parent); + }); + } + + pub fn remove_thread(&mut self, thread: &Arc) { + assert!( + self.threads.find_mut(&thread.tid).remove().is_some(), + "Double remove" + ); } - pub fn set_init_process(&mut self, init: Arc) { + pub fn remove_session(&mut self, session: &Arc) { + assert!( + self.sessions.find_mut(&session.sid).remove().is_some(), + "Double remove" + ); + } + + pub fn remove_pgroup(&mut self, pgroup: &Arc) { + assert!( + self.pgroups.find_mut(&pgroup.pgid).remove().is_some(), + "Double remove" + ); + } + + fn set_init_process(&mut self, init: Arc) { let old_init = self.init.replace(init); assert!(old_init.is_none(), "Init process already set"); } @@ -87,61 +113,80 @@ impl ProcessList { self.init.as_ref().unwrap() } - pub fn try_find_thread(&self, tid: u32) -> Option<&Arc> { - self.threads.get(&tid) + pub fn try_find_thread(&self, tid: u32) -> Option> { + self.threads.find(&tid).clone_pointer() } pub fn try_find_process(&self, pid: u32) -> Option> { - self.processes.get(&pid).and_then(Weak::upgrade) + self.procs.find(&pid).clone_pointer() } pub fn try_find_pgroup(&self, pgid: u32) -> Option> { - self.pgroups.get(&pgid).and_then(Weak::upgrade) + self.pgroups.find(&pgid).clone_pointer() } pub fn try_find_session(&self, sid: u32) -> Option> { - self.sessions.get(&sid).and_then(Weak::upgrade) - } - - /// Make the process a zombie and notify the parent. - /// # Safety - /// This function will destroy the process and all its threads. - /// It is the caller's responsibility to ensure that the process is not - /// running or will not run after this function is called. - pub async unsafe fn do_exit( - &mut self, - thread: &Thread, - exit_status: WaitType, - is_exiting_group: bool, - ) { - let process = thread.process.clone(); - - if process.pid == 1 { - panic!("init exited"); - } + self.sessions.find(&sid).clone_pointer() + } - let inner = process.inner.access_mut(self.prove_mut()); + pub async fn sys_init(load_info: LoadInfo) { + let thread_builder = ThreadBuilder::new() + .name(Arc::from(&b"busybox"[..])) + .entry(load_info.entry_ip, load_info.sp); - thread.dead.store(true, Ordering::SeqCst); + let mut process_list = ProcessList::get().write().await; + let (thread, process) = ProcessBuilder::new() + .pid(alloc_pid()) + .mm_list(load_info.mm_list) + .thread_builder(thread_builder) + .build(&mut process_list); - if is_exiting_group { - // TODO: Send SIGKILL to all threads. - // todo!() - } + process_list.set_init_process(process); - if thread.tid != process.pid { - self.threads.remove(&thread.tid); - inner.threads.remove(&thread.tid).unwrap(); - } + RUNTIME.spawn(Reaper::daemon()); + RUNTIME.spawn(thread.run()); + } - if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); + pub fn send_to_reaper(thread: Arc) { + GLOBAL_REAPER.reap_list.lock().push_back(thread); + GLOBAL_REAPER.wait.notify_one(); + } +} + +struct Reaper { + reap_list: Spin>>, + wait: WaitList, +} + +static GLOBAL_REAPER: Reaper = Reaper { + reap_list: Spin::new(VecDeque::new()), + wait: WaitList::new(), +}; + +impl Reaper { + async fn reap(&self, thread: Arc) { + let exit_status = thread + .exit_status + .lock() + .take() + .expect("Exited thread with no exit status"); + + let process = &thread.process; - let _ = futex_wake(clear_ctid.addr(), None, 1).await; + if process.pid == 1 && thread.tid == process.pid { + panic!("init exited: {}", alloc_pid()); } - if let Some(robust_list) = thread.get_robust_list() { - let _ = robust_list.wake_all().await; + let mut procs = ProcessList::get().write().await; + + if thread.tid != process.pid { + let threads = process.threads.access_mut(procs.prove_mut()); + assert!( + threads.find_mut(&thread.tid).remove().is_some(), + "Thread gone?" + ); + + procs.remove_thread(&thread); } // main thread exit @@ -150,49 +195,61 @@ impl ProcessList { thread.files.close_all().await; + let session = process.session(procs.prove()).clone(); // If we are the session leader, we should drop the control terminal. - if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await - { - terminal.drop_session().await; + if session.sid == process.pid { + if let Some(terminal) = session.control_terminal() { + terminal.drop_session(procs.prove()); } } // Release the MMList as well as the page table. - unsafe { - // SAFETY: We are exiting the process, so no one might be using it. - process.mm_list.replace(None); - } + process.mm_list.release(); // Make children orphans (adopted by init) - { - let init = self.init_process(); - inner.children.retain(|_, child| { - let child = child.upgrade().unwrap(); - // SAFETY: `child.parent` must be ourself. So we don't need to free it. - unsafe { child.parent.swap(Some(init.clone())) }; - init.add_child(&child, self.prove_mut()); - - false - }); + let init = procs.init_process(); + let children = process.children.access_mut(procs.prove_mut()); + for child in children.take() { + // XXX: May buggy. Check here again. + // SAFETY: `child.parent` must be ourself. + // So we don't need to free it. + unsafe { child.parent.swap(Some(init.clone())) }; + init.add_child(&child, procs.prove_mut()); } - let mut init_notify = self.init_process().notify_batch(); + let mut init_notify = procs.init_process().notify_batch(); process .wait_list .drain_exited() .into_iter() .for_each(|item| init_notify.notify(item)); - init_notify.finish(self.prove()); + init_notify.finish(procs.prove()); - process.parent(self.prove()).notify( + process.parent(procs.prove()).notify( process.exit_signal, WaitObject { pid: process.pid, code: exit_status, }, - self.prove(), + procs.prove(), ); } } + + async fn daemon() { + let me = &GLOBAL_REAPER; + + loop { + let mut wait = pin!(me.wait.prepare_to_wait()); + wait.as_mut().add_to_wait_list(); + + let thd_to_reap = me.reap_list.lock().pop_front(); + if let Some(thd_to_reap) = thd_to_reap { + me.reap(thd_to_reap).await; + continue; + } + + wait.await; + } + } } diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index a7b57afd..899aa395 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -1,117 +1,170 @@ -use super::{Process, ProcessGroup, ProcessList, Thread}; -use crate::kernel::constants::EPERM; -use crate::{kernel::Terminal, prelude::*}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProof as _, AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLock}; use posix_types::signal::Signal; -#[derive(Debug)] +use super::process_group::SessionGroups; +use super::{Process, ProcessGroup, ProcessList}; +use crate::kernel::constants::EPERM; +use crate::kernel::Terminal; +use crate::prelude::*; + struct SessionJobControl { - /// Foreground process group - foreground: Weak, + foreground: Option>, control_terminal: Option>, } -#[allow(dead_code)] -#[derive(Debug)] pub struct Session { pub sid: u32, pub leader: Weak, - job_control: RwLock, + job_control: Spin, + groups: Locked, ProcessList>, + all_sessions_link: RBTreeAtomicLink, +} - groups: Locked>, ProcessList>, +intrusive_adapter!(pub AllSessions = Arc: Session { + all_sessions_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllSessions { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.sid + } } impl Session { /// Create a session and add it to the global session list. - pub fn new(leader: &Arc, process_list: &mut ProcessList) -> Arc { + pub fn new(leader: &Arc, proclist: &mut ProcessList) -> Arc { let session = Arc::new(Self { sid: leader.pid, leader: Arc::downgrade(leader), - job_control: RwLock::new(SessionJobControl { - foreground: Weak::new(), + job_control: Spin::new(SessionJobControl { + foreground: None, control_terminal: None, }), - groups: Locked::new( - BTreeMap::new(), - // SAFETY: `procs` must be the global process list, which won't be moved. - process_list, - ), + groups: Locked::new(RBTree::new(SessionGroups::NEW), proclist), + all_sessions_link: RBTreeAtomicLink::new(), }); - process_list.add_session(&session); + proclist.add_session(&session); session } - pub(super) fn add_member(&self, procs: &mut ProcessList, pgroup: &Arc) { - let groups = self.groups.access_mut(procs.prove_mut()); - let old = groups.insert(pgroup.pgid, Arc::downgrade(pgroup)); - assert!(old.is_none(), "Process group already exists"); + pub fn add_member( + &self, + pgroup: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_sessions_link.is_linked(), "Dead session"); + self.groups.access_mut(procs).insert(pgroup.clone()); } - pub(super) fn remove_member(&self, pgid: u32, procs: ProofMut<'_, ProcessList>) { - assert!(self.groups.access_mut(procs).remove(&pgid).is_some()); + pub fn remove_member( + self: &Arc, + pgroup: &Arc, + procs: &mut ProcessList, + ) { + let members = self.groups.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&pgroup.pgid).remove().is_some(), + "Not a member" + ); + + if let Some(fg_pgroup) = self.foreground_pgroup() { + if fg_pgroup.pgid == pgroup.pgid { + let _ = self.set_foreground_pgroup(None); + } + } + + if !members.is_empty() { + return; + } + + // Recycle dead session. + procs.remove_session(self); } - pub async fn foreground(&self) -> Option> { - self.job_control.read().await.foreground.upgrade() + pub fn leader(&self) -> Option> { + self.leader.upgrade() + } + + pub fn foreground_pgroup(&self) -> Option> { + self.job_control.lock().foreground.clone() + } + + pub fn control_terminal(&self) -> Option> { + self.job_control.lock().control_terminal.clone() } /// Set the foreground process group identified by `pgid`. /// The process group must belong to the session. - pub async fn set_foreground_pgid( + pub fn set_foreground_pgroup( &self, - pgid: u32, - procs: Proof<'_, ProcessList>, + pgroup: Option<&Arc>, ) -> KResult<()> { - if let Some(group) = self.groups.access(procs).get(&pgid) { - self.job_control.write().await.foreground = group.clone(); - Ok(()) - } else { - // TODO: Check if the process group refers to an existing process group. - // That's not a problem though, the operation will fail anyway. - Err(EPERM) + if let Some(pgroup) = pgroup { + if pgroup.session.sid != self.sid { + return Err(EPERM); + } } + + self.job_control.lock().foreground = pgroup.cloned(); + Ok(()) } - /// Only session leaders can set the control terminal. - /// Make sure we've checked that before calling this function. - pub async fn set_control_terminal( + /// Set our controlling terminal to `terminal`. Only meant to be called by + /// the session leader. The pgroup that the session leader is in becomes the + /// new foreground pgroup. + /// + /// # Panics + /// Panics if we have a controlling terminal already + /// or the session leader is gone. + pub fn _set_control_terminal( self: &Arc, terminal: &Arc, - forced: bool, procs: Proof<'_, ProcessList>, - ) -> KResult<()> { - let mut job_control = self.job_control.write().await; - if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().await.as_ref() { - if session.sid == self.sid { - return Ok(()); - } - } - return Err(EPERM); - } - terminal.set_session(self, forced).await?; + ) { + let mut job_control = self.job_control.lock(); + let leader = self.leader().expect("Leader is gone?"); + + assert!( + job_control.control_terminal.is_none(), + "We have a controlling terminal already" + ); + job_control.control_terminal = Some(terminal.clone()); - job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); - Ok(()) + job_control.foreground = Some(leader.pgroup(procs).clone()); } /// Drop the control terminal reference inside the session. - /// DO NOT TOUCH THE TERMINAL'S SESSION FIELD. - pub async fn drop_control_terminal(&self) -> Option> { - let mut inner = self.job_control.write().await; - inner.foreground = Weak::new(); - inner.control_terminal.take() + /// Send SIGHUP and then SIGCONT to our foreground pgroup. + pub fn _drop_control_terminal(&self, procs: Proof<'_, ProcessList>) { + let foreground = { + let mut inner = self.job_control.lock(); + inner.control_terminal = None; + inner.foreground.take() + }; + + if let Some(foreground) = foreground { + foreground.raise(Signal::SIGHUP, procs); + foreground.raise(Signal::SIGCHLD, procs); + } } pub async fn raise_foreground(&self, signal: Signal) { - if let Some(fg) = self.foreground().await { - let procs = ProcessList::get().read().await; - fg.raise(signal, procs.prove()); - } + let Some(fg) = self.foreground_pgroup() else { + return; + }; + + let procs = ProcessList::get().read().await; + fg.raise(signal, procs.prove()); } } diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index d9970cad..0a7b580d 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -1,11 +1,10 @@ mod signal_action; -use super::{ProcessList, Thread, WaitObject, WaitType}; -use crate::kernel::constants::{EFAULT, EINVAL}; -use crate::{kernel::user::UserPointer, prelude::*}; use alloc::collections::binary_heap::BinaryHeap; use alloc::sync::Arc; -use core::{cmp::Reverse, task::Waker}; +use core::cmp::Reverse; +use core::task::Waker; + use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; @@ -14,9 +13,13 @@ use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; use posix_types::{SIGNAL_IGNORE, SIGNAL_NOW, SIGNAL_STOP}; +pub use signal_action::SignalAction; use signal_action::SignalActionList; -pub use signal_action::SignalAction; +use super::{ProcessList, Thread, WaitObject, WaitType}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::user::UserPointer; +use crate::prelude::*; pub(self) const SAVED_DATA_SIZE: usize = size_of::() + size_of::() + size_of::(); @@ -168,10 +171,7 @@ impl SignalList { pub async fn handle(&self, trap_ctx: &mut TrapContext, fpu_state: &mut FpuState) { loop { let signal = { - let signal = match self.inner.lock().pop() { - Some(signal) => signal, - None => return, - }; + let Some(signal) = self.inner.lock().pop() else { return }; let handler = self.inner.lock().actions.get(signal); if let SignalAction::SimpleHandler { mask, .. } = &handler { @@ -246,7 +246,7 @@ impl SignalList { } signal => { // Default to terminate the thread. - Thread::current().force_kill(signal).await; + Thread::current().force_kill(signal); return; } } diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 708f9802..18348c32 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -1,22 +1,34 @@ -use super::{KResult, SAVED_DATA_SIZE}; -use crate::{ - io::BufferFill as _, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::UserMut, - user::UserBuffer, - }, -}; -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; use core::arch::naked_asm; -use eonix_hal::{fpu::FpuState, traits::trap::RawTrapContext, trap::TrapContext}; + +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_sync::Spin; -use posix_types::{ - ctypes::Long, - signal::{SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, TryFromSigAction}, - SIGNAL_NOW, +use posix_types::ctypes::Long; +use posix_types::signal::{ + SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, + TryFromSigAction, }; +use posix_types::SIGNAL_NOW; + +use super::{KResult, SAVED_DATA_SIZE}; +use crate::io::BufferFill as _; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::UserMut; +use crate::kernel::user::UserBuffer; + +macro_rules! vdso_sym_addr { + ($sym:expr) => {{ + const VDSO_START_VADDR: VAddr = VAddr::from(0x7f00_0000_0000); + let vdso_link_start = eonix_hal::extern_symbol_addr!(VDSO_START); + + eonix_hal::symbol_addr!($sym) - vdso_link_start + + VDSO_START_VADDR.addr() + }}; +} #[cfg(target_arch = "x86_64")] #[unsafe(naked)] @@ -139,7 +151,9 @@ impl SignalAction { handler, restorer, .. } = self else { - unreachable!("Default and Ignore actions should not be handled here"); + unreachable!( + "Default and Ignore actions should not be handled here" + ); }; let current_sp = VAddr::from(trap_ctx.get_stack_pointer()); @@ -167,31 +181,19 @@ impl SignalAction { target_arch = "riscv64", target_arch = "loongarch64" )))] - compile_error!("`vdso_sigreturn` is not implemented for this architecture"); + compile_error!( + "`vdso_sigreturn` is not implemented for this architecture" + ); #[cfg(target_arch = "x86_64")] { // TODO: Check and use `vdso_rt_sigreturn` for x86 as well. - static VDSO_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - static VDSO_RT_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } }; @@ -201,7 +203,8 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; + let mut buffer = + UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 11348e51..76c56dcc 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,43 +1,37 @@ -use super::{ - signal::{RaiseResult, SignalList}, - stackful, Process, ProcessList, WaitType, -}; -use crate::{ - kernel::{ - interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler, User, UserMut}, - task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, - timer::{should_reschedule, timer_interrupt}, - user::{UserPointer, UserPointerMut}, - vfs::{filearray::FileArray, FsContext}, - }, - prelude::*, -}; -use alloc::{alloc::Allocator, sync::Arc}; +use alloc::alloc::Allocator; +use alloc::sync::Arc; +use core::future::{poll_fn, Future}; +use core::pin::Pin; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicBool, Ordering}; +use core::task::{Context, Poll}; + use atomic_unique_refcell::AtomicUniqueRefCell; -use core::{ - future::{poll_fn, Future}, - pin::Pin, - ptr::NonNull, - sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll}, -}; -use eonix_hal::{ - fpu::FpuState, - processor::{UserTLS, CPU}, - traits::{ - fault::Fault, - fpu::RawFpuState as _, - trap::{RawTrapContext, TrapReturn, TrapType}, - }, - trap::TrapContext, -}; +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::fault::Fault; +use eonix_hal::traits::fpu::RawFpuState as _; +use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; +use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; use pointers::BorrowedArc; use posix_types::signal::Signal; use stalloc::UnsafeStalloc; +use super::signal::{RaiseResult, SignalList}; +use super::user_tls::UserTLS; +use super::{stackful, Process, ProcessList, WaitType}; +use crate::kernel::interrupt::default_irq_handler; +use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut}; +use crate::kernel::task::clone::CloneArgs; +use crate::kernel::task::{futex_exit, CloneFlags, RobustListHead}; +use crate::kernel::timer::{should_reschedule, timer_interrupt}; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::filearray::FileArray; +use crate::kernel::vfs::FsContext; +use crate::prelude::*; + #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; @@ -89,10 +83,46 @@ pub struct Thread { pub fpu_state: AtomicUniqueRefCell, pub dead: AtomicBool, + pub exit_status: Spin>, + + /// Link in the global thread list. + all_threads_link: RBTreeAtomicLink, + + /// Link in the process's thread list. + process_threads_link: RBTreeAtomicLink, inner: Spin, } +intrusive_adapter!(pub AllThreads = Arc: Thread { + all_threads_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessThreads = Arc: Thread { + process_threads_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + +impl KeyAdapter<'_> for ProcessThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + impl ThreadBuilder { pub fn new() -> Self { Self { @@ -145,12 +175,18 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { + pub fn set_child_tid( + mut self, + set_child_tid: Option>, + ) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { + pub fn clear_child_tid( + mut self, + clear_child_tid: Option>, + ) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -177,7 +213,11 @@ impl ThreadBuilder { } /// Clone the thread from another thread. - pub fn clone_from(self, thread: &Thread, clone_args: &CloneArgs) -> KResult { + pub fn clone_from( + self, + thread: &Thread, + clone_args: &CloneArgs, + ) -> KResult { let inner = thread.inner.lock(); let mut trap_ctx = thread.trap_ctx.borrow().clone(); @@ -205,11 +245,12 @@ impl ThreadBuilder { FileArray::new_cloned(&thread.files) }; - let signal_list = if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { - SignalList::new_shared(&thread.signal_list) - } else { - SignalList::new_cloned(&thread.signal_list) - }; + let signal_list = + if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { + SignalList::new_shared(&thread.signal_list) + } else { + SignalList::new_cloned(&thread.signal_list) + }; Ok(self .files(files) @@ -246,6 +287,9 @@ impl ThreadBuilder { trap_ctx: AtomicUniqueRefCell::new(trap_ctx), fpu_state: AtomicUniqueRefCell::new(fpu_state), dead: AtomicBool::new(false), + exit_status: Spin::new(None), + all_threads_link: RBTreeAtomicLink::new(), + process_threads_link: RBTreeAtomicLink::new(), inner: Spin::new(ThreadInner { name, tls: self.tls, @@ -275,12 +319,9 @@ impl Thread { self.signal_list.raise(signal) } - /// # Safety - /// This function is unsafe because it accesses the `current_cpu()`, which needs - /// to be called in a preemption disabled context. - pub unsafe fn load_thread_area32(&self) { + pub fn activate_tls(&self) { if let Some(tls) = self.inner.lock().tls.as_ref() { - CPU::local().as_mut().set_tls32(tls); + tls.activate(); } } @@ -289,7 +330,10 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option>) { + pub fn set_robust_list( + &self, + robust_list_address: Option>, + ) { self.inner.lock().robust_list_address = robust_list_address; } @@ -340,18 +384,26 @@ impl Thread { } } - pub async fn force_kill(&self, signal: Signal) { - let mut proc_list = ProcessList::get().write().await; - unsafe { - // SAFETY: Preemption is disabled. - proc_list - .do_exit(self, WaitType::Signaled(signal), false) - .await; + pub fn exit(&self, exit_status: WaitType) { + { + let mut self_status = self.exit_status.lock(); + if self_status.is_some() { + // Someone has got here before us. + return; + } + + *self_status = Some(exit_status); } + + self.dead.store(true, Ordering::Release); + } + + pub fn force_kill(&self, signal: Signal) { + self.exit(WaitType::Signaled(signal)); } pub fn is_dead(&self) -> bool { - self.dead.load(Ordering::SeqCst) + self.dead.load(Ordering::Acquire) } async fn real_run(&self) { @@ -371,7 +423,10 @@ impl Thread { while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list - .handle(&mut self.trap_ctx.borrow(), &mut self.fpu_state.borrow()) + .handle( + &mut self.trap_ctx.borrow(), + &mut self.fpu_state.borrow(), + ) .await; } @@ -394,8 +449,14 @@ impl Thread { error_code, address: addr, }) => { + if self.is_dead() { + return; + } + let mms = &self.process.mm_list; - if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await { + if let Err(signal) = + mms.handle_user_page_fault(addr, error_code).await + { self.signal_list.raise(signal); } } @@ -405,8 +466,12 @@ impl Thread { TrapType::Fault(Fault::InvalidOp) => { self.signal_list.raise(Signal::SIGILL); } - TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), - TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), + TrapType::Fault(Fault::Unknown(_)) => { + unimplemented!("Unhandled fault") + } + TrapType::Breakpoint => { + unimplemented!("Breakpoint in user space") + } TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -416,11 +481,20 @@ impl Thread { } } TrapType::Syscall { no, args } => { - if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { + if self.is_dead() { + return; + } + + if let Some(retval) = + self.handle_syscall(thd_alloc, no, args).await + { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] + #[cfg(any( + target_arch = "riscv64", + target_arch = "loongarch64" + ))] { let pc = trap_ctx.get_program_counter(); trap_ctx.set_program_counter(pc + 4); @@ -442,14 +516,7 @@ impl Thread { CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); - unsafe { - eonix_preempt::disable(); - - // SAFETY: Preemption is disabled. - self.load_thread_area32(); - - eonix_preempt::enable(); - } + self.activate_tls(); let result = future.as_mut().poll(cx); @@ -463,7 +530,20 @@ impl Thread { } pub fn run(self: Arc) -> impl Future + Send + 'static { - async move { self.contexted(stackful(self.real_run())).await } + async move { + self.contexted(async { + stackful(self.real_run()).await; + + futex_exit(&self).await; + }) + .await; + + assert!( + self.is_dead(), + "`real_run` returned before the thread die?" + ); + ProcessList::send_to_reaper(self); + } } } @@ -488,7 +568,10 @@ pub async fn yield_now() { impl Future for Yield { type Output = (); - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + fn poll( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll { if self.as_mut().yielded { Poll::Ready(()) } else { diff --git a/src/kernel/task/user_tls/mod.rs b/src/kernel/task/user_tls/mod.rs new file mode 100644 index 00000000..2583b580 --- /dev/null +++ b/src/kernel/task/user_tls/mod.rs @@ -0,0 +1,34 @@ +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + mod x86_64; + pub use x86_64::*; + } else { + use eonix_mm::address::VAddr; + use posix_types::ctypes::PtrT; + + use crate::prelude::KResult; + + + #[derive(Debug, Clone)] + pub struct UserTLS(VAddr); + + #[derive(Debug, Clone)] + pub struct UserTLSDescriptor(VAddr); + + impl UserTLS { + pub fn activate(&self) { + self.0; + } + } + + impl UserTLSDescriptor { + pub fn new(tp: PtrT) -> KResult { + Ok(Self(VAddr::from(tp.addr()))) + } + + pub fn read(&self) -> KResult { + Ok(UserTLS(self.0)) + } + } + } +} diff --git a/src/kernel/task/user_tls/x86_64.rs b/src/kernel/task/user_tls/x86_64.rs new file mode 100644 index 00000000..5bb33b97 --- /dev/null +++ b/src/kernel/task/user_tls/x86_64.rs @@ -0,0 +1,83 @@ +use core::fmt; + +use eonix_hal::arch_exported::gdt::{GDTEntry, GDT}; +use eonix_hal::processor::CPU; +use eonix_mm::address::VAddr; +use posix_types::ctypes::PtrT; +use posix_types::x86_64::UserDescriptor; + +use crate::kernel::syscall::{User, UserMut}; +use crate::kernel::user::{CheckedUserPointer, UserPointerMut}; +use crate::prelude::KResult; + +#[derive(Debug, Clone)] +pub struct UserTLS { + desc: GDTEntry, + base: u64, +} + +pub struct UserTLSDescriptor<'a> { + ptr: UserPointerMut<'a, UserDescriptor>, +} + +impl UserTLS { + fn new(base: u32, limit: u32) -> Self { + Self { + desc: GDTEntry::new_tls(base, limit), + base: base as u64, + } + } + + fn new_page_limit(base: u32, limit_in_pages: u32) -> Self { + Self { + desc: GDTEntry::new_tls_page_limit(base, limit_in_pages), + base: base as u64, + } + } + + pub fn activate(&self) { + CPU::local().as_mut().set_tls32(self.desc, self.base); + } +} + +impl UserTLSDescriptor<'_> { + pub fn new(raw_tls: PtrT) -> KResult { + Ok(Self { + ptr: UserPointerMut::new(UserMut::::with_addr(raw_tls.addr()))?, + }) + } + + pub fn read(&self) -> KResult { + let mut desc = self.ptr.read()?; + + let base = VAddr::from(desc.base as usize); + + // Clear the TLS area if it is not present. + if desc.flags.is_read_exec_only() && !desc.flags.is_present() { + if desc.limit != 0 && base != VAddr::NULL { + let len = if desc.flags.is_limit_in_pages() { + (desc.limit as usize) << 12 + } else { + desc.limit as usize + }; + + CheckedUserPointer::new(User::new(base), len)?.zero()?; + } + } + + desc.entry = GDT::TLS32_INDEX as u32; + self.ptr.write(desc)?; + + Ok(if desc.flags.is_limit_in_pages() { + UserTLS::new_page_limit(desc.base, desc.limit) + } else { + UserTLS::new(desc.base, desc.limit) + }) + } +} + +impl fmt::Debug for UserTLSDescriptor<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("UserTLSDescriptor").finish_non_exhaustive() + } +} diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 734655b7..ddc3cc1f 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,18 +1,19 @@ -use super::{ - task::{ProcessList, Session, Thread}, - user::{UserPointer, UserPointerMut}, -}; -use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; -use crate::{io::Buffer, prelude::*, sync::CondVar}; -use alloc::{ - collections::vec_deque::VecDeque, - sync::{Arc, Weak}, -}; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; + use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_sync::{AsProof as _, Mutex}; +use eonix_sync::{Mutex, Proof}; use posix_types::signal::Signal; +use super::constants::ESRCH; +use super::task::{ProcessList, Session, Thread}; +use super::user::{UserPointer, UserPointerMut}; +use crate::io::Buffer; +use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; +use crate::prelude::*; +use crate::sync::CondVar; + const BUFFER_SIZE: usize = 4096; const NCCS: usize = 19; @@ -351,12 +352,12 @@ pub trait TerminalDevice: Send + Sync { struct TerminalInner { termio: Termios, - session: Weak, buffer: VecDeque, } pub struct Terminal { inner: Mutex, + session: Spin>, device: Arc, cv: CondVar, } @@ -400,9 +401,9 @@ impl Terminal { Arc::new(Self { inner: Mutex::new(TerminalInner { termio: Termios::new_standard(), - session: Weak::new(), buffer: VecDeque::with_capacity(BUFFER_SIZE), }), + session: Spin::new(Weak::new()), cv: CondVar::new(), device, }) @@ -447,15 +448,21 @@ impl Terminal { } async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { - if let Some(session) = inner.session.upgrade() { + if let Some(session) = self.session() { session.raise_foreground(signal).await; } + if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal( + &self, + inner: &mut TerminalInner, + ch: u8, + signal: Signal, + ) { self.echo_char(inner, ch); self.signal(inner, signal).await; } @@ -481,13 +488,19 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGINT) + .await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + .await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + .await } _ => {} } @@ -517,8 +530,12 @@ impl Terminal { match ch { b'\r' if inner.termio.igncr() => {} - b'\r' if inner.termio.icrnl() => return self.do_commit_char(&mut inner, b'\n'), - b'\n' if inner.termio.inlcr() => return self.do_commit_char(&mut inner, b'\r'), + b'\r' if inner.termio.icrnl() => { + return self.do_commit_char(&mut inner, b'\n') + } + b'\n' if inner.termio.inlcr() => { + return self.do_commit_char(&mut inner, b'\r') + } _ => self.do_commit_char(&mut inner, ch), } } @@ -589,26 +606,30 @@ impl Terminal { pub async fn ioctl(&self, request: TerminalIORequest<'_>) -> KResult<()> { match request { TerminalIORequest::GetProcessGroup(pgid_pointer) => { - if let Some(session) = self.inner.lock().await.session.upgrade() { - if let Some(pgroup) = session.foreground().await { - return pgid_pointer.write(pgroup.pgid); - } - } + let Some(session) = self.session() else { + return Err(ENOTTY); + }; + + let Some(pgroup) = session.foreground_pgroup() else { + return Err(ENOTTY); + }; - Err(ENOTTY) + pgid_pointer.write(pgroup.pgid) } TerminalIORequest::SetProcessGroup(pgid) => { let pgid = pgid.read()?; let procs = ProcessList::get().read().await; - let inner = self.inner.lock().await; - let session = inner.session.upgrade(); + let Some(session) = self.session() else { + return Err(ENOTTY); + }; - if let Some(session) = session { - session.set_foreground_pgid(pgid, procs.prove()).await - } else { - Err(ENOTTY) - } + let Some(pgroup) = procs.try_find_pgroup(pgid) else { + return Err(ESRCH); + }; + + session.set_foreground_pgroup(Some(&pgroup))?; + Ok(()) } TerminalIORequest::GetWindowSize(ptr) => { // TODO: Get the actual window size @@ -630,9 +651,12 @@ impl Terminal { let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. - inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); - inner.termio.oflag = TermioOFlags::from_bits_truncate(user_termios.oflag as u16); - inner.termio.lflag = TermioLFlags::from_bits_truncate(user_termios.lflag as u16); + inner.termio.iflag = + TermioIFlags::from_bits_truncate(user_termios.iflag as u16); + inner.termio.oflag = + TermioOFlags::from_bits_truncate(user_termios.oflag as u16); + inner.termio.lflag = + TermioLFlags::from_bits_truncate(user_termios.lflag as u16); inner.termio.cflag = user_termios.cflag; inner.termio.line = user_termios.line; inner.termio.cc = user_termios.cc; @@ -642,30 +666,52 @@ impl Terminal { } } - /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = self.inner.lock().await; - if let Some(session) = inner.session.upgrade() { + pub fn session(&self) -> Option> { + self.session.lock().upgrade() + } + + /// Drop our current controlled session. The old session lose its controlling + /// terminal and all processes in it will receive a SIGHUP and then SIGCONT. + pub fn drop_session(&self, procs: Proof<'_, ProcessList>) { + let session = + core::mem::replace(&mut *self.session.lock(), Weak::new()); + let Some(old_session) = session.upgrade() else { + return; + }; + + old_session._drop_control_terminal(procs); + } + + /// Assign the `session` to this terminal. + /// Drop the previous session if `forced` is true. + pub async fn set_session( + self: &Arc, + session: &Arc, + forced: bool, + procs: Proof<'_, ProcessList>, + ) -> KResult<()> { + let mut cur_session = self.session.lock(); + + // XXX: Holding spinlock for too long? + if let Some(old_session) = cur_session.upgrade() { + if old_session.sid == session.sid { + return Ok(()); + } + if !forced { - Err(EPERM) - } else { - session.drop_control_terminal().await; - inner.session = Arc::downgrade(&session); - Ok(()) + return Err(EPERM); } - } else { - // Sessions should set their `control_terminal` field. - inner.session = Arc::downgrade(&session); - Ok(()) + + // TODO: Check whether the caller has the CAP_SYS_ADMIN capability. + + // We've stolen the terminal from the old session. + old_session._drop_control_terminal(procs); } - } - pub async fn drop_session(&self) { - self.inner.lock().await.session = Weak::new(); - } + *cur_session = Arc::downgrade(session); + session._set_control_terminal(self, procs); - pub async fn session(&self) -> Option> { - self.inner.lock().await.session.upgrade() + Ok(()) } } diff --git a/src/kernel/timer.rs b/src/kernel/timer.rs index 9b6a3ff2..1dbb1382 100644 --- a/src/kernel/timer.rs +++ b/src/kernel/timer.rs @@ -76,6 +76,8 @@ impl Ticks { } impl Instant { + pub const UNIX_EPOCH: Self = Self::default(); + pub const fn default() -> Self { Instant { secs_since_epoch: 0, diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 02e7d791..5d8ac167 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,18 +1,15 @@ -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use crate::{ - io::{IntoStream, Stream}, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::{User, UserMut}, - }, -}; -use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use core::arch::asm; +use core::ffi::CStr; +use core::marker::PhantomData; + use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; +use crate::io::{Buffer, FillResult, IntoStream, Stream}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::{User, UserMut}; +use crate::prelude::*; + pub struct CheckedUserPointer<'a> { ptr: User, len: usize, diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 8bcd9f8a..a401f4f7 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,35 +1,60 @@ pub mod dcache; +mod walk; -use super::{ - inode::{Ino, Inode, Mode, RenameData, WriteOffset}, - DevId, FsContext, -}; -use crate::{ - hash::KernelHasher, - io::{Buffer, ByteBuffer}, - kernel::{block::BlockDevice, CharDevice}, - path::{Path, PathComponent}, - prelude::*, - rcu::{RCUNode, RCUPointer, RCUReadGuard}, -}; -use crate::{ - io::Stream, - kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, -}; -use alloc::sync::{Arc, Weak}; -use core::{ - fmt, - hash::{BuildHasher, BuildHasherDefault, Hasher}, - ops::ControlFlow, - sync::atomic::{AtomicPtr, AtomicU64, Ordering}, -}; +use alloc::sync::Arc; +use core::cell::UnsafeCell; +use core::fmt; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}; + +use arcref::AsArcRef; use eonix_sync::LazyLock; use pointers::BorrowedArc; -use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; +use posix_types::namei::RenameFlags; +use posix_types::open::OpenFlags; +use posix_types::result::PosixError; +use posix_types::stat::StatX; + +use super::inode::{Ino, InodeUse, RenameData, WriteOffset}; +use super::types::{DeviceId, Format, Mode, Permission}; +use super::FsContext; +use crate::hash::KernelHasher; +use crate::io::{Buffer, Stream}; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}; +use crate::kernel::CharDevice; +use crate::path::Path; +use crate::prelude::*; +use crate::rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}; + +// TODO: Implement slab reclaim +#[allow(unused)] +const D_INVALID: u8 = 0; +const D_REGULAR: u8 = 1; +const D_DIRECTORY: u8 = 2; +const D_SYMLINK: u8 = 3; + +#[derive(Debug, PartialEq, Eq)] +enum DentryKind { + Regular = D_REGULAR as isize, + Directory = D_DIRECTORY as isize, + Symlink = D_SYMLINK as isize, +} -struct DentryData { - inode: Arc, - flags: u64, +/// The [`Inode`] associated with a [`Dentry`]. +/// +/// We could assign an inode to a negative dentry exactly once when the dentry +/// is invalid and we create a file or directory on it, or the dentry is brought +/// to the dcache by [lookup()]. +/// +/// This guarantees that as long as we acquire a non-invalid from [`Self::kind`], +/// we are synced with the writer and can safely read the [`Self::inode`] field +/// without reading torn data. +/// +/// [lookup()]: crate::kernel::vfs::inode::InodeDirOps::lookup +struct AssociatedInode { + kind: UnsafeCell>, + inode: UnsafeCell>, } /// # Safety @@ -48,8 +73,7 @@ pub struct Dentry { prev: AtomicPtr, next: AtomicPtr, - // RCU Mutable - data: RCUPointer, + inode: AssociatedInode, } pub(super) static DROOT: LazyLock> = LazyLock::new(|| { @@ -59,7 +83,7 @@ pub(super) static DROOT: LazyLock> = LazyLock::new(|| { hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); unsafe { @@ -79,12 +103,6 @@ impl fmt::Debug for Dentry { } } -const D_DIRECTORY: u64 = 1; -#[allow(dead_code)] -const D_MOUNTPOINT: u64 = 2; -const D_SYMLINK: u64 = 4; -const D_REGULAR: u64 = 8; - impl RCUNode for Dentry { fn rcu_prev(&self) -> &AtomicPtr { &self.prev @@ -115,50 +133,19 @@ impl Dentry { self.hash.store(hash, Ordering::Relaxed); } - - fn find(self: &Arc, name: &[u8]) -> KResult> { - let data = self.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - if data.flags & D_DIRECTORY == 0 { - return Err(ENOTDIR); - } - - match name { - b"." => Ok(self.clone()), - b".." => Ok(self.parent().clone()), - _ => { - let dentry = Dentry::create(self.clone(), name); - - if let Some(found) = dcache::d_find_fast(&dentry) { - unsafe { - // SAFETY: This is safe because the dentry is never shared with - // others so we can drop them safely. - let _ = dentry.name.swap(None); - let _ = dentry.parent.swap(None); - } - - return Ok(found); - } - - dcache::d_try_revalidate(&dentry); - dcache::d_add(dentry.clone()); - - Ok(dentry) - } - } - } } impl Dentry { pub fn create(parent: Arc, name: &[u8]) -> Arc { + // TODO!!!: don't acquire our parent's refcount here... + let val = Arc::new(Self { parent: RCUPointer::new(parent), name: RCUPointer::new(Arc::new(Arc::from(name))), hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); val.rehash(); @@ -174,7 +161,7 @@ impl Dentry { && &***self.name() == &***other.name() } - pub fn name(&self) -> RCUReadGuard>> { + pub fn name(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Arc<[u8]>>> { self.name.load().expect("Dentry has no name") } @@ -182,7 +169,7 @@ impl Dentry { (***self.name()).clone() } - pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc> { + pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc<'_, Dentry>> { self.parent.load().expect("Dentry has no parent") } @@ -192,205 +179,98 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - fn save_data(&self, inode: Arc, flags: u64) -> KResult<()> { - let new = DentryData { inode, flags }; - - // TODO!!!: We don't actually need to use `RCUPointer` here - // Safety: this function may only be called from `create`-like functions which requires the - // superblock's write locks to be held, so only one creation can happen at a time and we - // can't get a reference to the old data. - let old = unsafe { self.data.swap(Some(Arc::new(new))) }; - assert!(old.is_none()); - - Ok(()) + pub fn fill(&self, file: InodeUse) { + self.inode.store(file); } - pub fn save_reg(&self, file: Arc) -> KResult<()> { - self.save_data(file, D_REGULAR) + pub fn inode(&self) -> Option { + self.inode.load().map(|(_, inode)| inode.clone()) } - pub fn save_symlink(&self, link: Arc) -> KResult<()> { - self.save_data(link, D_SYMLINK) - } - - pub fn save_dir(&self, dir: Arc) -> KResult<()> { - self.save_data(dir, D_DIRECTORY) - } - - pub fn get_inode(&self) -> KResult> { - self.data - .load() - .as_ref() - .ok_or(ENOENT) - .map(|data| data.inode.clone()) + pub fn get_inode(&self) -> KResult { + self.inode().ok_or(ENOENT) } pub fn is_directory(&self) -> bool { - let data = self.data.load(); - data.as_ref() - .map_or(false, |data| data.flags & D_DIRECTORY != 0) + self.inode + .load() + .map_or(false, |(kind, _)| kind == DentryKind::Directory) } pub fn is_valid(&self) -> bool { - self.data.load().is_some() + self.inode.load().is_some() } - pub fn open_check(self: &Arc, flags: OpenFlags, mode: Mode) -> KResult<()> { - let data = self.data.load(); - - if data.is_some() { - if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { - Err(EEXIST) - } else { - Ok(()) - } - } else { - if !flags.contains(OpenFlags::O_CREAT) { - return Err(ENOENT); + pub async fn open_check(self: &Arc, flags: OpenFlags, perm: Permission) -> KResult<()> { + match self.inode.load() { + Some(_) => { + if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { + Err(EEXIST) + } else { + Ok(()) + } } + None => { + if !flags.contains(OpenFlags::O_CREAT) { + return Err(ENOENT); + } - let parent = self.parent().get_inode()?; - parent.creat(self, mode) + let parent = self.parent().get_inode()?; + parent.create(self, perm).await + } } } } impl Dentry { - fn resolve_directory( + pub async fn open( context: &FsContext, - dentry: Arc, - nrecur: u32, + path: &Path, + follow_symlinks: bool, ) -> KResult> { - if nrecur >= 16 { - return Err(ELOOP); - } - - let data = dentry.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - match data.flags { - flags if flags & D_REGULAR != 0 => Err(ENOTDIR), - flags if flags & D_DIRECTORY != 0 => Ok(dentry), - flags if flags & D_SYMLINK != 0 => { - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; - - let dentry = - Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)?; - - Self::resolve_directory(context, dentry, nrecur + 1) - } - _ => panic!("Invalid dentry flags"), - } + let cwd = context.cwd.lock().clone(); + Self::open_at(context, &cwd, path, follow_symlinks).await } - pub fn open_recursive( + pub async fn open_at( context: &FsContext, - cwd: &Arc, - path: Path, - follow: bool, - nrecur: u32, + at: &Arc, + path: &Path, + follow_symlinks: bool, ) -> KResult> { - // too many recursive search layers will cause stack overflow - // so we use 16 for now - if nrecur >= 16 { - return Err(ELOOP); - } - - let mut cwd = if path.is_absolute() { - context.fsroot.clone() - } else { - cwd.clone() - }; - - for item in path.iter() { - if let PathComponent::TrailingEmpty = item { - if cwd.data.load().as_ref().is_none() { - return Ok(cwd); - } - } + let mut found = context.start_recursive_walk(at, path).await?; - cwd = Self::resolve_directory(context, cwd, nrecur)?; - - match item { - PathComponent::TrailingEmpty | PathComponent::Current => {} // pass - PathComponent::Parent => { - if !cwd.hash_eq(&context.fsroot) { - let parent = cwd.parent().clone(); - cwd = Self::resolve_directory(context, parent, nrecur)?; - } - continue; - } - PathComponent::Name(name) => { - cwd = cwd.find(name)?; - } - } + if !follow_symlinks { + return Ok(found); } - if follow { - let data = cwd.data.load(); - - if let Some(data) = data.as_ref() { - if data.flags & D_SYMLINK != 0 { - let data = cwd.data.load(); - let data = data.as_ref().unwrap(); - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; - - let parent = cwd.parent().clone(); - cwd = Self::open_recursive(context, &parent, path, true, nrecur + 1)?; + loop { + match found.inode.load() { + Some((DentryKind::Symlink, inode)) => { + found = context.follow_symlink(found.aref(), inode, 0).await?; } + _ => return Ok(found), } } - - Ok(cwd) } - pub fn open(context: &FsContext, path: Path, follow_symlinks: bool) -> KResult> { - let cwd = context.cwd.lock().clone(); - Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0) - } + pub fn get_path(self: &Arc, context: &FsContext, buffer: &mut dyn Buffer) -> KResult<()> { + let rcu_read = rcu_read_lock(); - pub fn open_at( - context: &FsContext, - at: &Arc, - path: Path, - follow_symlinks: bool, - ) -> KResult> { - Dentry::open_recursive(context, at, path, follow_symlinks, 0) - } - - pub fn get_path( - self: &Arc, - context: &FsContext, - buffer: &mut dyn Buffer, - ) -> KResult<()> { - let locked_parent = self.parent(); - - let path = { - let mut path = vec![]; - - let mut parent = locked_parent.borrow(); - let mut dentry = BorrowedArc::new(self); + let mut path = vec![]; - while Arc::as_ptr(&dentry) != Arc::as_ptr(&context.fsroot) { - if path.len() > 32 { - return Err(ELOOP); - } + let mut current = self.aref(); + let mut parent = self.parent.dereference(&rcu_read).unwrap(); - path.push(dentry.name().clone()); - dentry = parent; - parent = dentry.parent.load_protected(&locked_parent).unwrap(); + while !current.ptr_eq_arc(&context.fsroot) { + if path.len() > 32 { + return Err(ELOOP); } - path - }; + path.push(current.name.dereference(&rcu_read).unwrap()); + current = parent; + parent = current.parent.dereference(&rcu_read).unwrap(); + } buffer.fill(b"/")?.ok_or(ERANGE)?; for item in path.iter().rev().map(|name| name.as_ref()) { @@ -405,18 +285,18 @@ impl Dentry { } impl Dentry { - pub fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.read(buffer, offset), - Mode::BLK => { + match inode.format { + Format::DIR => Err(EISDIR), + Format::REG => inode.read(buffer, offset).await, + Format::BLK => { let device = BlockDevice::get(inode.devid()?)?; - Ok(device.read_some(offset, buffer)?.allow_partial()) + Ok(device.read_some(offset, buffer).await?.allow_partial()) } - Mode::CHR => { + Format::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -424,32 +304,32 @@ impl Dentry { } } - pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.write(stream, offset), - Mode::BLK => Err(EINVAL), // TODO - Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.format { + Format::DIR => Err(EISDIR), + Format::REG => inode.write(stream, offset).await, + Format::BLK => Err(EINVAL), // TODO + Format::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } - pub fn readdir(&self, offset: usize, mut callback: F) -> KResult + pub async fn readdir(&self, offset: usize, mut for_each_entry: F) -> KResult> where - F: FnMut(&[u8], Ino) -> KResult>, + F: FnMut(&[u8], Ino) -> KResult + Send, { let dir = self.get_inode()?; - dir.do_readdir(offset, &mut callback) + dir.readdir(offset, &mut for_each_entry).await } - pub fn mkdir(&self, mode: Mode) -> KResult<()> { + pub async fn mkdir(&self, perm: Permission) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mkdir(self, mode) + dir.mkdir(self, perm).await } } @@ -457,50 +337,50 @@ impl Dentry { self.get_inode()?.statx(stat, mask) } - pub fn truncate(&self, size: usize) -> KResult<()> { - self.get_inode()?.truncate(size) + pub async fn truncate(&self, size: usize) -> KResult<()> { + self.get_inode()?.truncate(size).await } - pub fn unlink(self: &Arc) -> KResult<()> { + pub async fn unlink(self: &Arc) -> KResult<()> { if self.get_inode().is_err() { Err(ENOENT) } else { let dir = self.parent().get_inode()?; - dir.unlink(self) + dir.unlink(self).await } } - pub fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { + pub async fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.symlink(self, link) + dir.symlink(self, link).await } } - pub fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - self.get_inode()?.readlink(buffer) + pub async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + self.get_inode()?.readlink(buffer).await } - pub fn mknod(&self, mode: Mode, devid: DevId) -> KResult<()> { + pub async fn mknod(&self, mode: Mode, devid: DeviceId) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mknod(self, mode, devid) + dir.mknod(self, mode, devid).await } } - pub fn chmod(&self, mode: Mode) -> KResult<()> { - self.get_inode()?.chmod(mode) + pub async fn chmod(&self, mode: Mode) -> KResult<()> { + self.get_inode()?.chmod(mode.perm()).await } - pub fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - self.get_inode()?.chown(uid, gid) + pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + self.get_inode()?.chown(uid, gid).await } - pub fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { + pub async fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { if Arc::ptr_eq(self, new) { return Ok(()); } @@ -509,22 +389,87 @@ impl Dentry { let new_parent = new.parent().get_inode()?; // If the two dentries are not in the same filesystem, return EXDEV. - if !Weak::ptr_eq(&old_parent.vfs, &new_parent.vfs) { + if old_parent.sbref().eq(&new_parent.sbref()) { Err(PosixError::EXDEV)?; } - let vfs = old_parent.vfs.upgrade().ok_or(EIO)?; - let rename_data = RenameData { old_dentry: self, new_dentry: new, new_parent, - vfs, is_exchange: flags.contains(RenameFlags::RENAME_EXCHANGE), no_replace: flags.contains(RenameFlags::RENAME_NOREPLACE), }; // Delegate to the parent directory's rename implementation - old_parent.rename(rename_data) + old_parent.rename(rename_data).await + } +} + +impl DentryKind { + fn into_raw(self) -> u8 { + unsafe { core::mem::transmute(self) } + } + + fn from_raw(raw: u8) -> Option { + unsafe { core::mem::transmute(raw) } + } + + fn as_atomic(me: &UnsafeCell>) -> &AtomicU8 { + unsafe { AtomicU8::from_ptr(me.get().cast()) } + } + + fn atomic_acq(me: &UnsafeCell>) -> Option { + Self::from_raw(Self::as_atomic(me).load(Ordering::Acquire)) + } + + fn atomic_swap_acqrel(me: &UnsafeCell>, kind: Option) -> Option { + Self::from_raw(Self::as_atomic(me).swap(kind.map_or(0, Self::into_raw), Ordering::AcqRel)) } } + +impl AssociatedInode { + fn new() -> Self { + Self { + inode: UnsafeCell::new(None), + kind: UnsafeCell::new(None), + } + } + + fn store(&self, inode: InodeUse) { + let kind = match inode.format { + Format::REG | Format::BLK | Format::CHR => DentryKind::Regular, + Format::DIR => DentryKind::Directory, + Format::LNK => DentryKind::Symlink, + }; + + unsafe { + // SAFETY: We should be the first and only one to store the inode as + // is checked below. All other readers reading non-invalid + // kind will see the fully written inode. + self.inode.get().write(Some(inode)); + } + + assert_eq!( + DentryKind::atomic_swap_acqrel(&self.kind, Some(kind)), + None, + "Dentry can only be stored once." + ); + } + + fn kind(&self) -> Option { + DentryKind::atomic_acq(&self.kind) + } + + fn load(&self) -> Option<(DentryKind, &InodeUse)> { + self.kind().map(|kind| unsafe { + let inode = (&*self.inode.get()) + .as_ref() + .expect("Dentry with non-invalid kind has no inode"); + (kind, inode) + }) + } +} + +unsafe impl Send for AssociatedInode {} +unsafe impl Sync for AssociatedInode {} diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 188a1cfc..ee7503dc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,13 +1,13 @@ -use super::{Dentry, Inode}; +use super::Dentry; use crate::kernel::constants::ENOENT; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::Mode; -use crate::rcu::RCUPointer; +use crate::rcu::{RCUPointer, RCUReadLock}; use crate::{ prelude::*, rcu::{RCUIterator, RCUList}, }; use alloc::sync::Arc; +use arcref::ArcRef; +use core::ops::Deref; use core::sync::atomic::Ordering; use eonix_sync::Mutex; @@ -18,50 +18,58 @@ static DCACHE: [RCUList; 1 << DCACHE_HASH_BITS] = static D_EXCHANGE_LOCK: Mutex<()> = Mutex::new(()); -pub fn d_hinted(dentry: &Dentry) -> &'static RCUList { - let hash = dentry.hash.load(Ordering::Relaxed) as usize & ((1 << DCACHE_HASH_BITS) - 1); +pub trait DCacheItem { + fn d_hash(&self) -> usize; + fn d_parent(&self) -> *const Dentry; + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r; +} + +fn d_eq(lhs: &impl DCacheItem, rhs: &impl DCacheItem, rcu_read: &RCUReadLock) -> bool { + lhs.d_hash() == rhs.d_hash() + && lhs.d_parent() == rhs.d_parent() + && *lhs.d_name(rcu_read) == *rhs.d_name(rcu_read) +} + +fn d_hinted(item: &impl DCacheItem) -> &'static RCUList { + let hash = item.d_hash() & ((1 << DCACHE_HASH_BITS) - 1); &DCACHE[hash] } -pub fn d_iter_for(dentry: &Dentry) -> RCUIterator<'static, Dentry> { - d_hinted(dentry).iter() +fn d_iter_for<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> RCUIterator<'static, 'rcu, Dentry> { + d_hinted(item).iter(rcu_read) +} + +pub fn d_find_rcu<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> Option> { + d_iter_for(item, rcu_read).find(|cur_ref| cur_ref.with_arc(|cur| d_eq(cur, item, rcu_read))) } /// Add the dentry to the dcache pub fn d_add(dentry: Arc) { + // TODO: Add `children` field to parent and lock parent dentry to avoid + // concurrent insertion causing duplication. d_hinted(&dentry).insert(dentry); } -pub fn d_find_fast(dentry: &Dentry) -> Option> { - d_iter_for(dentry) - .find(|cur| cur.hash_eq(dentry)) - .map(|dentry| dentry.clone()) -} - /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode /// /// Silently fail without any side effects -pub fn d_try_revalidate(dentry: &Arc) { - let _lock = block_on(D_EXCHANGE_LOCK.lock()); - - (|| -> KResult<()> { - let parent = dentry.parent().get_inode()?; - let inode = parent.lookup(dentry)?.ok_or(ENOENT)?; +pub async fn d_try_revalidate(dentry: &Arc) -> KResult<()> { + let _lock = D_EXCHANGE_LOCK.lock().await; - d_save(dentry, inode) - })() - .unwrap_or_default(); -} + let parent = dentry.parent().get_inode()?; + let inode = parent.lookup(dentry).await?.ok_or(ENOENT)?; -/// Save the inode to the dentry. -/// -/// Dentry flags will be determined by the inode's mode. -pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load().format() { - Mode::DIR => dentry.save_dir(inode), - Mode::LNK => dentry.save_symlink(inode), - _ => dentry.save_reg(inode), - } + dentry.fill(inode); + Ok(()) } /// Replace the old dentry with the new one in the dcache @@ -95,3 +103,34 @@ pub async fn d_exchange(old: &Arc, new: &Arc) { d_add(old.clone()); d_add(new.clone()); } + +impl DCacheItem for Arc { + fn d_hash(&self) -> usize { + self.hash.load(Ordering::Relaxed) as usize + } + + fn d_parent(&self) -> *const Dentry { + self.parent_addr() + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + struct Name<'a>(ArcRef<'a, Arc<[u8]>>); + + impl Deref for Name<'_> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + Name( + self.name + .dereference(rcu_read) + .expect("Dentry should have a non-null name"), + ) + } +} diff --git a/src/kernel/vfs/dentry/walk.rs b/src/kernel/vfs/dentry/walk.rs new file mode 100644 index 00000000..7b1060ac --- /dev/null +++ b/src/kernel/vfs/dentry/walk.rs @@ -0,0 +1,360 @@ +use alloc::boxed::Box; +use alloc::sync::Arc; +use core::future::Future; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::ops::Deref; +use core::pin::Pin; + +use arcref::{ArcRef, AsArcRef}; +use posix_types::result::PosixError; + +use super::dcache::{self, DCacheItem}; +use super::{Dentry, DentryKind}; +use crate::hash::KernelHasher; +use crate::io::ByteBuffer; +use crate::kernel::constants::ELOOP; +use crate::kernel::vfs::inode::InodeUse; +use crate::kernel::vfs::FsContext; +use crate::path::{Path, PathComponent, PathIterator}; +use crate::prelude::KResult; +use crate::rcu::{rcu_read_lock, RCUReadLock}; + +struct DentryFind<'a, 'b> { + parent: &'a Dentry, + name: &'b [u8], + hash: usize, +} + +pub enum WalkResultRcu<'rcu, 'path> { + Err(PosixError), + Ok(ArcRef<'rcu, Dentry>), + Symlink { + symlink: ArcRef<'rcu, Dentry>, + inode: InodeUse, + }, + Miss { + parent: ArcRef<'rcu, Dentry>, + name: &'path [u8], + }, +} + +pub enum WalkResult { + Err(PosixError), + Ok(Arc), + Symlink { + symlink: Arc, + inode: InodeUse, + }, +} + +impl Dentry { + /// Quick path of the dentry find operation. + /// + /// Check invalid and non-directory dentries, return immediately on dot and + /// dotdot component, and do a quick rcu dcache lookup. + /// + /// Note that while `Some(dentry)` guarantees present and valid dentry, + /// returning `None` is acceptable if the actual file exists but is not in + /// the dentry cache. If so, we should check again with `lookup`. + fn find_rcu<'r, 's: 'r>( + self: ArcRef<'s, Self>, + name: &[u8], + rcu_read: &'r RCUReadLock, + ) -> Result>, PosixError> { + match self.inode.load() { + Some((DentryKind::Directory, _)) => {} + Some(_) => return Err(PosixError::ENOTDIR), + None => return Err(PosixError::ENOENT), + } + + match name { + b"." => Ok(Some(self)), + b".." => Ok(Some( + self.parent + .dereference(rcu_read) + .expect("The field `parent` should be non-null"), + )), + _ => { + let dentry_find = DentryFind::new(&self, name); + Ok(dcache::d_find_rcu(&dentry_find, rcu_read)) + } + } + } + + async fn find_slow(self: &Arc, name: &[u8]) -> Result, PosixError> { + let dentry = Dentry::create(self.clone(), name); + + let _ = dcache::d_try_revalidate(&dentry).await; + dcache::d_add(dentry.clone()); + + Ok(dentry) + } + + pub async fn find_full(self: &Arc, name: &[u8]) -> Result, PosixError> { + if let Some(dentry) = self.aref().find_rcu(name, &rcu_read_lock())? { + return Ok(dentry.clone_arc()); + } + + self.find_slow(name).await + } +} + +impl FsContext { + /// Walk the pathname and try to find the corresponding dentry FAST without + /// consulting the VFS for invalid dentries encountered. + fn walk_rcu<'rcu, 'path>( + &self, + mut current: ArcRef<'rcu, Dentry>, + iter: &mut PathIterator<'path>, + rcu_read: &'rcu RCUReadLock, + ) -> WalkResultRcu<'rcu, 'path> { + use PathComponent::*; + + loop { + let inode = current.inode.load(); + + if iter.is_empty() { + break; + } + + // Skip symlink resolution in rcu walk without consuming the iter. + if let Some((DentryKind::Symlink, inode)) = inode { + return WalkResultRcu::Symlink { + symlink: current, + inode: inode.clone(), + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (inode, component) { + // Skip trailing empty and dot for normal directories. + (Some((DentryKind::Directory, _)), TrailingEmpty | Current) => {} + // Walk to parent directory unless we are at the filesystem root. + (Some((DentryKind::Directory, _)), Parent) => { + if current.ptr_eq_arc(&self.fsroot) { + continue; + } + + current = current + .parent + .dereference(&rcu_read) + .expect("parent should exist"); + } + // Normal directory traversal + (Some((DentryKind::Directory, _)), Name(name)) => { + match current.find_rcu(name, &rcu_read) { + Err(err) => return WalkResultRcu::Err(err), + Ok(Some(found)) => { + current = found; + } + Ok(None) => { + return WalkResultRcu::Miss { + name, + parent: current, + }; + } + } + } + // Not a directory, fail and exit. + (Some(_), _) => return WalkResultRcu::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResultRcu::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResultRcu::Err(PosixError::ENOENT), + } + } + + WalkResultRcu::Ok(current) + } + + /// Walk the pathname slowly with refcounts held and VFS lookups. + async fn walk_slow(&self, mut current: Arc, iter: &mut PathIterator<'_>) -> WalkResult { + use PathComponent::*; + + loop { + // `current` should be the parent directory and `component` is the + // next path component we are stepping into. + + if iter.is_empty() { + break; + } + + if let Some((DentryKind::Symlink, inode)) = current.inode.load() { + return WalkResult::Symlink { + inode: inode.clone(), + symlink: current, + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (current.inode.load(), &component) { + // Normal directory traversal + (Some((DentryKind::Directory, _)), _) => {} + // Not a directory, fail and exit. + (Some(_), _) => return WalkResult::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResult::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResult::Err(PosixError::ENOENT), + } + + match component { + PathComponent::TrailingEmpty => {} + PathComponent::Current => {} + PathComponent::Parent => { + if current.hash_eq(&self.fsroot) { + continue; + } + + let parent = current.parent().clone(); + current = parent; + } + PathComponent::Name(name) => { + match current.find_full(name).await { + Ok(found) => current = found, + Err(err) => return WalkResult::Err(err), + }; + } + } + } + + WalkResult::Ok(current) + } + + /// Walk the pathname and get an accurate answer. Stop at symlinks. + async fn walk_full( + &self, + current: ArcRef<'_, Dentry>, + iter: &mut PathIterator<'_>, + ) -> WalkResult { + let (parent_slow, name_slow); + + match self.walk_rcu(current, iter, &rcu_read_lock()) { + WalkResultRcu::Err(error) => return WalkResult::Err(error.into()), + WalkResultRcu::Ok(dentry) => return WalkResult::Ok(dentry.clone_arc()), + WalkResultRcu::Symlink { symlink, inode } => { + return WalkResult::Symlink { + symlink: symlink.clone_arc(), + inode, + }; + } + WalkResultRcu::Miss { parent, name } => { + // Fallback to regular refcounted lookup + parent_slow = parent.clone_arc(); + name_slow = name; + } + } + + match parent_slow.find_slow(name_slow).await { + Ok(found) => self.walk_slow(found, iter).await, + Err(err) => return WalkResult::Err(err), + } + } + + pub async fn follow_symlink( + &self, + symlink: ArcRef<'_, Dentry>, + inode: &InodeUse, + nr_follows: u32, + ) -> KResult> { + let mut target = [0; 256]; + let mut target = ByteBuffer::new(&mut target); + inode.readlink(&mut target).await?; + + self.walk_recursive( + &symlink.parent().clone(), + Path::new(target.data()).unwrap(), + nr_follows + 1, + ) + .await + } + + fn follow_symlink_boxed<'r, 'a: 'r, 'b: 'r, 'c: 'r>( + &'a self, + symlink: ArcRef<'b, Dentry>, + inode: &'c InodeUse, + nr_follows: u32, + ) -> Pin>> + Send + 'r>> { + Box::pin(self.follow_symlink(symlink, inode, nr_follows)) + } + + async fn walk_recursive( + &self, + cwd: &Arc, + path: &Path, + nr_follows: u32, + ) -> KResult> { + const MAX_NR_FOLLOWS: u32 = 16; + + let mut current_owned; + let mut current; + if path.is_absolute() { + current = self.fsroot.aref(); + } else { + current = cwd.aref(); + } + + let mut path_iter = path.iter(); + + loop { + match self.walk_full(current, &mut path_iter).await { + WalkResult::Err(posix_error) => return Err(posix_error.into()), + WalkResult::Ok(dentry) => return Ok(dentry), + WalkResult::Symlink { symlink, inode } => { + if nr_follows >= MAX_NR_FOLLOWS { + return Err(ELOOP); + } + + current_owned = self + .follow_symlink_boxed(symlink.aref(), &inode, nr_follows) + .await?; + current = current_owned.aref(); + } + } + } + } + + pub async fn start_recursive_walk( + &self, + cwd: &Arc, + path: &Path, + ) -> KResult> { + self.walk_recursive(cwd, path, 0).await + } +} + +impl<'a, 'b> DentryFind<'a, 'b> { + fn new(parent: &'a Dentry, name: &'b [u8]) -> Self { + let builder: BuildHasherDefault = Default::default(); + let mut hasher = builder.build_hasher(); + + hasher.write_usize(parent as *const _ as usize); + hasher.write(name); + let hash = hasher.finish() as usize; + + Self { parent, name, hash } + } +} + +impl DCacheItem for DentryFind<'_, '_> { + fn d_hash(&self) -> usize { + self.hash + } + + fn d_parent(&self) -> *const Dentry { + self.parent as *const _ + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + _rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + self.name + } +} diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs index 6386ba92..d302079c 100644 --- a/src/kernel/vfs/file/inode_file.rs +++ b/src/kernel/vfs/file/inode_file.rs @@ -1,23 +1,17 @@ -use super::{File, FileType, SeekOption}; -use crate::{ - io::{Buffer, BufferFill, Stream}, - kernel::{ - constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, - vfs::{ - dentry::Dentry, - inode::{Inode, Mode, WriteOffset}, - }, - }, - prelude::KResult, -}; use alloc::sync::Arc; -use core::{ops::ControlFlow, sync::atomic::Ordering}; + use eonix_sync::Mutex; -use posix_types::{ - getdent::{UserDirent, UserDirent64}, - open::OpenFlags, - stat::StatX, -}; +use posix_types::getdent::{UserDirent, UserDirent64}; +use posix_types::open::OpenFlags; +use posix_types::stat::StatX; + +use super::{File, FileType, SeekOption}; +use crate::io::{Buffer, BufferFill, Stream}; +use crate::kernel::constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{InodeUse, WriteOffset}; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; pub struct InodeFile { pub r: bool, @@ -25,7 +19,7 @@ pub struct InodeFile { pub a: bool, /// Only a few modes those won't possibly change are cached here to speed up file operations. /// Specifically, `S_IFMT` masked bits. - pub mode: Mode, + pub format: Format, cursor: Mutex, dentry: Arc, } @@ -34,12 +28,7 @@ impl InodeFile { pub fn new(dentry: Arc, flags: OpenFlags) -> File { // SAFETY: `dentry` used to create `InodeFile` is valid. // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load() - .format(); + let format = dentry.inode().expect("dentry should be invalid").format; let (r, w, a) = flags.as_rwa(); @@ -50,15 +39,15 @@ impl InodeFile { r, w, a, - mode: cached_mode, + format, cursor: Mutex::new(0), }), ) } pub fn sendfile_check(&self) -> KResult<()> { - match self.mode { - Mode::REG | Mode::BLK => Ok(()), + match self.format { + Format::REG | Format::BLK => Ok(()), _ => Err(EBADF), } } @@ -70,21 +59,19 @@ impl InodeFile { let mut cursor = self.cursor.lock().await; - if self.a { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + let (offset, update_offset) = match (self.a, offset) { + (true, _) => (WriteOffset::End(&mut cursor), None), + (false, Some(offset)) => (WriteOffset::Position(offset), None), + (false, None) => (WriteOffset::Position(*cursor), Some(&mut *cursor)), + }; - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) + let nr_write = self.dentry.write(stream, offset).await?; + + if let Some(update_offset) = update_offset { + *update_offset += nr_write; } + + Ok(nr_write) } pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { @@ -92,24 +79,20 @@ impl InodeFile { return Err(EBADF); } - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.read(buffer, *cursor)?; + if let Some(offset) = offset { + return Ok(self.dentry.read(buffer, offset).await?); + } - *cursor += nread; - nread - }; + let mut cursor = self.cursor.lock().await; + let nread = self.dentry.read(buffer, *cursor).await?; + *cursor += nread; Ok(nread) } } impl File { - pub fn get_inode(&self) -> KResult>> { + pub fn get_inode(&self) -> KResult> { if let FileType::Inode(inode_file) = &**self { Ok(Some(inode_file.dentry.get_inode()?)) } else { @@ -124,27 +107,30 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; + let record = UserDirent { + d_ino: ino.as_raw() as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -157,28 +143,31 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; + let record = UserDirent64 { + d_ino: ino.as_raw(), + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -196,7 +185,7 @@ impl File { SeekOption::Set(n) => n, SeekOption::End(off) => { let inode = inode_file.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; + let size = inode.info.lock().size as usize; size.checked_add_signed(off).ok_or(EOVERFLOW)? } }; diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs index bb1c66ec..799b9848 100644 --- a/src/kernel/vfs/file/mod.rs +++ b/src/kernel/vfs/file/mod.rs @@ -2,29 +2,24 @@ mod inode_file; mod pipe; mod terminal_file; -use crate::{ - io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, - kernel::{ - constants::{EBADF, EINTR, EINVAL, ENOTTY}, - mem::{AsMemoryBlock, Page}, - task::Thread, - CharDevice, - }, - prelude::KResult, -}; use alloc::sync::Arc; -use bitflags::bitflags; -use core::{ - ops::Deref, - sync::atomic::{AtomicI32, AtomicU32, Ordering}, -}; -use pipe::{PipeReadEnd, PipeWriteEnd}; -use posix_types::open::OpenFlags; +use core::ops::Deref; +use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use bitflags::bitflags; pub use inode_file::InodeFile; pub use pipe::Pipe; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; pub use terminal_file::TerminalFile; +use crate::io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}; +use crate::kernel::constants::{EBADF, EINTR, EINVAL, ENOTTY}; +use crate::kernel::mem::FolioOwned; +use crate::kernel::task::Thread; +use crate::kernel::CharDevice; +use crate::prelude::KResult; + pub enum FileType { Inode(InodeFile), PipeRead(PipeReadEnd), @@ -99,9 +94,8 @@ impl FileType { } pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + let mut buffer_page = FolioOwned::alloc(); + let buffer = buffer_page.as_bytes_mut(); self.sendfile_check()?; diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs index f318c5b2..04a022b5 100644 --- a/src/kernel/vfs/file/terminal_file.rs +++ b/src/kernel/vfs/file/terminal_file.rs @@ -1,24 +1,46 @@ -use super::{File, FileType, PollEvent}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - kernel::{ - constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - terminal::TerminalIORequest, - user::{UserPointer, UserPointerMut}, - Terminal, - }, - prelude::KResult, -}; use alloc::sync::Arc; + +use eonix_sync::AsProof; use posix_types::open::OpenFlags; +use super::{File, FileType, PollEvent}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::kernel::constants::{ + EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP, +}; +use crate::kernel::task::{ProcessList, Thread}; +use crate::kernel::terminal::TerminalIORequest; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::Terminal; +use crate::prelude::KResult; + pub struct TerminalFile { terminal: Arc, } impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> File { - File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + pub async fn open( + thread: &Thread, + terminal: &Arc, + flags: OpenFlags, + ) -> File { + let set_control_tty = !flags.contains(OpenFlags::O_NOCTTY); + + let procs = ProcessList::get().read().await; + let session = thread.process.session(procs.prove()); + + // We only set the control terminal if the process is the session leader. + if set_control_tty && session.sid == thread.process.pid { + // Silently fail if we can't set the control terminal. + let _ = terminal.set_session(&session, false, procs.prove()).await; + } + + File::new( + flags, + FileType::Terminal(TerminalFile { + terminal: terminal.clone(), + }), + ) } pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { @@ -43,11 +65,21 @@ impl TerminalFile { pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { self.terminal .ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + TCGETS => TerminalIORequest::GetTermios( + UserPointerMut::with_addr(arg3)?, + ), + TCSETS => { + TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?) + } + TIOCGPGRP => TerminalIORequest::GetProcessGroup( + UserPointerMut::with_addr(arg3)?, + ), + TIOCSPGRP => TerminalIORequest::SetProcessGroup( + UserPointer::with_addr(arg3)?, + ), + TIOCGWINSZ => TerminalIORequest::GetWindowSize( + UserPointerMut::with_addr(arg3)?, + ), _ => return Err(EINVAL), }) .await diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index b457a425..c0b6a49e 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,28 +1,26 @@ -use super::{ - file::{File, InodeFile, Pipe}, - inode::Mode, - Spin, TerminalFile, -}; -use crate::kernel::{ - constants::{ - EBADF, EISDIR, ENOTDIR, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, - }, - syscall::{FromSyscallArg, SyscallRetVal}, -}; -use crate::{ - kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, - prelude::*, -}; use alloc::sync::Arc; + +use intrusive_collections::rbtree::Entry; use intrusive_collections::{ - intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, -}; -use itertools::{ - FoldWhile::{Continue, Done}, - Itertools, + intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, }; +use itertools::FoldWhile::{Continue, Done}; +use itertools::Itertools; use posix_types::open::{FDFlags, OpenFlags}; +use super::file::{File, InodeFile, Pipe}; +use super::types::{Format, Permission}; +use super::Spin; +use crate::kernel::constants::{ + EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, + F_SETFD, F_SETFL, +}; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::task::Thread; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::CharDevice; +use crate::prelude::*; + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); @@ -85,7 +83,11 @@ impl FDAllocator { self.min_avail = FD(0); } - fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + fn find_available( + &mut self, + from: FD, + files: &RBTree, + ) -> FD { files .range(Bound::Included(&from), Bound::Unbounded) .fold_while(from, |current, OpenFile { fd, .. }| { @@ -148,7 +150,8 @@ impl FileArray { let other_inner = other.inner.lock(); for file in other_inner.files.iter() { - let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + let new_file = + OpenFile::new(file.fd, file.flags, file.file.dup()); new_files.insert(new_file); } (new_files, other_inner.fd_alloc.clone()) @@ -228,7 +231,12 @@ impl FileArray { /// Duplicates the file to a new file descriptor, returning the old file /// description to be dropped. - fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { + fn dup_to_no_close( + &self, + old_fd: FD, + new_fd: FD, + fd_flags: FDFlags, + ) -> KResult> { let mut inner = self.inner.lock(); let (files, fd_alloc) = inner.split_borrow(); @@ -245,7 +253,8 @@ impl FileArray { Entry::Occupied(mut entry) => { let mut file = entry.remove().unwrap(); file.flags = fd_flags; - let old_file = core::mem::replace(&mut file.file, new_file_data); + let old_file = + core::mem::replace(&mut file.file, new_file_data); entry.insert(file); @@ -254,8 +263,15 @@ impl FileArray { } } - pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + pub async fn dup_to( + &self, + old_fd: FD, + new_fd: FD, + flags: OpenFlags, + ) -> KResult { + if let Some(old_file) = + self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? + { old_file.close().await; } @@ -280,28 +296,33 @@ impl FileArray { Ok((read_fd, write_fd)) } - pub fn open(&self, dentry: &Arc, flags: OpenFlags, mode: Mode) -> KResult { - dentry.open_check(flags, mode)?; + pub async fn open( + &self, + thread: &Thread, + dentry: &Arc, + flags: OpenFlags, + perm: Permission, + ) -> KResult { + dentry.open_check(flags, perm).await?; let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let file_format = inode.mode.load().format(); - match (flags.directory(), file_format, flags.write()) { - (true, Mode::DIR, _) => {} + match (flags.directory(), inode.format, flags.write()) { + (true, Format::DIR, _) => {} (true, _, _) => return Err(ENOTDIR), - (false, Mode::DIR, true) => return Err(EISDIR), + (false, Format::DIR, true) => return Err(EISDIR), _ => {} } - if flags.truncate() && flags.write() && file_format.is_reg() { - inode.truncate(0)?; + if flags.truncate() && flags.write() && inode.format == Format::REG { + inode.truncate(0).await?; } - let file = if file_format.is_chr() { + let file = if inode.format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; - device.open(flags)? + device.open(thread, flags).await? } else { InodeFile::new(dentry.clone(), flags) }; @@ -324,7 +345,8 @@ impl FileArray { F_DUPFD | F_DUPFD_CLOEXEC => { let ofile = cursor.get().ok_or(EBADF)?; - let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); + let cloexec = + cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); @@ -343,7 +365,9 @@ impl FileArray { cursor.insert(ofile); 0 } - F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, + F_GETFL => { + cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize + } F_SETFL => { cursor .get() @@ -358,35 +382,6 @@ impl FileArray { Ok(ret) } - - /// Only used for init process. - pub fn open_console(&self) { - let mut inner = self.inner.lock(); - let (files, fd_alloc) = inner.split_borrow(); - - let (stdin, stdout, stderr) = ( - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - ); - let console_terminal = get_console().expect("No console terminal"); - - inner.do_insert( - stdin, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stdout, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stderr, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - } } impl FileArrayInner { @@ -398,7 +393,9 @@ impl FileArrayInner { fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { match self.files.entry(&fd) { Entry::Occupied(_) => { - panic!("File descriptor {fd:?} already exists in the file array."); + panic!( + "File descriptor {fd:?} already exists in the file array." + ); } Entry::Vacant(insert_cursor) => { insert_cursor.insert(OpenFile::new(fd, flags, file)); @@ -406,7 +403,9 @@ impl FileArrayInner { } } - fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + fn split_borrow( + &mut self, + ) -> (&mut RBTree, &mut FDAllocator) { let Self { files, fd_alloc } = self; (files, fd_alloc) } diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs deleted file mode 100644 index 52529f84..00000000 --- a/src/kernel/vfs/inode.rs +++ /dev/null @@ -1,494 +0,0 @@ -use super::{dentry::Dentry, vfs::Vfs, DevId}; -use crate::io::Stream; -use crate::kernel::constants::{ - EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, - S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, -}; -use crate::kernel::mem::PageCache; -use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::{io::Buffer, prelude::*}; -use alloc::sync::{Arc, Weak}; -use core::{ - mem::MaybeUninit, - ops::ControlFlow, - ptr::addr_of_mut, - sync::atomic::{AtomicU32, AtomicU64, Ordering}, -}; -use eonix_sync::RwLock; -use posix_types::stat::StatX; - -pub type Ino = u64; -pub type AtomicIno = AtomicU64; -#[allow(dead_code)] -pub type ISize = u64; -pub type AtomicISize = AtomicU64; -#[allow(dead_code)] -pub type Nlink = u64; -pub type AtomicNlink = AtomicU64; -#[allow(dead_code)] -pub type Uid = u32; -pub type AtomicUid = AtomicU32; -#[allow(dead_code)] -pub type Gid = u32; -pub type AtomicGid = AtomicU32; - -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct Mode(u32); - -pub struct AtomicMode(AtomicU32); - -#[derive(Debug)] -pub struct InodeData { - pub ino: Ino, - pub size: AtomicISize, - pub nlink: AtomicNlink, - - pub uid: AtomicUid, - pub gid: AtomicGid, - pub mode: AtomicMode, - - pub atime: Spin, - pub ctime: Spin, - pub mtime: Spin, - - pub rwsem: RwLock<()>, - - pub vfs: Weak, -} - -impl InodeData { - pub fn new(ino: Ino, vfs: Weak) -> Self { - Self { - ino, - vfs, - atime: Spin::new(Instant::now()), - ctime: Spin::new(Instant::now()), - mtime: Spin::new(Instant::now()), - rwsem: RwLock::new(()), - size: AtomicU64::new(0), - nlink: AtomicNlink::new(0), - uid: AtomicUid::new(0), - gid: AtomicGid::new(0), - mode: AtomicMode::new(0), - } - } -} - -#[allow(dead_code)] -pub trait InodeInner: - Send + Sync + core::ops::Deref + core::ops::DerefMut -{ - fn data(&self) -> &InodeData; - fn data_mut(&mut self) -> &mut InodeData; -} - -pub enum WriteOffset<'end> { - Position(usize), - End(&'end mut usize), -} - -pub struct RenameData<'a, 'b> { - pub old_dentry: &'a Arc, - pub new_dentry: &'b Arc, - pub new_parent: Arc, - pub vfs: Arc, - pub is_exchange: bool, - pub no_replace: bool, -} - -#[allow(unused_variables)] -pub trait Inode: Send + Sync + InodeInner + Any { - fn is_dir(&self) -> bool { - self.mode.load().is_dir() - } - - fn lookup(&self, dentry: &Arc) -> KResult>> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn devid(&self) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn truncate(&self, length: usize) -> KResult<()> { - Err(if self.is_dir() { EISDIR } else { EPERM }) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - Err(EPERM) - } - - fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - Err(EPERM) - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } - - fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { - // Safety: ffi should have checked reference - let vfs = self.vfs.upgrade().expect("Vfs is dropped"); - - let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(); - - if mask & STATX_NLINK != 0 { - stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; - stat.stx_mask |= STATX_NLINK; - } - - if mask & STATX_ATIME != 0 { - let atime = *self.atime.lock(); - stat.stx_atime = atime.into(); - stat.stx_mask |= STATX_ATIME; - } - - if mask & STATX_MTIME != 0 { - let mtime = *self.mtime.lock(); - stat.stx_mtime = mtime.into(); - stat.stx_mask |= STATX_MTIME; - } - - if mask & STATX_CTIME != 0 { - let ctime = *self.ctime.lock(); - stat.stx_ctime = ctime.into(); - stat.stx_mask |= STATX_CTIME; - } - - if mask & STATX_SIZE != 0 { - stat.stx_size = self.size.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_SIZE; - } - - stat.stx_mode = 0; - if mask & STATX_MODE != 0 { - stat.stx_mode |= mode.non_format_bits() as u16; - stat.stx_mask |= STATX_MODE; - } - - if mask & STATX_TYPE != 0 { - stat.stx_mode |= mode.format_bits() as u16; - if mode.is_blk() || mode.is_chr() { - let devid = self.devid(); - stat.stx_rdev_major = (devid? >> 8) & 0xff; - stat.stx_rdev_minor = devid? & 0xff; - } - stat.stx_mask |= STATX_TYPE; - } - - if mask & STATX_INO != 0 { - stat.stx_ino = self.ino as _; - stat.stx_mask |= STATX_INO; - } - - if mask & STATX_BLOCKS != 0 { - stat.stx_blocks = (size + 512 - 1) / 512; - stat.stx_blksize = vfs.io_blksize() as _; - stat.stx_mask |= STATX_BLOCKS; - } - - if mask & STATX_UID != 0 { - stat.stx_uid = self.uid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_UID; - } - - if mask & STATX_GID != 0 { - stat.stx_gid = self.gid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_GID; - } - - let fsdev = vfs.fs_devid(); - stat.stx_dev_major = (fsdev >> 8) & 0xff; - stat.stx_dev_minor = fsdev & 0xff; - - // TODO: support more attributes - stat.stx_attributes_mask = 0; - - Ok(()) - } - - fn new_locked(ino: Ino, vfs: Weak, f: F) -> Arc - where - Self: Sized, - F: FnOnce(*mut Self, &()), - { - let mut uninit = Arc::::new_uninit(); - - let uninit_mut = Arc::get_mut(&mut uninit).unwrap(); - - // Safety: `idata` is owned by `uninit` - let idata = unsafe { - addr_of_mut!(*(*uninit_mut.as_mut_ptr()).data_mut()) - .cast::>() - .as_mut() - .unwrap() - }; - - idata.write(InodeData::new(ino, vfs)); - - f( - uninit_mut.as_mut_ptr(), - // SAFETY: `idata` is initialized and we will never move the lock. - &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), - ); - - // Safety: `uninit` is initialized - unsafe { uninit.assume_init() } - } -} - -// TODO: define multiple inode structs a time -macro_rules! define_struct_inode { - ($v:vis struct $inode_t:ident;) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; - ($v:vis struct $inode_t:ident { $($vis:vis $name:ident: $type:ty,)* }) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - $($vis $name: $type,)* - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; -} - -pub(crate) use define_struct_inode; - -impl Mode { - pub const REG: Self = Self(S_IFREG); - pub const DIR: Self = Self(S_IFDIR); - pub const LNK: Self = Self(S_IFLNK); - pub const BLK: Self = Self(S_IFBLK); - pub const CHR: Self = Self(S_IFCHR); - - pub const fn new(bits: u32) -> Self { - Self(bits) - } - - pub const fn is_blk(&self) -> bool { - (self.0 & S_IFMT) == S_IFBLK - } - - pub const fn is_chr(&self) -> bool { - (self.0 & S_IFMT) == S_IFCHR - } - - pub const fn is_reg(&self) -> bool { - (self.0 & S_IFMT) == S_IFREG - } - - pub const fn is_dir(&self) -> bool { - (self.0 & S_IFMT) == S_IFDIR - } - - pub const fn is_lnk(&self) -> bool { - (self.0 & S_IFMT) == S_IFLNK - } - - pub const fn bits(&self) -> u32 { - self.0 - } - - pub const fn format_bits(&self) -> u32 { - self.0 & S_IFMT - } - - pub const fn format(&self) -> Self { - Self::new(self.format_bits()) - } - - pub const fn non_format_bits(&self) -> u32 { - self.0 & !S_IFMT - } - - pub const fn non_format(&self) -> Self { - Self::new(self.non_format_bits()) - } - - pub const fn perm(self, perm: u32) -> Self { - Self::new((self.0 & !0o777) | (perm & 0o777)) - } - - pub const fn set_perm(&mut self, perm: u32) { - *self = self.perm(perm); - } - - pub const fn mask_perm(&mut self, perm_mask: u32) { - let perm_mask = perm_mask & 0o777; - let self_perm = self.non_format_bits() & 0o777; - - *self = self.perm(self_perm & perm_mask); - } -} - -impl AtomicMode { - pub const fn new(bits: u32) -> Self { - Self(AtomicU32::new(bits)) - } - - pub const fn from(mode: Mode) -> Self { - Self::new(mode.0) - } - - pub fn load(&self) -> Mode { - Mode(self.0.load(Ordering::Relaxed)) - } - - pub fn store(&self, mode: Mode) { - self.0.store(mode.0, Ordering::Relaxed); - } -} - -impl core::fmt::Debug for AtomicMode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("AtomicMode") - .field("bits", &self.load().0) - .finish() - } -} - -impl core::fmt::Debug for Mode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let format_name = match self.format() { - Mode::REG => "REG", - Mode::DIR => "DIR", - Mode::LNK => "LNK", - Mode::BLK => "BLK", - Mode::CHR => "CHR", - _ => "UNK", - }; - - match self.non_format_bits() & !0o777 { - 0 => write!( - f, - "Mode({format_name}, {perm:#o})", - perm = self.non_format_bits() - )?, - rem => write!( - f, - "Mode({format_name}, {perm:#o}, rem={rem:#x})", - perm = self.non_format_bits() & 0o777 - )?, - } - - Ok(()) - } -} - -impl FromSyscallArg for Mode { - fn from_arg(value: usize) -> Self { - Mode::new(value as u32) - } -} - -impl SyscallRetVal for Mode { - fn into_retval(self) -> Option { - Some(self.bits() as usize) - } -} diff --git a/src/kernel/vfs/inode/ino.rs b/src/kernel/vfs/inode/ino.rs new file mode 100644 index 00000000..b5ee7ac0 --- /dev/null +++ b/src/kernel/vfs/inode/ino.rs @@ -0,0 +1,31 @@ +use core::{ + fmt::{Debug, Display, Formatter}, + sync::atomic::AtomicU64, +}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ino(u64); + +pub struct AtomicIno(AtomicU64); + +impl Ino { + pub const fn new(ino: u64) -> Self { + Self(ino) + } + + pub const fn as_raw(self) -> u64 { + self.0 + } +} + +impl Debug for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "Ino({})", self.0) + } +} + +impl Display for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs new file mode 100644 index 00000000..5f0b98c2 --- /dev/null +++ b/src/kernel/vfs/inode/inode.rs @@ -0,0 +1,363 @@ +use alloc::boxed::Box; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::{Arc, Weak}; +use core::any::Any; +use core::future::Future; +use core::ops::Deref; + +use async_trait::async_trait; +use eonix_sync::{RwLock, Spin}; + +use super::{Ino, RenameData, WriteOffset}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::{EINVAL, EPERM}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock}; +use crate::prelude::KResult; + +pub struct Inode { + pub ino: Ino, + pub format: Format, + pub info: Spin, + pub rwsem: RwLock<()>, + page_cache: Spin>, + sb: SbRef, + ops: Box, +} + +macro_rules! return_type { + ($type:ty) => { + $type + }; + () => { + () + }; +} + +macro_rules! define_inode_ops { + { + $( + $(#[$attr:meta])* + async fn $method:ident $(<$($lt:lifetime),+>)? (&self $(,)? $($name:ident : $type:ty $(,)?)*) $(-> $ret:ty)? + $body:block + )* + + --- + + $( + $(#[$attr1:meta])* + fn $method1:ident $(<$($lt1:lifetime),+>)? (&self $(,)? $($name1:ident : $type1:ty $(,)?)*) $(-> $ret1:ty)? + $body1:block + )* + } => { + #[allow(unused_variables)] + pub trait InodeOps: Sized + Send + Sync + 'static { + type SuperBlock: SuperBlock + Sized; + + $( + $(#[$attr])* + fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> impl Future + Send { + async { $body } + })* + + $( + $(#[$attr1])* + fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + $body1 + })* + } + + #[async_trait] + trait InodeOpsErased: Any + Send + Sync + 'static { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?);)* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?);)* + } + + #[async_trait] + impl InodeOpsErased for T + where + T: InodeOps, + { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.$method(sb.downcast(), inode, $($name),*).await + })* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.$method1(sb.downcast(), inode, $($name1),*) + })* + } + + impl InodeUse { + $(pub async fn $method $(<$($lt),+>)? ( + &self, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.ops.$method(self.sbget()?, self, $($name),*).await + })* + + $(pub fn $method1 $(<$($lt1),+>)? ( + &self, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.ops.$method1(self.sbget()?, self, $($name1),*) + })* + } + }; +} + +define_inode_ops! { + // DIRECTORY OPERATIONS + + async fn lookup(&self, dentry: &Arc) -> KResult> { + Err(EPERM) + } + + /// Read directory entries and call the given closure for each entry. + /// + /// # Returns + /// - Ok(count): The number of entries read. + /// - Ok(Err(err)): Some error occurred while calling the given closure. + /// - Err(err): An error occurred while reading the directory. + async fn readdir( + &self, + offset: usize, + for_each_entry: &mut (dyn (for<'a> FnMut(&'a [u8], Ino) -> KResult) + Send), + ) -> KResult> { + Err(EPERM) + } + + async fn create(&self, at: &Arc, mode: Permission) -> KResult<()> { + Err(EPERM) + } + + async fn mkdir(&self, at: &Dentry, mode: Permission) -> KResult<()> { + Err(EPERM) + } + + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + Err(EPERM) + } + + async fn unlink(&self, at: &Arc) -> KResult<()> { + Err(EPERM) + } + + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + Err(EPERM) + } + + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + Err(EPERM) + } + + // FILE OPERATIONS + + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) + } + + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) + } + + async fn write( + &self, + stream: &mut dyn Stream, + offset: WriteOffset<'_> + ) -> KResult { + Err(EINVAL) + } + + async fn write_direct( + &self, + stream: &mut dyn Stream, + offset: usize, + ) -> KResult { + Err(EINVAL) + } + + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + Err(EINVAL) + } + + async fn truncate(&self, length: usize) -> KResult<()> { + Err(EPERM) + } + + async fn chmod(&self, perm: Permission) -> KResult<()> { + Err(EPERM) + } + + async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + Err(EPERM) + } + + // PAGE CACHE OPERATIONS + async fn read_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) + } + + async fn write_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) + } + + async fn write_begin<'a>( + &self, + page_cache: &PageCache, + pages: &'a mut BTreeMap, + offset: usize, + len: usize, + ) -> KResult<&'a mut CachePage> { + Err(EINVAL) + } + + async fn write_end( + &self, + page_cache: &PageCache, + pages: &mut BTreeMap, + offset: usize, + len: usize, + copied: usize + ) -> KResult<()> { + Err(EINVAL) + } + + --- + + fn devid(&self) -> KResult { + Err(EINVAL) + } +} + +#[derive(Debug, Clone)] +pub struct InodeInfo { + pub size: u64, + pub nlink: u64, + + pub uid: u32, + pub gid: u32, + pub perm: Permission, + + pub atime: Instant, + pub ctime: Instant, + pub mtime: Instant, +} + +#[repr(transparent)] +pub struct InodeUse(Arc); + +impl InodeUse { + pub fn new( + sb: SbRef, + ino: Ino, + format: Format, + info: InodeInfo, + ops: impl InodeOps, + ) -> Self { + let inode = Inode { + sb, + ino, + format, + info: Spin::new(info), + rwsem: RwLock::new(()), + page_cache: Spin::new(Weak::new()), + ops: Box::new(ops), + }; + + Self(Arc::new(inode)) + } + + pub fn sbref(&self) -> SbRef { + self.sb.clone() + } + + pub fn sbget(&self) -> KResult> { + self.sb.get().map(|sb| sb as _) + } + + pub fn get_priv(&self) -> &I + where + I: InodeOps, + { + let ops = (&*self.ops) as &dyn Any; + + ops.downcast_ref() + .expect("InodeUse::private: InodeOps type mismatch") + } + + pub fn get_page_cache(&self) -> Arc { + if let Some(cache) = self.page_cache.lock().upgrade() { + return cache; + } + + // Slow path... + let cache = Arc::new(PageCache::new(self.clone())); + let mut page_cache = self.page_cache.lock(); + if let Some(cache) = page_cache.upgrade() { + return cache; + } + + *page_cache = Arc::downgrade(&cache); + cache + } +} + +impl Clone for InodeUse { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl core::fmt::Debug for InodeUse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "InodeUse(ino={})", self.ino) + } +} + +impl Deref for InodeUse { + type Target = Inode; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl PartialEq for InodeUse { + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.0, &other.0) + } +} diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs new file mode 100644 index 00000000..6f4f041a --- /dev/null +++ b/src/kernel/vfs/inode/mod.rs @@ -0,0 +1,8 @@ +mod ino; +mod inode; +mod ops; +mod statx; + +pub use ino::Ino; +pub use inode::{Inode, InodeInfo, InodeOps, InodeUse}; +pub use ops::{RenameData, WriteOffset}; diff --git a/src/kernel/vfs/inode/ops.rs b/src/kernel/vfs/inode/ops.rs new file mode 100644 index 00000000..7bf00ce5 --- /dev/null +++ b/src/kernel/vfs/inode/ops.rs @@ -0,0 +1,17 @@ +use alloc::sync::Arc; + +use super::inode::InodeUse; +use crate::kernel::vfs::dentry::Dentry; + +pub enum WriteOffset<'end> { + Position(usize), + End(&'end mut usize), +} + +pub struct RenameData<'a, 'b> { + pub old_dentry: &'a Arc, + pub new_dentry: &'b Arc, + pub new_parent: InodeUse, + pub is_exchange: bool, + pub no_replace: bool, +} diff --git a/src/kernel/vfs/inode/statx.rs b/src/kernel/vfs/inode/statx.rs new file mode 100644 index 00000000..feb2a1b5 --- /dev/null +++ b/src/kernel/vfs/inode/statx.rs @@ -0,0 +1,87 @@ +use posix_types::stat::StatX; + +use super::inode::InodeUse; +use crate::kernel::constants::{ + STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, + STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, +}; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; + +impl InodeUse { + pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { + let sb = self.sbget()?; + let info = self.info.lock(); + + if mask & STATX_NLINK != 0 { + stat.stx_nlink = info.nlink as _; + stat.stx_mask |= STATX_NLINK; + } + + if mask & STATX_ATIME != 0 { + stat.stx_atime = info.atime.into(); + stat.stx_mask |= STATX_ATIME; + } + + if mask & STATX_MTIME != 0 { + stat.stx_mtime = info.mtime.into(); + stat.stx_mask |= STATX_MTIME; + } + + if mask & STATX_CTIME != 0 { + stat.stx_ctime = info.ctime.into(); + stat.stx_mask |= STATX_CTIME; + } + + if mask & STATX_SIZE != 0 { + stat.stx_size = info.size as _; + stat.stx_mask |= STATX_SIZE; + } + + stat.stx_mode = 0; + if mask & STATX_MODE != 0 { + stat.stx_mode |= info.perm.bits() as u16; + stat.stx_mask |= STATX_MODE; + } + + if mask & STATX_TYPE != 0 { + stat.stx_mode |= self.format.as_raw() as u16; + if let Format::BLK | Format::CHR = self.format { + let devid = self.devid()?; + stat.stx_rdev_major = devid.major as _; + stat.stx_rdev_minor = devid.minor as _; + } + stat.stx_mask |= STATX_TYPE; + } + + if mask & STATX_INO != 0 { + stat.stx_ino = self.ino.as_raw(); + stat.stx_mask |= STATX_INO; + } + + if mask & STATX_BLOCKS != 0 { + stat.stx_blocks = (info.size + 512 - 1) / 512; + stat.stx_blksize = sb.info.io_blksize as _; + stat.stx_mask |= STATX_BLOCKS; + } + + if mask & STATX_UID != 0 { + stat.stx_uid = info.uid; + stat.stx_mask |= STATX_UID; + } + + if mask & STATX_GID != 0 { + stat.stx_gid = info.gid; + stat.stx_mask |= STATX_GID; + } + + let fsdev = sb.info.device_id; + stat.stx_dev_major = fsdev.major as _; + stat.stx_dev_minor = fsdev.minor as _; + + // TODO: support more attributes + stat.stx_attributes_mask = 0; + + Ok(()) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index f62cb9b9..5b8eca5a 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,31 +1,31 @@ -use crate::prelude::*; -use alloc::sync::Arc; -use dentry::Dentry; -use eonix_sync::LazyLock; -use inode::Mode; - pub mod dentry; mod file; pub mod filearray; pub mod inode; pub mod mount; -pub mod vfs; +mod superblock; +pub mod types; -pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +use crate::prelude::*; +use alloc::sync::Arc; +use dentry::Dentry; +use eonix_sync::LazyLock; +use types::Permission; -pub type DevId = u32; +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +pub use superblock::{SbRef, SbUse, SuperBlock, SuperBlockInfo, SuperBlockLock}; pub struct FsContext { pub fsroot: Arc, pub cwd: Spin>, - pub umask: Spin, + pub umask: Spin, } static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(Mode::new(0o022)), + umask: Spin::new(Permission::new(0o755)), }) }); diff --git a/src/kernel/vfs/mount.rs b/src/kernel/vfs/mount.rs index 0b38e0c0..6b171f81 100644 --- a/src/kernel/vfs/mount.rs +++ b/src/kernel/vfs/mount.rs @@ -1,12 +1,16 @@ -use super::{ - dentry::{dcache, Dentry, DROOT}, - inode::Inode, - vfs::Vfs, -}; +use alloc::collections::btree_map::BTreeMap; +use alloc::string::ToString as _; +use alloc::sync::Arc; + +use async_trait::async_trait; +use eonix_sync::LazyLock; + +use super::dentry::{dcache, Dentry, DROOT}; +use super::inode::InodeUse; +use super::{SbUse, SuperBlock}; use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR}; +use crate::kernel::task::block_on; use crate::prelude::*; -use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc}; -use eonix_sync::LazyLock; pub const MS_RDONLY: u64 = 1 << 0; pub const MS_NOSUID: u64 = 1 << 1; @@ -30,17 +34,17 @@ static MOUNT_CREATORS: Spin>> = Spin::new static MOUNTS: Spin, MountPointData)>> = Spin::new(vec![]); pub struct Mount { - _vfs: Arc, + sb: SbUse, root: Arc, } impl Mount { - pub fn new(mp: &Dentry, vfs: Arc, root_inode: Arc) -> KResult { + pub fn new(mp: &Dentry, sb: SbUse, root_inode: InodeUse) -> KResult { let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name()); - root_dentry.save_dir(root_inode)?; + root_dentry.fill(root_inode); Ok(Self { - _vfs: vfs, + sb, root: root_dentry, }) } @@ -53,9 +57,10 @@ impl Mount { unsafe impl Send for Mount {} unsafe impl Sync for Mount {} +#[async_trait] pub trait MountCreator: Send + Sync { fn check_signature(&self, first_block: &[u8]) -> KResult; - fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; + async fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; } pub fn register_filesystem(fstype: &str, creator: Arc) -> KResult<()> { @@ -77,7 +82,7 @@ struct MountPointData { flags: u64, } -pub fn do_mount( +pub async fn do_mount( mountpoint: &Arc, source: &str, mountpoint_str: &str, @@ -101,7 +106,7 @@ pub fn do_mount( let creators = { MOUNT_CREATORS.lock() }; creators.get(fstype).ok_or(ENODEV)?.clone() }; - let mount = creator.create_mount(source, flags, mountpoint)?; + let mount = creator.create_mount(source, flags, mountpoint).await?; let root_dentry = mount.root().clone(); @@ -165,8 +170,7 @@ impl Dentry { .cloned() .expect("tmpfs not registered."); - let mount = creator - .create_mount(&source, mount_flags, &DROOT) + let mount = block_on(creator.create_mount(&source, mount_flags, &DROOT)) .expect("Failed to create root mount."); let root_dentry = mount.root().clone(); diff --git a/src/kernel/vfs/superblock.rs b/src/kernel/vfs/superblock.rs new file mode 100644 index 00000000..e3be5cef --- /dev/null +++ b/src/kernel/vfs/superblock.rs @@ -0,0 +1,156 @@ +use alloc::sync::{Arc, Weak}; +use core::any::{Any, TypeId}; +use core::marker::Unsize; +use core::ops::{CoerceUnsized, Deref}; + +use eonix_sync::RwLock; + +use super::types::DeviceId; +use crate::kernel::constants::EIO; +use crate::prelude::KResult; + +pub trait SuperBlock: Any + Send + Sync + 'static {} + +#[derive(Debug, Clone)] +pub struct SuperBlockInfo { + pub io_blksize: u32, + pub device_id: DeviceId, + pub read_only: bool, +} + +pub struct SuperBlockLock(()); + +pub struct SuperBlockComplex +where + Backend: SuperBlock + ?Sized, +{ + pub info: SuperBlockInfo, + pub rwsem: RwLock, + pub backend: Backend, +} + +pub struct SbRef(Weak>) +where + S: SuperBlock + ?Sized; + +pub struct SbUse(Arc>) +where + S: SuperBlock + ?Sized; + +impl SbRef +where + S: SuperBlock + ?Sized, +{ + pub fn try_get(&self) -> Option> { + self.0.upgrade().map(|arc| SbUse(arc)) + } + + pub fn get(&self) -> KResult> { + self.try_get().ok_or(EIO) + } + + pub fn from(sb: &SbUse) -> Self { + SbRef(Arc::downgrade(&sb.0)) + } + + pub fn eq(&self, other: &SbRef) -> bool + where + U: SuperBlock + ?Sized, + { + core::ptr::addr_eq(self.0.as_ptr(), other.0.as_ptr()) + } +} + +impl SbUse +where + S: SuperBlock, +{ + pub fn new(info: SuperBlockInfo, backend: S) -> Self { + Self(Arc::new(SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend, + })) + } + + pub fn new_cyclic(info: SuperBlockInfo, backend_func: impl FnOnce(SbRef) -> S) -> Self { + Self(Arc::new_cyclic(|weak| SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend: backend_func(SbRef(weak.clone())), + })) + } +} + +impl SbUse +where + S: SuperBlock + ?Sized, +{ + pub fn get_ref(&self) -> SbRef { + SbRef(Arc::downgrade(&self.0)) + } +} + +impl SbUse { + /// Downcast the superblock to a specific type. + /// + /// # Panics + /// Panics if the downcast fails. + pub fn downcast(self) -> SbUse { + let Self(sb_complex) = self; + if (&sb_complex.backend as &dyn Any).type_id() != TypeId::of::() { + panic!("Downcast failed: type mismatch"); + } + + unsafe { + // SAFETY: We have checked the type above and unsized coercion says + // that Arc has the same layout as Arc if T: Unsize. + SbUse(Arc::from_raw( + Arc::into_raw(sb_complex) as *const SuperBlockComplex + )) + } + } +} + +impl Clone for SbRef +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbRef(self.0.clone()) + } +} + +impl Clone for SbUse +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbUse(self.0.clone()) + } +} + +impl CoerceUnsized> for SbRef +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl CoerceUnsized> for SbUse +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl Deref for SbUse +where + S: SuperBlock + ?Sized, +{ + type Target = SuperBlockComplex; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/src/kernel/vfs/types/device_id.rs b/src/kernel/vfs/types/device_id.rs new file mode 100644 index 00000000..6dd128ee --- /dev/null +++ b/src/kernel/vfs/types/device_id.rs @@ -0,0 +1,25 @@ +use core::fmt::{Debug, Display, Formatter}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct DeviceId { + pub major: u16, + pub minor: u16, +} + +impl DeviceId { + pub const fn new(major: u16, minor: u16) -> Self { + Self { major, minor } + } +} + +impl Debug for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "DeviceId({:04x}:{:04x})", self.major, self.minor) + } +} + +impl Display for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:04x}:{:04x}", self.major, self.minor) + } +} diff --git a/src/kernel/vfs/types/mod.rs b/src/kernel/vfs/types/mod.rs new file mode 100644 index 00000000..4a7505f7 --- /dev/null +++ b/src/kernel/vfs/types/mod.rs @@ -0,0 +1,5 @@ +mod device_id; +mod mode; + +pub use device_id::DeviceId; +pub use mode::{Format, Mode, Permission}; diff --git a/src/kernel/vfs/types/mode.rs b/src/kernel/vfs/types/mode.rs new file mode 100644 index 00000000..a58c8215 --- /dev/null +++ b/src/kernel/vfs/types/mode.rs @@ -0,0 +1,172 @@ +use crate::kernel::{ + constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}, + syscall::{FromSyscallArg, SyscallRetVal}, +}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Format { + REG, + DIR, + LNK, + BLK, + CHR, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Permission(u32); + +impl Mode { + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub fn format(&self) -> Format { + match self.try_format() { + None => panic!("unknown format bits: {:#o}", self.format_bits()), + Some(format) => format, + } + } + + pub fn try_format(&self) -> Option { + match self.format_bits() { + S_IFREG => Some(Format::REG), + S_IFDIR => Some(Format::DIR), + S_IFLNK => Some(Format::LNK), + S_IFBLK => Some(Format::BLK), + S_IFCHR => Some(Format::CHR), + _ => None, + } + } + + pub fn perm(&self) -> Permission { + Permission::new(self.non_format_bits()) + } + + pub const fn set_perm(&mut self, perm: Permission) { + self.0 = self.format_bits() | perm.bits(); + } +} + +impl Format { + pub const fn as_raw(&self) -> u32 { + match self { + Self::REG => S_IFREG, + Self::DIR => S_IFDIR, + Self::LNK => S_IFLNK, + Self::BLK => S_IFBLK, + Self::CHR => S_IFCHR, + } + } +} + +impl Permission { + const RWX: [&str; 8] = ["---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx"]; + + pub const fn new(perm_bits: u32) -> Self { + Self(perm_bits & 0o7777) + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn mask_with(&self, mask: Self) -> Self { + Self(self.0 & mask.0) + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format:?}, {perm:?})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()), + )?, + rem => write!( + f, + "Mode({format:?}, {perm:?}, rem={rem:#x})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()) + )?, + } + + Ok(()) + } +} + +impl core::fmt::Debug for Format { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::REG => write!(f, "REG"), + Self::DIR => write!(f, "DIR"), + Self::LNK => write!(f, "LNK"), + Self::BLK => write!(f, "BLK"), + Self::CHR => write!(f, "CHR"), + } + } +} + +impl core::fmt::Debug for Permission { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let owner = self.0 >> 6 & 0o7; + let group = self.0 >> 3 & 0o7; + let other = self.0 & 0o7; + + write!( + f, + "{}{}{}", + Self::RWX[owner as usize], + Self::RWX[group as usize], + Self::RWX[other as usize] + ) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} + +impl FromSyscallArg for Permission { + fn from_arg(value: usize) -> Self { + Permission::new(value as u32) + } +} + +impl SyscallRetVal for Permission { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/vfs.rs b/src/kernel/vfs/vfs.rs deleted file mode 100644 index ee66f0b6..00000000 --- a/src/kernel/vfs/vfs.rs +++ /dev/null @@ -1,10 +0,0 @@ -use crate::prelude::*; - -use super::DevId; - -#[allow(dead_code)] -pub trait Vfs: Send + Sync + AsAny { - fn io_blksize(&self) -> usize; - fn fs_devid(&self) -> DevId; - fn is_read_only(&self) -> bool; -} diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 3d8be90f..2259f6cf 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -1,35 +1,27 @@ -use crate::kernel::mem::{GlobalPageAlloc, RawPage}; -use eonix_hal::{ - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}, - traits::mm::Memory, -}; -use eonix_mm::{ - address::{Addr as _, AddrOps as _, VAddr, VRange}, - page_table::{PageAttribute, PagingMode as _, PTE}, - paging::{Page as GenericPage, PAGE_SIZE, PFN}, -}; - -pub fn setup_memory(data: &mut BootStrapData) { - let addr_max = ArchMemory::present_ram() - .map(|range| range.end()) - .max() - .expect("No free memory"); +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; +use eonix_hal::bootstrap::BootStrapData; +use eonix_hal::mm::{ArchMemory, BasicPageAllocRef, GLOBAL_PAGE_TABLE}; +use eonix_hal::traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps as _, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PageTable, PTE}; +use eonix_mm::paging::{Folio as _, FrameAlloc, PAGE_SIZE, PFN}; - let pfn_max = PFN::from(addr_max.ceil()); - let len_bytes_page_array = usize::from(pfn_max) * size_of::(); - let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); +use crate::kernel::mem::{GlobalPageAlloc, RawPage}; - let alloc = data.get_alloc().unwrap(); +fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { + // TODO: This should be done by the global Zone + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); // Map kernel page array. const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000); - for pte in GLOBAL_PAGE_TABLE.iter_kernel_in( - VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages), - ArchPagingMode::LEVELS, - &alloc, - ) { + let range = + VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); + for pte in global_page_table.iter_kernel(range) { let attr = PageAttribute::PRESENT | PageAttribute::WRITE | PageAttribute::READ @@ -37,22 +29,39 @@ pub fn setup_memory(data: &mut BootStrapData) { | PageAttribute::ACCESSED | PageAttribute::DIRTY; - let page = GenericPage::alloc_in(&alloc); + let page = alloc.alloc().unwrap(); pte.set(page.into_raw(), attr.into()); } + // TODO!!!: Construct the global zone with all present ram. + // for range in ArchMemory::present_ram() { + // GlobalPageAlloc::mark_present(range); + // } + unsafe { // SAFETY: We've just mapped the area with sufficient length. core::ptr::write_bytes( - V_KERNEL_PAGE_ARRAY_START.addr() as *mut (), + V_KERNEL_PAGE_ARRAY_START.addr() as *mut u8, 0, count_pages * PAGE_SIZE, ); } - for range in ArchMemory::present_ram() { - GlobalPageAlloc::mark_present(range); - } + core::mem::forget(global_page_table); +} + +pub fn setup_memory(data: &mut BootStrapData) { + let addr_max = ArchMemory::present_ram() + .map(|range| range.end()) + .max() + .expect("No free memory"); + + let pfn_max = PFN::from(addr_max.ceil()); + let len_bytes_page_array = usize::from(pfn_max) * size_of::(); + let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); + + let alloc = data.get_alloc().unwrap(); + setup_kernel_page_array(alloc, count_pages); if let Some(early_alloc) = data.take_alloc() { for range in early_alloc.into_iter() { diff --git a/src/lib.rs b/src/lib.rs index 80d24c28..2e28db24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,13 +2,17 @@ #![no_main] #![feature(allocator_api)] #![feature(c_size_t)] -#![feature(concat_idents)] +#![feature(coerce_unsized)] #![feature(arbitrary_self_types)] #![feature(get_mut_unchecked)] #![feature(macro_metavar_expr)] +#![feature(unsize)] extern crate alloc; +#[macro_use] +extern crate static_assertions; + #[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] extern crate unwinding; @@ -26,32 +30,27 @@ mod prelude; mod rcu; mod sync; -use crate::kernel::task::alloc_pid; -use alloc::{ffi::CString, sync::Arc}; -use core::{ - hint::spin_loop, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; -use eonix_hal::{ - arch_exported::bootstrap::shutdown, - context::TaskContext, - processor::{halt, CPU, CPU_COUNT}, - traits::{context::RawTaskContext, trap::IrqState}, - trap::disable_irqs_save, -}; +use alloc::ffi::CString; +use core::hint::spin_loop; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +use eonix_hal::arch_exported::bootstrap::shutdown; +use eonix_hal::context::TaskContext; +use eonix_hal::processor::{halt, CPU, CPU_COUNT}; +use eonix_hal::symbol_addr; +use eonix_hal::traits::context::RawTaskContext; +use eonix_hal::traits::trap::IrqState; +use eonix_hal::trap::disable_irqs_save; use eonix_mm::address::PRange; -use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; -use kernel::{ - mem::GlobalPageAlloc, - task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, - vfs::{ - dentry::Dentry, - inode::Mode, - mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, - FsContext, - }, - CharDevice, -}; +use eonix_runtime::executor::Stack; +use eonix_runtime::scheduler::RUNTIME; +use kernel::mem::GlobalPageAlloc; +use kernel::task::{KernelStack, ProcessList, ProgramLoader}; +use kernel::vfs::dentry::Dentry; +use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}; +use kernel::vfs::types::Permission; +use kernel::vfs::FsContext; +use kernel::CharDevice; use kernel_init::setup_memory; use path::Path; use prelude::*; @@ -135,7 +134,7 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { @@ -161,7 +160,7 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { @@ -192,16 +191,16 @@ async fn init_process(early_kstack: PRange) { { // We might want the serial initialized as soon as possible. driver::serial::init().unwrap(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } #[cfg(target_arch = "riscv64")] { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; driver::goldfish_rtc::probe(); } @@ -209,21 +208,26 @@ async fn init_process(early_kstack: PRange) { { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } fs::tmpfs::init(); - fs::procfs::init(); + fs::procfs::init().await; fs::fat32::init(); - fs::ext4::init(); + // fs::ext4::init(); let load_info = { // mount fat32 /mnt directory let fs_context = FsContext::global(); - let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); + let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true) + .await + .unwrap(); - mnt_dir.mkdir(Mode::new(0o755)).unwrap(); + mnt_dir + .mkdir(Permission::new(0o755)) + .await + .expect("Failed to create /mnt directory"); do_mount( &mnt_dir, @@ -232,6 +236,7 @@ async fn init_process(early_kstack: PRange) { "fat32", MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID, ) + .await .unwrap(); let init_names = [&b"/init"[..], &b"/sbin/init"[..], &b"/mnt/initsh"[..]]; @@ -239,7 +244,7 @@ async fn init_process(early_kstack: PRange) { let mut init_name = None; let mut init = None; for name in init_names { - if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true) { + if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true).await { if dentry.is_valid() { init_name = Some(CString::new(name).unwrap()); init = Some(dentry); @@ -261,27 +266,12 @@ async fn init_process(early_kstack: PRange) { ]; ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) + .await .expect("Failed to parse init program") .load() .await .expect("Failed to load init program") }; - let thread_builder = ThreadBuilder::new() - .name(Arc::from(&b"busybox"[..])) - .entry(load_info.entry_ip, load_info.sp); - - let mut process_list = ProcessList::get().write().await; - let (thread, process) = ProcessBuilder::new() - .pid(alloc_pid()) - .mm_list(load_info.mm_list) - .thread_builder(thread_builder) - .build(&mut process_list); - - process_list.set_init_process(process); - - // TODO!!!: Remove this. - thread.files.open_console(); - - RUNTIME.spawn(thread.run()); + ProcessList::sys_init(load_info).await; } diff --git a/src/panic.rs b/src/panic.rs index 3c9c5f34..4a9ef92c 100644 --- a/src/panic.rs +++ b/src/panic.rs @@ -24,6 +24,12 @@ pub fn stack_trace() { UnwindReasonCode::NO_REASON } + println_fatal!("--------------8< CUT HERE 8<--------------"); + println_fatal!("Stacktrace:"); + println_fatal!(); + let mut data = CallbackData { counter: 0 }; _Unwind_Backtrace(callback, &raw mut data as *mut c_void); + + println_fatal!("--------------8< CUT HERE 8<--------------"); } diff --git a/src/path.rs b/src/path.rs index 8b740095..47b9a4b6 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,34 +1,32 @@ -use crate::{kernel::constants::ENOENT, prelude::*}; use core::fmt::{self, Debug, Formatter}; -pub struct Path<'lt> { - all: &'lt [u8], +use crate::kernel::constants::ENOENT; +use crate::prelude::*; + +#[repr(transparent)] +pub struct Path { + all: [u8], } pub struct PathIterator<'lt> { rem: &'lt [u8], } -#[allow(dead_code)] -impl<'lt> Path<'lt> { - pub fn new(all: &'lt [u8]) -> KResult { +impl Path { + pub fn new(all: &[u8]) -> KResult<&Self> { if all.is_empty() { Err(ENOENT) } else { - Ok(Self { all }) + Ok(unsafe { &*(all as *const [u8] as *const Path) }) } } - pub fn from_str(all: &'lt str) -> KResult { - Self::new(all.as_bytes()) - } - pub fn is_absolute(&self) -> bool { self.all.starts_with(&['/' as u8]) } - pub fn iter(&self) -> PathIterator<'lt> { - PathIterator::new(self.all) + pub fn iter(&self) -> PathIterator<'_> { + PathIterator::new(&self.all) } } @@ -46,11 +44,17 @@ pub enum PathComponent<'lt> { Parent, } +impl PathIterator<'_> { + pub fn is_empty(&self) -> bool { + self.rem.is_empty() + } +} + impl<'lt> Iterator for PathIterator<'lt> { type Item = PathComponent<'lt>; fn next(&mut self) -> Option { - if self.rem.is_empty() { + if self.is_empty() { return None; } @@ -71,16 +75,16 @@ impl<'lt> Iterator for PathIterator<'lt> { self.rem = rem; match cur { - cur if cur.is_empty() => Some(PathComponent::TrailingEmpty), - cur if cur == b"." => Some(PathComponent::Current), - cur if cur == b".." => Some(PathComponent::Parent), - cur => Some(PathComponent::Name(cur)), + b"" => Some(PathComponent::TrailingEmpty), + b"." => Some(PathComponent::Current), + b".." => Some(PathComponent::Parent), + name => Some(PathComponent::Name(name)), } } } -impl Debug for Path<'_> { +impl Debug for Path { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "Path({:?})", self.all) + write!(f, "Path({:?})", &self.all) } } diff --git a/src/prelude.rs b/src/prelude.rs index b3dbe2ce..880489da 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -18,34 +18,6 @@ pub(crate) use crate::kernel::console::{ pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec}; -pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str}; +pub(crate) use core::{fmt::Write, marker::PhantomData, str}; pub use crate::sync::Spin; - -#[allow(dead_code)] -pub trait AsAny: Send + Sync { - fn as_any(&self) -> &dyn Any; - fn as_any_mut(&mut self) -> &mut dyn Any; -} - -macro_rules! impl_any { - ($t:ty) => { - impl AsAny for $t { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - } - }; -} - -macro_rules! addr_of_mut_field { - ($pointer:expr, $field:ident) => { - core::ptr::addr_of_mut!((*$pointer).$field) - }; -} - -pub(crate) use {addr_of_mut_field, impl_any}; diff --git a/src/rcu.rs b/src/rcu.rs index c1645d33..b06db9e2 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,21 +1,35 @@ use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; +use arcref::ArcRef; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; +use eonix_preempt::PreemptGuard; use eonix_runtime::scheduler::RUNTIME; -use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; +use eonix_sync::{RwLock, RwLockReadGuard}; use pointers::BorrowedArc; +/// The RCU Read Lock. Holding a reference to an instance of the struct assures +/// you that any RCU protected data would not be dropped. +/// +/// The struct cannot be created directly. Instead, use [`rcu_read_lock()`]. +#[derive(Debug)] +pub struct RCUReadLock(); + +pub struct RCUReadGuardNew { + guard: RwLockReadGuard<'static, RCUReadLock>, + _disable_preempt: PreemptGuard<()>, +} + pub struct RCUReadGuard<'data, T: 'data> { value: T, - _guard: RwLockReadGuard<'data, ()>, + _guard: RwLockReadGuard<'static, RCUReadLock>, _phantom: PhantomData<&'data T>, } -static GLOBAL_RCU_SEM: RwLock<()> = RwLock::new(()); +static GLOBAL_RCU_SEM: RwLock = RwLock::new(RCUReadLock()); impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { @@ -25,14 +39,6 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { _phantom: PhantomData, } } - - pub fn borrow(&self) -> BorrowedArc<'data, T> { - unsafe { - BorrowedArc::from_raw(NonNull::new_unchecked( - &raw const *self.value.borrow() as *mut T - )) - } - } } impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> { @@ -63,17 +69,14 @@ pub trait RCUNode { pub struct RCUList> { head: AtomicPtr, - - reader_lock: RwLock<()>, - update_lock: Mutex<()>, + update_lock: Spin<()>, } impl> RCUList { pub const fn new() -> Self { Self { head: AtomicPtr::new(core::ptr::null_mut()), - reader_lock: RwLock::new(()), - update_lock: Mutex::new(()), + update_lock: Spin::new(()), } } @@ -117,7 +120,6 @@ impl> RCUList { unsafe { Arc::from_raw(me) }; } - let _lck = self.reader_lock.write(); node.rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); node.rcu_next() @@ -152,7 +154,6 @@ impl> RCUList { unsafe { Arc::from_raw(old) }; } - let _lck = self.reader_lock.write(); old_node .rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); @@ -161,36 +162,36 @@ impl> RCUList { .store(core::ptr::null_mut(), Ordering::Release); } - pub fn iter(&self) -> RCUIterator { - let _lck = block_on(self.reader_lock.read()); - + pub fn iter<'a, 'r>(&'a self, _lock: &'r RCUReadLock) -> RCUIterator<'a, 'r, T> { RCUIterator { - // SAFETY: We have a read lock, so the node is still alive. - cur: NonNull::new(self.head.load(Ordering::SeqCst)), - _lock: _lck, + cur: NonNull::new(self.head.load(Ordering::Acquire)), + _phantom: PhantomData, } } } -pub struct RCUIterator<'lt, T: RCUNode> { +pub struct RCUIterator<'list, 'rcu, T: RCUNode> { cur: Option>, - _lock: RwLockReadGuard<'lt, ()>, + _phantom: PhantomData<(&'list (), &'rcu ())>, } -impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { - type Item = BorrowedArc<'lt, T>; +impl<'rcu, T: RCUNode> Iterator for RCUIterator<'_, 'rcu, T> { + type Item = ArcRef<'rcu, T>; fn next(&mut self) -> Option { - match self.cur { - None => None, - Some(pointer) => { - // SAFETY: We have a read lock, so the node is still alive. - let reference = unsafe { pointer.as_ref() }; + self.cur.map(|pointer| { + let reference = unsafe { + // SAFETY: We have the read lock so the node is still alive. + pointer.as_ref() + }; + + self.cur = NonNull::new(reference.rcu_next().load(Ordering::Acquire)); - self.cur = NonNull::new(reference.rcu_next().load(Ordering::SeqCst)); - Some(unsafe { BorrowedArc::from_raw(pointer) }) + unsafe { + // SAFETY: We have the read lock so the node is still alive. + ArcRef::new_unchecked(pointer.as_ptr()) } - } + }) } } @@ -228,15 +229,16 @@ where } pub fn load<'lt>(&self) -> Option>> { + // BUG: We should acquire the lock before loading the pointer NonNull::new(self.0.load(Ordering::Acquire)) .map(|p| RCUReadGuard::lock(unsafe { BorrowedArc::from_raw(p) })) } - pub fn load_protected<'a, U: 'a>( - &self, - _guard: &RCUReadGuard<'a, U>, - ) -> Option> { - NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { BorrowedArc::from_raw(p) }) + pub fn dereference<'r, 'a: 'r>(&self, _lock: &'a RCUReadLock) -> Option> { + NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { + // SAFETY: We have a read lock, so the node is still alive. + ArcRef::new_unchecked(p.as_ptr()) + }) } /// # Safety @@ -289,3 +291,18 @@ where } } } + +impl Deref for RCUReadGuardNew { + type Target = RCUReadLock; + + fn deref(&self) -> &Self::Target { + &self.guard + } +} + +pub fn rcu_read_lock() -> RCUReadGuardNew { + RCUReadGuardNew { + guard: block_on(GLOBAL_RCU_SEM.read()), + _disable_preempt: PreemptGuard::new(()), + } +} diff --git a/src/sync/arcswap.rs b/src/sync/arcswap.rs index fb8219b2..7421659f 100644 --- a/src/sync/arcswap.rs +++ b/src/sync/arcswap.rs @@ -1,9 +1,8 @@ use alloc::sync::Arc; -use core::{ - fmt::{self, Debug, Formatter}, - ptr::NonNull, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::fmt::{self, Debug, Formatter}; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicPtr, Ordering}; + use pointers::BorrowedArc; unsafe impl Send for ArcSwap where T: Send + Sync {} @@ -33,7 +32,7 @@ impl ArcSwap { } } - pub fn borrow(&self) -> BorrowedArc { + pub fn borrow(&self) -> BorrowedArc<'_, T> { unsafe { BorrowedArc::from_raw( NonNull::new(self.pointer.load(Ordering::Acquire)) diff --git a/user-programs/init_script_riscv64.sh b/user-programs/init_script_riscv64.sh index 52b2628c..f67e2a27 100644 --- a/user-programs/init_script_riscv64.sh +++ b/user-programs/init_script_riscv64.sh @@ -1,60 +1,78 @@ #!/mnt/busybox sh BUSYBOX=/mnt/busybox +TERMINAL=/dev/ttyS0 +VERBOSE= -freeze() { - echo "an error occurred while executing '''$@''', freezing..." >&2 +error() { + printf "\033[91merror: \033[0m%s\n" "$1" >&2 +} +warn() { + printf "\033[93mwarn : \033[0m%s\n" "$1" >&2 +} + +info() { + printf "\033[92minfo : \033[0m%s\n" "$1" >&2 +} + +die() { + error "$1" && freeze +} + +freeze() { + info "freezing..." >&2 while true; do - true + : done + + exit 1 } -do_or_freeze() { - if $@; then - return - fi +unrecoverable() { + die "unrecoverable error occurred. check the message above." +} - freeze $@ +busybox() { + $BUSYBOX "$@" } -do_or_freeze $BUSYBOX mkdir -p /dev +trap unrecoverable EXIT -do_or_freeze $BUSYBOX mknod -m 666 /dev/console c 5 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3 -do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda b 8 0 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda1 b 8 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vdb b 8 16 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65 +set -euo pipefail -echo -n -e "deploying busybox... " >&2 +if [ -n "$VERBOSE" ]; then + set -x +fi -do_or_freeze $BUSYBOX mkdir -p /bin -do_or_freeze $BUSYBOX --install -s /bin -do_or_freeze $BUSYBOX mkdir -p /lib +busybox mkdir -p /dev -export PATH="/bin" +busybox mknod -m 666 /dev/console c 5 1 +busybox mknod -m 666 /dev/null c 1 3 +busybox mknod -m 666 /dev/zero c 1 5 +busybox mknod -m 666 /dev/vda b 8 0 +busybox mknod -m 666 /dev/vda1 b 8 1 +busybox mknod -m 666 /dev/vdb b 8 16 +busybox mknod -m 666 /dev/ttyS0 c 4 64 +busybox mknod -m 666 /dev/ttyS1 c 4 65 -echo ok >&2 +exec < "$TERMINAL" +exec > "$TERMINAL" 2>&1 -do_or_freeze mkdir -p /etc /root /proc -do_or_freeze mount -t procfs proc proc +info "deploying busybox..." -# Check if the device /dev/vdb is available and can be read -if dd if=/dev/vdb of=/dev/null bs=512 count=1; then - echo -n -e "Mounting the ext4 image... " >&2 - do_or_freeze mkdir -p /mnt1 - do_or_freeze mount -t ext4 /dev/vdb /mnt1 - echo ok >&2 -fi +busybox mkdir -p /bin /lib +busybox --install -s /bin + +info "done" + +export PATH="/bin" -cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1 -ln -s /lib/ld-musl-i386.so.1 /lib/libc.so +mkdir -p /etc /root /proc +mount -t procfs proc proc cat > /etc/passwd < /etc/group < /dev/ttyS0 2> /dev/ttyS0 +exec sh -l # We don't have a working init yet, so we use busybox sh directly for now. # exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0'