From f6c26c956c97da2506c4e69b9dc4f121da39af0b Mon Sep 17 00:00:00 2001 From: Heinz Date: Wed, 23 Jul 2025 23:40:26 +0800 Subject: [PATCH 01/54] perf: replace the annoyed ext4 crate with a new choice (called another ext4 crate --- Cargo.lock | 41 +++++++++++----------- Cargo.toml | 2 +- src/fs/ext4.rs | 94 ++++++++++++++++++++++++++++++-------------------- 3 files changed, 78 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b8b8e1b9..04b25a21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,15 +19,24 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c330e503236d0b06386ae6cc42a513ef1ccc23c52b603c1b52f018564faf44" +[[package]] +name = "another_ext4" +version = "0.1.0" +source = "git+https://github.com/SMS-Derfflinger/another_ext4?branch=main#ed6d91718db721eb4a744483c289cc44a6f34bf4" +dependencies = [ + "bitflags", + "log", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bit_field" @@ -51,9 +60,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "critical-section" @@ -133,6 +142,7 @@ version = "0.1.0" dependencies = [ "acpi", "align_ext", + "another_ext4", "atomic_unique_refcell", "bitflags", "buddy_allocator", @@ -144,7 +154,6 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", - "ext4_rs", "intrusive-collections", "intrusive_list", "itertools", @@ -247,16 +256,6 @@ dependencies = [ "intrusive-collections", ] -[[package]] -name = "ext4_rs" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1a97344bde15b0ace15e265dab27228d4bdc37a0bfa8548c5645d7cfa6a144" -dependencies = [ - "bitflags", - "log", -] - [[package]] name = "fdt" version = "0.1.5" @@ -392,9 +391,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -459,18 +458,18 @@ checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index edc2c319..ceea1cf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" xmas-elf = "0.10.0" -ext4_rs = "1.3.2" +another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch = "main" } [target.'cfg(target_arch = "riscv64")'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 3ffc2fe0..56928623 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -22,10 +22,11 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; +use another_ext4::{ + Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, +}; use eonix_runtime::task::Task; use eonix_sync::RwLock; -use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error}; -use ext4_rs::{Errno, Ext4}; pub struct Ext4BlockDevice { device: Arc, @@ -38,19 +39,22 @@ impl Ext4BlockDevice { } impl Ext4BlockDeviceTrait for Ext4BlockDevice { - fn read_offset(&self, offset: usize) -> Vec { - let mut buffer = vec![0u8; 4096]; + fn read_block(&self, block_id: PBlockId) -> Block { + let mut buffer = [0u8; 4096]; let mut byte_buffer = ByteBuffer::new(buffer.as_mut_slice()); let _ = self .device - .read_some(offset, &mut byte_buffer) + .read_some((block_id as usize) * 4096, &mut byte_buffer) .expect("Failed to read from block device"); - buffer + Block { + id: block_id, + data: buffer, + } } - fn write_offset(&self, _offset: usize, _data: &[u8]) { + fn write_block(&self, block: &another_ext4::Block) { todo!() } } @@ -115,7 +119,7 @@ impl Ext4Fs { impl Ext4Fs { pub fn create(device: Arc) -> KResult<(Arc, Arc)> { let ext4_device = Ext4BlockDevice::new(device.clone()); - let ext4 = Ext4::open(Arc::new(ext4_device)); + let ext4 = Ext4::load(Arc::new(ext4_device)).unwrap(); let ext4fs = Arc::new(Self { inner: ext4, @@ -125,28 +129,28 @@ impl Ext4Fs { let root_inode = { let mut icache = Task::block_on(ext4fs.icache.write()); - let root_inode = ext4fs.inner.get_inode_ref(2); + let root_inode = ext4fs.inner.read_root_inode(); ext4fs.get_or_insert( &mut icache, InodeData { - ino: root_inode.inode_num as Ino, + ino: root_inode.id as Ino, size: AtomicU64::new(root_inode.inode.size()), - nlink: AtomicNlink::new(root_inode.inode.links_count() as _), - uid: AtomicU32::new(root_inode.inode.uid() as _), - gid: AtomicU32::new(root_inode.inode.gid() as _), - mode: AtomicU32::new(root_inode.inode.mode() as _), + nlink: AtomicNlink::new(root_inode.inode.link_count() as u64), + uid: AtomicU32::new(root_inode.inode.uid()), + gid: AtomicU32::new(root_inode.inode.gid()), + mode: AtomicU32::new(root_inode.inode.mode().bits() as u32), atime: Spin::new(Instant::new( root_inode.inode.atime() as _, - root_inode.inode.i_atime_extra() as _, + root_inode.inode.atime_extra() as _, )), ctime: Spin::new(Instant::new( root_inode.inode.ctime() as _, - root_inode.inode.i_ctime_extra() as _, + root_inode.inode.ctime_extra() as _, )), mtime: Spin::new(Instant::new( root_inode.inode.mtime() as _, - root_inode.inode.i_mtime_extra() as _, + root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), vfs: Arc::downgrade(&ext4fs) as _, @@ -187,12 +191,12 @@ impl Inode for FileInode { let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; - match ext4fs.inner.read_at(self.ino as u32, offset, &mut temp_buf) { + match ext4fs.inner.read(self.ino as u32, offset, &mut temp_buf) { Ok(bytes_read) => { let _ = buffer.fill(&temp_buf[..bytes_read])?; Ok(buffer.wrote()) } - Err(e) => Err(e.error() as u32), + Err(e) => Err(e.code() as u32), } } } @@ -204,13 +208,14 @@ impl Inode for DirInode { let name = dentry.get_name(); let name = String::from_utf8_lossy(&name); - let lookup_result = ext4fs.inner.fuse_lookup(self.ino, &name); + let lookup_result = ext4fs.inner.lookup(self.ino as u32, &name); - const EXT4_ERROR_ENOENT: Ext4Error = Ext4Error::new(Errno::ENOENT); + // TODO: wtf + //const EXT4_ERROR_ENOENT: Ext4Error_ = Ext4Error_::new(ErrCode::ENOENT); let attr = match lookup_result { - Ok(attr) => attr, - Err(EXT4_ERROR_ENOENT) => return Ok(None), - Err(error) => return Err(error.error() as u32), + Ok(inode_id) => ext4fs.inner.getattr(inode_id).unwrap(), + //Err(EXT4_ERROR_ENOENT) => return Ok(None), + Err(error) => return Err(error.code() as u32), }; // Fast path: if the inode is already in the cache, return it. @@ -219,9 +224,19 @@ impl Inode for DirInode { return Ok(Some(inode)); } - let extra_perm = attr.perm.bits() as u32 & 0o7000; - let perm = attr.perm.bits() as u32 & 0o0700; - let real_perm = extra_perm | perm | perm >> 3 | perm >> 6; + let file_type_bits = match attr.ftype { + FileType::RegularFile => InodeMode::FILE.bits(), + FileType::Directory => InodeMode::DIRECTORY.bits(), + FileType::CharacterDev => InodeMode::CHARDEV.bits(), + FileType::BlockDev => InodeMode::BLOCKDEV.bits(), + FileType::Fifo => InodeMode::FIFO.bits(), + FileType::Socket => InodeMode::SOCKET.bits(), + FileType::SymLink => InodeMode::SOFTLINK.bits(), + FileType::Unknown => 0, + }; + + let perm_bits = attr.perm.bits() & InodeMode::PERM_MASK.bits(); + let mode = file_type_bits | perm_bits; // Create a new inode based on the attributes. let mut icache = Task::block_on(ext4fs.icache.write()); @@ -230,10 +245,10 @@ impl Inode for DirInode { InodeData { ino: attr.ino as Ino, size: AtomicU64::new(attr.size), - nlink: AtomicNlink::new(attr.nlink as _), + nlink: AtomicNlink::new(attr.links as _), uid: AtomicU32::new(attr.uid), gid: AtomicU32::new(attr.gid), - mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm), + mode: AtomicU32::new(mode as u32), atime: Spin::new(Instant::new(attr.atime as _, 0)), ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), @@ -255,18 +270,23 @@ impl Inode for DirInode { let entries = ext4fs .inner - .fuse_readdir(self.ino as u64, 0, offset as i64) - .map_err(|err| err.error() as u32)?; - let mut current_offset = 0; + .listdir(self.ino as u32) + .map_err(|err| err.code() as u32)?; - for entry in entries { - let name_len = entry.name_len as usize; - let name = &entry.name[..name_len]; + let entries_to_process = if offset < entries.len() { + &entries[offset..] + } else { + &entries[0..0] + }; + let mut current_offset = 0; + for entry in entries_to_process { + let name_string = entry.name(); + let name = name_string.as_bytes(); + let inode = entry.inode() as Ino; - if callback(name, entry.inode as Ino)?.is_break() { + if callback(name, inode)?.is_break() { break; } - current_offset += 1; } Ok(current_offset) From f05037374c20c896a0249ffda6edebb7bd562d9c Mon Sep 17 00:00:00 2001 From: Heinz Date: Fri, 25 Jul 2025 22:47:02 +0800 Subject: [PATCH 02/54] feat(fs): impl write, create and mkdir for ext4 fs --- src/driver/virtio/virtio_blk.rs | 18 +++- src/fs/ext4.rs | 158 ++++++++++++++++++++++++++++++-- src/kernel/block.rs | 97 ++++++++++++++++++++ 3 files changed, 266 insertions(+), 7 deletions(-) diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 57026d2a..3b15063d 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -18,7 +18,23 @@ impl BlockRequestQueue for Spin>> { fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { match req { - BlockDeviceRequest::Write { .. } => todo!(), + BlockDeviceRequest::Write { + sector, + count, + buffer, + } => { + let mut dev = self.lock(); + for ((start, len), buffer_page) in + Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) + { + let buffer = unsafe { + // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. + &buffer_page.as_memblk().as_bytes()[..len as usize * 512] + }; + + dev.write_blocks(start, buffer).map_err(|_| EIO)?; + } + } BlockDeviceRequest::Read { sector, count, diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 56928623..f7ca7578 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,14 +1,14 @@ -use core::sync::atomic::{AtomicU32, AtomicU64}; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::{ - io::{Buffer, ByteBuffer}, + io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::EIO, + constants::{EIO, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::Dentry, - inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData}, + inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, vfs::Vfs, @@ -20,7 +20,7 @@ use crate::{ }; use alloc::{ collections::btree_map::{BTreeMap, Entry}, - sync::Arc, + sync::{Arc, Weak}, }; use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, @@ -55,7 +55,9 @@ impl Ext4BlockDeviceTrait for Ext4BlockDevice { } fn write_block(&self, block: &another_ext4::Block) { - todo!() + let _ = self + .device + .write_some((block.id as usize) * 4096, &block.data); } } @@ -85,6 +87,20 @@ impl Ext4Fs { icache.get(&ino).cloned().map(Ext4Inode::into_inner) } + fn update_modify_inode(&self, ino: u64, size: u64, mtime: u32) { + let _ = self.inner.setattr( + ino as u32, + None, + None, + None, + Some(size), + None, + Some(mtime), + None, + None, + ); + } + fn get_or_insert( &self, icache: &mut BTreeMap, @@ -185,6 +201,21 @@ define_struct_inode! { struct DirInode; } +impl FileInode { + pub fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|_| FileInode { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data + .mode + .store(S_IFREG | (mode & 0o777), Ordering::Relaxed); + inode_data.nlink.store(1, Ordering::Relaxed); + inode_data + }, + }) + } +} + impl Inode for FileInode { fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; @@ -199,6 +230,68 @@ impl Inode for FileInode { Err(e) => Err(e.code() as u32), } } + + fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let mut temp_buf = vec![0u8; 4096]; + let mut total_written = 0; + + let offset = match offset { + WriteOffset::Position(offset) => offset, + // TODO: here need to add some operate + WriteOffset::End(end) => *end, + }; + + while let Some(data) = stream.poll_data(&mut temp_buf)? { + let written = ext4fs + .inner + .write(self.ino as u32, offset + total_written, data) + .unwrap(); + total_written += written; + if written < data.len() { + break; + } + } + + let mtime = Instant::now(); + *self.mtime.lock() = mtime; + let new_size = (offset + total_written) as u64; + self.size + .store(offset as u64 + total_written as u64, Ordering::Relaxed); + ext4fs.update_modify_inode(self.ino, new_size, mtime.since_epoch().as_secs() as u32); + + Ok(total_written) + } + + // TODO + fn truncate(&self, length: usize) -> KResult<()> { + Ok(()) + } +} + +impl DirInode { + fn new(idata: InodeData) -> Arc { + let inode = Arc::new(Self { idata }); + + inode + } + + fn link(&self, file: &dyn Inode) { + let now = Instant::now(); + + // SAFETY: Only `unlink` will do something based on `nlink` count + // No need to synchronize here + file.nlink.fetch_add(1, Ordering::Relaxed); + *self.ctime.lock() = now; + + // SAFETY: `rwsem` has done the synchronization + self.size.fetch_add(1, Ordering::Relaxed); + *self.mtime.lock() = now; + } } impl Inode for DirInode { @@ -291,6 +384,59 @@ impl Inode for DirInode { } Ok(current_offset) } + + fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .create( + self.ino as u32, + &name, + InodeMode::from_bits_retain((mode | S_IFREG) as u16), + ) + .unwrap(); + + let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + + *self.ctime.lock() = now; + // SAFETY: `rwsem` has done the synchronization + self.size.fetch_add(1, Ordering::Relaxed); + *self.mtime.lock() = now; + + at.save_reg(file) + } + + fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .mkdir( + self.ino as u32, + &name, + InodeMode::from_bits_retain((mode | S_IFDIR) as u16), + ) + .unwrap(); + + let newdir = DirInode::new(InodeData::new(new_ino as u64, self.vfs.clone())); + + self.link(newdir.as_ref()); + at.save_dir(newdir) + } } struct Ext4MountCreator; diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 4a10e4c7..069ae4ca 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -285,6 +285,103 @@ impl BlockDevice { Ok(FillResult::Partial(nfilled)) } } + + /// Write some data to the block device, may involve some copy and fragmentation + /// + /// # Arguments + /// `offset` - offset in bytes + /// `data` - data to write + /// + pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { + let mut sector_start = offset as u64 / 512; + let mut first_sector_offset = offset as u64 % 512; + let mut remaining_data = data; + let mut nwritten = 0; + + while !remaining_data.is_empty() { + let pages: &[Page]; + let page: Option; + let page_vec: Option>; + + // Calculate sectors needed for this write + let write_end = first_sector_offset + remaining_data.len() as u64; + let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); + + match sector_count { + count if count <= 8 => { + let _page = Page::alloc(); + page = Some(_page); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count if count <= 16 => { + let _pages = Page::alloc_order(1); + page = Some(_pages); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count => { + let npages = (count + 15) / 16; + let mut _page_vec = Vec::with_capacity(npages as usize); + for _ in 0..npages { + _page_vec.push(Page::alloc_order(1)); + } + page_vec = Some(_page_vec); + pages = page_vec.as_ref().unwrap().as_slice(); + } + } + + if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { + let read_req = BlockDeviceRequest::Read { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(read_req)?; + } + + let mut data_offset = 0; + let mut page_offset = first_sector_offset as usize; + + for page in pages.iter() { + // SAFETY: We own the page and can modify it + let page_data = unsafe { + let memblk = page.as_memblk(); + core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) + }; + + let copy_len = + (remaining_data.len() - data_offset).min(page_data.len() - page_offset); + + if copy_len == 0 { + break; + } + + page_data[page_offset..page_offset + copy_len] + .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); + + data_offset += copy_len; + page_offset = 0; // Only first page has offset + + if data_offset >= remaining_data.len() { + break; + } + } + + let write_req = BlockDeviceRequest::Write { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(write_req)?; + + let bytes_written = data_offset; + nwritten += bytes_written; + remaining_data = &remaining_data[bytes_written..]; + sector_start += sector_count; + first_sector_offset = 0; + } + + Ok(nwritten) + } } pub enum BlockDeviceRequest<'lt> { From d59a550880297abbe5447b10e2214f4ad822cdec Mon Sep 17 00:00:00 2001 From: Heinz Date: Sat, 26 Jul 2025 16:33:52 +0800 Subject: [PATCH 03/54] feat(fs): impl remove file and dir. --- src/fs/ext4.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index f7ca7578..b321fbf1 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,10 +4,10 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, S_IFDIR, S_IFREG}, + constants::{EIO, EISDIR, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ - dentry::Dentry, + dentry::{dcache, Dentry}, inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, @@ -26,7 +26,7 @@ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; use eonix_runtime::task::Task; -use eonix_sync::RwLock; +use eonix_sync::{AsProofMut, ProofMut, RwLock}; pub struct Ext4BlockDevice { device: Arc, @@ -292,6 +292,37 @@ impl DirInode { self.size.fetch_add(1, Ordering::Relaxed); *self.mtime.lock() = now; } + + fn unlink( + &self, + file: &Arc, + decrease_size: bool, + _dir_lock: ProofMut<()>, + _file_lock: ProofMut<()>, + ) -> KResult<()> { + let now = Instant::now(); + + // SAFETY: `file_lock` has done the synchronization + if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { + return Err(EISDIR); + } + + if decrease_size { + // SAFETY: `dir_lock` has done the synchronization + self.size.fetch_sub(1, Ordering::Relaxed); + } + + *self.mtime.lock() = now; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + // SAFETY: `file_lock` has done the synchronization + file.nlink.fetch_sub(1, Ordering::Relaxed); + *file.ctime.lock() = now; + + Ok(()) + } } impl Inode for DirInode { @@ -437,6 +468,30 @@ impl Inode for DirInode { self.link(newdir.as_ref()); at.save_dir(newdir) } + + fn unlink(&self, at: &Arc) -> KResult<()> { + let dir_lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let file = at.get_inode()?; + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + let file_lock = Task::block_on(file.rwsem.write()); + + if file.is_dir() { + let _ = ext4fs.inner.rmdir(self.ino as u32, &name); + } else { + let _ = ext4fs.inner.unlink(self.ino as u32, &name); + } + + self.unlink(&file, true, dir_lock.prove_mut(), file_lock.prove_mut())?; + dcache::d_remove(at); + + Ok(()) + } } struct Ext4MountCreator; From 5c4016615ac79f980c070514acada82ade8f188f Mon Sep 17 00:00:00 2001 From: Heinz Date: Sat, 26 Jul 2025 18:06:31 +0800 Subject: [PATCH 04/54] fix(fs): fix some informations --- src/fs/ext4.rs | 121 ++++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index b321fbf1..cb4bc136 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,7 +4,7 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, EISDIR, S_IFDIR, S_IFREG}, + constants::{EIO, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::{dcache, Dentry}, @@ -26,7 +26,7 @@ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; use eonix_runtime::task::Task; -use eonix_sync::{AsProofMut, ProofMut, RwLock}; +use eonix_sync::RwLock; pub struct Ext4BlockDevice { device: Arc, @@ -87,18 +87,27 @@ impl Ext4Fs { icache.get(&ino).cloned().map(Ext4Inode::into_inner) } - fn update_modify_inode(&self, ino: u64, size: u64, mtime: u32) { + fn modify_inode_stat(&self, ino: u32, size: Option, mtime: u32) { + let _ = self + .inner + .setattr(ino, None, None, None, size, None, Some(mtime), None, None); + } + + fn create_inode_stat(&self, parent: u32, child: u32, mtime: u32) { let _ = self.inner.setattr( - ino as u32, + parent, + None, None, None, None, - Some(size), None, Some(mtime), None, None, ); + let _ = self + .inner + .setattr(child, None, None, None, None, None, Some(mtime), None, None); } fn get_or_insert( @@ -262,7 +271,11 @@ impl Inode for FileInode { let new_size = (offset + total_written) as u64; self.size .store(offset as u64 + total_written as u64, Ordering::Relaxed); - ext4fs.update_modify_inode(self.ino, new_size, mtime.since_epoch().as_secs() as u32); + ext4fs.modify_inode_stat( + self.ino as u32, + Some(new_size), + mtime.since_epoch().as_secs() as u32, + ); Ok(total_written) } @@ -274,54 +287,44 @@ impl Inode for FileInode { } impl DirInode { - fn new(idata: InodeData) -> Arc { - let inode = Arc::new(Self { idata }); - - inode + fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|_| DirInode { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data + .mode + .store(S_IFDIR | (mode & 0o777), Ordering::Relaxed); + inode_data.nlink.store(2, Ordering::Relaxed); + inode_data.size.store(4096, Ordering::Relaxed); + inode_data + }, + }) } - fn link(&self, file: &dyn Inode) { - let now = Instant::now(); + fn update_time(&self, time: Instant) { + *self.ctime.lock() = time; + *self.mtime.lock() = time; + } - // SAFETY: Only `unlink` will do something based on `nlink` count - // No need to synchronize here - file.nlink.fetch_add(1, Ordering::Relaxed); - *self.ctime.lock() = now; + fn update_child_time(&self, child: &dyn Inode, time: Instant) { + self.update_time(time); + *child.ctime.lock() = time; + *child.mtime.lock() = time; + } - // SAFETY: `rwsem` has done the synchronization + fn link_file(&self) { + // TODO self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; } - fn unlink( - &self, - file: &Arc, - decrease_size: bool, - _dir_lock: ProofMut<()>, - _file_lock: ProofMut<()>, - ) -> KResult<()> { - let now = Instant::now(); - - // SAFETY: `file_lock` has done the synchronization - if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(EISDIR); - } - - if decrease_size { - // SAFETY: `dir_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - } - - *self.mtime.lock() = now; - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `file_lock` has done the synchronization - file.nlink.fetch_sub(1, Ordering::Relaxed); - *file.ctime.lock() = now; + fn link_dir(&self) { + // TODO + self.nlink.fetch_add(1, Ordering::Relaxed); + self.size.fetch_add(1, Ordering::Relaxed); + } - Ok(()) + fn unlink_dir(&self) { + self.nlink.fetch_sub(1, Ordering::Relaxed); } } @@ -436,11 +439,10 @@ impl Inode for DirInode { let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); let now = Instant::now(); + self.update_child_time(file.as_ref(), now); + self.link_file(); - *self.ctime.lock() = now; - // SAFETY: `rwsem` has done the synchronization - self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); at.save_reg(file) } @@ -463,14 +465,18 @@ impl Inode for DirInode { ) .unwrap(); - let newdir = DirInode::new(InodeData::new(new_ino as u64, self.vfs.clone())); + let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + self.update_child_time(new_dir.as_ref(), now); + self.link_dir(); + + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); - self.link(newdir.as_ref()); - at.save_dir(newdir) + at.save_dir(new_dir) } fn unlink(&self, at: &Arc) -> KResult<()> { - let dir_lock = Task::block_on(self.rwsem.write()); + let _dir_lock = Task::block_on(self.rwsem.write()); let vfs = self.vfs.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); @@ -479,15 +485,18 @@ impl Inode for DirInode { let name = at.get_name(); let name = String::from_utf8_lossy(&name); - let file_lock = Task::block_on(file.rwsem.write()); + let _file_lock = Task::block_on(file.rwsem.write()); if file.is_dir() { let _ = ext4fs.inner.rmdir(self.ino as u32, &name); + self.unlink_dir(); } else { let _ = ext4fs.inner.unlink(self.ino as u32, &name); } + let now = Instant::now(); + self.update_time(now); + ext4fs.modify_inode_stat(self.ino as u32, None, now.since_epoch().as_secs() as u32); - self.unlink(&file, true, dir_lock.prove_mut(), file_lock.prove_mut())?; dcache::d_remove(at); Ok(()) From 1d1a0257ba9c895b5eb54d15ddf7d3f397fb00df Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 29 Jul 2025 23:12:06 +0800 Subject: [PATCH 05/54] feat(fs): impl rename --- src/fs/ext4.rs | 115 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index cb4bc136..39df849f 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,11 +4,14 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, S_IFDIR, S_IFREG}, + constants::{EEXIST, EINVAL, EIO, ENOSYS, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::{dcache, Dentry}, - inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, + inode::{ + define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, RenameData, + WriteOffset, + }, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, vfs::Vfs, @@ -27,6 +30,7 @@ use another_ext4::{ }; use eonix_runtime::task::Task; use eonix_sync::RwLock; +use xmas_elf::dynamic::FLAG_1_NOW; pub struct Ext4BlockDevice { device: Arc, @@ -110,6 +114,20 @@ impl Ext4Fs { .setattr(child, None, None, None, None, None, Some(mtime), None, None); } + fn chmod_stat(&self, ino: u32, new_mode: u16, ctime: u32) { + let _ = self.inner.setattr( + ino, + Some(InodeMode::from_bits_retain(new_mode.try_into().unwrap())), + None, + None, + None, + None, + None, + Some(ctime), + None, + ); + } + fn get_or_insert( &self, icache: &mut BTreeMap, @@ -280,6 +298,28 @@ impl Inode for FileInode { Ok(total_written) } + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(Ordering::Relaxed); + let new_mode = (old_mode & !0o777) | (mode & 0o777); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode as u16, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode, Ordering::Relaxed); + *self.ctime.lock() = now; + + Ok(()) + } + // TODO fn truncate(&self, length: usize) -> KResult<()> { Ok(()) @@ -313,12 +353,10 @@ impl DirInode { } fn link_file(&self) { - // TODO self.size.fetch_add(1, Ordering::Relaxed); } fn link_dir(&self) { - // TODO self.nlink.fetch_add(1, Ordering::Relaxed); self.size.fetch_add(1, Ordering::Relaxed); } @@ -501,6 +539,75 @@ impl Inode for DirInode { Ok(()) } + + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(Ordering::Relaxed); + let new_mode = (old_mode & !0o777) | (mode & 0o777); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode as u16, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode, Ordering::Relaxed); + *self.ctime.lock() = now; + + Ok(()) + } + + fn rename(&self, rename_data: RenameData) -> KResult<()> { + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + vfs, + } = rename_data; + + if is_exchange { + println_warn!("Ext4Fs does not support exchange rename for now"); + return Err(ENOSYS); + } + + // TODO: may need another lock + let _lock = Task::block_on(self.rwsem.write()); + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.get_inode(); + if no_replace && new_file.is_ok() { + return Err(EEXIST); + } + + let name = old_dentry.name(); + let name = core::str::from_utf8(&*name).map_err(|_| EINVAL)?; + let new_name = new_dentry.name(); + let new_name = core::str::from_utf8(&*new_name).map_err(|_| EINVAL)?; + + ext4fs + .inner + .rename(self.ino as u32, name, new_parent.ino as u32, new_name) + .map_err(|err| err.code() as u32)?; + + // TODO: may need more operations + let now = Instant::now(); + *self.mtime.lock() = now; + *old_file.ctime.lock() = now; + self.size.fetch_sub(1, Ordering::Relaxed); + + Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); + + Ok(()) + } } struct Ext4MountCreator; From 22458ed33cd225123a8a23b74ae86c32984d2943 Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 29 Jul 2025 23:34:09 +0800 Subject: [PATCH 06/54] fix(fs): fix rename's metadata --- src/fs/ext4.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 39df849f..25810bdf 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -30,7 +30,6 @@ use another_ext4::{ }; use eonix_runtime::task::Task; use eonix_sync::RwLock; -use xmas_elf::dynamic::FLAG_1_NOW; pub struct Ext4BlockDevice { device: Arc, @@ -600,9 +599,24 @@ impl Inode for DirInode { // TODO: may need more operations let now = Instant::now(); - *self.mtime.lock() = now; *old_file.ctime.lock() = now; - self.size.fetch_sub(1, Ordering::Relaxed); + *self.mtime.lock() = now; + + let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; + if !same_parent { + *new_parent.mtime.lock() = now; + if old_file.is_dir() { + self.nlink.fetch_sub(1, Ordering::Relaxed); + new_parent.nlink.fetch_add(1, Ordering::Relaxed); + } + } + + if let Ok(replaced_file) = new_dentry.get_inode() { + if !no_replace { + *replaced_file.ctime.lock() = now; + replaced_file.nlink.fetch_sub(1, Ordering::Relaxed); + } + } Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); From 806c4fe0acfd8172c62e0111aee4544a8b6a05f5 Mon Sep 17 00:00:00 2001 From: Heinz Date: Thu, 31 Jul 2025 14:49:30 +0800 Subject: [PATCH 07/54] fix(fs): fix ext4's write offset update --- src/fs/ext4.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 25810bdf..1923d218 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -266,10 +266,14 @@ impl Inode for FileInode { let mut temp_buf = vec![0u8; 4096]; let mut total_written = 0; + let mut store_new_end = None; let offset = match offset { WriteOffset::Position(offset) => offset, // TODO: here need to add some operate - WriteOffset::End(end) => *end, + WriteOffset::End(end) => { + store_new_end = Some(end); + self.size.load(Ordering::Relaxed) as usize + } }; while let Some(data) = stream.poll_data(&mut temp_buf)? { @@ -283,6 +287,9 @@ impl Inode for FileInode { } } + if let Some(store_end) = store_new_end { + *store_end = offset + total_written; + } let mtime = Instant::now(); *self.mtime.lock() = mtime; let new_size = (offset + total_written) as u64; From db1caebde5063a6cb0c2c5f93c4689d5ece5e0e5 Mon Sep 17 00:00:00 2001 From: Heinz Date: Mon, 4 Aug 2025 22:59:58 +0800 Subject: [PATCH 08/54] feat(fs): partial work for ext4's page cache Fix page cache's bug, add size check in read function. Add page cache's base operations for ext4, but the cachepage will not be dropped until kernel stop, so we need to call fsync function manually, consider use some strategy such as LRU. --- src/fs/ext4.rs | 53 +++++++++++++++++------------ src/fs/fat32.rs | 8 ++--- src/fs/tmpfs.rs | 10 +++--- src/kernel/mem.rs | 2 +- src/kernel/mem/page_cache.rs | 66 +++++++++++++++++++++++++++++++----- src/kernel/vfs/inode.rs | 2 +- 6 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 0853e69a..763f1caa 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,6 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; -use crate::kernel::mem::{PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ @@ -83,7 +83,7 @@ impl Vfs for Ext4Fs { } fn is_read_only(&self) -> bool { - true + false } } @@ -257,12 +257,12 @@ impl FileInode { } impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult { + fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { self.read_direct(page, offset) } - fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult { - todo!() + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { + self.write_direct(page, offset) } fn size(&self) -> usize { @@ -296,12 +296,6 @@ impl Inode for FileInode { fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { let _lock = Task::block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let ext4fs = vfs.as_any().downcast_ref::().unwrap(); - - let mut temp_buf = vec![0u8; 4096]; - let mut total_written = 0; - let mut store_new_end = None; let offset = match offset { WriteOffset::Position(offset) => offset, @@ -312,6 +306,31 @@ impl Inode for FileInode { } }; + let total_written = Task::block_on(self.page_cache.write(stream, offset))?; + let cursor_end = offset + total_written; + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + let mtime = Instant::now(); + *self.mtime.lock() = mtime; + self.size.store(cursor_end as u64, Ordering::Relaxed); + + // TODO: change this with some update strategy such as LRU + let _ = Task::block_on(self.page_cache.fsync()); + + Ok(total_written) + } + + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { + //let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let mut temp_buf = vec![0u8; 4096]; + let mut total_written = 0; + while let Some(data) = stream.poll_data(&mut temp_buf)? { let written = ext4fs .inner @@ -323,18 +342,10 @@ impl Inode for FileInode { } } - if let Some(store_end) = store_new_end { - *store_end = offset + total_written; - } - let mtime = Instant::now(); - *self.mtime.lock() = mtime; - let new_size = (offset + total_written) as u64; - self.size - .store(offset as u64 + total_written as u64, Ordering::Relaxed); ext4fs.modify_inode_stat( self.ino as u32, - Some(new_size), - mtime.since_epoch().as_secs() as u32, + Some(self.size() as u64), + self.mtime.lock().since_epoch().as_secs() as u32, ); Ok(total_written) diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 852d8673..fb4a3e2e 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -3,7 +3,7 @@ mod file; use crate::io::Stream; use crate::kernel::constants::EIO; -use crate::kernel::mem::AsMemoryBlock; +use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; use crate::kernel::vfs::inode::WriteOffset; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, @@ -308,11 +308,11 @@ impl Inode for FileInode { Ok(buffer.wrote()) } - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { todo!() } } @@ -322,7 +322,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { todo!() } diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 334e2781..13a01de5 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -1,6 +1,6 @@ use crate::io::Stream; use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::kernel::timer::Instant; use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; @@ -496,7 +496,7 @@ impl PageCacheBackend for FileInode { Ok(PAGE_SIZE) } - fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { Ok(PAGE_SIZE) } @@ -511,13 +511,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); Task::block_on(self.pages.read(buffer, offset)) } fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { // TODO: We don't need that strong guarantee, find some way to avoid locks - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); let mut store_new_end = None; let offset = match offset { @@ -545,7 +545,7 @@ impl Inode for FileInode { } fn truncate(&self, length: usize) -> KResult<()> { - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); Task::block_on(self.pages.resize(length))?; self.size.store(length as u64, Ordering::Relaxed); *self.mtime.lock() = Instant::now(); diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index ce705cff..efd06824 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, PageCache, PageCacheBackend}; +pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; pub use paging::{Page, PageBuffer}; diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 863e538e..fa475f9b 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -125,27 +125,32 @@ impl PageCache { pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; + let size = self.backend.upgrade().unwrap().size(); loop { + if offset >= size { + break; + } let page_id = offset >> PAGE_SIZE_BITS; let page = pages.get(&page_id); match page { Some(page) => { let inner_offset = offset % PAGE_SIZE; + let available_in_file = size.saturating_sub(offset); // TODO: still cause unnecessary IO if valid_size < PAGESIZE // and fill result is Done - if page.valid_size() == 0 - || buffer - .fill(&page.valid_data()[inner_offset..])? - .should_stop() + let page_data = &page.valid_data()[inner_offset..]; + let read_size = page_data.len().min(available_in_file); + + if read_size == 0 + || buffer.fill(&page_data[..read_size])?.should_stop() || buffer.available() == 0 { break; } - - offset += PAGE_SIZE - inner_offset; + offset += read_size; } None => { let mut new_page = CachePage::new(); @@ -217,7 +222,7 @@ impl PageCache { self.backend .upgrade() .unwrap() - .write_page(page, page_id << PAGE_SIZE_BITS)?; + .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; page.clear_dirty(); } } @@ -293,6 +298,51 @@ impl PageCache { } } +pub struct CachePageStream { + page: CachePage, + cur: usize, +} + +impl CachePageStream { + pub fn new(page: CachePage) -> Self { + Self { page, cur: 0 } + } + + pub fn remaining(&self) -> usize { + self.page.valid_size().saturating_sub(self.cur) + } + + pub fn is_drained(&self) -> bool { + self.cur >= self.page.valid_size() + } +} + +impl Stream for CachePageStream { + fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let page_data = &self.page.all()[self.cur..self.page.valid_size()]; + let to_read = buf.len().min(page_data.len()); + + buf[..to_read].copy_from_slice(&page_data[..to_read]); + self.cur += to_read; + + Ok(Some(&mut buf[..to_read])) + } + + fn ignore(&mut self, len: usize) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let to_ignore = len.min(self.page.valid_size() - self.cur); + self.cur += to_ignore; + Ok(Some(to_ignore)) + } +} + // with this trait, "page cache" and "block cache" are unified, // for fs, offset is file offset (floor algin to PAGE_SIZE) // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) @@ -300,7 +350,7 @@ impl PageCache { pub trait PageCacheBackend { fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - fn write_page(&self, page: &CachePage, offset: usize) -> KResult; + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; fn size(&self) -> usize; } diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 2b52043d..0f73c910 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -136,7 +136,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { Err(if self.is_dir() { EISDIR } else { EINVAL }) } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { Err(if self.is_dir() { EISDIR } else { EINVAL }) } From a2c50b9a112948f448b5142dee375303c2e3e15a Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 5 Aug 2025 22:13:54 +0800 Subject: [PATCH 09/54] feat(fs): temporary cache write back strategy for ext4 temporary write back by timer, when write function is called, check if the time since the last write back is greater than 10 seconds. If it is, then write back. --- src/fs/ext4.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 763f1caa..9d315980 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +use crate::kernel::timer::Ticks; use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ @@ -223,6 +224,7 @@ impl Ext4Inode { define_struct_inode! { struct FileInode { + last_sync: AtomicU64, page_cache: PageCache, } } @@ -235,6 +237,7 @@ impl FileInode { fn with_idata(idata: InodeData) -> Arc { let inode = Arc::new_cyclic(|weak_self: &Weak| Self { idata, + last_sync: AtomicU64::new(0), page_cache: PageCache::new(weak_self.clone()), }); @@ -251,9 +254,22 @@ impl FileInode { inode_data.nlink.store(1, Ordering::Relaxed); inode_data }, + last_sync: AtomicU64::new(0), page_cache: PageCache::new(weak_self.clone()), }) } + + fn sync_if_needed(&self) { + let now = Ticks::now().in_secs(); + let last = self.last_sync.load(Ordering::Relaxed); + + // TODO: this is a temporary implement, + // consider change this with some update strategy such as LRU future + if now - last > 10 { + self.last_sync.store(now, Ordering::Relaxed); + let _ = Task::block_on(self.page_cache.fsync()); + } + } } impl PageCacheBackend for FileInode { @@ -316,8 +332,7 @@ impl Inode for FileInode { *self.mtime.lock() = mtime; self.size.store(cursor_end as u64, Ordering::Relaxed); - // TODO: change this with some update strategy such as LRU - let _ = Task::block_on(self.page_cache.fsync()); + self.sync_if_needed(); Ok(total_written) } From e89a28610421c25b8a86203e10445fb811d1d13c Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 6 Aug 2025 01:56:03 +0800 Subject: [PATCH 10/54] runtime: rework the whole runtime arch. (partial) Remove old Scheduler. Add Runtime as replacement. Use stackless coroutine as the low level tasking mechanism and build the stackful tasks on top of it. Redesign of the task state system. Rework the executor. Remove Run trait and anything related. Signed-off-by: greatbridf --- crates/eonix_runtime/src/executor.rs | 158 +++++------ crates/eonix_runtime/src/executor/builder.rs | 6 +- .../src/executor/execute_status.rs | 4 - crates/eonix_runtime/src/lib.rs | 1 - crates/eonix_runtime/src/run.rs | 34 --- crates/eonix_runtime/src/run/future_run.rs | 34 --- crates/eonix_runtime/src/scheduler.rs | 265 +++++++----------- crates/eonix_runtime/src/task.rs | 182 ++++-------- crates/eonix_runtime/src/task/adapter.rs | 3 +- crates/eonix_runtime/src/task/task_state.rs | 29 +- 10 files changed, 241 insertions(+), 475 deletions(-) delete mode 100644 crates/eonix_runtime/src/executor/execute_status.rs delete mode 100644 crates/eonix_runtime/src/run.rs delete mode 100644 crates/eonix_runtime/src/run/future_run.rs diff --git a/crates/eonix_runtime/src/executor.rs b/crates/eonix_runtime/src/executor.rs index 12eb9556..7be70eb9 100644 --- a/crates/eonix_runtime/src/executor.rs +++ b/crates/eonix_runtime/src/executor.rs @@ -1,125 +1,105 @@ -mod builder; -mod execute_status; +// mod builder; mod output_handle; mod stack; -use crate::{ - run::{Contexted, Run, RunState}, - scheduler::Scheduler, - task::Task, +use alloc::{ + boxed::Box, + sync::{Arc, Weak}, }; -use alloc::sync::Weak; use core::{ + marker::PhantomData, pin::Pin, - sync::atomic::{compiler_fence, fence, AtomicBool, Ordering}, - task::Waker, + task::{Context, Poll}, }; use eonix_sync::Spin; -pub use builder::ExecutorBuilder; -pub use execute_status::ExecuteStatus; pub use output_handle::OutputHandle; pub use stack::Stack; -/// An `Executor` executes a `Run` object in a separate thread of execution -/// where we have a dedicated stack and context. -pub trait Executor: Send { - fn progress(&self) -> ExecuteStatus; +/// An `Executor` executes a Future object in a separate thread of execution. +/// +/// When the Future is finished, the `Executor` will call the `OutputHandle` to commit the output. +/// Then the `Executor` will release the resources associated with the Future. +pub struct Executor(Option>>); + +trait TypeErasedExecutor: Send { + /// # Returns + /// Whether the executor has finished. + fn run(self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool; } -struct RealExecutor +struct RealExecutor<'a, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send + 'a, + F::Output: Send + 'a, { - _stack: S, - runnable: R, - output_handle: Weak>>, - finished: AtomicBool, + future: F, + output_handle: Weak>>, + _phantom: PhantomData<&'a ()>, } -impl RealExecutor +impl TypeErasedExecutor for RealExecutor<'_, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send, + F::Output: Send, { - extern "C" fn execute(self: Pin<&Self>) -> ! { - // We get here with preempt count == 1. - eonix_preempt::enable(); - - { - let waker = Waker::from(Task::current().clone()); + fn run(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool { + if self.output_handle.as_ptr().is_null() { + return true; + } - let output_data = loop { - // TODO!!!!!!: CHANGE THIS. - let runnable_pointer = &raw const self.get_ref().runnable; + let future = unsafe { + // SAFETY: We don't move the future. + self.as_mut().map_unchecked_mut(|me| &mut me.future) + }; - // SAFETY: We don't move the runnable object and we MIGHT not be using the - // part that is used in `pinned_run` in the runnable...? - let mut pinned_runnable = - unsafe { Pin::new_unchecked(&mut *(runnable_pointer as *mut R)) }; + match future.poll(cx) { + Poll::Ready(output) => { + if let Some(output_handle) = self.output_handle.upgrade() { + output_handle.lock().commit_output(output); - match pinned_runnable.as_mut().run(&waker) { - RunState::Finished(output) => break output, - RunState::Running => Task::park(), + unsafe { + // SAFETY: `output_handle` is Unpin. + self.get_unchecked_mut().output_handle = Weak::new(); + } } - }; - if let Some(output_handle) = self.output_handle.upgrade() { - output_handle.lock().commit_output(output_data); + true } - } - - // SAFETY: We are on the same CPU as the task. - self.finished.store(true, Ordering::Relaxed); - - unsafe { - // SAFETY: `preempt::count()` == 1. - eonix_preempt::disable(); - Scheduler::goto_scheduler_noreturn() + Poll::Pending => false, } } } -impl Executor for RealExecutor -where - S: Send, - R: Run + Contexted + Send, - R::Output: Send, -{ - fn progress(&self) -> ExecuteStatus { - // TODO!!!: If the task comes from another cpu, we need to sync. - // - // The other cpu should see the changes of kernel stack of the target thread - // made in this cpu. - // - // Can we find a better way other than `fence`s? - // - // An alternative way is to use an atomic variable to store the cpu id of - // the current task. Then we can use acquire release swap to ensure that the - // other cpu sees the changes. - fence(Ordering::SeqCst); - compiler_fence(Ordering::SeqCst); - - // TODO!!!: We should load the context only if the previous task is - // different from the current task. - - self.runnable.load_running_context(); - - unsafe { - // SAFETY: We are in the scheduler context and we are not preempted. - Scheduler::go_from_scheduler(&Task::current().execution_context); - } - - self.runnable.restore_running_context(); +impl Executor { + pub fn new(future: F) -> (Self, Arc>>) + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + let output_handle = OutputHandle::new(); + + // TODO: accept futures with non 'static lifetimes. + ( + Executor(Some(Box::pin(RealExecutor { + future, + output_handle: Arc::downgrade(&output_handle), + _phantom: PhantomData, + }))), + output_handle, + ) + } - compiler_fence(Ordering::SeqCst); - fence(Ordering::SeqCst); + pub fn run(&mut self, cx: &mut Context<'_>) -> bool { + if let Some(executor) = self.0.as_mut() { + let finished = executor.as_mut().run(cx); + if finished { + self.0.take(); + } - if self.finished.load(Ordering::Acquire) { - ExecuteStatus::Finished + finished } else { - ExecuteStatus::Executing + true } } } diff --git a/crates/eonix_runtime/src/executor/builder.rs b/crates/eonix_runtime/src/executor/builder.rs index eb073dc8..2729270b 100644 --- a/crates/eonix_runtime/src/executor/builder.rs +++ b/crates/eonix_runtime/src/executor/builder.rs @@ -1,8 +1,5 @@ use super::{Executor, OutputHandle, RealExecutor, Stack}; -use crate::{ - context::ExecutionContext, - run::{Contexted, Run}, -}; +use crate::context::ExecutionContext; use alloc::{boxed::Box, sync::Arc}; use core::{pin::Pin, sync::atomic::AtomicBool}; use eonix_sync::Spin; @@ -15,7 +12,6 @@ pub struct ExecutorBuilder { impl ExecutorBuilder where S: Stack, - R: Run + Contexted + Send + 'static, R::Output: Send, { pub fn new() -> Self { diff --git a/crates/eonix_runtime/src/executor/execute_status.rs b/crates/eonix_runtime/src/executor/execute_status.rs deleted file mode 100644 index 9c95aa6f..00000000 --- a/crates/eonix_runtime/src/executor/execute_status.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub enum ExecuteStatus { - Executing, - Finished, -} diff --git a/crates/eonix_runtime/src/lib.rs b/crates/eonix_runtime/src/lib.rs index 1df43fa4..458e01d9 100644 --- a/crates/eonix_runtime/src/lib.rs +++ b/crates/eonix_runtime/src/lib.rs @@ -3,7 +3,6 @@ pub mod context; pub mod executor; mod ready_queue; -pub mod run; pub mod scheduler; pub mod task; diff --git a/crates/eonix_runtime/src/run.rs b/crates/eonix_runtime/src/run.rs deleted file mode 100644 index 368b567b..00000000 --- a/crates/eonix_runtime/src/run.rs +++ /dev/null @@ -1,34 +0,0 @@ -mod future_run; - -use core::{pin::Pin, task::Waker}; -pub use future_run::FutureRun; - -pub enum RunState { - Running, - Finished(Output), -} - -pub trait Contexted { - /// # Safety - /// This function should be called in a preemption disabled context. - fn load_running_context(&self) {} - - /// # Safety - /// This function should be called in a preemption disabled context. - fn restore_running_context(&self) {} -} - -pub trait Run { - type Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState; - - fn join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output { - loop { - match self.as_mut().run(waker) { - RunState::Running => continue, - RunState::Finished(output) => break output, - } - } - } -} diff --git a/crates/eonix_runtime/src/run/future_run.rs b/crates/eonix_runtime/src/run/future_run.rs deleted file mode 100644 index 813f8d2f..00000000 --- a/crates/eonix_runtime/src/run/future_run.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::{Contexted, Run, RunState}; -use core::{ - pin::Pin, - task::{Context, Poll, Waker}, -}; - -pub struct FutureRun(F); - -impl FutureRun -where - F: Future, -{ - pub const fn new(future: F) -> Self { - Self(future) - } -} - -impl Contexted for FutureRun where F: Future {} -impl Run for FutureRun -where - F: Future + 'static, -{ - type Output = F::Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut future = unsafe { self.map_unchecked_mut(|me| &mut me.0) }; - let mut context = Context::from_waker(waker); - - match future.as_mut().poll(&mut context) { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, - } - } -} diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 9031d3a1..dcd8cfc1 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,16 @@ use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, OutputHandle, Stack}, - ready_queue::{cpu_rq, local_rq}, - run::{Contexted, Run}, - task::{Task, TaskAdapter, TaskHandle}, + executor::OutputHandle, + ready_queue::{cpu_rq, local_rq, ReadyQueue}, + task::{Task, TaskAdapter, TaskHandle, TaskState}, }; use alloc::sync::Arc; use core::{ - mem::forget, + ops::{Deref, DerefMut}, ptr::NonNull, - sync::atomic::{compiler_fence, Ordering}, + sync::atomic::Ordering, task::Waker, }; use eonix_hal::processor::halt; -use eonix_log::println_trace; -use eonix_preempt::assert_preempt_count_eq; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; @@ -22,13 +18,12 @@ use pointers::BorrowedArc; #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; -#[eonix_percpu::define_percpu] -static LOCAL_SCHEDULER_CONTEXT: ExecutionContext = ExecutionContext::new(); - static TASKS: LazyLock>> = LazyLock::new(|| Spin::new(RBTree::new(TaskAdapter::new()))); -pub struct Scheduler; +pub static RUNTIME: Runtime = Runtime(); + +pub struct Runtime(); pub struct JoinHandle(Arc>>) where @@ -68,74 +63,7 @@ where } } -impl Scheduler { - /// `Scheduler` might be used in various places. Do not hold it for a long time. - /// - /// # Safety - /// The locked returned by this function should be locked with `lock_irq` to prevent from - /// rescheduling during access to the scheduler. Disabling preemption will do the same. - /// - /// Drop the lock before calling `schedule`. - pub fn get() -> &'static Self { - static GLOBAL_SCHEDULER: Scheduler = Scheduler; - &GLOBAL_SCHEDULER - } - - pub fn init_local_scheduler() - where - S: Stack, - { - let stack = S::new(); - - unsafe { - eonix_preempt::disable(); - // SAFETY: Preemption is disabled. - let context: &mut ExecutionContext = LOCAL_SCHEDULER_CONTEXT.as_mut(); - context.set_ip(local_scheduler as _); - context.set_sp(stack.get_bottom().addr().get() as usize); - context.set_interrupt(true); - eonix_preempt::enable(); - } - - // We don't need to keep the stack around. - forget(stack); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn go_from_scheduler(to: &ExecutionContext) { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }.switch_to(to); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler(from: &ExecutionContext) { - // SAFETY: Preemption is disabled. - from.switch_to(unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler_noreturn() -> ! { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref().switch_noreturn() } - } - - fn add_task(task: Arc) { - TASKS.lock().insert(task); - } - - fn remove_task(task: &Task) { - unsafe { TASKS.lock().cursor_mut_from_ptr(task as *const _).remove() }; - } - +impl Runtime { fn select_cpu_for_task(&self, task: &Task) -> usize { task.cpu.load(Ordering::Relaxed) as _ } @@ -165,112 +93,127 @@ impl Scheduler { } } - pub fn spawn(&self, runnable: R) -> JoinHandle + pub fn spawn(&self, future: F) -> JoinHandle where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, + F: Future + Send + 'static, + F::Output: Send + 'static, { let TaskHandle { task, output_handle, - } = Task::new::(runnable); + } = Task::new(future); - Self::add_task(task.clone()); + self.add_task(task.clone()); self.activate(&task); JoinHandle(output_handle) } - /// Go to idle task. Call this with `preempt_count == 1`. - /// The preempt count will be decremented by this function. - /// - /// # Safety - /// We might never return from here. - /// Drop all variables that take ownership of some resource before calling this function. - pub fn schedule() { - assert_preempt_count_eq!(1, "Scheduler::schedule"); + // /// Go to idle task. Call this with `preempt_count == 1`. + // /// The preempt count will be decremented by this function. + // /// + // /// # Safety + // /// We might never return from here. + // /// Drop all variables that take ownership of some resource before calling this function. + // pub fn schedule() { + // assert_preempt_count_eq!(1, "Scheduler::schedule"); + + // // Make sure all works are done before scheduling. + // compiler_fence(Ordering::SeqCst); + + // // TODO!!!!!: Use of reference here needs further consideration. + // // + // // Since we might never return to here, we can't take ownership of `current()`. + // // Is it safe to believe that `current()` will never change across calls? + // unsafe { + // // SAFETY: Preemption is disabled. + // Scheduler::goto_scheduler(&Task::current().execution_context); + // } + // eonix_preempt::enable(); + // } +} - // Make sure all works are done before scheduling. - compiler_fence(Ordering::SeqCst); +impl Runtime { + fn add_task(&self, task: Arc) { + TASKS.lock_irq().insert(task); + } - // TODO!!!!!: Use of reference here needs further consideration. - // - // Since we might never return to here, we can't take ownership of `current()`. - // Is it safe to believe that `current()` will never change across calls? + fn remove_task(&self, task: &impl Deref>) { unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context); + TASKS + .lock_irq() + .cursor_mut_from_ptr(Arc::as_ptr(task)) + .remove(); } - eonix_preempt::enable(); } -} - -extern "C" fn local_scheduler() -> ! { - loop { - assert_preempt_count_eq!(1, "Scheduler::idle_task"); - let mut rq = local_rq().lock_irq(); - let previous_task = CURRENT_TASK + fn current(&self) -> Option> { + CURRENT_TASK .get() - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }); - let next_task = rq.get(); + .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) + } + + fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { + let Some(current) = self.current() else { + return; + }; + + match current.state.cmpxchg(TaskState::RUNNING, TaskState::READY) { + Ok(_) => { + let current = unsafe { + Arc::from_raw( + CURRENT_TASK + .get() + .expect("Current task should be present") + .as_ptr(), + ) + }; + + rq.put(current); + } + Err(old) => { + assert_eq!( + old, + TaskState::PARKED, + "Current task should be in PARKED state" + ); + } + } + } - match (previous_task, next_task) { - (None, None) => { - // Nothing to do, halt the cpu and rerun the loop. + /// Enter the runtime with an "init" future and run till its completion. + /// + /// The "init" future has the highest priority and when it completes, + /// the runtime will exit immediately and yield its output. + pub fn enter(&self) { + loop { + let mut rq = local_rq().lock_irq(); + + self.remove_and_enqueue_current(&mut rq); + + let Some(next) = rq.get() else { drop(rq); halt(); continue; - } - (None, Some(next)) => { - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); - } - (Some(previous), None) => { - if previous.state.is_running() { - // Previous thread is `Running`, return to the current running thread. - println_trace!( - "trace_scheduler", - "Returning to task id({}) without doing context switch", - previous.id - ); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - } else { - // Nothing to do, halt the cpu and rerun the loop. - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - drop(rq); - halt(); - continue; - } - } - (Some(previous), Some(next)) => { - println_trace!( - "trace_scheduler", - "Switching from task id({}) to task id({})", - previous.id, - next.id - ); + }; - debug_assert_ne!(previous.id, next.id, "Switching to the same task"); + let old_state = next.state.swap(TaskState::RUNNING); + assert_eq!( + old_state, + TaskState::READY, + "Next task should be in READY state" + ); - if previous.state.is_running() || !previous.state.try_park() { - rq.put(previous); - } else { - previous.on_rq.store(false, Ordering::Release); - } + CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); + drop(rq); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); - } - } + // TODO: MAYBE we can move the release of finished tasks to some worker thread. + if Task::current().run() { + Task::current().state.set(TaskState::DEAD); + CURRENT_TASK.set(None); - drop(rq); - // TODO: We can move the release of finished tasks to some worker thread. - if let ExecuteStatus::Finished = Task::current().run() { - let current = CURRENT_TASK - .swap(None) - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }) - .expect("Current task should be present"); - Scheduler::remove_task(¤t); + self.remove_task(&Task::current()); + } } } } diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 66746ba2..c193d774 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -2,25 +2,22 @@ mod adapter; mod task_state; use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, Executor, ExecutorBuilder, OutputHandle, Stack}, - run::{Contexted, Run}, - scheduler::Scheduler, + executor::{Executor, OutputHandle}, + ready_queue::{cpu_rq, ReadyQueue}, }; -use alloc::{boxed::Box, sync::Arc, task::Wake}; +use alloc::{sync::Arc, task::Wake}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - pin::{pin, Pin}, - sync::atomic::{AtomicBool, AtomicU32, Ordering}, - task::{Context, Poll, Waker}, + ops::DerefMut, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Waker}, }; use eonix_hal::processor::CPU; -use eonix_preempt::assert_preempt_enabled; -use eonix_sync::Spin; -use intrusive_collections::RBTreeAtomicLink; -use task_state::TaskState; +use eonix_sync::{Spin, SpinIrq}; +use intrusive_collections::{LinkedListAtomicLink, RBTreeAtomicLink}; -pub use adapter::TaskAdapter; +pub use adapter::{TaskAdapter, TaskRqAdapter}; +pub(crate) use task_state::TaskState; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct TaskId(u32); @@ -33,159 +30,70 @@ where pub(crate) output_handle: Arc>>, } -/// A `Task` represents a schedulable unit. -/// -/// Initial: state = Running, unparked = false -/// -/// Task::park() => swap state <- Parking, assert prev == Running -/// => swap unparked <- false -/// -> true => store state <- Running => return -/// -> false => goto scheduler => get rq lock => load state -/// -> Running => enqueue -/// -> Parking => cmpxchg Parking -> Parked -/// -> Running => enqueue -/// -> Parking => on_rq <- false -/// -> Parked => ??? -/// -/// Task::unpark() => swap unparked <- true -/// -> true => return -/// -> false => swap state <- Running -/// -> Running => return -/// -> Parking | Parked => Scheduler::activate pub struct Task { /// Unique identifier of the task. pub id: TaskId, - /// Whether the task is on some run queue (a.k.a ready). - pub(crate) on_rq: AtomicBool, - /// Whether someone has called `unpark` on this task. - pub(crate) unparked: AtomicBool, /// The last cpu that the task was executed on. /// If `on_rq` is `false`, we can't assume that this task is still on the cpu. pub(crate) cpu: AtomicU32, /// Task state. pub(crate) state: TaskState, - /// Task execution context. - pub(crate) execution_context: ExecutionContext, /// Executor object. - executor: AtomicUniqueRefCell>>>, + executor: AtomicUniqueRefCell, /// Link in the global task list. link_task_list: RBTreeAtomicLink, + /// Link in the ready queue. + link_ready_queue: LinkedListAtomicLink, } impl Task { - pub fn new(runnable: R) -> TaskHandle + pub fn new(future: F) -> TaskHandle where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, + F: Future + Send + 'static, + F::Output: Send + 'static, { static ID: AtomicU32 = AtomicU32::new(0); - let (executor, execution_context, output) = ExecutorBuilder::new() - .stack(S::new()) - .runnable(runnable) - .build(); + let (executor, output_handle) = Executor::new(future); let task = Arc::new(Self { id: TaskId(ID.fetch_add(1, Ordering::Relaxed)), - on_rq: AtomicBool::new(false), - unparked: AtomicBool::new(false), cpu: AtomicU32::new(CPU::local().cpuid() as u32), state: TaskState::new(TaskState::RUNNING), - executor: AtomicUniqueRefCell::new(Some(executor)), - execution_context, + executor: AtomicUniqueRefCell::new(executor), link_task_list: RBTreeAtomicLink::new(), + link_ready_queue: LinkedListAtomicLink::new(), }); TaskHandle { task, - output_handle: output, + output_handle, } } - pub fn run(&self) -> ExecuteStatus { + /// # Returns + /// Whether the task has finished. + pub fn run(self: &Arc) -> bool { let mut executor_borrow = self.executor.borrow(); + let waker = Waker::from(self.clone()); + let mut cx = Context::from_waker(&waker); - let executor = executor_borrow - .as_ref() - .expect("Executor should be present") - .as_ref() - .get_ref(); - - if let ExecuteStatus::Finished = executor.progress() { - executor_borrow.take(); - ExecuteStatus::Finished - } else { - ExecuteStatus::Executing - } - } - - pub fn unpark(self: &Arc) { - if self.unparked.swap(true, Ordering::Release) { - return; - } - - eonix_preempt::disable(); - - match self.state.swap(TaskState::RUNNING) { - TaskState::RUNNING => {} - TaskState::PARKED | TaskState::PARKING => { - // We are waking up from sleep or someone else is parking this task. - // Try to wake it up. - Scheduler::get().activate(self); - } - _ => unreachable!(), - } - - eonix_preempt::enable(); + executor_borrow.run(&mut cx) } - pub fn park() { - eonix_preempt::disable(); - Self::park_preempt_disabled(); - } - - /// Park the current task with `preempt::count() == 1`. - pub fn park_preempt_disabled() { - let task = Task::current(); - - let old_state = task.state.swap(TaskState::PARKING); - assert_eq!( - old_state, - TaskState::RUNNING, - "Parking a task that is not running." - ); - - if task.unparked.swap(false, Ordering::AcqRel) { - // Someone has called `unpark` on this task previously. - task.state.swap(TaskState::RUNNING); - } else { - unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context) - }; - assert!(task.unparked.swap(false, Ordering::Acquire)); - } - - eonix_preempt::enable(); - } - - pub fn block_on(future: F) -> F::Output - where - F: Future, - { - assert_preempt_enabled!("block_on() must be called with preemption enabled"); - - let waker = Waker::from(Task::current().clone()); - let mut context = Context::from_waker(&waker); - let mut future = pin!(future); - + /// Get the stabilized lock for the task's run queue. + fn rq(&self) -> Option + 'static> { loop { - if let Poll::Ready(output) = future.as_mut().poll(&mut context) { - break output; + let cpu = self.cpu.load(Ordering::Relaxed); + let rq = cpu_rq(cpu as usize).lock_irq(); + + if cpu == self.cpu.load(Ordering::Acquire) { + if self.link_ready_queue.is_linked() { + return Some(rq); + } else { + return None; + } } - - Task::park(); } } } @@ -196,6 +104,20 @@ impl Wake for Task { } fn wake_by_ref(self: &Arc) { - self.unpark(); + if self + .state + .cmpxchg(TaskState::PARKED, TaskState::READY) + .is_err() + { + return; + } + + if let Some(mut rq) = self.rq() { + if self.state.get() != TaskState::PARKED { + return; + } + + rq.put(self.clone()); + } } } diff --git a/crates/eonix_runtime/src/task/adapter.rs b/crates/eonix_runtime/src/task/adapter.rs index de1d0bad..3b5d1583 100644 --- a/crates/eonix_runtime/src/task/adapter.rs +++ b/crates/eonix_runtime/src/task/adapter.rs @@ -1,8 +1,9 @@ use super::{Task, TaskId}; use alloc::sync::Arc; -use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; +use intrusive_collections::{intrusive_adapter, KeyAdapter, LinkedListAtomicLink, RBTreeAtomicLink}; intrusive_adapter!(pub TaskAdapter = Arc: Task { link_task_list: RBTreeAtomicLink }); +intrusive_adapter!(pub TaskRqAdapter = Arc: Task { link_ready_queue: LinkedListAtomicLink }); impl<'a> KeyAdapter<'a> for TaskAdapter { type Key = TaskId; diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index b22ad889..ec9d67ad 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -4,32 +4,29 @@ use core::sync::atomic::{AtomicU32, Ordering}; pub struct TaskState(AtomicU32); impl TaskState { - pub const RUNNING: u32 = 0; - pub const PARKING: u32 = 1; + pub const READY: u32 = 0; + pub const RUNNING: u32 = 1; pub const PARKED: u32 = 2; + pub const DEAD: u32 = 1 << 31; pub(crate) const fn new(state: u32) -> Self { Self(AtomicU32::new(state)) } pub(crate) fn swap(&self, state: u32) -> u32 { - self.0.swap(state, Ordering::AcqRel) + self.0.swap(state, Ordering::SeqCst) } - pub(crate) fn try_park(&self) -> bool { - match self.0.compare_exchange( - TaskState::PARKING, - TaskState::PARKED, - Ordering::AcqRel, - Ordering::Acquire, - ) { - Ok(_) => true, - Err(TaskState::RUNNING) => false, - Err(_) => unreachable!("Invalid task state while trying to park."), - } + pub(crate) fn set(&self, state: u32) { + self.0.store(state, Ordering::SeqCst); } - pub(crate) fn is_running(&self) -> bool { - self.0.load(Ordering::Acquire) == Self::RUNNING + pub(crate) fn get(&self) -> u32 { + self.0.load(Ordering::SeqCst) + } + + pub(crate) fn cmpxchg(&self, current: u32, new: u32) -> Result { + self.0 + .compare_exchange(current, new, Ordering::SeqCst, Ordering::SeqCst) } } From e23c9eb1f24c572e19c9d275acf62e4ce0e2a1e0 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Thu, 7 Aug 2025 16:59:47 +0800 Subject: [PATCH 11/54] runtime: new task sleep-wakeup method and some adaption We use RUNNING to indicate that the task is on the cpu, and use READY to indicate that the task could be further run again and therefore put into the ready queue after one poll() call. When the task is acquired from the ready queue and put onto cpu, it's marked as RUNNING only, making it put suspended after we got the Poll::Pending from the poll() call. If we (or others) call Waker::wake() within the run, we'll set the READY flag then. And when we return from the poll call, we could find it by a CAS and put it back to the ready queue again. We've also done some adaption work to the rest of the kernel, mainly to remove *SOME* of the Task::block_on calls. But to completely remove it is not possible for now. We should solve that in further few commits. Signed-off-by: greatbridf --- crates/eonix_runtime/src/executor.rs | 39 +++--- crates/eonix_runtime/src/scheduler.rs | 129 +++++++++----------- crates/eonix_runtime/src/task.rs | 52 ++++---- crates/eonix_runtime/src/task/task_state.rs | 19 +-- src/driver/serial.rs | 6 +- src/driver/virtio/riscv64.rs | 12 +- src/kernel/interrupt.rs | 13 +- src/kernel/mem/page_alloc/raw_page.rs | 1 - src/kernel/syscall/mm.rs | 2 +- src/kernel/task.rs | 2 +- src/kernel/task/clone.rs | 9 +- src/kernel/task/process.rs | 12 +- src/kernel/task/process_list.rs | 10 +- src/kernel/task/session.rs | 4 +- src/kernel/task/signal.rs | 13 +- src/kernel/task/thread.rs | 108 +++++----------- src/kernel/terminal.rs | 18 +-- src/lib.rs | 63 +++++----- 18 files changed, 210 insertions(+), 302 deletions(-) diff --git a/crates/eonix_runtime/src/executor.rs b/crates/eonix_runtime/src/executor.rs index 7be70eb9..3b858a47 100644 --- a/crates/eonix_runtime/src/executor.rs +++ b/crates/eonix_runtime/src/executor.rs @@ -23,9 +23,7 @@ pub use stack::Stack; pub struct Executor(Option>>); trait TypeErasedExecutor: Send { - /// # Returns - /// Whether the executor has finished. - fn run(self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()>; } struct RealExecutor<'a, F> @@ -43,9 +41,9 @@ where F: Future + Send, F::Output: Send, { - fn run(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool { + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { if self.output_handle.as_ptr().is_null() { - return true; + return Poll::Ready(()); } let future = unsafe { @@ -53,21 +51,16 @@ where self.as_mut().map_unchecked_mut(|me| &mut me.future) }; - match future.poll(cx) { - Poll::Ready(output) => { - if let Some(output_handle) = self.output_handle.upgrade() { - output_handle.lock().commit_output(output); + future.poll(cx).map(|output| { + if let Some(output_handle) = self.output_handle.upgrade() { + output_handle.lock().commit_output(output); - unsafe { - // SAFETY: `output_handle` is Unpin. - self.get_unchecked_mut().output_handle = Weak::new(); - } + unsafe { + // SAFETY: `output_handle` is Unpin. + self.get_unchecked_mut().output_handle = Weak::new(); } - - true } - Poll::Pending => false, - } + }) } } @@ -79,7 +72,6 @@ impl Executor { { let output_handle = OutputHandle::new(); - // TODO: accept futures with non 'static lifetimes. ( Executor(Some(Box::pin(RealExecutor { future, @@ -90,16 +82,13 @@ impl Executor { ) } - pub fn run(&mut self, cx: &mut Context<'_>) -> bool { + pub fn poll(&mut self, cx: &mut Context<'_>) -> Poll<()> { if let Some(executor) = self.0.as_mut() { - let finished = executor.as_mut().run(cx); - if finished { + executor.as_mut().poll(cx).map(|_| { self.0.take(); - } - - finished + }) } else { - true + Poll::Ready(()) } } } diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index dcd8cfc1..c9c73ea5 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,14 +1,13 @@ use crate::{ executor::OutputHandle, - ready_queue::{cpu_rq, local_rq, ReadyQueue}, + ready_queue::{local_rq, ReadyQueue}, task::{Task, TaskAdapter, TaskHandle, TaskState}, }; -use alloc::sync::Arc; +use alloc::{sync::Arc, task::Wake}; use core::{ ops::{Deref, DerefMut}, ptr::NonNull, - sync::atomic::Ordering, - task::Waker, + task::{Context, Poll, Waker}, }; use eonix_hal::processor::halt; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; @@ -64,35 +63,6 @@ where } impl Runtime { - fn select_cpu_for_task(&self, task: &Task) -> usize { - task.cpu.load(Ordering::Relaxed) as _ - } - - pub fn activate(&self, task: &Arc) { - // Only one cpu can be activating the task at a time. - // TODO: Add some checks. - - if task.on_rq.swap(true, Ordering::Acquire) { - // Lock the rq and check whether the task is on the rq again. - let cpuid = task.cpu.load(Ordering::Acquire); - let mut rq = cpu_rq(cpuid as _).lock_irq(); - - if !task.on_rq.load(Ordering::Acquire) { - // Task has just got off the rq. Put it back. - rq.put(task.clone()); - } else { - // Task is already on the rq. Do nothing. - return; - } - } else { - // Task not on some rq. Select one and put it here. - let cpu = self.select_cpu_for_task(&task); - let mut rq = cpu_rq(cpu).lock_irq(); - task.cpu.store(cpu as _, Ordering::Release); - rq.put(task.clone()); - } - } - pub fn spawn(&self, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -104,36 +74,11 @@ impl Runtime { } = Task::new(future); self.add_task(task.clone()); - self.activate(&task); + task.wake_by_ref(); JoinHandle(output_handle) } - // /// Go to idle task. Call this with `preempt_count == 1`. - // /// The preempt count will be decremented by this function. - // /// - // /// # Safety - // /// We might never return from here. - // /// Drop all variables that take ownership of some resource before calling this function. - // pub fn schedule() { - // assert_preempt_count_eq!(1, "Scheduler::schedule"); - - // // Make sure all works are done before scheduling. - // compiler_fence(Ordering::SeqCst); - - // // TODO!!!!!: Use of reference here needs further consideration. - // // - // // Since we might never return to here, we can't take ownership of `current()`. - // // Is it safe to believe that `current()` will never change across calls? - // unsafe { - // // SAFETY: Preemption is disabled. - // Scheduler::goto_scheduler(&Task::current().execution_context); - // } - // eonix_preempt::enable(); - // } -} - -impl Runtime { fn add_task(&self, task: Arc) { TASKS.lock_irq().insert(task); } @@ -158,12 +103,18 @@ impl Runtime { return; }; - match current.state.cmpxchg(TaskState::RUNNING, TaskState::READY) { - Ok(_) => { + match current.state.update(|state| match state { + TaskState::READY_RUNNING => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::BLOCKED), + _ => { + unreachable!("Current task should be at least in RUNNING state, but got {state:?}") + } + }) { + Ok(TaskState::READY_RUNNING) => { let current = unsafe { Arc::from_raw( CURRENT_TASK - .get() + .swap(None) .expect("Current task should be present") .as_ptr(), ) @@ -171,14 +122,40 @@ impl Runtime { rq.put(current); } - Err(old) => { - assert_eq!( - old, - TaskState::PARKED, - "Current task should be in PARKED state" - ); + Ok(_) => {} + _ => unreachable!(), + } + } + + pub fn block_till_woken(set_waker: impl FnOnce(&Waker)) -> impl Future { + struct BlockTillWoken { + set_waker: Option, + slept: bool, + } + + impl Future for BlockTillWoken { + type Output = (); + + fn poll(self: core::pin::Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + if self.slept { + Poll::Ready(()) + } else { + let (set_waker, slept) = unsafe { + let me = self.get_unchecked_mut(); + (me.set_waker.take().unwrap(), &mut me.slept) + }; + + set_waker(cx.waker()); + *slept = true; + Poll::Pending + } } } + + BlockTillWoken { + set_waker: Some(set_waker), + slept: false, + } } /// Enter the runtime with an "init" future and run till its completion. @@ -204,15 +181,23 @@ impl Runtime { "Next task should be in READY state" ); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); + unsafe { + CURRENT_TASK.set(Some(NonNull::new_unchecked(Arc::into_raw(next) as *mut _))); + } + drop(rq); // TODO: MAYBE we can move the release of finished tasks to some worker thread. - if Task::current().run() { - Task::current().state.set(TaskState::DEAD); - CURRENT_TASK.set(None); + if Task::current().poll().is_ready() { + let old_state = Task::current().state.swap(TaskState::DEAD); + assert!( + old_state & TaskState::RUNNING != 0, + "Current task should be at least in RUNNING state" + ); self.remove_task(&Task::current()); + + CURRENT_TASK.set(None); } } } diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index c193d774..8f4062d8 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -10,7 +10,7 @@ use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ ops::DerefMut, sync::atomic::{AtomicU32, Ordering}, - task::{Context, Waker}, + task::{Context, Poll, Waker}, }; use eonix_hal::processor::CPU; use eonix_sync::{Spin, SpinIrq}; @@ -59,7 +59,7 @@ impl Task { let task = Arc::new(Self { id: TaskId(ID.fetch_add(1, Ordering::Relaxed)), cpu: AtomicU32::new(CPU::local().cpuid() as u32), - state: TaskState::new(TaskState::RUNNING), + state: TaskState::new(TaskState::BLOCKED), executor: AtomicUniqueRefCell::new(executor), link_task_list: RBTreeAtomicLink::new(), link_ready_queue: LinkedListAtomicLink::new(), @@ -71,31 +71,35 @@ impl Task { } } - /// # Returns - /// Whether the task has finished. - pub fn run(self: &Arc) -> bool { + pub fn poll(self: &Arc) -> Poll<()> { let mut executor_borrow = self.executor.borrow(); let waker = Waker::from(self.clone()); let mut cx = Context::from_waker(&waker); - executor_borrow.run(&mut cx) + executor_borrow.poll(&mut cx) } /// Get the stabilized lock for the task's run queue. - fn rq(&self) -> Option + 'static> { + pub fn rq(&self) -> impl DerefMut + 'static { loop { let cpu = self.cpu.load(Ordering::Relaxed); let rq = cpu_rq(cpu as usize).lock_irq(); - if cpu == self.cpu.load(Ordering::Acquire) { - if self.link_ready_queue.is_linked() { - return Some(rq); - } else { - return None; - } + // We stabilize the task cpu with the cpu rq here for now. + if cpu != self.cpu.load(Ordering::Acquire) { + continue; } + + return rq; } } + + pub fn block_on(future: F) -> F::Output + where + F: Future, + { + todo!() + } } impl Wake for Task { @@ -104,20 +108,18 @@ impl Wake for Task { } fn wake_by_ref(self: &Arc) { - if self - .state - .cmpxchg(TaskState::PARKED, TaskState::READY) - .is_err() - { + let Ok(old) = self.state.update(|state| match state { + TaskState::BLOCKED => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::READY | TaskState::RUNNING), + TaskState::READY | TaskState::READY_RUNNING => None, + state => unreachable!("Waking a {state:?} task"), + }) else { return; - } - - if let Some(mut rq) = self.rq() { - if self.state.get() != TaskState::PARKED { - return; - } + }; - rq.put(self.clone()); + if old == TaskState::BLOCKED { + // If the task was blocked, we need to put it back to the ready queue. + self.rq().put(self.clone()); } } } diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index ec9d67ad..074acfb4 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -4,9 +4,10 @@ use core::sync::atomic::{AtomicU32, Ordering}; pub struct TaskState(AtomicU32); impl TaskState { - pub const READY: u32 = 0; - pub const RUNNING: u32 = 1; - pub const PARKED: u32 = 2; + pub const BLOCKED: u32 = 0; + pub const READY: u32 = 1; + pub const RUNNING: u32 = 2; + pub const READY_RUNNING: u32 = TaskState::READY | TaskState::RUNNING; pub const DEAD: u32 = 1 << 31; pub(crate) const fn new(state: u32) -> Self { @@ -17,16 +18,8 @@ impl TaskState { self.0.swap(state, Ordering::SeqCst) } - pub(crate) fn set(&self, state: u32) { - self.0.store(state, Ordering::SeqCst); - } - - pub(crate) fn get(&self) -> u32 { - self.0.load(Ordering::SeqCst) - } - - pub(crate) fn cmpxchg(&self, current: u32, new: u32) -> Result { + pub(crate) fn update(&self, func: impl FnMut(u32) -> Option) -> Result { self.0 - .compare_exchange(current, new, Ordering::SeqCst, Ordering::SeqCst) + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, func) } } diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d7fabbbd..d69965f4 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -3,14 +3,14 @@ mod io; use crate::{ kernel::{ block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - task::KernelStack, CharDevice, CharDeviceType, Terminal, TerminalDevice, + CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; use alloc::{collections::vec_deque::VecDeque, format, sync::Arc}; use bitflags::bitflags; use core::pin::pin; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{SpinIrq as _, WaitList}; use io::SerialIO; @@ -161,7 +161,7 @@ impl Serial { })?; } - Scheduler::get().spawn::(FutureRun::new(Self::worker(port.clone()))); + RUNTIME.spawn(Self::worker(port.clone())); let _ = set_console(terminal.clone()); eonix_log::set_console(terminal.clone()); diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 66f150c3..ad132569 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,23 +1,15 @@ use super::virtio_blk::HAL; -use crate::kernel::{ - block::{make_device, BlockDevice}, - mem::{AsMemoryBlock, MemoryBlock, Page}, -}; +use crate::kernel::block::{make_device, BlockDevice}; use alloc::{sync::Arc, vec::Vec}; -use core::num::NonZero; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; use eonix_log::{println_info, println_warn}; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; +use eonix_mm::address::{PAddr, PhysAccess}; use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, transport::{mmio::MmioTransport, Transport}, - Hal, }; pub fn init() { diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 1a84d534..4b55f182 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,5 +1,5 @@ use super::mem::handle_kernel_page_fault; -use super::timer::{should_reschedule, timer_interrupt}; +use super::timer::timer_interrupt; use crate::kernel::constants::EINVAL; use crate::prelude::*; use alloc::sync::Arc; @@ -7,7 +7,6 @@ use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::scheduler::Scheduler; use eonix_sync::SpinIrq as _; static IRQ_HANDLERS: Spin<[Vec>; 16]> = @@ -51,15 +50,7 @@ pub fn interrupt_handler(trap_ctx: &mut TrapContext) { TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no), TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), - TrapType::Timer { callback } => { - callback(timer_interrupt); - - if eonix_preempt::count() == 0 && should_reschedule() { - // To make scheduler satisfied. - eonix_preempt::disable(); - Scheduler::schedule(); - } - } + TrapType::Timer { callback } => callback(timer_interrupt), } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 4b420255..54d4d590 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -6,7 +6,6 @@ use core::{ sync::atomic::{AtomicU32, AtomicUsize, Ordering}, }; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::paging::PAGE_SIZE; use eonix_mm::{ address::{PAddr, PhysAccess as _}, paging::{RawPage as RawPageTrait, PFN}, diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index b639650d..dd263e6b 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,6 +1,6 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT, ENOMEM}; +use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; diff --git a/src/kernel/task.rs b/src/kernel/task.rs index e8d36e51..1b47923e 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -18,4 +18,4 @@ pub use process_group::ProcessGroup; pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; -pub use thread::{new_thread_runnable, yield_now, Thread, ThreadBuilder}; +pub use thread::{yield_now, Thread, ThreadBuilder}; diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index c8efe5e8..48e34f96 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,10 +1,7 @@ use crate::{ kernel::{ syscall::procops::parse_user_tls, - task::{ - alloc_pid, new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, Thread, - ThreadBuilder, - }, + task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, user::UserPointerMut, }, KResult, @@ -12,7 +9,7 @@ use crate::{ use bitflags::bitflags; use core::num::NonZero; use eonix_hal::processor::UserTLS; -use eonix_runtime::{scheduler::Scheduler, task::Task}; +use eonix_runtime::{scheduler::RUNTIME, task::Task}; use eonix_sync::AsProof; use posix_types::signal::Signal; @@ -166,7 +163,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - Scheduler::get().spawn::(new_thread_runnable(new_thread)); + RUNTIME.spawn(new_thread.run()); Ok(new_pid) } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 53499a06..bf2edc95 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -350,7 +350,11 @@ impl Process { trace_continue: bool, ) -> KResult> { let wait_object = { - let mut waits = self.wait_list.entry(wait_id, trace_stop, trace_continue); + let mut waits = self + .wait_list + .entry(wait_id, trace_stop, trace_continue) + .await; + loop { if let Some(object) = waits.get() { break object; @@ -377,7 +381,7 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid); + procs.remove_process(wait_object.pid).await; assert!(self .inner .access_mut(procs.prove_mut()) @@ -572,9 +576,9 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { Entry { - process_list: Task::block_on(ProcessList::get().read()), + process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), cv: &self.cv_wait_procs, want_stop, diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index 2832dae5..5293b4b7 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -9,7 +9,6 @@ use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; pub struct ProcessList { @@ -54,7 +53,7 @@ impl ProcessList { self.threads.insert(thread.tid, thread.clone()); } - pub fn remove_process(&mut self, pid: u32) { + pub async fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. if let Some(thread) = self.threads.remove(&pid) { self.processes.remove(&pid); @@ -64,7 +63,7 @@ impl ProcessList { let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); pgroup.remove_member(pid, self.prove_mut()); - Task::block_on(rcu_sync()); + rcu_sync().await; if Arc::strong_count(&pgroup) == 1 { self.pgroups.remove(&pgroup.pgid); @@ -154,10 +153,9 @@ impl ProcessList { // If we are the session leader, we should drop the control terminal. if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = - Task::block_on(process.session(self.prove()).drop_control_terminal()) + if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await { - terminal.drop_session(); + terminal.drop_session().await; } } diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index 261a60c0..a7b57afd 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -87,14 +87,14 @@ impl Session { ) -> KResult<()> { let mut job_control = self.job_control.write().await; if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().as_ref() { + if let Some(session) = terminal.session().await.as_ref() { if session.sid == self.sid { return Ok(()); } } return Err(EPERM); } - terminal.set_session(self, forced)?; + terminal.set_session(self, forced).await?; job_control.control_terminal = Some(terminal.clone()); job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); Ok(()) diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index 5cff2fe6..b6ed34bf 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -9,7 +9,7 @@ use core::{cmp::Reverse, task::Waker}; use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::Runtime; use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; @@ -226,15 +226,12 @@ impl SignalList { // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`. // SAFETY: Preempt disabled above. - { + Runtime::block_till_woken(|waker| { let mut inner = self.inner.lock(); - let waker = Waker::from(Task::current().clone()); - - let old_waker = inner.stop_waker.replace(waker); + let old_waker = inner.stop_waker.replace(waker.clone()); assert!(old_waker.is_none(), "We should not have a waker here"); - } - - Task::park_preempt_disabled(); + }) + .await; if let Some(parent) = thread.process.parent.load() { parent.notify( diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index cccbb918..e3b3a967 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -17,10 +17,10 @@ use alloc::sync::Arc; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ future::Future, - pin::Pin, + pin::{pin, Pin}, ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll, Waker}, + task::{Context, Poll}, }; use eonix_hal::{ fpu::FpuState, @@ -33,7 +33,6 @@ use eonix_hal::{ trap::{disable_irqs_save, TrapContext}, }; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::run::{Contexted, Run, RunState}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; @@ -41,11 +40,6 @@ use posix_types::signal::Signal; #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; -pub struct ThreadRunnable { - thread: Arc, - future: F, -} - pub struct ThreadBuilder { tid: Option, name: Option>, @@ -421,28 +415,44 @@ impl Thread { } } - pub async fn run(self: Arc) { - struct ContextedRun<'a, F: Future>(F, &'a Thread); + pub fn run(self: Arc) -> impl Future + Send + 'static { + async fn real_run_with_context(me: &Arc) { + let mut future = pin!(me.real_run()); + + core::future::poll_fn(|cx| { + me.process.mm_list.activate(); - impl Future for ContextedRun<'_, F> { - type Output = F::Output; + CURRENT_THREAD.set(NonNull::new(Arc::as_ptr(me) as *mut _)); + + unsafe { + // SAFETY: Preemption is disabled. + me.load_thread_area32(); + } + + unsafe { + let trap_ctx_ptr: *const TrapContext = &raw const *me.trap_ctx.borrow(); + // SAFETY: + CPU::local() + .as_mut() + .load_interrupt_stack(trap_ctx_ptr as u64); + } - fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { let irq_state = disable_irqs_save(); - let (future, _) = unsafe { - // SAFETY: We construct a pinned future and `&Thread` is `Unpin`. - let me = self.as_mut().get_unchecked_mut(); - (Pin::new_unchecked(&mut me.0), me.1) - }; - let retval = future.poll(ctx); + let result = future.as_mut().poll(cx); irq_state.restore(); - retval - } + + me.process.mm_list.deactivate(); + + CURRENT_THREAD.set(None); + + result + }) + .await } - ContextedRun(self.real_run(), &self).await + async move { real_run_with_context(&self).await } } } @@ -467,57 +477,3 @@ pub async fn yield_now() { Yield { yielded: false }.await; } - -pub fn new_thread_runnable( - thread: Arc, -) -> ThreadRunnable + Send + 'static> { - ThreadRunnable { - thread: thread.clone(), - future: thread.run(), - } -} - -impl Contexted for ThreadRunnable { - fn load_running_context(&self) { - self.thread.process.mm_list.activate(); - - let raw_ptr: *const Thread = &raw const *self.thread; - CURRENT_THREAD.set(NonNull::new(raw_ptr as *mut _)); - - unsafe { - // SAFETY: Preemption is disabled. - self.thread.load_thread_area32(); - } - - unsafe { - let trap_ctx_ptr: *const TrapContext = &raw const *self.thread.trap_ctx.borrow(); - // SAFETY: - CPU::local() - .as_mut() - .load_interrupt_stack(trap_ctx_ptr as u64); - } - } - - fn restore_running_context(&self) { - self.thread.process.mm_list.deactivate(); - - CURRENT_THREAD.set(None); - } -} - -impl Run for ThreadRunnable { - type Output = F::Output; - - fn run(mut self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut ctx = Context::from_waker(waker); - - match unsafe { - self.as_mut() - .map_unchecked_mut(|me| &mut me.future) - .poll(&mut ctx) - } { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, - } - } -} diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 31c08ea2..5532a2e1 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -623,12 +623,12 @@ impl Terminal { ptr.write(window_size) } TerminalIORequest::GetTermios(ptr) => { - let termios = Task::block_on(self.inner.lock()).termio.get_user(); + let termios = self.inner.lock().await.termio.get_user(); ptr.write(termios) } TerminalIORequest::SetTermios(ptr) => { let user_termios = ptr.read()?; - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); @@ -644,13 +644,13 @@ impl Terminal { } /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = Task::block_on(self.inner.lock()); + pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { + let mut inner = self.inner.lock().await; if let Some(session) = inner.session.upgrade() { if !forced { Err(EPERM) } else { - Task::block_on(session.drop_control_terminal()); + session.drop_control_terminal().await; inner.session = Arc::downgrade(&session); Ok(()) } @@ -661,12 +661,12 @@ impl Terminal { } } - pub fn drop_session(&self) { - Task::block_on(self.inner.lock()).session = Weak::new(); + pub async fn drop_session(&self) { + self.inner.lock().await.session = Weak::new(); } - pub fn session(&self) -> Option> { - Task::block_on(self.inner.lock()).session.upgrade() + pub async fn session(&self) -> Option> { + self.inner.lock().await.session.upgrade() } } diff --git a/src/lib.rs b/src/lib.rs index 6fd82c40..2900772a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,19 +24,19 @@ use crate::kernel::task::alloc_pid; use alloc::{ffi::CString, sync::Arc}; use core::{ hint::spin_loop, - sync::atomic::{AtomicBool, Ordering}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; use eonix_hal::{ - arch_exported::bootstrap::shutdown, processor::CPU, traits::trap::IrqState, + arch_exported::bootstrap::shutdown, + processor::{halt, CPU, CPU_COUNT}, + traits::trap::IrqState, trap::disable_irqs_save, }; use eonix_mm::address::PRange; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task}; +use eonix_runtime::scheduler::RUNTIME; use kernel::{ mem::GlobalPageAlloc, - task::{ - new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder, - }, + task::{ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -80,6 +80,25 @@ fn panic(info: &core::panic::PanicInfo) -> ! { } static BSP_OK: AtomicBool = AtomicBool::new(false); +static CPU_SHUTTING_DOWN: AtomicUsize = AtomicUsize::new(0); + +fn shutdown_system() -> ! { + let cpu_count = CPU_COUNT.load(Ordering::Relaxed); + + if CPU_SHUTTING_DOWN.fetch_add(1, Ordering::AcqRel) + 1 == cpu_count { + println_info!("All CPUs are shutting down. Gracefully powering off..."); + shutdown(); + } else { + println_info!( + "CPU {} is shutting down. Waiting for other CPUs...", + CPU::local().cpuid() + ); + + loop { + halt(); + } + } +} #[eonix_hal::main] fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { @@ -90,22 +109,14 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { driver::sbi_console::init_console(); } - // To satisfy the `Scheduler` "preempt count == 0" assertion. - eonix_preempt::disable(); - - // We need root dentry to be present in constructor of `FsContext`. - // So call `init_vfs` first, then `init_multitasking`. - Scheduler::init_local_scheduler::(); - - Scheduler::get().spawn::(FutureRun::new(init_process(data.get_early_stack()))); - BSP_OK.store(true, Ordering::Release); + RUNTIME.spawn(init_process(data.get_early_stack())); + drop(data); - unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() - } + + RUNTIME.enter(); + shutdown_system(); } #[eonix_hal::ap_main] @@ -115,16 +126,10 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { spin_loop(); } - Scheduler::init_local_scheduler::(); println_debug!("AP{} started", CPU::local().cpuid()); - eonix_preempt::disable(); - - // TODO!!!!!: Free the stack after having switched to idle task. - unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() - } + RUNTIME.enter(); + shutdown_system(); } async fn init_process(early_kstack: PRange) { @@ -223,7 +228,7 @@ async fn init_process(early_kstack: PRange) { .name(Arc::from(&b"busybox"[..])) .entry(load_info.entry_ip, load_info.sp); - let mut process_list = Task::block_on(ProcessList::get().write()); + let mut process_list = ProcessList::get().write().await; let (thread, process) = ProcessBuilder::new() .pid(alloc_pid()) .mm_list(load_info.mm_list) @@ -235,5 +240,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - Scheduler::get().spawn::(new_thread_runnable(thread)); + RUNTIME.spawn(thread.run()); } From fb9a175e70c182681ad3467f70e18b7c2fa08f8a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Fri, 8 Aug 2025 00:44:41 +0800 Subject: [PATCH 12/54] runtime: add trace logs and fix few bugs Add tracing logs in Runtime::enter and other critical points. Pass trace_scheduler feature down to eonix_runtime crate, fixing the problem that the feature is not working. When the task is blocked, we set CURRENT_TASK to None as well. In early initialization stage, the stack is placed in identically mapped physical address. VirtIO driver might try converting the given buffer paths back to physical ones, which will generate errors. So BSP and AP should allocate an another stack and switch to it. We use TaskContext for the fix. Signed-off-by: greatbridf --- Cargo.toml | 2 +- crates/eonix_runtime/src/scheduler.rs | 42 ++++++++++++++++++++------- src/lib.rs | 42 +++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 15df5f15..5231dbb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ virtio-drivers = { version = "0.11.0" } default = [] trace_pci = [] trace_syscall = [] -trace_scheduler = [] +trace_scheduler = ["eonix_runtime/trace_scheduler"] log_trace = ["trace_pci", "trace_syscall", "trace_scheduler"] log_debug = [] smp = [] diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index c9c73ea5..3f72fbf4 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -10,6 +10,7 @@ use core::{ task::{Context, Poll, Waker}, }; use eonix_hal::processor::halt; +use eonix_log::println_trace; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; @@ -99,7 +100,10 @@ impl Runtime { } fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { - let Some(current) = self.current() else { + let Some(current) = CURRENT_TASK + .swap(None) + .map(|cur| unsafe { Arc::from_raw(cur.as_ptr()) }) + else { return; }; @@ -111,18 +115,23 @@ impl Runtime { } }) { Ok(TaskState::READY_RUNNING) => { - let current = unsafe { - Arc::from_raw( - CURRENT_TASK - .swap(None) - .expect("Current task should be present") - .as_ptr(), - ) - }; + println_trace!( + "trace_scheduler", + "Re-enqueueing task {:?} (CPU{})", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); rq.put(current); } - Ok(_) => {} + Ok(_) => { + println_trace!( + "trace_scheduler", + "Current task {:?} (CPU{}) is blocked, not re-enqueueing", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); + } _ => unreachable!(), } } @@ -174,6 +183,13 @@ impl Runtime { continue; }; + println_trace!( + "trace_scheduler", + "Switching to task {:?} (CPU{})", + next.id, + eonix_hal::processor::CPU::local().cpuid(), + ); + let old_state = next.state.swap(TaskState::RUNNING); assert_eq!( old_state, @@ -195,6 +211,12 @@ impl Runtime { "Current task should be at least in RUNNING state" ); + println_trace!( + "trace_scheduler", + "Task {:?} finished execution, removing...", + Task::current().id, + ); + self.remove_task(&Task::current()); CURRENT_TASK.set(None); diff --git a/src/lib.rs b/src/lib.rs index 2900772a..beebe7c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,15 +28,16 @@ use core::{ }; use eonix_hal::{ arch_exported::bootstrap::shutdown, + context::TaskContext, processor::{halt, CPU, CPU_COUNT}, - traits::trap::IrqState, + traits::{context::RawTaskContext, trap::IrqState}, trap::disable_irqs_save, }; use eonix_mm::address::PRange; -use eonix_runtime::scheduler::RUNTIME; +use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -115,8 +116,21 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { drop(data); - RUNTIME.enter(); - shutdown_system(); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); + + unsafe { + TaskContext::switch_to_noreturn(&mut ctx); + } } #[eonix_hal::ap_main] @@ -128,6 +142,24 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { println_debug!("AP{} started", CPU::local().cpuid()); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); + + unsafe { + TaskContext::switch_to_noreturn(&mut ctx); + } +} + +fn standard_main() -> ! { RUNTIME.enter(); shutdown_system(); } From 3ab454f6df811741e346cbb7951502151b170f3a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:00:39 +0800 Subject: [PATCH 13/54] riscv64, trap: remove load_interrupt_stack impl This is used only by Thread when we enter user execution context, when we need to save the "interrupt stack" to the local CPU so we can get the information needed to capture the trap. We need to support nested captured trap returns. So instead of setting that manually, we save the needed information when trap_return() is called (since we have precisely the trap context needed) and restore it after the trap is captured. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/cpu.rs | 6 +----- crates/eonix_hal/src/arch/riscv64/trap/mod.rs | 8 ++++++++ src/kernel/task/thread.rs | 8 -------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 8d62e741..7e6e3ac0 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -59,11 +59,7 @@ impl CPU { sscratch::write(TRAP_SCRATCH.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) { - TRAP_SCRATCH - .as_mut() - .set_trap_context(NonNull::new(sp as *mut _).unwrap()); - } + pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { // nothing diff --git a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs index 2d359759..58566ebe 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs @@ -283,9 +283,15 @@ impl TrapReturn for TrapContext { unsafe fn trap_return(&mut self) { let irq_states = disable_irqs_save(); + let old_handler = core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler); + let old_trap_context = core::mem::replace( + &mut TRAP_SCRATCH.as_mut().trap_context, + Some(NonNull::from(&mut *self)), + ); + let mut to_ctx = TaskContext::new(); to_ctx.set_program_counter(captured_trap_return as usize); to_ctx.set_stack_pointer(&raw mut *self as usize); @@ -296,6 +302,8 @@ impl TrapReturn for TrapContext { } TRAP_SCRATCH.as_mut().handler = old_handler; + TRAP_SCRATCH.as_mut().trap_context = old_trap_context; + irq_states.restore(); } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index e3b3a967..ef71657f 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -429,14 +429,6 @@ impl Thread { me.load_thread_area32(); } - unsafe { - let trap_ctx_ptr: *const TrapContext = &raw const *me.trap_ctx.borrow(); - // SAFETY: - CPU::local() - .as_mut() - .load_interrupt_stack(trap_ctx_ptr as u64); - } - let irq_state = disable_irqs_save(); let result = future.as_mut().poll(cx); From 6b152c74dd16cf54ea60d9d074b1e4baadbc0839 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:06:15 +0800 Subject: [PATCH 14/54] riscv64, trap: fix kernel space trap returns On riscv64 platforms, we load the kernel tp only if we've come from U mode to reduce overhead. But we would restore the tp saved in TrapContext even if we are returning to kernel space, which causes problems because the default tp is zero. We should save kernel tp register to the field in TrapContext structs when we set privilege mode to kernel. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 776fab2a..369eef3e 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -224,7 +224,15 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.sstatus.set_spp(SPP::User), - false => self.sstatus.set_spp(SPP::Supervisor), + false => { + unsafe { + core::arch::asm!( + "mv {}, tp", + out(reg) self.regs.tp, + ); + }; + self.sstatus.set_spp(SPP::Supervisor); + } } } From 33ff3156a046af1843ab7afe73fc6daa79cd9557 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:52:57 +0800 Subject: [PATCH 15/54] task: brand new block_on and stackful wrapper We provide a simple block_on to constantly poll the given future and block the current execution thread as before. We also introduce a new future wrapper named `stackful` to convert any future into a stackful one. We allocate a stack and keep polling the future on the stack by constructing a TrapContext and call trap_return() to get into the stackful environment. Then we capture the timer interrupt to get preempts work. Signed-off-by: greatbridf --- crates/eonix_runtime/src/task.rs | 7 -- src/kernel/task.rs | 137 +++++++++++++++++++++++++++++++ src/kernel/task/clone.rs | 7 +- src/lib.rs | 4 +- 4 files changed, 143 insertions(+), 12 deletions(-) diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 8f4062d8..7b89d3fe 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -93,13 +93,6 @@ impl Task { return rq; } } - - pub fn block_on(future: F) -> F::Output - where - F: Future, - { - todo!() - } } impl Wake for Task { diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 1b47923e..e2bbcb3f 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -19,3 +19,140 @@ pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; pub use thread::{yield_now, Thread, ThreadBuilder}; + +fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output +where + F: core::future::Future, +{ + let waker = core::task::Waker::noop(); + let mut cx = core::task::Context::from_waker(&waker); + + loop { + match future.as_mut().poll(&mut cx) { + core::task::Poll::Ready(output) => return output, + core::task::Poll::Pending => {} + } + } +} + +/// Constantly poll the given future until it is ready, blocking the current thread. +/// +/// # Warning +/// This function will block the current thread and should not be used in async +/// contexts as it might cause infinite blocking or deadlocks. The following is +/// a bad example: +/// +/// ```ignore +/// block_on(async { +/// // This will block the current thread forever. +/// loop { +/// println_debug!("This will never end!"); +/// } +/// }); +/// +/// // The code below will never be reached. +/// println_debug!("You'll never see this message!"); +/// ``` +/// +/// Use [`stackful`] instead to run async (or computational) code in a separate +/// stackful (and preemptive) context or `RUNTIME.spawn` to run async code in +/// the runtime's executor. +pub fn block_on(future: F) -> F::Output +where + F: core::future::Future, +{ + do_block_on(core::pin::pin!(future)) +} + +/// Run the given future in a stackful context, allowing it to be preempted by +/// timer interrupts. +/// +/// ```ignore +/// RUNTIME.spawn(stackful(async { +/// // Some simulated computation heavy task. +/// loop { +/// println_debug!("Hello from stackful future!"); +/// } +/// })); +/// ``` +pub async fn stackful(mut future: F) -> F::Output +where + F: core::future::Future, +{ + use core::cell::UnsafeCell; + use eonix_hal::traits::fault::Fault; + use eonix_hal::traits::trap::RawTrapContext; + use eonix_hal::traits::trap::TrapReturn; + use eonix_hal::trap::TrapContext; + use eonix_log::println_debug; + use eonix_runtime::executor::Stack; + + use crate::kernel::{ + interrupt::{default_fault_handler, default_irq_handler}, + timer::{should_reschedule, timer_interrupt}, + }; + + let stack = KernelStack::new(); + + fn execute( + future: core::pin::Pin<&mut F>, + output_ptr: core::ptr::NonNull>, + ) -> ! + where + F: core::future::Future, + { + let output = do_block_on(future); + + unsafe { + output_ptr.write(Some(output)); + } + + unsafe { + core::arch::asm!("ebreak"); + } + + unreachable!() + } + + let sp = stack.get_bottom(); + let output = UnsafeCell::new(None); + + let mut trap_ctx = TrapContext::new(); + + trap_ctx.set_user_mode(false); + trap_ctx.set_interrupt_enabled(true); + let _ = trap_ctx.set_user_call_frame( + execute:: as usize, + Some(sp.addr().get()), + None, + &[(&raw mut future) as usize, output.get() as usize], + |_, _| Ok::<(), u32>(()), + ); + + loop { + unsafe { + trap_ctx.trap_return(); + } + + match trap_ctx.trap_type() { + eonix_hal::traits::trap::TrapType::Syscall { .. } => {} + eonix_hal::traits::trap::TrapType::Fault(fault) => { + // Breakpoint + if let Fault::Unknown(3) = &fault { + println_debug!("Breakpoint hit, returning output"); + break output.into_inner().unwrap(); + } + + default_fault_handler(fault, &mut trap_ctx) + } + eonix_hal::traits::trap::TrapType::Irq { callback } => callback(default_irq_handler), + eonix_hal::traits::trap::TrapType::Timer { callback } => { + callback(timer_interrupt); + + if should_reschedule() { + yield_now().await; + } + } + } + } +} diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 48e34f96..2a16ce56 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,3 +1,4 @@ +use super::{block_on, stackful}; use crate::{ kernel::{ syscall::procops::parse_user_tls, @@ -9,7 +10,7 @@ use crate::{ use bitflags::bitflags; use core::num::NonZero; use eonix_hal::processor::UserTLS; -use eonix_runtime::{scheduler::RUNTIME, task::Task}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; use posix_types::signal::Signal; @@ -131,7 +132,7 @@ impl CloneArgs { } pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { - let mut procs = Task::block_on(ProcessList::get().write()); + let mut procs = block_on(ProcessList::get().write()); let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?; let current_process = thread.process.clone(); @@ -163,7 +164,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - RUNTIME.spawn(new_thread.run()); + RUNTIME.spawn(stackful(new_thread.run())); Ok(new_pid) } diff --git a/src/lib.rs b/src/lib.rs index beebe7c1..cbe7bc5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ use eonix_mm::address::PRange; use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{stackful, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -272,5 +272,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - RUNTIME.spawn(thread.run()); + RUNTIME.spawn(stackful(thread.run())); } From 5ada0d063410c21ad08a9cbda3a4b93993bae910 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:55:56 +0800 Subject: [PATCH 16/54] build, Makefile: remove --feature if none is present If we don't pass in FEATURES or SMP, we will have no feature enabled. In this scenerio, the dangling --feature argument will cause cargo to panic. We provide the features and the --feature together to avoid this... Signed-off-by: greatbridf --- Makefile.src | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile.src b/Makefile.src index 2701ecd6..ab13f5b8 100644 --- a/Makefile.src +++ b/Makefile.src @@ -22,7 +22,15 @@ KERNEL_CARGO_MANIFESTS += $(shell find src macros crates -name Cargo.toml -type KERNEL_DEPS := $(KERNEL_SOURCES) $(KERNEL_CARGO_MANIFESTS) QEMU_ARGS ?= -no-reboot -CARGO_FLAGS := --profile $(PROFILE) --features $(FEATURES)$(if $(SMP),$(COMMA)smp,) +CARGO_FLAGS := --profile $(PROFILE) + +ifneq ($(SMP),) +CARGO_FLAGS += --features smp +endif + +ifneq ($(FEATURES),) +CARGO_FLAGS += --features $(FEATURES) +endif ifeq ($(HOST),darwin) QEMU_ACCEL ?= -accel tcg From 21dd5ea1c754e5ab9a334960dc5ddfb5f04106e6 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 03:00:04 +0800 Subject: [PATCH 17/54] rcu: provide call_rcu() to call rcu drop asynchronously We can pass a function to be called after a success rcu_sync call. Signed-off-by: greatbridf --- src/kernel/task/process.rs | 24 +++++++++++++----------- src/rcu.rs | 21 ++++++++++++++++----- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index bf2edc95..fb53f4fa 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -4,10 +4,11 @@ use super::{ }; use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::rcu::call_rcu; use crate::{ kernel::mem::MMList, prelude::*, - rcu::{rcu_sync, RCUPointer, RCUReadGuard}, + rcu::{RCUPointer, RCUReadGuard}, sync::CondVar, }; use alloc::{ @@ -408,12 +409,14 @@ impl Process { .session(session.clone()) .build(&mut process_list); - { - let _old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); - Task::block_on(rcu_sync()); - } + let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); + old_pgroup.remove_member(self.pid, process_list.prove_mut()); + + call_rcu(move || { + drop(old_session); + drop(old_pgroup); + }); Ok(pgroup.pgid) } @@ -459,10 +462,9 @@ impl Process { }; pgroup.remove_member(self.pid, procs.prove_mut()); - { - let _old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - Task::block_on(rcu_sync()); - } + + let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); + call_rcu(move || drop(old_pgroup)); Ok(()) } diff --git a/src/rcu.rs b/src/rcu.rs index f018d3f3..32ff7657 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,11 +1,11 @@ -use crate::prelude::*; +use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; use pointers::BorrowedArc; @@ -21,7 +21,7 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { Self { value, - _guard: Task::block_on(GLOBAL_RCU_SEM.read()), + _guard: block_on(GLOBAL_RCU_SEM.read()), _phantom: PhantomData, } } @@ -48,6 +48,14 @@ pub async fn rcu_sync() { let _ = GLOBAL_RCU_SEM.write().await; } +pub fn call_rcu(func: impl FnOnce() + Send + 'static) { + RUNTIME.spawn(async move { + // Wait for all readers to finish. + rcu_sync().await; + func(); + }); +} + pub trait RCUNode { fn rcu_prev(&self) -> &AtomicPtr; fn rcu_next(&self) -> &AtomicPtr; @@ -154,7 +162,7 @@ impl> RCUList { } pub fn iter(&self) -> RCUIterator { - let _lck = Task::block_on(self.reader_lock.read()); + let _lck = block_on(self.reader_lock.read()); RCUIterator { // SAFETY: We have a read lock, so the node is still alive. @@ -264,7 +272,10 @@ impl Drop for RCUPointer { if let Some(arc) = unsafe { self.swap(None) } { // We only wait if there are other references. if Arc::strong_count(&arc) == 1 { - Task::block_on(rcu_sync()); + call_rcu(move || { + let _ = arc; + todo!(); + }); } } } From 874a4fa000dfbe95a12781a4fa72cc00082baa60 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 03:01:37 +0800 Subject: [PATCH 18/54] task: migrate all Task::block_on calls to task::block_on Simple renamings... Further work is needed to make the system work. Signed-off-by: greatbridf --- src/driver/ahci/mod.rs | 4 +-- src/driver/ahci/port.rs | 6 ++-- src/driver/virtio/riscv64.rs | 8 +++-- src/fs/ext4.rs | 11 +++---- src/fs/fat32.rs | 10 +++--- src/fs/procfs.rs | 10 +++--- src/fs/tmpfs.rs | 46 ++++++++++++++-------------- src/kernel/chardev.rs | 11 +++---- src/kernel/mem/mm_area.rs | 4 +-- src/kernel/mem/mm_list.rs | 14 ++++----- src/kernel/mem/mm_list/page_fault.rs | 5 ++- src/kernel/syscall/file_rw.rs | 26 ++++++++-------- src/kernel/syscall/mm.rs | 22 ++++++------- src/kernel/syscall/procops.rs | 41 ++++++++++++------------- src/kernel/task/process.rs | 14 ++++----- src/kernel/terminal.rs | 5 ++- src/kernel/vfs/dentry/dcache.rs | 4 +-- src/kernel/vfs/file.rs | 19 ++++++------ src/kernel/vfs/filearray.rs | 11 ++++++- src/kernel/vfs/inode.rs | 4 +-- 20 files changed, 137 insertions(+), 138 deletions(-) diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index e988c9c3..c3b1cfa0 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -6,6 +6,7 @@ use crate::{ constants::{EINVAL, EIO}, interrupt::register_irq_handler, pcie::{self, Header, PCIDevice, PCIDriver, PciError}, + task::block_on, }, prelude::*, }; @@ -13,7 +14,6 @@ use alloc::{format, sync::Arc}; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::SpinIrq as _; use port::AdapterPort; @@ -133,7 +133,7 @@ impl Device<'static> { port, )?; - Task::block_on(port.partprobe())?; + block_on(port.partprobe())?; Ok(()) })() { diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index 27333d5d..f558f6e1 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -9,11 +9,11 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::paging::Page; use crate::kernel::mem::AsMemoryBlock as _; +use crate::kernel::task::block_on; use crate::prelude::*; use alloc::collections::vec_deque::VecDeque; use core::pin::pin; use eonix_mm::address::{Addr as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. @@ -156,7 +156,7 @@ impl AdapterPort<'_> { wait.as_mut().add_to_wait_list(); drop(free_list); - Task::block_on(wait); + block_on(wait); } } @@ -222,7 +222,7 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = Task::block_on(slot.wait_finish()) { + if let Err(_) = block_on(slot.wait_finish()) { self.stats.inc_cmd_error(); return Err(EIO); }; diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index ad132569..9bdbf6ce 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,11 +1,13 @@ use super::virtio_blk::HAL; -use crate::kernel::block::{make_device, BlockDevice}; +use crate::kernel::{ + block::{make_device, BlockDevice}, + task::block_on, +}; use alloc::{sync::Arc, vec::Vec}; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; use eonix_log::{println_info, println_warn}; use eonix_mm::address::{PAddr, PhysAccess}; -use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, @@ -47,7 +49,7 @@ pub fn init() { ) .expect("Failed to register VirtIO Block device"); - Task::block_on(block_device.partprobe()) + block_on(block_device.partprobe()) .expect("Failed to probe partitions for VirtIO Block device"); disk_id += 1; diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index a2138a09..b4953491 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; use crate::{ io::{Buffer, ByteBuffer}, kernel::{ @@ -24,7 +25,6 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error}; use ext4_rs::{Errno, Ext4}; @@ -126,7 +126,7 @@ impl Ext4Fs { }); let root_inode = { - let mut icache = Task::block_on(ext4fs.icache.write()); + let mut icache = block_on(ext4fs.icache.write()); let root_inode = ext4fs.inner.get_inode_ref(2); ext4fs.get_or_insert( @@ -216,7 +216,7 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { @@ -251,8 +251,7 @@ impl Inode for DirInode { }; // Fast path: if the inode is already in the cache, return it. - if let Some(inode) = ext4fs.try_get(&Task::block_on(ext4fs.icache.read()), attr.ino as u64) - { + if let Some(inode) = ext4fs.try_get(&block_on(ext4fs.icache.read()), attr.ino as u64) { return Ok(Some(inode)); } @@ -261,7 +260,7 @@ impl Inode for DirInode { let real_perm = extra_perm | perm | perm >> 3 | perm >> 6; // Create a new inode based on the attributes. - let mut icache = Task::block_on(ext4fs.icache.write()); + let mut icache = block_on(ext4fs.icache.write()); let inode = ext4fs.get_or_insert( &mut icache, InodeData { diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 852d8673..f328dc74 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -4,6 +4,7 @@ mod file; use crate::io::Stream; use crate::kernel::constants::EIO; use crate::kernel::mem::AsMemoryBlock; +use crate::kernel::task::block_on; use crate::kernel::vfs::inode::WriteOffset; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, @@ -32,7 +33,6 @@ use alloc::{ }; use core::{ops::ControlFlow, sync::atomic::Ordering}; use dir::Dirs as _; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use file::ClusterRead; @@ -266,13 +266,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); if self.size.load(Ordering::Relaxed) as usize == 0 { return Ok(0); @@ -354,7 +354,7 @@ impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, 0) @@ -385,7 +385,7 @@ impl Inode for DirInode { ) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, offset) diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 85c0ecbb..82f597b8 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,4 +1,5 @@ use crate::kernel::constants::{EACCES, ENOTDIR}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{ io::Buffer, @@ -17,7 +18,6 @@ use crate::{ }; use alloc::sync::{Arc, Weak}; use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; use itertools::Itertools; @@ -134,7 +134,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); Ok(self .entries .access(lock.prove()) @@ -147,7 +147,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -234,7 +234,7 @@ pub fn creat( let inode = FileInode::new(ino, Arc::downgrade(&fs), file); { - let lock = Task::block_on(parent.idata.rwsem.write()); + let lock = block_on(parent.idata.rwsem.write()); parent .entries .access_mut(lock.prove_mut()) @@ -258,7 +258,7 @@ pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { parent .entries - .access_mut(Task::block_on(inode.rwsem.write()).prove_mut()) + .access_mut(block_on(inode.rwsem.write()).prove_mut()) .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); Ok(ProcFsNode::Dir(inode)) diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 334e2781..840f97b1 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -1,6 +1,7 @@ use crate::io::Stream; use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; @@ -21,7 +22,6 @@ use alloc::sync::{Arc, Weak}; use core::fmt::Debug; use core::{ops::ControlFlow, sync::atomic::Ordering}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; use itertools::Itertools; @@ -138,7 +138,7 @@ impl Inode for DirectoryInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -153,7 +153,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = FileInode::new(ino, self.vfs.clone(), 0, mode); @@ -170,7 +170,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = NodeInode::new( @@ -188,7 +188,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); @@ -201,7 +201,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); @@ -213,11 +213,11 @@ impl Inode for DirectoryInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let dir_lock = Task::block_on(self.rwsem.write()); + let dir_lock = block_on(self.rwsem.write()); let file = at.get_inode()?; let filename = at.get_name(); - let file_lock = Task::block_on(file.rwsem.write()); + let file_lock = block_on(file.rwsem.write()); let entries = self.entries.access_mut(dir_lock.prove_mut()); @@ -240,7 +240,7 @@ impl Inode for DirectoryInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization let old = self.mode.load(Ordering::Relaxed); @@ -271,7 +271,7 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("vfs must be a TmpFs"); - let _rename_lock = Task::block_on(vfs.rename_lock.lock()); + let _rename_lock = block_on(vfs.rename_lock.lock()); let old_file = old_dentry.get_inode()?; let new_file = new_dentry.get_inode(); @@ -284,7 +284,7 @@ impl Inode for DirectoryInode { if same_parent { // Same directory rename // Remove from old location and add to new location - let parent_lock = Task::block_on(self.rwsem.write()); + let parent_lock = block_on(self.rwsem.write()); let entries = self.entries.access_mut(parent_lock.prove_mut()); fn rename_old( @@ -328,7 +328,7 @@ impl Inode for DirectoryInode { if let Some(new_idx) = new_entry_idx { // Replace existing file (i.e. rename the old and unlink the new) let new_file = new_file.unwrap(); - let _new_file_lock = Task::block_on(new_file.rwsem.write()); + let _new_file_lock = block_on(new_file.rwsem.write()); // SAFETY: `new_file_lock` has done the synchronization if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { @@ -364,8 +364,8 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("new parent must be a DirectoryInode"); - let old_parent_lock = Task::block_on(self.rwsem.write()); - let new_parent_lock = Task::block_on(new_parent_inode.rwsem.write()); + let old_parent_lock = block_on(self.rwsem.write()); + let new_parent_lock = block_on(new_parent_inode.rwsem.write()); let old_ino = old_file.ino; let new_ino = new_file.as_ref().ok().map(|f| f.ino); @@ -391,7 +391,7 @@ impl Inode for DirectoryInode { if has_new { // Replace existing file (i.e. move the old and unlink the new) let new_file = new_file.unwrap(); - let new_file_lock = Task::block_on(new_file.rwsem.write()); + let new_file_lock = block_on(new_file.rwsem.write()); if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0 @@ -424,7 +424,7 @@ impl Inode for DirectoryInode { *old_file.ctime.lock() = now; } - Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); + block_on(dcache::d_exchange(old_dentry, new_dentry)); Ok(()) } @@ -511,13 +511,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.read(buffer, offset)) + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.read(buffer, offset)) } fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { // TODO: We don't need that strong guarantee, find some way to avoid locks - let lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); let mut store_new_end = None; let offset = match offset { @@ -530,7 +530,7 @@ impl Inode for FileInode { } }; - let wrote = Task::block_on(self.pages.write(stream, offset))?; + let wrote = block_on(self.pages.write(stream, offset))?; let cursor_end = offset + wrote; if let Some(store_end) = store_new_end { @@ -545,8 +545,8 @@ impl Inode for FileInode { } fn truncate(&self, length: usize) -> KResult<()> { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.resize(length))?; + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.resize(length))?; self.size.store(length as u64, Ordering::Relaxed); *self.mtime.lock() = Instant::now(); Ok(()) @@ -554,7 +554,7 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization let old = self.mode.load(Ordering::Relaxed); diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index cd23fc14..4e0d9d0b 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -2,7 +2,7 @@ use super::{ block::make_device, console::get_console, constants::{EEXIST, EIO}, - task::{ProcessList, Thread}, + task::{block_on, ProcessList, Thread}, terminal::Terminal, vfs::{ file::{File, FileType, TerminalFile}, @@ -18,7 +18,6 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; use eonix_sync::AsProof as _; use posix_types::open::OpenFlags; @@ -43,7 +42,7 @@ static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => Task::block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -75,13 +74,13 @@ impl CharDevice { pub fn open(self: &Arc, flags: OpenFlags) -> KResult> { Ok(match &self.device { CharDeviceType::Terminal(terminal) => { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); let current = Thread::current(); let session = current.process.session(procs.prove()); // We only set the control terminal if the process is the session leader. if session.sid == Thread::current().process.pid { // Silently fail if we can't set the control terminal. - dont_check!(Task::block_on(session.set_control_terminal( + dont_check!(block_on(session.set_control_terminal( &terminal, false, procs.prove() @@ -123,7 +122,7 @@ struct ConsoleDevice; impl VirtualCharDevice for ConsoleDevice { fn read(&self, buffer: &mut dyn Buffer) -> KResult { let console_terminal = get_console().ok_or(EIO)?; - Task::block_on(console_terminal.read(buffer)) + block_on(console_terminal.read(buffer)) } fn write(&self, stream: &mut dyn Stream) -> KResult { diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 956ae7e4..528d79ad 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -2,6 +2,7 @@ use super::mm_list::EMPTY_PAGE; use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, Mapping, Page, Permission}; use crate::kernel::constants::EINVAL; +use crate::kernel::task::block_on; use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; @@ -9,7 +10,6 @@ use core::cmp; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; use eonix_mm::paging::{PAGE_SIZE, PFN}; -use eonix_runtime::task::Task; #[derive(Debug)] pub struct MMArea { @@ -209,7 +209,7 @@ impl MMArea { } if attr.contains(PageAttribute::MAPPED) { - Task::block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; + block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; } attr.insert(PageAttribute::ACCESSED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 1d142546..6593624b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -7,6 +7,7 @@ use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, MMArea, Page}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::kernel::task::block_on; use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; @@ -23,7 +24,6 @@ use eonix_mm::{ page_table::{PageTable, RawAttribute, PTE}, paging::PAGE_SIZE, }; -use eonix_runtime::task::Task; use eonix_sync::{LazyLock, Mutex}; pub use mapping::{FileMapping, Mapping}; @@ -507,7 +507,7 @@ impl MMList { const VDSO_SIZE: usize = 0x1000; let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let mut pte_iter = inner .page_table @@ -538,7 +538,7 @@ impl MMList { is_shared: bool, ) -> KResult { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); if hint == VAddr::NULL { let at = inner.find_available(hint, len).ok_or(ENOMEM)?; @@ -565,14 +565,14 @@ impl MMList { permission: Permission, is_shared: bool, ) -> KResult { - Task::block_on(self.inner.borrow().lock()) + block_on(self.inner.borrow().lock()) .mmap(at, len, mapping.clone(), permission, is_shared) .map(|_| at) } pub fn set_break(&self, pos: Option) -> VAddr { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); // SAFETY: `set_break` is only called in syscalls, where program break should be valid. assert!(inner.break_start.is_some() && inner.break_pos.is_some()); @@ -631,7 +631,7 @@ impl MMList { /// This should be called only **once** for every thread. pub fn register_break(&self, start: VAddr) { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); assert!(inner.break_start.is_none() && inner.break_pos.is_none()); inner.break_start = Some(start.into()); @@ -651,7 +651,7 @@ impl MMList { } let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let mut offset = 0; let mut remaining = len; diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index bb62b388..089fdf06 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,10 +1,9 @@ use super::{MMList, VAddr}; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::signal::Signal; #[repr(C)] @@ -149,7 +148,7 @@ pub fn handle_kernel_page_fault( let mms = &Thread::current().process.mm_list; let inner = mms.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let area = match inner.areas.get(&VRange::from(addr)) { Some(area) => area, diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 5683b27e..97d47c48 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,11 +1,9 @@ -use core::time::Duration; - use super::FromSyscallArg; use crate::io::IntoStream; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, }; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; use crate::{ @@ -24,7 +22,7 @@ use crate::{ prelude::*, }; use alloc::sync::Arc; -use eonix_runtime::task::Task; +use core::time::Duration; use posix_types::ctypes::{Long, PtrT}; use posix_types::namei::RenameFlags; use posix_types::open::{AtFlags, OpenFlags}; @@ -77,14 +75,14 @@ fn dentry_from( fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) + block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) } #[eonix_macros::define_syscall(SYS_PREAD64)] fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on( + block_on( thread .files .get(fd) @@ -98,7 +96,7 @@ fn write(fd: FD, buffer: *const u8, count: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) + block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) } #[eonix_macros::define_syscall(SYS_PWRITE64)] @@ -106,7 +104,7 @@ fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult KResult { let mut tot = 0usize; for mut buffer in iov_buffers.into_iter() { // TODO!!!: `readv` - let nread = Task::block_on(file.read(&mut buffer, None))?; + let nread = block_on(file.read(&mut buffer, None))?; tot += nread; if nread != buffer.total() { @@ -426,7 +424,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { let mut tot = 0usize; for mut stream in iov_streams.into_iter() { - let nread = Task::block_on(file.write(&mut stream, None))?; + let nread = block_on(file.write(&mut stream, None))?; tot += nread; if nread == 0 || !stream.is_drained() { @@ -477,7 +475,7 @@ fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult K let mut fd = fds.read()?; let file = thread.files.get(fd.fd).ok_or(EBADF)?; - fd.revents = Task::block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); + fd.revents = block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); fds.write(fd)?; Ok(1) @@ -550,11 +548,11 @@ fn pselect6( } let timeout = UserPointerMut::new(timeout)?; - + // Read here to check for invalid pointers. let _timeout_value = timeout.read()?; - Task::block_on(sleep(Duration::from_millis(10))); + block_on(sleep(Duration::from_millis(10))); timeout.write(TimeSpec { tv_sec: 0, diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index dd263e6b..b6ba5fdc 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -2,7 +2,7 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use crate::kernel::vfs::filearray::FD; use crate::{ kernel::{ @@ -14,7 +14,6 @@ use crate::{ use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::syscall_no::*; impl FromSyscallArg for UserMmapProtocol { @@ -67,11 +66,8 @@ fn do_mmap2( Mapping::Anonymous } else { // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = Task::block_on(SHM_MANAGER.lock()).create_shared_area( - len, - thread.process.pid, - 0x777, - ); + let shared_area = + block_on(SHM_MANAGER.lock()).create_shared_area(len, thread.process.pid, 0x777); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -94,7 +90,7 @@ fn do_mmap2( // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether // `addr` is above user reachable memory. let addr = if flags.contains(UserMmapFlags::MAP_FIXED) { - Task::block_on(mm_list.unmap(addr, len)); + block_on(mm_list.unmap(addr, len)); mm_list.mmap_fixed(addr, len, mapping, permission, is_shared) } else { mm_list.mmap_hint(addr, len, mapping, permission, is_shared) @@ -137,7 +133,7 @@ fn munmap(addr: usize, len: usize) -> KResult { } let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) + block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) } #[eonix_macros::define_syscall(SYS_BRK)] @@ -160,7 +156,7 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.protect( + block_on(thread.process.mm_list.protect( addr, len, Permission { @@ -175,7 +171,7 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let size = size.align_up(PAGE_SIZE); - let mut shm_manager = Task::block_on(SHM_MANAGER.lock()); + let mut shm_manager = block_on(SHM_MANAGER.lock()); let shmid = gen_shm_id(key)?; let mode = shmflg & 0o777; @@ -207,7 +203,7 @@ fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { #[eonix_macros::define_syscall(SYS_SHMAT)] fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { let mm_list = &thread.process.mm_list; - let shm_manager = Task::block_on(SHM_MANAGER.lock()); + let shm_manager = block_on(SHM_MANAGER.lock()); let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; let mode = shmflg & 0o777; @@ -256,7 +252,7 @@ fn shmdt(addr: usize) -> KResult { let size = *shm_areas.get(&addr).ok_or(EINVAL)?; shm_areas.remove(&addr); drop(shm_areas); - return Task::block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); + return block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); } #[eonix_macros::define_syscall(SYS_SHMCTL)] diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index c21aade5..62194691 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -8,8 +8,8 @@ use crate::kernel::constants::{ }; use crate::kernel::mem::PageBuffer; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, - RobustListHead, SignalAction, Thread, WaitId, WaitType, + block_on, do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, + ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; @@ -27,7 +27,6 @@ use eonix_hal::processor::UserTLS; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::task::Task; use eonix_sync::AsProof as _; use posix_types::ctypes::PtrT; use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; @@ -59,7 +58,7 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + block_on(sleep(duration)); if let Some(rem) = rem { rem.write((0, 0))?; @@ -90,7 +89,7 @@ fn clock_nanosleep( }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + block_on(sleep(duration)); if let Some(rem) = rem { rem.write((0, 0))?; @@ -212,7 +211,7 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult KResult SyscallNoReturn { unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); + let mut procs = block_on(ProcessList::get().write()); + block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); } SyscallNoReturn @@ -249,8 +248,8 @@ fn exit(status: u32) -> SyscallNoReturn { #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] fn exit_group(status: u32) -> SyscallNoReturn { unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); + let mut procs = block_on(ProcessList::get().write()); + block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); } SyscallNoReturn @@ -278,7 +277,7 @@ fn do_waitid( Some(options) => options, }; - let Some(wait_object) = Task::block_on(thread.process.wait( + let Some(wait_object) = block_on(thread.process.wait( wait_id, options.contains(UserWaitOptions::WNOHANG), options.contains(UserWaitOptions::WUNTRACED), @@ -377,7 +376,7 @@ fn getsid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.session_rcu().sid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); procs .try_find_process(pid) .map(|proc| proc.session(procs.prove()).sid) @@ -390,7 +389,7 @@ fn getpgid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.pgroup_rcu().pgid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); procs .try_find_process(pid) .map(|proc| proc.pgroup(procs.prove()).pgid) @@ -476,7 +475,7 @@ fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize { #[eonix_macros::define_syscall(SYS_SCHED_YIELD)] fn sched_yield() -> KResult<()> { - Task::block_on(yield_now()); + block_on(yield_now()); Ok(()) } @@ -572,7 +571,7 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { #[eonix_macros::define_syscall(SYS_KILL)] fn kill(pid: i32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); match pid { // Send signal to every process for which the calling process has // permission to send signals. @@ -599,7 +598,7 @@ fn kill(pid: i32, sig: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_TKILL)] fn tkill(tid: u32, sig: u32) -> KResult<()> { - Task::block_on(ProcessList::get().read()) + block_on(ProcessList::get().read()) .try_find_thread(tid) .ok_or(ESRCH)? .raise(Signal::try_from_raw(sig)?); @@ -608,7 +607,7 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_TGKILL)] fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?; if thread_to_kill.process.pid != tgid { @@ -867,11 +866,11 @@ fn futex( match futex_op { FutexOp::FUTEX_WAIT => { - Task::block_on(futex_wait(uaddr, pid, val as u32, None))?; + block_on(futex_wait(uaddr, pid, val as u32, None))?; return Ok(0); } FutexOp::FUTEX_WAKE => { - return Task::block_on(futex_wake(uaddr, pid, val as u32)); + return block_on(futex_wake(uaddr, pid, val as u32)); } FutexOp::FUTEX_REQUEUE => { todo!() @@ -906,7 +905,7 @@ fn rt_sigreturn() -> KResult { "`rt_sigreturn` failed in thread {} with error {err}!", thread.tid ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); + block_on(thread.force_kill(Signal::SIGSEGV)); })?; Ok(SyscallNoReturn) @@ -927,7 +926,7 @@ fn sigreturn() -> KResult { "`sigreturn` failed in thread {} with error {err}!", thread.tid ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); + block_on(thread.force_kill(Signal::SIGSEGV)); })?; Ok(SyscallNoReturn) diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index fb53f4fa..3e69fc4b 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,3 +1,4 @@ +use super::block_on; use super::{ process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, ProcessList, Session, Thread, @@ -17,7 +18,6 @@ use alloc::{ }; use core::sync::atomic::{AtomicU32, Ordering}; use eonix_mm::address::VAddr; -use eonix_runtime::task::Task; use eonix_sync::{ AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, UnlockableGuard as _, UnlockedGuard as _, @@ -134,7 +134,7 @@ impl WaitId { } else if id == -1 { WaitId::Any } else if id == 0 { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); WaitId::Pgid(thread.process.pgroup(procs.prove()).pgid) } else { WaitId::Pid(id.cast_unsigned()) @@ -208,9 +208,9 @@ impl ProcessBuilder { pub fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { - Task::block_on(process.mm_list.new_shared()) + block_on(process.mm_list.new_shared()) } else { - Task::block_on(process.mm_list.new_cloned()) + block_on(process.mm_list.new_cloned()) }; if let Some(exit_signal) = clone_args.exit_signal { @@ -396,7 +396,7 @@ impl Process { /// Create a new session for the process. pub fn setsid(self: &Arc) -> KResult { - let mut process_list = Task::block_on(ProcessList::get().write()); + let mut process_list = block_on(ProcessList::get().write()); // If there exists a session that has the same sid as our pid, we can't create a new // session. The standard says that we should create a new process group and be the // only process in the new process group and session. @@ -474,7 +474,7 @@ impl Process { /// This function should be called on the process that issued the syscall in order to do /// permission checks. pub fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { - let mut procs = Task::block_on(ProcessList::get().write()); + let mut procs = block_on(ProcessList::get().write()); // We may set pgid of either the calling process or a child process. if pid == self.pid { self.do_setpgid(pgid, &mut procs) @@ -609,7 +609,7 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); if let Some(process) = procs.try_find_process(item.pid) { return process.pgroup(procs.prove()).pgid == pgid; } diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 5532a2e1..134021a8 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,5 +1,5 @@ use super::{ - task::{ProcessList, Session, Thread}, + task::{block_on, ProcessList, Session, Thread}, user::{UserPointer, UserPointerMut}, }; use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; @@ -10,7 +10,6 @@ use alloc::{ }; use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, Mutex}; use posix_types::signal::Signal; @@ -449,7 +448,7 @@ impl Terminal { fn signal(&self, inner: &mut TerminalInner, signal: Signal) { if let Some(session) = inner.session.upgrade() { - Task::block_on(session.raise_foreground(signal)); + block_on(session.raise_foreground(signal)); } if !inner.termio.noflsh() { self.clear_read_buffer(inner); diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 9dfdbddc..06a4e14d 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,5 +1,6 @@ use super::{Dentry, Inode}; use crate::kernel::constants::ENOENT; +use crate::kernel::task::block_on; use crate::rcu::RCUPointer; use crate::{ kernel::vfs::{s_isdir, s_islnk}, @@ -8,7 +9,6 @@ use crate::{ }; use alloc::sync::Arc; use core::sync::atomic::Ordering; -use eonix_runtime::task::Task; use eonix_sync::Mutex; const DCACHE_HASH_BITS: u32 = 8; @@ -42,7 +42,7 @@ pub fn d_find_fast(dentry: &Dentry) -> Option> { /// /// Silently fail without any side effects pub fn d_try_revalidate(dentry: &Arc) { - let _lock = Task::block_on(D_EXCHANGE_LOCK.lock()); + let _lock = block_on(D_EXCHANGE_LOCK.lock()); (|| -> KResult<()> { let parent = dentry.parent().get_inode()?; diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index 49cb1d44..75e4df2f 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -8,7 +8,7 @@ use crate::{ kernel::{ constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, mem::{paging::Page, AsMemoryBlock as _}, - task::Thread, + task::{block_on, Thread}, terminal::{Terminal, TerminalIORequest}, user::{UserPointer, UserPointerMut}, vfs::inode::Inode, @@ -29,7 +29,6 @@ use core::{ ops::{ControlFlow, Deref}, sync::atomic::{AtomicU32, Ordering}, }; -use eonix_runtime::task::Task; use eonix_sync::Mutex; use posix_types::{open::OpenFlags, signal::Signal, stat::StatX}; @@ -159,7 +158,7 @@ impl Pipe { } fn close_read(&self) { - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = block_on(self.inner.lock()); if inner.read_closed { return; } @@ -169,7 +168,7 @@ impl Pipe { } fn close_write(&self) { - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = block_on(self.inner.lock()); if inner.write_closed { return; } @@ -318,7 +317,7 @@ impl InodeFile { } fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let new_cursor = match option { SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, @@ -339,7 +338,7 @@ impl InodeFile { return Err(EBADF); } - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); if self.append { let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; @@ -367,7 +366,7 @@ impl InodeFile { let nread = self.dentry.read(buffer, offset)?; nread } else { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.read(buffer, *cursor)?; @@ -379,7 +378,7 @@ impl InodeFile { } fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.readdir(*cursor, |filename, ino| { // Filename length + 1 for padding '\0' @@ -409,7 +408,7 @@ impl InodeFile { } fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.readdir(*cursor, |filename, ino| { // + 1 for filename length padding '\0', + 1 for d_type. @@ -466,7 +465,7 @@ impl TerminalFile { } fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - Task::block_on(self.terminal.ioctl(match request as u32 { + block_on(self.terminal.ioctl(match request as u32 { TCGETS => TerminalIORequest::GetTermios(UserPointerMut::new_vaddr(arg3)?), TCSETS => TerminalIORequest::SetTermios(UserPointer::new_vaddr(arg3)?), TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::new_vaddr(arg3)?), diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index f8b06a12..0fb9205d 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -29,7 +29,7 @@ use itertools::{ }; use posix_types::open::{FDFlags, OpenFlags}; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); #[derive(Clone)] @@ -323,6 +323,15 @@ impl FD { pub const AT_FDCWD: FD = FD(-100i32 as u32); } +impl core::fmt::Debug for FD { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + &Self::AT_FDCWD => f.write_str("FD(AT_FDCWD)"), + FD(no) => f.debug_tuple("FD").field(&no).finish(), + } + } +} + impl FromSyscallArg for FD { fn from_arg(value: usize) -> Self { Self(value as u32) diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 2b52043d..3eb6c8dc 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -5,6 +5,7 @@ use crate::kernel::constants::{ STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT, }; use crate::kernel::mem::PageCache; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{io::Buffer, prelude::*}; use alloc::sync::{Arc, Weak}; @@ -14,7 +15,6 @@ use core::{ ptr::addr_of_mut, sync::atomic::{AtomicU32, AtomicU64, Ordering}, }; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use posix_types::stat::StatX; @@ -280,7 +280,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { f( uninit_mut.as_mut_ptr(), // SAFETY: `idata` is initialized and we will never move the lock. - &Task::block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), + &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), ); // Safety: `uninit` is initialized From 661a15940badf992d7836eb93c7ab293590c6ad4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 00:23:12 +0800 Subject: [PATCH 19/54] riscv64, trap: rework to fix nested captured traps The previous implementation has some bugs inside that will cause kernel space nested traps to lose some required information: - In kernel mode, trap contexts are saved above the current stack frame without exception, which is not what we want. We expect to read the trap data in the CAPTURED context. - The capturer task context is not saved as well, which will mess up the nested traps completely. - We are reading page fault virtual addresses in TrapContext::trap_type, which won't work since if the inner trap is captured, and the outer trap interleaves with the trap_type() call, we will lose the stval data in the inner trap. The solution is to separate our "normal" trap handling procedure out of captured trap handling procedure. We swap the stvec CSR when we set up captured traps and restore it afterwards so the two approach don't have to tell then apart in trap entries. Then, we can store the TrapContext pointer in sscratch without having to distinguish between trap handling types. In the way, we keep the procedure simple. The register stval is saved together with other registers to be used in page faults. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 8 - crates/eonix_hal/src/arch/riscv64/cpu.rs | 13 +- .../src/arch/riscv64/trap/captured.rs | 177 +++++++++++ .../src/arch/riscv64/trap/default.rs | 134 ++++++++ crates/eonix_hal/src/arch/riscv64/trap/mod.rs | 291 ++---------------- .../src/arch/riscv64/trap/trap_context.rs | 97 +++--- 6 files changed, 389 insertions(+), 331 deletions(-) create mode 100644 crates/eonix_hal/src/arch/riscv64/trap/captured.rs create mode 100644 crates/eonix_hal/src/arch/riscv64/trap/default.rs diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 7c6a6ae0..0f1dff63 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -3,7 +3,6 @@ use super::{ console::write_str, cpu::{CPUID, CPU_COUNT}, time::set_next_timer, - trap::TRAP_SCRATCH, }; use crate::{ arch::{ @@ -234,13 +233,6 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { } percpu_area.register(cpu.cpuid()); - - unsafe { - // SAFETY: Interrupts are disabled. - TRAP_SCRATCH - .as_mut() - .set_kernel_tp(PercpuArea::get_for(cpu.cpuid()).unwrap().cast()); - } } fn get_ap_start_addr() -> usize { diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 7e6e3ac0..9c843eaf 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -1,9 +1,13 @@ use super::{ interrupt::InterruptControl, - trap::{setup_trap, TRAP_SCRATCH}, + trap::{setup_trap, TrapContext}, }; use crate::arch::fdt::{FdtExt, FDT}; -use core::{arch::asm, pin::Pin, ptr::NonNull, sync::atomic::AtomicUsize}; +use core::{ + arch::asm, cell::UnsafeCell, mem::MaybeUninit, pin::Pin, ptr::NonNull, + sync::atomic::AtomicUsize, +}; +use eonix_hal_traits::trap::RawTrapContext; use eonix_preempt::PreemptGuard; use eonix_sync_base::LazyLock; use riscv::register::{ @@ -17,6 +21,9 @@ pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0); #[eonix_percpu::define_percpu] pub static CPUID: usize = 0; +#[eonix_percpu::define_percpu] +static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); @@ -56,7 +63,7 @@ impl CPU { interrupt.init(); sstatus::set_sum(); - sscratch::write(TRAP_SCRATCH.as_ptr() as usize); + sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/captured.rs b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs new file mode 100644 index 00000000..d4c00e9f --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs @@ -0,0 +1,177 @@ +use crate::{arch::trap::Registers, context::TaskContext, trap::TrapContext}; +use core::{arch::naked_asm, mem::MaybeUninit}; +use eonix_hal_traits::context::RawTaskContext; + +static mut DIRTY_TASK_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + +// If captured trap context is present, we use it directly. +// We need to restore the kernel tp from that TrapContext but sp is +// fine since we will use TaskContext::switch. +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "ld tp, {ra}(t0)", // Load kernel tp from trap_ctx.ra + "sd ra, {ra}(t0)", + "ld ra, {sp}(t0)", // Load capturer task context from trap_ctx.sp + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "csrr t2, sstatus", + "csrr t3, sepc", + "csrr t4, scause", + "csrr t5, stval", + "csrrw t6, sscratch, t0", + "sd t6, {t0}(t0)", + "sd t2, {sstatus}(t0)", + "sd t3, {sepc}(t0)", + "sd t4, {scause}(t0)", + "sd t5, {stval}(t0)", + "la a0, {dirty_task_context}", + "mv a1, ra", + "j {task_context_switch}", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + stval = const TrapContext::OFFSET_STVAL, + dirty_task_context = sym DIRTY_TASK_CONTEXT, + task_context_switch = sym TaskContext::switch, + ); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_return(ctx: &mut TrapContext) -> ! { + naked_asm!( + "csrr t0, sscratch", + "ld t1, {sstatus}(t0)", + "ld t2, {sepc}(t0)", + "csrw sstatus, t1", + "csrw sepc, t2", + "mv t4, tp", + "mv t5, sp", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "sd t4, {ra}(t0)", // Store kernel tp to trap_ctx.ra + "sd t5, {sp}(t0)", // Store capturer task context to trap_ctx.sp + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/default.rs b/crates/eonix_hal/src/arch/riscv64/trap/default.rs new file mode 100644 index 00000000..4025b719 --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/default.rs @@ -0,0 +1,134 @@ +use super::Registers; +use crate::trap::TrapContext; +use core::arch::naked_asm; + +unsafe extern "C" { + fn _default_trap_handler(trap_context: &mut TrapContext); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _default_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "sd ra, {ra}(t0)", + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "mv a0, t0", + "csrrw t0, sscratch, t0", + "sd t0, {t0}(a0)", + "csrr t0, sepc", + "csrr t1, scause", + "csrr t2, sstatus", + "csrr t3, stval", + "sd t0, {sepc}(a0)", + "sd t1, {scause}(a0)", + "sd t2, {sstatus}(a0)", + "sd t3, {stval}(a0)", + + "la t0, {default_trap_handler}", + "jalr t0", + + "csrr t0, sscratch", + "ld t1, {sepc}(t0)", + "ld t2, {sstatus}(t0)", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + + "csrw sepc, t1", + "csrw sstatus, t2", + + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + tp = const Registers::OFFSET_TP, + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + t0 = const Registers::OFFSET_T0, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + sstatus = const TrapContext::OFFSET_SSTATUS, + stval = const TrapContext::OFFSET_STVAL, + default_trap_handler = sym _default_trap_handler, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs index 58566ebe..28689111 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs @@ -1,18 +1,22 @@ +mod captured; +mod default; mod trap_context; use super::config::platform::virt::*; use super::context::TaskContext; +use captured::{_captured_trap_entry, _captured_trap_return}; use core::arch::{global_asm, naked_asm}; use core::mem::{offset_of, size_of}; use core::num::NonZero; use core::ptr::NonNull; +use default::_default_trap_entry; use eonix_hal_traits::{ context::RawTaskContext, trap::{IrqState as IrqStateTrait, TrapReturn}, }; use riscv::register::sstatus::{self, Sstatus}; use riscv::register::stvec::TrapMode; -use riscv::register::{scause, sepc, stval}; +use riscv::register::{scause, sepc, sscratch, stval}; use riscv::{ asm::sfence_vma_all, register::stvec::{self, Stvec}, @@ -21,288 +25,35 @@ use sbi::SbiError; pub use trap_context::*; -#[repr(C)] -pub struct TrapScratch { - t1: u64, - t2: u64, - kernel_tp: Option>, - trap_context: Option>, - handler: unsafe extern "C" fn(), - capturer_context: TaskContext, -} - -#[eonix_percpu::define_percpu] -pub(crate) static TRAP_SCRATCH: TrapScratch = TrapScratch { - t1: 0, - t2: 0, - kernel_tp: None, - trap_context: None, - handler: default_trap_handler, - capturer_context: TaskContext::new(), -}; - -static mut DIRTY_TASK_CONTEXT: TaskContext = TaskContext::new(); - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_entry() -> ! { - naked_asm!( - "csrrw t0, sscratch, t0", // Swap t0 and sscratch - "sd t1, 0(t0)", - "sd t2, 8(t0)", - "csrr t1, sstatus", - "andi t1, t1, 0x100", - "beqz t1, 2f", - // else SPP = 1, supervisor mode - "addi t1, sp, -{trap_context_size}", - "mv t2, tp", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "mv sp, t1", - "j 4f", - // SPP = 0, user mode - "2:", - "ld t1, 24(t0)", // Load captured TrapContext address - "mv t2, tp", - "ld tp, 16(t0)", // Restore kernel tp - // t0: &mut TrapScratch, t1: &mut TrapContext, t2: tp before trap - "3:", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "4:", - "sd gp, {gp}(t1)", - "sd t2, {tp}(t1)", - "ld ra, 0(t0)", - "ld t2, 8(t0)", - "sd ra, {t1}(t1)", // Save t1 - "sd t2, {t2}(t1)", // Save t2 - "ld ra, 32(t0)", // Load handler address - "csrrw t2, sscratch, t0", // Swap t0 and sscratch - "sd t2, {t0}(t1)", - "sd a0, {a0}(t1)", - "sd a1, {a1}(t1)", - "sd a2, {a2}(t1)", - "sd a3, {a3}(t1)", - "sd a4, {a4}(t1)", - "sd a5, {a5}(t1)", - "sd a6, {a6}(t1)", - "sd a7, {a7}(t1)", - "sd t3, {t3}(t1)", - "sd t4, {t4}(t1)", - "sd t5, {t5}(t1)", - "sd t6, {t6}(t1)", - "sd s0, {s0}(t1)", - "sd s1, {s1}(t1)", - "sd s2, {s2}(t1)", - "sd s3, {s3}(t1)", - "sd s4, {s4}(t1)", - "sd s5, {s5}(t1)", - "sd s6, {s6}(t1)", - "sd s7, {s7}(t1)", - "sd s8, {s8}(t1)", - "sd s9, {s9}(t1)", - "sd s10, {s10}(t1)", - "sd s11, {s11}(t1)", - "csrr t2, sstatus", - "csrr t3, sepc", - "csrr t4, scause", - "sd t2, {sstatus}(t1)", - "sd t3, {sepc}(t1)", - "sd t4, {scause}(t1)", - "ret", - trap_context_size = const size_of::(), - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - scause = const TrapContext::OFFSET_SCAUSE, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! { - naked_asm!( - "ld ra, {ra}(a0)", - "ld sp, {sp}(a0)", - "ld gp, {gp}(a0)", - "ld tp, {tp}(a0)", - "ld t1, {t1}(a0)", - "ld t2, {t2}(a0)", - "ld t0, {t0}(a0)", - "ld a1, {a1}(a0)", - "ld a2, {a2}(a0)", - "ld a3, {a3}(a0)", - "ld a4, {a4}(a0)", - "ld a5, {a5}(a0)", - "ld a6, {a6}(a0)", - "ld a7, {a7}(a0)", - "ld t3, {t3}(a0)", - "ld t4, {sepc}(a0)", // Load sepc from TrapContext - "ld t5, {sstatus}(a0)", // Load sstatus from TrapContext - "ld s0, {s0}(a0)", - "ld s1, {s1}(a0)", - "ld s2, {s2}(a0)", - "ld s3, {s3}(a0)", - "ld s4, {s4}(a0)", - "ld s5, {s5}(a0)", - "ld s6, {s6}(a0)", - "ld s7, {s7}(a0)", - "ld s8, {s8}(a0)", - "ld s9, {s9}(a0)", - "ld s10, {s10}(a0)", - "ld s11, {s11}(a0)", - "csrw sepc, t4", // Restore sepc - "csrw sstatus, t5", // Restore sstatus - "ld t4, {t4}(a0)", - "ld t5, {t5}(a0)", - "ld t6, {t6}(a0)", - "ld a0, {a0}(a0)", - "sret", - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn default_trap_handler() { - unsafe extern "C" { - fn _default_trap_handler(trap_context: &mut TrapContext); - } - - naked_asm!( - "andi sp, sp, -16", // Align stack pointer to 16 bytes - "addi sp, sp, -16", - "mv a0, t1", // TrapContext pointer in t1 - "sd a0, 0(sp)", // Save TrapContext pointer - "", - "call {default_handler}", - "", - "ld a0, 0(sp)", // Restore TrapContext pointer - "j {trap_return}", - default_handler = sym _default_trap_handler, - trap_return = sym _raw_trap_return, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_handler() { - naked_asm!( - "la a0, {dirty_task_context}", - "addi a1, t0, {capturer_context_offset}", - "j {switch}", - dirty_task_context = sym DIRTY_TASK_CONTEXT, - capturer_context_offset = const offset_of!(TrapScratch, capturer_context), - switch = sym TaskContext::switch, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! { - naked_asm!( - "mv a0, sp", - "j {raw_trap_return}", - raw_trap_return = sym _raw_trap_return, - ); -} - -impl TrapScratch { - pub fn set_trap_context(&mut self, ctx: NonNull) { - self.trap_context = Some(ctx); - } - - pub fn clear_trap_context(&mut self) { - self.trap_context = None; - } - - pub fn set_kernel_tp(&mut self, tp: NonNull) { - self.kernel_tp = Some(NonZero::new(tp.addr().get() as u64).unwrap()); - } -} - impl TrapReturn for TrapContext { type TaskContext = TaskContext; unsafe fn trap_return(&mut self) { let irq_states = disable_irqs_save(); - let old_handler = - core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler); + let old_stvec = stvec::read(); + stvec::write({ + let mut stvec_val = Stvec::from_bits(0); + stvec_val.set_address(_captured_trap_entry as usize); + stvec_val.set_trap_mode(TrapMode::Direct); + stvec_val + }); - let old_trap_context = core::mem::replace( - &mut TRAP_SCRATCH.as_mut().trap_context, - Some(NonNull::from(&mut *self)), - ); + let old_trap_ctx = sscratch::read(); + sscratch::write(&raw mut *self as usize); + let mut from_ctx = TaskContext::new(); let mut to_ctx = TaskContext::new(); - to_ctx.set_program_counter(captured_trap_return as usize); - to_ctx.set_stack_pointer(&raw mut *self as usize); + to_ctx.set_program_counter(_captured_trap_return as usize); + to_ctx.set_stack_pointer(&raw mut from_ctx as usize); to_ctx.set_interrupt_enabled(false); unsafe { - TaskContext::switch(&mut TRAP_SCRATCH.as_mut().capturer_context, &mut to_ctx); + TaskContext::switch(&mut from_ctx, &mut to_ctx); } - TRAP_SCRATCH.as_mut().handler = old_handler; - TRAP_SCRATCH.as_mut().trap_context = old_trap_context; + sscratch::write(old_trap_ctx); + stvec::write(old_stvec); irq_states.restore(); } @@ -319,7 +70,7 @@ fn setup_trap_handler(trap_entry_addr: usize) { } pub fn setup_trap() { - setup_trap_handler(_raw_trap_entry as usize); + setup_trap_handler(_default_trap_entry as usize); } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 369eef3e..73ed34b2 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -1,5 +1,5 @@ use crate::{arch::time::set_next_timer, processor::CPU}; -use core::arch::asm; +use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, trap::{RawTrapContext, TrapType}, @@ -18,24 +18,23 @@ use riscv::{ #[repr(C)] #[derive(Default, Clone, Copy)] pub struct Registers { + tp: u64, ra: u64, sp: u64, gp: u64, - tp: u64, - t1: u64, - t2: u64, - t0: u64, a0: u64, a1: u64, a2: u64, a3: u64, a4: u64, + t1: u64, a5: u64, a6: u64, a7: u64, t3: u64, t4: u64, t5: u64, + t2: u64, t6: u64, s0: u64, s1: u64, @@ -49,10 +48,11 @@ pub struct Registers { s9: u64, s10: u64, s11: u64, + t0: u64, } /// Saved CPU context when a trap (interrupt or exception) occurs on RISC-V 64. -#[repr(C)] +#[repr(C, align(16))] #[derive(Clone, Copy)] pub struct TrapContext { regs: Registers, @@ -60,46 +60,48 @@ pub struct TrapContext { sstatus: Sstatus, sepc: usize, scause: Scause, + stval: usize, } impl Registers { - pub const OFFSET_RA: usize = 0 * 8; - pub const OFFSET_SP: usize = 1 * 8; - pub const OFFSET_GP: usize = 2 * 8; - pub const OFFSET_TP: usize = 3 * 8; - pub const OFFSET_T1: usize = 4 * 8; - pub const OFFSET_T2: usize = 5 * 8; - pub const OFFSET_T0: usize = 6 * 8; - pub const OFFSET_A0: usize = 7 * 8; - pub const OFFSET_A1: usize = 8 * 8; - pub const OFFSET_A2: usize = 9 * 8; - pub const OFFSET_A3: usize = 10 * 8; - pub const OFFSET_A4: usize = 11 * 8; - pub const OFFSET_A5: usize = 12 * 8; - pub const OFFSET_A6: usize = 13 * 8; - pub const OFFSET_A7: usize = 14 * 8; - pub const OFFSET_T3: usize = 15 * 8; - pub const OFFSET_T4: usize = 16 * 8; - pub const OFFSET_T5: usize = 17 * 8; - pub const OFFSET_T6: usize = 18 * 8; - pub const OFFSET_S0: usize = 19 * 8; - pub const OFFSET_S1: usize = 20 * 8; - pub const OFFSET_S2: usize = 21 * 8; - pub const OFFSET_S3: usize = 22 * 8; - pub const OFFSET_S4: usize = 23 * 8; - pub const OFFSET_S5: usize = 24 * 8; - pub const OFFSET_S6: usize = 25 * 8; - pub const OFFSET_S7: usize = 26 * 8; - pub const OFFSET_S8: usize = 27 * 8; - pub const OFFSET_S9: usize = 28 * 8; - pub const OFFSET_S10: usize = 29 * 8; - pub const OFFSET_S11: usize = 30 * 8; + pub const OFFSET_TP: usize = offset_of!(Registers, tp); + pub const OFFSET_SP: usize = offset_of!(Registers, sp); + pub const OFFSET_RA: usize = offset_of!(Registers, ra); + pub const OFFSET_GP: usize = offset_of!(Registers, gp); + pub const OFFSET_T1: usize = offset_of!(Registers, t1); + pub const OFFSET_T2: usize = offset_of!(Registers, t2); + pub const OFFSET_T0: usize = offset_of!(Registers, t0); + pub const OFFSET_A0: usize = offset_of!(Registers, a0); + pub const OFFSET_A1: usize = offset_of!(Registers, a1); + pub const OFFSET_A2: usize = offset_of!(Registers, a2); + pub const OFFSET_A3: usize = offset_of!(Registers, a3); + pub const OFFSET_A4: usize = offset_of!(Registers, a4); + pub const OFFSET_A5: usize = offset_of!(Registers, a5); + pub const OFFSET_A6: usize = offset_of!(Registers, a6); + pub const OFFSET_A7: usize = offset_of!(Registers, a7); + pub const OFFSET_T3: usize = offset_of!(Registers, t3); + pub const OFFSET_T4: usize = offset_of!(Registers, t4); + pub const OFFSET_T5: usize = offset_of!(Registers, t5); + pub const OFFSET_T6: usize = offset_of!(Registers, t6); + pub const OFFSET_S0: usize = offset_of!(Registers, s0); + pub const OFFSET_S1: usize = offset_of!(Registers, s1); + pub const OFFSET_S2: usize = offset_of!(Registers, s2); + pub const OFFSET_S3: usize = offset_of!(Registers, s3); + pub const OFFSET_S4: usize = offset_of!(Registers, s4); + pub const OFFSET_S5: usize = offset_of!(Registers, s5); + pub const OFFSET_S6: usize = offset_of!(Registers, s6); + pub const OFFSET_S7: usize = offset_of!(Registers, s7); + pub const OFFSET_S8: usize = offset_of!(Registers, s8); + pub const OFFSET_S9: usize = offset_of!(Registers, s9); + pub const OFFSET_S10: usize = offset_of!(Registers, s10); + pub const OFFSET_S11: usize = offset_of!(Registers, s11); } impl TrapContext { - pub const OFFSET_SSTATUS: usize = 31 * 8; - pub const OFFSET_SEPC: usize = 32 * 8; - pub const OFFSET_SCAUSE: usize = 33 * 8; + pub const OFFSET_SSTATUS: usize = offset_of!(TrapContext, sstatus); + pub const OFFSET_SEPC: usize = offset_of!(TrapContext, sepc); + pub const OFFSET_SCAUSE: usize = offset_of!(TrapContext, scause); + pub const OFFSET_STVAL: usize = offset_of!(TrapContext, stval); fn syscall_no(&self) -> usize { self.regs.a7 as usize @@ -131,6 +133,7 @@ impl RawTrapContext for TrapContext { sstatus, sepc: 0, scause: Scause::from_bits(0), + stval: 0, } } @@ -176,16 +179,10 @@ impl RawTrapContext for TrapContext { }, exception @ (Exception::InstructionPageFault | Exception::LoadPageFault - | Exception::StorePageFault) => { - #[inline(always)] - fn get_page_fault_address() -> VAddr { - VAddr::from(stval::read()) - } - TrapType::Fault(Fault::PageFault { - error_code: self.get_page_fault_error_code(exception), - address: get_page_fault_address(), - }) - } + | Exception::StorePageFault) => TrapType::Fault(Fault::PageFault { + error_code: self.get_page_fault_error_code(exception), + address: VAddr::from(self.stval), + }), // breakpoint and supervisor env call _ => TrapType::Fault(Fault::Unknown(e)), } From 9c900be22500f03319d30bdd42f1709b1cc27dc5 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 00:36:27 +0800 Subject: [PATCH 20/54] task, thread: working version of threads We've got everything done in order to make the system run. Add Thread::contexted to load the context needed for the thread to run. Wrap the Thread::real_run() with contexted(stackful(...)) in Thread::run(). We would use this for now. Later, we will make the thread completely asynchronous. This way we don't have to change its interface then. Signed-off-by: greatbridf --- src/kernel/task.rs | 88 +++++++++++++++++++++++++++++---------- src/kernel/task/clone.rs | 4 +- src/kernel/task/thread.rs | 64 +++++++++++++++++----------- src/lib.rs | 4 +- 4 files changed, 109 insertions(+), 51 deletions(-) diff --git a/src/kernel/task.rs b/src/kernel/task.rs index e2bbcb3f..2cdb8c22 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -79,29 +79,68 @@ pub async fn stackful(mut future: F) -> F::Output where F: core::future::Future, { + use crate::kernel::{ + interrupt::{default_fault_handler, default_irq_handler}, + timer::{should_reschedule, timer_interrupt}, + }; + use alloc::sync::Arc; + use alloc::task::Wake; use core::cell::UnsafeCell; + use core::future::Future; + use core::pin::Pin; + use core::ptr::NonNull; + use core::sync::atomic::AtomicBool; + use core::sync::atomic::Ordering; + use core::task::Context; + use core::task::Poll; + use core::task::Waker; use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::traits::trap::TrapReturn; + use eonix_hal::traits::trap::TrapType; use eonix_hal::trap::TrapContext; - use eonix_log::println_debug; + use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; - - use crate::kernel::{ - interrupt::{default_fault_handler, default_irq_handler}, - timer::{should_reschedule, timer_interrupt}, - }; + use thread::wait_for_wakeups; let stack = KernelStack::new(); - fn execute( - future: core::pin::Pin<&mut F>, - output_ptr: core::ptr::NonNull>, - ) -> ! + fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! where - F: core::future::Future, + F: Future, { - let output = do_block_on(future); + struct WokenUp(AtomicBool); + + impl Wake for WokenUp { + fn wake(self: Arc) { + self.wake_by_ref(); + } + + fn wake_by_ref(self: &Arc) { + self.0.swap(true, Ordering::AcqRel); + } + } + + let woken_up = Arc::new(WokenUp(AtomicBool::new(false))); + let waker = Waker::from(woken_up.clone()); + let mut cx = Context::from_waker(&waker); + + let output = loop { + match future.as_mut().poll(&mut cx) { + Poll::Ready(output) => break output, + Poll::Pending => { + if woken_up.0.swap(false, Ordering::Acquire) { + continue; + } + + assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + + unsafe { + core::arch::asm!("ebreak"); + } + } + } + }; unsafe { output_ptr.write(Some(output)); @@ -115,7 +154,7 @@ where } let sp = stack.get_bottom(); - let output = UnsafeCell::new(None); + let mut output = UnsafeCell::new(None); let mut trap_ctx = TrapContext::new(); @@ -135,21 +174,26 @@ where } match trap_ctx.trap_type() { - eonix_hal::traits::trap::TrapType::Syscall { .. } => {} - eonix_hal::traits::trap::TrapType::Fault(fault) => { + TrapType::Syscall { .. } => {} + TrapType::Fault(fault) => { // Breakpoint if let Fault::Unknown(3) = &fault { - println_debug!("Breakpoint hit, returning output"); - break output.into_inner().unwrap(); + if let Some(output) = output.get_mut().take() { + break output; + } else { + wait_for_wakeups().await; + } + + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + } else { + default_fault_handler(fault, &mut trap_ctx) } - - default_fault_handler(fault, &mut trap_ctx) } - eonix_hal::traits::trap::TrapType::Irq { callback } => callback(default_irq_handler), - eonix_hal::traits::trap::TrapType::Timer { callback } => { + TrapType::Irq { callback } => callback(default_irq_handler), + TrapType::Timer { callback } => { callback(timer_interrupt); - if should_reschedule() { + if eonix_preempt::count() == 0 && should_reschedule() { yield_now().await; } } diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 2a16ce56..574cdfc9 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,4 +1,4 @@ -use super::{block_on, stackful}; +use super::block_on; use crate::{ kernel::{ syscall::procops::parse_user_tls, @@ -164,7 +164,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - RUNTIME.spawn(stackful(new_thread.run())); + RUNTIME.spawn(new_thread.run()); Ok(new_pid) } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index ef71657f..be3a6077 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,6 +1,6 @@ use super::{ signal::{RaiseResult, SignalList}, - Process, ProcessList, WaitType, + stackful, Process, ProcessList, WaitType, }; use crate::{ kernel::{ @@ -16,8 +16,8 @@ use crate::{ use alloc::sync::Arc; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - future::Future, - pin::{pin, Pin}, + future::{poll_fn, Future}, + pin::Pin, ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, task::{Context, Poll}, @@ -28,9 +28,9 @@ use eonix_hal::{ traits::{ fault::Fault, fpu::RawFpuState as _, - trap::{IrqState as _, RawTrapContext, TrapReturn, TrapType}, + trap::{RawTrapContext, TrapReturn, TrapType}, }, - trap::{disable_irqs_save, TrapContext}, + trap::TrapContext, }; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; @@ -415,36 +415,39 @@ impl Thread { } } - pub fn run(self: Arc) -> impl Future + Send + 'static { - async fn real_run_with_context(me: &Arc) { - let mut future = pin!(me.real_run()); + async fn contexted(&self, future: F) -> F::Output + where + F: Future, + { + let mut future = core::pin::pin!(future); - core::future::poll_fn(|cx| { - me.process.mm_list.activate(); + core::future::poll_fn(|cx| { + self.process.mm_list.activate(); - CURRENT_THREAD.set(NonNull::new(Arc::as_ptr(me) as *mut _)); + CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); - unsafe { - // SAFETY: Preemption is disabled. - me.load_thread_area32(); - } + unsafe { + eonix_preempt::disable(); - let irq_state = disable_irqs_save(); + // SAFETY: Preemption is disabled. + self.load_thread_area32(); - let result = future.as_mut().poll(cx); + eonix_preempt::enable(); + } - irq_state.restore(); + let result = future.as_mut().poll(cx); - me.process.mm_list.deactivate(); + self.process.mm_list.deactivate(); - CURRENT_THREAD.set(None); + CURRENT_THREAD.set(None); - result - }) - .await - } + result + }) + .await + } - async move { real_run_with_context(&self).await } + pub fn run(self: Arc) -> impl Future + Send + 'static { + async move { self.contexted(stackful(self.real_run())).await } } } @@ -469,3 +472,14 @@ pub async fn yield_now() { Yield { yielded: false }.await; } + +pub fn wait_for_wakeups() -> impl Future { + let mut waited = false; + poll_fn(move |_| match waited { + true => Poll::Ready(()), + false => { + waited = true; + Poll::Pending + } + }) +} diff --git a/src/lib.rs b/src/lib.rs index cbe7bc5d..beebe7c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ use eonix_mm::address::PRange; use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{stackful, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -272,5 +272,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - RUNTIME.spawn(stackful(thread.run())); + RUNTIME.spawn(thread.run()); } From 30bfc5a0db6d705f15800d4eaccc6a3d62726786 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 03:44:38 +0800 Subject: [PATCH 21/54] loongarch64, trap: rework to fix nested captured traps Similar to 661a15940badf992d7836eb93c7ab293590c6ad4: - Save previous {trap, task}_ctx and restore them afterwards. - Set kernel tp when setting trap context user mode. - Add the program counter with 4 bytes on breakpoints. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/loongarch64/trap/mod.rs | 13 +++++++++++-- .../src/arch/loongarch64/trap/trap_context.rs | 14 ++++++++++++-- src/driver/virtio/loongarch64.rs | 4 ++-- src/kernel/task.rs | 12 ++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs index f529bf61..4a6c4754 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs @@ -278,11 +278,18 @@ impl TrapReturn for TrapContext { to_ctx.set_interrupt_enabled(false); unsafe { + let mut old_trap_ctx: usize; + let mut old_task_ctx: usize; + asm!( + "csrrd {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrrd {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", "csrwr {captured_trap_context}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", "csrwr {capturer_task_context}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", captured_trap_context = inout(reg) &raw mut *self => _, capturer_task_context = inout(reg) &raw mut capturer_ctx => _, + old_trap_ctx = out(reg) old_trap_ctx, + old_task_ctx = out(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), @@ -291,8 +298,10 @@ impl TrapReturn for TrapContext { TaskContext::switch(&mut capturer_ctx, &mut to_ctx); asm!( - "csrwr $zero, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", - "csrwr $zero, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + "csrwr {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrwr {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + old_trap_ctx = inout(reg) old_trap_ctx, + old_task_ctx = inout(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs index 83e28cfa..70a2bdc2 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs @@ -1,4 +1,4 @@ -use crate::processor::CPU; +use crate::{arch::trap::CSR_KERNEL_TP, processor::CPU}; use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, @@ -226,7 +226,17 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.prmd |= 0x3, - false => self.prmd &= !0x3, + false => { + unsafe { + asm!( + "csrrd {tp}, {CSR_KERNEL_TP}", + tp = out(reg) self.regs.tp, + CSR_KERNEL_TP = const CSR_KERNEL_TP, + options(nomem, nostack, preserves_flags), + ) + } + self.prmd &= !0x3; + } } } diff --git a/src/driver/virtio/loongarch64.rs b/src/driver/virtio/loongarch64.rs index bcd7e713..996683bd 100644 --- a/src/driver/virtio/loongarch64.rs +++ b/src/driver/virtio/loongarch64.rs @@ -3,13 +3,13 @@ use crate::kernel::{ block::{make_device, BlockDevice}, constants::EIO, pcie::{self, PCIDevice, PCIDriver, PciError, SegmentGroup}, + task::block_on, }; use alloc::sync::Arc; use core::sync::atomic::{AtomicUsize, Ordering}; use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; use eonix_log::println_warn; use eonix_mm::address::PhysAccess; -use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, @@ -134,7 +134,7 @@ impl PCIDriver for VirtIODriver { Arc::new(Spin::new(virtio_block)), )?; - Task::block_on(block_device.partprobe()).map_err(|err| { + block_on(block_device.partprobe()).map_err(|err| { println_warn!( "Failed to probe partitions for VirtIO Block device: {}", err diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 2cdb8c22..9c900a64 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -136,7 +136,11 @@ where assert_preempt_enabled!("Blocking in stackful futures is not allowed."); unsafe { + #[cfg(target_arch = "riscv64")] core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); } } } @@ -147,7 +151,11 @@ where } unsafe { + #[cfg(target_arch = "riscv64")] core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); } unreachable!() @@ -184,7 +192,11 @@ where wait_for_wakeups().await; } + #[cfg(target_arch = "riscv64")] trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + + #[cfg(target_arch = "loongarch64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); } else { default_fault_handler(fault, &mut trap_ctx) } From a6221725870605955b606ca5329b384cdefe5414 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 03:48:24 +0800 Subject: [PATCH 22/54] trap: introduce Breakpoint fault type TODO: hide changes to the program counter in the HAL crate. Signed-off-by: greatbridf --- crates/eonix_hal/eonix_hal_traits/src/trap.rs | 1 + .../src/arch/loongarch64/trap/trap_context.rs | 1 + .../src/arch/riscv64/trap/trap_context.rs | 1 + src/kernel/interrupt.rs | 1 + src/kernel/task.rs | 33 ++++++++----------- src/kernel/task/thread.rs | 1 + 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/crates/eonix_hal/eonix_hal_traits/src/trap.rs b/crates/eonix_hal/eonix_hal_traits/src/trap.rs index d698dbaa..e51a9eb4 100644 --- a/crates/eonix_hal/eonix_hal_traits/src/trap.rs +++ b/crates/eonix_hal/eonix_hal_traits/src/trap.rs @@ -66,6 +66,7 @@ where { Syscall { no: usize, args: [usize; 6] }, Fault(Fault), + Breakpoint, Irq { callback: FIrq }, Timer { callback: FTimer }, } diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs index 70a2bdc2..56bf59b6 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs @@ -173,6 +173,7 @@ impl RawTrapContext for TrapContext { | Exception::MemoryAccessAddressError | Exception::PagePrivilegeIllegal, ) => TrapType::Fault(Fault::BadAccess), + Trap::Exception(Exception::Breakpoint) => TrapType::Breakpoint, Trap::Exception(Exception::InstructionNotExist) => TrapType::Fault(Fault::InvalidOp), Trap::Exception(Exception::Syscall) => TrapType::Syscall { no: self.syscall_no(), diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 73ed34b2..ab4ca9a2 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -166,6 +166,7 @@ impl RawTrapContext for TrapContext { } Trap::Exception(e) => { match Exception::from_number(e).unwrap() { + Exception::Breakpoint => TrapType::Breakpoint, Exception::InstructionMisaligned | Exception::LoadMisaligned | Exception::InstructionFault diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 4b55f182..9394f64c 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -48,6 +48,7 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { pub fn interrupt_handler(trap_ctx: &mut TrapContext) { match trap_ctx.trap_type() { TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no), + TrapType::Breakpoint => unreachable!("Breakpoint in kernel space."), TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => callback(timer_interrupt), diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 9c900a64..35d293e3 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -94,7 +94,6 @@ where use core::task::Context; use core::task::Poll; use core::task::Waker; - use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::traits::trap::TrapReturn; use eonix_hal::traits::trap::TrapType; @@ -183,24 +182,7 @@ where match trap_ctx.trap_type() { TrapType::Syscall { .. } => {} - TrapType::Fault(fault) => { - // Breakpoint - if let Fault::Unknown(3) = &fault { - if let Some(output) = output.get_mut().take() { - break output; - } else { - wait_for_wakeups().await; - } - - #[cfg(target_arch = "riscv64")] - trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); - - #[cfg(target_arch = "loongarch64")] - trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); - } else { - default_fault_handler(fault, &mut trap_ctx) - } - } + TrapType::Fault(fault) => default_fault_handler(fault, &mut trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -209,6 +191,19 @@ where yield_now().await; } } + TrapType::Breakpoint => { + if let Some(output) = output.get_mut().take() { + break output; + } else { + wait_for_wakeups().await; + } + + #[cfg(target_arch = "riscv64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + + #[cfg(target_arch = "loongarch64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); + } } } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index be3a6077..50600436 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -391,6 +391,7 @@ impl Thread { self.signal_list.raise(Signal::SIGILL); } TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), + TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); From 21b765092ffa27bcf7ed2e53a29ff64420e8f9c7 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 11 Aug 2025 00:55:18 +0800 Subject: [PATCH 23/54] task: fix stackful waker implementation The current implementation use the WokenUp object to detect whether the stackful task is woken up somewhere. This is WRONG since we might lose wakeups as the runtime have no idea what we have done. If someone wakes us up, the task won't be enqueued so we will never have a second chance to get to the foreground. The fix is to use Arc to create a waker and check whether the task is ready each time we get back to the stackful poll loop. Signed-off-by: greatbridf --- crates/eonix_runtime/src/task.rs | 4 ++++ crates/eonix_runtime/src/task/task_state.rs | 4 ++++ src/kernel/task.rs | 24 ++++----------------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 7b89d3fe..8a1d6ef4 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -93,6 +93,10 @@ impl Task { return rq; } } + + pub fn is_ready(&self) -> bool { + self.state.is_ready() + } } impl Wake for Task { diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index 074acfb4..473310d7 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -22,4 +22,8 @@ impl TaskState { self.0 .fetch_update(Ordering::SeqCst, Ordering::SeqCst, func) } + + pub(crate) fn is_ready(&self) -> bool { + self.0.load(Ordering::SeqCst) & Self::READY == Self::READY + } } diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 35d293e3..13e2ec93 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -10,6 +10,7 @@ mod signal; mod thread; pub use clone::{do_clone, CloneArgs, CloneFlags}; +use eonix_runtime::task::Task; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -18,7 +19,7 @@ pub use process_group::ProcessGroup; pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; -pub use thread::{yield_now, Thread, ThreadBuilder}; +pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output where @@ -83,14 +84,10 @@ where interrupt::{default_fault_handler, default_irq_handler}, timer::{should_reschedule, timer_interrupt}, }; - use alloc::sync::Arc; - use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; - use core::sync::atomic::AtomicBool; - use core::sync::atomic::Ordering; use core::task::Context; use core::task::Poll; use core::task::Waker; @@ -108,27 +105,14 @@ where where F: Future, { - struct WokenUp(AtomicBool); - - impl Wake for WokenUp { - fn wake(self: Arc) { - self.wake_by_ref(); - } - - fn wake_by_ref(self: &Arc) { - self.0.swap(true, Ordering::AcqRel); - } - } - - let woken_up = Arc::new(WokenUp(AtomicBool::new(false))); - let waker = Waker::from(woken_up.clone()); + let waker = Waker::from(Task::current().clone()); let mut cx = Context::from_waker(&waker); let output = loop { match future.as_mut().poll(&mut cx) { Poll::Ready(output) => break output, Poll::Pending => { - if woken_up.0.swap(false, Ordering::Acquire) { + if Task::current().is_ready() { continue; } From dee96a3a6aac134ee95ce4217c7067c3236eb203 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 11 Aug 2025 00:59:52 +0800 Subject: [PATCH 24/54] syscall: migrate all syscalls to async... We introduced a per-thread allocator inside the future object to allocate space for the syscalls. This ensures performance and saves memory. The allocator takes up 8K for now and is enough for current use. Signed-off-by: greatbridf --- Cargo.lock | 11 +- Cargo.toml | 3 + crates/eonix_hal/src/arch/loongarch64/mm.rs | 2 + crates/eonix_hal/src/arch/riscv64/mm.rs | 2 + crates/eonix_mm/src/page_table/page_table.rs | 2 +- crates/posix_types/src/result.rs | 10 + macros/src/lib.rs | 61 ++-- src/fs/fat32.rs | 6 +- src/io.rs | 6 +- src/kernel/block.rs | 15 - src/kernel/interrupt.rs | 3 +- src/kernel/mem/mm_area.rs | 8 +- src/kernel/mem/mm_list.rs | 30 +- src/kernel/mem/mm_list/page_fault.rs | 20 +- src/kernel/mem/page_cache.rs | 2 + src/kernel/syscall.rs | 165 ++++++++- src/kernel/syscall/file_rw.rs | 259 ++++++++------ src/kernel/syscall/mm.rs | 105 +++--- src/kernel/syscall/net.rs | 2 +- src/kernel/syscall/procops.rs | 348 +++++++++---------- src/kernel/syscall/sysinfo.rs | 17 +- src/kernel/task/clone.rs | 20 +- src/kernel/task/futex.rs | 13 +- src/kernel/task/loader/elf.rs | 189 +++++----- src/kernel/task/loader/mod.rs | 4 +- src/kernel/task/process.rs | 57 ++- src/kernel/task/process_list.rs | 7 +- src/kernel/task/signal.rs | 6 +- src/kernel/task/signal/signal_action.rs | 5 +- src/kernel/task/thread.rs | 64 +++- src/kernel/terminal.rs | 16 +- src/kernel/user.rs | 8 +- src/kernel/user/dataflow.rs | 115 +++--- src/kernel/vfs/file.rs | 10 +- src/lib.rs | 2 + 35 files changed, 948 insertions(+), 645 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2caa0bad..484f2796 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,6 +152,7 @@ dependencies = [ "pointers", "posix_types", "slab_allocator", + "stalloc", "virtio-drivers", "xmas-elf", ] @@ -401,11 +402,17 @@ dependencies = [ "intrusive_list", ] +[[package]] +name = "stalloc" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" + [[package]] name = "syn" -version = "2.0.103" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5231dbb2..25768c83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,9 @@ acpi = "5.2.0" align_ext = "0.1.0" xmas-elf = "0.10.0" ext4_rs = "1.3.2" +stalloc = { version = "0.6.1", default-features = false, features = [ + "allocator-api", +] } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/eonix_hal/src/arch/loongarch64/mm.rs b/crates/eonix_hal/src/arch/loongarch64/mm.rs index 91a2aae5..d5b00a6b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/mm.rs +++ b/crates/eonix_hal/src/arch/loongarch64/mm.rs @@ -87,6 +87,8 @@ impl PagingMode for PagingMode48 { pub type ArchPagingMode = PagingMode48; +unsafe impl Send for RawPageTable48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTable48<'a> { type Entry = PTE64; diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 74ebc349..46dd9437 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -88,6 +88,8 @@ impl PagingMode for PagingModeSv48 { pub type ArchPagingMode = PagingModeSv48; +unsafe impl Send for RawPageTableSv48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTableSv48<'a> { type Entry = PTE64; diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 24253dc9..8318049f 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -11,7 +11,7 @@ use crate::{ }; use core::{marker::PhantomData, ptr::NonNull}; -pub trait RawPageTable<'a>: 'a { +pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; /// Return the entry at the given index. diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index fb251472..a10ff0ad 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -13,3 +13,13 @@ impl From for u32 { } } } + +impl core::fmt::Debug for PosixError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::EFAULT => write!(f, "EFAULT"), + Self::EXDEV => write!(f, "EXDEV"), + Self::EINVAL => write!(f, "EINVAL"), + } + } +} diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 022160a2..722fa5da 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -18,6 +18,11 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let args = item.sig.inputs.iter(); let ty_ret = item.sig.output; + assert!( + item.sig.asyncness.is_some(), + "Syscall must be async function" + ); + let args_mapped = item .sig .inputs @@ -100,36 +105,50 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { }; #[link_section = #syscall_fn_section] - fn #helper_fn ( - thd: &crate::kernel::task::Thread, + fn #helper_fn <'thd, 'alloc>( + thd: &'thd crate::kernel::task::Thread, + thd_alloc: crate::kernel::task::ThreadAlloc<'alloc>, args: [usize; 6] - ) -> Option { + ) -> core::pin::Pin> + Send + 'thd, + crate::kernel::task::ThreadAlloc<'alloc> + >> { use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; + use alloc::boxed::Box; #(#args_mapped)* - eonix_log::println_trace!( - "trace_syscall", - "tid{}: {}({}) => {{", - thd.tid, - #syscall_name_str, - format_args!(#trace_format_string, #trace_format_args), - ); - - let retval = #real_fn(thd, #(#args_call),*).into_retval(); - - eonix_log::println_trace!( - "trace_syscall", - "}} => {:x?}", - retval, - ); - - retval + unsafe { + core::pin::Pin::new_unchecked( + Box::new_in( + async move { + eonix_log::println_trace!( + "trace_syscall", + "tid{}: {}({}) => {{", + thd.tid, + #syscall_name_str, + format_args!(#trace_format_string, #trace_format_args), + ); + + let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); + + eonix_log::println_trace!( + "trace_syscall", + "}} => {:x?}", + retval, + ); + + retval + }, + thd_alloc + ) + ) + } } #(#attrs)* #[link_section = #syscall_fn_section] - #vis fn #real_fn( + #vis async fn #real_fn( thread: &crate::kernel::task::Thread, #(#args),* ) #ty_ret #body diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index f328dc74..9f0adac5 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -308,11 +308,11 @@ impl Inode for FileInode { Ok(buffer.wrote()) } - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } } @@ -322,7 +322,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult { todo!() } diff --git a/src/io.rs b/src/io.rs index f1eae9b9..85675dea 100644 --- a/src/io.rs +++ b/src/io.rs @@ -30,7 +30,7 @@ impl FillResult { } } -pub trait Buffer { +pub trait Buffer: Send { fn total(&self) -> usize; fn wrote(&self) -> usize; @@ -49,7 +49,7 @@ pub trait Buffer { } } -pub trait Stream { +pub trait Stream: Send { fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult>; fn ignore(&mut self, len: usize) -> KResult>; } @@ -131,6 +131,8 @@ pub struct UninitBuffer<'lt, T: Copy + Sized> { buffer: ByteBuffer<'lt>, } +unsafe impl<'lt, T: Copy> Send for UninitBuffer<'lt, T> {} + impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> { pub fn new() -> Self { let mut data = Box::new(MaybeUninit::uninit()); diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 4a10e4c7..ccd43c68 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -48,21 +48,6 @@ enum BlockDeviceType { }, } -#[derive(Debug, Clone)] -pub enum FileSystemType { - Ext4, - Fat32, -} - -impl FileSystemType { - pub fn as_str(&self) -> &'static str { - match self { - FileSystemType::Ext4 => "ext4", - FileSystemType::Fat32 => "fat32", - } - } -} - pub struct BlockDevice { /// Unique device identifier, major and minor numbers devid: DevId, diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 9394f64c..742727cb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,4 +1,5 @@ use super::mem::handle_kernel_page_fault; +use super::task::block_on; use super::timer::timer_interrupt; use crate::kernel::constants::EINVAL; use crate::prelude::*; @@ -36,7 +37,7 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { } => { let fault_pc = VAddr::from(trap_ctx.get_program_counter()); - if let Some(new_pc) = handle_kernel_page_fault(fault_pc, vaddr, error_code) { + if let Some(new_pc) = block_on(handle_kernel_page_fault(fault_pc, vaddr, error_code)) { trap_ctx.set_program_counter(new_pc.addr()); } } diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 528d79ad..731c5303 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -2,7 +2,6 @@ use super::mm_list::EMPTY_PAGE; use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, Mapping, Page, Permission}; use crate::kernel::constants::EINVAL; -use crate::kernel::task::block_on; use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; @@ -19,6 +18,9 @@ pub struct MMArea { pub is_shared: bool, } +unsafe impl Send for MMArea {} +unsafe impl Sync for MMArea {} + impl Clone for MMArea { fn clone(&self) -> Self { Self { @@ -200,7 +202,7 @@ impl MMArea { Ok(()) } - pub fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { + pub async fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { let mut attr = pte.get_attr().as_page_attr().expect("Not a page attribute"); let mut pfn = pte.get_pfn(); @@ -209,7 +211,7 @@ impl MMArea { } if attr.contains(PageAttribute::MAPPED) { - block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; + self.handle_mmap(&mut pfn, &mut attr, offset, write).await?; } attr.insert(PageAttribute::ACCESSED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 6593624b..ad1e45c2 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -7,7 +7,6 @@ use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, MMArea, Page}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; use crate::kernel::mem::page_alloc::RawPagePtr; -use crate::kernel::task::block_on; use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; @@ -488,7 +487,7 @@ impl MMList { Ok(()) } - pub fn map_vdso(&self) -> KResult<()> { + pub async fn map_vdso(&self) -> KResult<()> { unsafe extern "C" { fn VDSO_PADDR(); } @@ -507,7 +506,7 @@ impl MMList { const VDSO_SIZE: usize = 0x1000; let inner = self.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let mut pte_iter = inner .page_table @@ -529,7 +528,7 @@ impl MMList { Ok(()) } - pub fn mmap_hint( + pub async fn mmap_hint( &self, hint: VAddr, len: usize, @@ -538,7 +537,7 @@ impl MMList { is_shared: bool, ) -> KResult { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; if hint == VAddr::NULL { let at = inner.find_available(hint, len).ok_or(ENOMEM)?; @@ -557,7 +556,7 @@ impl MMList { } } - pub fn mmap_fixed( + pub async fn mmap_fixed( &self, at: VAddr, len: usize, @@ -565,14 +564,17 @@ impl MMList { permission: Permission, is_shared: bool, ) -> KResult { - block_on(self.inner.borrow().lock()) + self.inner + .borrow() + .lock() + .await .mmap(at, len, mapping.clone(), permission, is_shared) .map(|_| at) } - pub fn set_break(&self, pos: Option) -> VAddr { + pub async fn set_break(&self, pos: Option) -> VAddr { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; // SAFETY: `set_break` is only called in syscalls, where program break should be valid. assert!(inner.break_start.is_some() && inner.break_pos.is_some()); @@ -629,9 +631,9 @@ impl MMList { } /// This should be called only **once** for every thread. - pub fn register_break(&self, start: VAddr) { + pub async fn register_break(&self, start: VAddr) { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; assert!(inner.break_start.is_none() && inner.break_pos.is_none()); inner.break_start = Some(start.into()); @@ -640,7 +642,7 @@ impl MMList { /// Access the memory area with the given function. /// The function will be called with the offset of the area and the slice of the area. - pub fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> + pub async fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> where F: Fn(usize, &mut [u8]), { @@ -651,7 +653,7 @@ impl MMList { } let inner = self.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let mut offset = 0; let mut remaining = len; @@ -676,7 +678,7 @@ impl MMList { let page_end = page_start + 0x1000; // Prepare for the worst case that we might write to the page... - area.handle(pte, page_start - area_start, true)?; + area.handle(pte, page_start - area_start, true).await?; let start_offset; if page_start < current { diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 089fdf06..6f14583d 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,5 +1,5 @@ use super::{MMList, VAddr}; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::task::Thread; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; @@ -94,6 +94,7 @@ impl MMList { addr.floor() - area.range().start(), error.contains(PageFaultErrorCode::Write), ) + .await .map_err(|_| Signal::SIGBUS)?; flush_tlb(addr.floor().addr()); @@ -128,7 +129,7 @@ fn kernel_page_fault_die(vaddr: VAddr, pc: VAddr) -> ! { ) } -pub fn handle_kernel_page_fault( +pub async fn handle_kernel_page_fault( fault_pc: VAddr, addr: VAddr, error: PageFaultErrorCode, @@ -148,7 +149,7 @@ pub fn handle_kernel_page_fault( let mms = &Thread::current().process.mm_list; let inner = mms.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let area = match inner.areas.get(&VRange::from(addr)) { Some(area) => area, @@ -163,11 +164,14 @@ pub fn handle_kernel_page_fault( .next() .expect("If we can find the mapped area, we should be able to find the PTE"); - if let Err(_) = area.handle( - pte, - addr.floor() - area.range().start(), - error.contains(PageFaultErrorCode::Write), - ) { + if let Err(_) = area + .handle( + pte, + addr.floor() - area.range().start(), + error.contains(PageFaultErrorCode::Write), + ) + .await + { return Some(try_page_fault_fix(fault_pc, addr)); } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 863e538e..e0567d21 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -26,6 +26,8 @@ unsafe impl Sync for PageCache {} #[derive(Clone, Copy)] pub struct CachePage(RawPagePtr); +unsafe impl Send for CachePage {} + impl Buffer for CachePage { fn total(&self) -> usize { PAGE_SIZE diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 0276ebdf..4131f3c4 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,5 +1,10 @@ +use super::task::ThreadAlloc; use crate::kernel::task::Thread; +use alloc::boxed::Box; +use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; +use posix_types::ctypes::PtrT; pub mod file_rw; pub mod mm; @@ -12,15 +17,33 @@ const MAX_SYSCALL_NO: usize = 512; #[derive(Debug, Clone, Copy)] pub struct SyscallNoReturn; +#[derive(Clone, Copy)] +pub struct User(VAddr, PhantomData); + +#[derive(Clone, Copy)] +pub struct UserMut(VAddr, PhantomData); + #[repr(C)] pub(self) struct RawSyscallHandler { no: usize, - handler: fn(&Thread, [usize; 6]) -> Option, + handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, name: &'static str, } pub struct SyscallHandler { - pub handler: fn(&Thread, [usize; 6]) -> Option, + pub handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, pub name: &'static str, } @@ -80,6 +103,18 @@ impl SyscallRetVal for SyscallNoReturn { } } +impl SyscallRetVal for User { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + +impl SyscallRetVal for UserMut { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + #[cfg(not(target_arch = "x86_64"))] impl SyscallRetVal for u64 { fn into_retval(self) -> Option { @@ -112,15 +147,129 @@ impl FromSyscallArg for usize { } } -impl FromSyscallArg for *const T { - fn from_arg(value: usize) -> *const T { - value as *const T +impl FromSyscallArg for PtrT { + fn from_arg(value: usize) -> Self { + PtrT::new(value).expect("Invalid user pointer value") + } +} + +impl FromSyscallArg for User { + fn from_arg(value: usize) -> User { + User(VAddr::from(value), PhantomData) + } +} + +impl FromSyscallArg for UserMut { + fn from_arg(value: usize) -> UserMut { + UserMut(VAddr::from(value), PhantomData) + } +} + +impl User { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> User { + User(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const unsafe fn as_mut(self) -> UserMut { + UserMut(self.0, PhantomData) + } +} + +impl UserMut { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> UserMut { + UserMut(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const fn as_const(self) -> User { + User(self.0, PhantomData) + } + + pub const fn vaddr(&self) -> VAddr { + self.0 + } +} + +impl Deref for User { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Deref for UserMut { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::fmt::Debug for User { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "User({:#x?})", self.0.addr()) } } -impl FromSyscallArg for *mut T { - fn from_arg(value: usize) -> *mut T { - value as *mut T +impl core::fmt::Debug for UserMut { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "UserMut({:#x?})", self.0.addr()) } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 97d47c48..ef222123 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,18 +1,16 @@ -use super::FromSyscallArg; +use super::{FromSyscallArg, User}; use crate::io::IntoStream; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, }; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; use crate::{ io::{Buffer, BufferFill}, kernel::{ - user::{ - dataflow::{CheckedUserPointer, UserBuffer, UserString}, - UserPointer, UserPointerMut, - }, + user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, vfs::{ dentry::Dentry, file::{PollEvent, SeekOption}, @@ -47,7 +45,7 @@ impl FromSyscallArg for AtFlags { fn dentry_from( thread: &Thread, dirfd: FD, - pathname: *const u8, + pathname: User, follow_symlink: bool, ) -> KResult> { let path = UserString::new(pathname)?; @@ -72,83 +70,91 @@ fn dentry_from( } #[eonix_macros::define_syscall(SYS_READ)] -fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, None) + .await } #[eonix_macros::define_syscall(SYS_PREAD64)] -fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult { +async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .read(&mut buffer, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_WRITE)] -fn write(fd: FD, buffer: *const u8, count: usize) -> KResult { +async fn write(fd: FD, buffer: User, count: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, None) + .await } #[eonix_macros::define_syscall(SYS_PWRITE64)] -fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult { +async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .write(&mut stream, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_OPENAT)] -fn openat(dirfd: FD, pathname: *const u8, flags: OpenFlags, mode: u32) -> KResult { +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: u32) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; thread.files.open(&dentry, flags, mode) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -fn open(path: *const u8, flags: OpenFlags, mode: u32) -> KResult { - sys_openat(thread, FD::AT_FDCWD, path, flags, mode) +async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { + sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } #[eonix_macros::define_syscall(SYS_CLOSE)] -fn close(fd: FD) -> KResult<()> { +async fn close(fd: FD) -> KResult<()> { thread.files.close(fd) } #[eonix_macros::define_syscall(SYS_DUP)] -fn dup(fd: FD) -> KResult { +async fn dup(fd: FD) -> KResult { thread.files.dup(fd) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] -fn dup2(old_fd: FD, new_fd: FD) -> KResult { +async fn dup2(old_fd: FD, new_fd: FD) -> KResult { thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) } #[eonix_macros::define_syscall(SYS_DUP3)] -fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { +async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { thread.files.dup_to(old_fd, new_fd, flags) } #[eonix_macros::define_syscall(SYS_PIPE2)] -fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd as *mut u8, core::mem::size_of::<[FD; 2]>())?; +async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { + let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -156,13 +162,13 @@ fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_PIPE)] -fn pipe(pipe_fd: *mut [FD; 2]) -> KResult<()> { - sys_pipe2(thread, pipe_fd, OpenFlags::empty()) +async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { + sys_pipe2(thread, pipe_fd, OpenFlags::empty()).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; @@ -170,7 +176,7 @@ fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread.files.get(fd).ok_or(EBADF)?.getdents64(&mut buffer)?; @@ -182,7 +188,12 @@ fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { eonix_macros::define_syscall(SYS_NEWFSTATAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTATAT64))] -fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags) -> KResult<()> { +async fn newfstatat( + dirfd: FD, + pathname: User, + statbuf: UserMut, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -205,23 +216,17 @@ fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags eonix_macros::define_syscall(SYS_NEWFSTAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] -fn newfstat(fd: FD, statbuf: *mut Stat) -> KResult<()> { - sys_newfstatat( - thread, - fd, - core::ptr::null(), - statbuf, - AtFlags::AT_EMPTY_PATH, - ) +async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await } #[eonix_macros::define_syscall(SYS_STATX)] -fn statx( +async fn statx( dirfd: FD, - pathname: *const u8, + pathname: User, flags: AtFlags, mask: u32, - buffer: *mut StatX, + buffer: UserMut, ) -> KResult<()> { if !flags.statx_default_sync() { unimplemented!("statx with no default sync flags: {:?}", flags); @@ -244,7 +249,7 @@ fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { +async fn mkdirat(dirfd: FD, pathname: User, mode: u32) -> KResult<()> { let umask = *thread.fs_context.umask.lock(); let mode = mode & !umask & 0o777; @@ -254,19 +259,19 @@ fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -fn mkdir(pathname: *const u8, mode: u32) -> KResult<()> { - sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode) +async fn mkdir(pathname: User, mode: u32) -> KResult<()> { + sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] -fn truncate64(fd: FD, length: usize) -> KResult<()> { +async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.truncate(length) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_TRUNCATE)] -fn truncate(pathname: *const u8, length: usize) -> KResult<()> { +async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -276,18 +281,18 @@ fn truncate(pathname: *const u8, length: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UNLINKAT)] -fn unlinkat(dirfd: FD, pathname: *const u8) -> KResult<()> { +async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { dentry_from(thread, dirfd, pathname, false)?.unlink() } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] -fn unlink(pathname: *const u8) -> KResult<()> { +async fn unlink(pathname: User) -> KResult<()> { sys_unlinkat(thread, FD::AT_FDCWD, pathname) } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { +async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { let target = UserString::new(target)?; let dentry = dentry_from(thread, dirfd, linkpath, false)?; @@ -296,12 +301,12 @@ fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] -fn symlink(target: *const u8, linkpath: *const u8) -> KResult<()> { +async fn symlink(target: User, linkpath: User) -> KResult<()> { sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) } #[eonix_macros::define_syscall(SYS_MKNODAT)] -fn mknodat(dirfd: FD, pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mode: u32, dev: u32) -> KResult<()> { let dentry = dentry_from(thread, dirfd, pathname, true)?; let umask = *thread.fs_context.umask.lock(); @@ -312,12 +317,17 @@ fn mknodat(dirfd: FD, pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -fn mknod(pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { - sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev) +async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { + sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } #[eonix_macros::define_syscall(SYS_READLINKAT)] -fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { +async fn readlinkat( + dirfd: FD, + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, false)?; let mut buffer = UserBuffer::new(buffer, bufsize)?; @@ -326,11 +336,11 @@ fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -fn readlink(pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { - sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize) +async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { + sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { @@ -343,17 +353,23 @@ fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_LSEEK)] -fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { - do_lseek(thread, fd, offset, whence) +async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { + do_lseek(thread, fd, offset, whence).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek(fd: FD, offset_high: u32, offset_low: u32, result: *mut u64, whence: u32) -> KResult<()> { - let mut result = UserBuffer::new(result as *mut u8, core::mem::size_of::())?; +fn llseek( + fd: FD, + offset_high: u32, + offset_low: u32, + result: UserMut, + whence: u32, +) -> KResult<()> { + let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); - let new_offset = do_lseek(thread, fd, offset, whence)?; + let new_offset = do_lseek(thread, fd, offset, whence).await?; result.copy(&new_offset)?.ok_or(EFAULT) } @@ -366,7 +382,7 @@ struct IoVec { } #[eonix_macros::define_syscall(SYS_READV)] -fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -381,14 +397,16 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => Some(UserBuffer::new(base.addr() as *mut u8, len.get())), + Ok(IoVec { base, len }) => { + Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) + } }) .collect::>>()?; let mut tot = 0usize; for mut buffer in iov_buffers.into_iter() { // TODO!!!: `readv` - let nread = block_on(file.read(&mut buffer, None))?; + let nread = file.read(&mut buffer, None).await?; tot += nread; if nread != buffer.total() { @@ -400,7 +418,7 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_WRITEV)] -fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -416,7 +434,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(base.addr() as *mut u8, len.get()) + CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) .map(|ptr| ptr.into_stream()), ), }) @@ -424,7 +442,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { let mut tot = 0usize; for mut stream in iov_streams.into_iter() { - let nread = block_on(file.write(&mut stream, None))?; + let nread = file.write(&mut stream, None).await?; tot += nread; if nread == 0 || !stream.is_drained() { @@ -436,7 +454,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -462,12 +480,12 @@ fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KRes #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ACCESS)] -fn access(pathname: *const u8, mode: u32) -> KResult<()> { - sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn access(pathname: User, mode: u32) -> KResult<()> { + sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult { +async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -475,18 +493,18 @@ fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult KResult { +async fn ioctl(fd: FD, request: usize, arg3: usize) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; file.ioctl(request, arg3) } #[eonix_macros::define_syscall(SYS_FCNTL64)] -fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { +async fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { thread.files.fcntl(fd, cmd, arg) } @@ -498,7 +516,12 @@ struct UserPollFd { revents: u16, } -fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> KResult { +async fn do_poll( + thread: &Thread, + fds: UserMut, + nfds: u32, + _timeout: u32, +) -> KResult { match nfds { 0 => Ok(0), 2.. => unimplemented!("Poll with {} fds", nfds), @@ -511,7 +534,10 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K let mut fd = fds.read()?; let file = thread.files.get(fd.fd).ok_or(EBADF)?; - fd.revents = block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); + fd.revents = file + .poll(PollEvent::from_bits_retain(fd.events)) + .await? + .bits(); fds.write(fd)?; Ok(1) @@ -520,24 +546,24 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K } #[eonix_macros::define_syscall(SYS_PPOLL)] -fn ppoll( - fds: *mut UserPollFd, +async fn ppoll( + fds: UserMut, nfds: u32, - _timeout_ptr: *const TimeSpec, - _sigmask: *const SigSet, + _timeout_ptr: User, + _sigmask: User, ) -> KResult { // TODO: Implement ppoll with signal mask and timeout - do_poll(thread, fds, nfds, 0) + do_poll(thread, fds, nfds, 0).await } #[eonix_macros::define_syscall(SYS_PSELECT6)] -fn pselect6( +async fn pselect6( nfds: u32, - _readfds: *mut FDSet, - _writefds: *mut FDSet, - _exceptfds: *mut FDSet, - timeout: *mut TimeSpec, - _sigmask: *const (), + _readfds: UserMut, + _writefds: UserMut, + _exceptfds: UserMut, + timeout: UserMut, + _sigmask: User<()>, ) -> KResult { // According to [pthread6(2)](https://linux.die.net/man/2/pselect6): // Some code calls select() with all three sets empty, nfds zero, and @@ -552,7 +578,7 @@ fn pselect6( // Read here to check for invalid pointers. let _timeout_value = timeout.read()?; - block_on(sleep(Duration::from_millis(10))); + sleep(Duration::from_millis(10)).await; timeout.write(TimeSpec { tv_sec: 0, @@ -564,12 +590,18 @@ fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -fn poll(fds: *mut UserPollFd, nfds: u32, timeout: u32) -> KResult { - do_poll(thread, fds, nfds, timeout) +async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { + do_poll(thread, fds, nfds, timeout).await } #[eonix_macros::define_syscall(SYS_FCHOWNAT)] -fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) -> KResult<()> { +async fn fchownat( + dirfd: FD, + pathname: User, + uid: u32, + gid: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; if !dentry.is_valid() { return Err(ENOENT); @@ -579,7 +611,7 @@ fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResult<()> { +async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -595,15 +627,15 @@ fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResul } #[eonix_macros::define_syscall(SYS_FCHMOD)] -fn chmod(pathname: *const u8, mode: u32) -> KResult<()> { - sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn chmod(pathname: User, mode: u32) -> KResult<()> { + sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_UTIMENSAT)] -fn utimensat( +async fn utimensat( dirfd: FD, - pathname: *const u8, - times: *const TimeSpec, + pathname: User, + times: User, flags: AtFlags, ) -> KResult<()> { let dentry = if flags.at_empty_path() { @@ -630,11 +662,11 @@ fn utimensat( } #[eonix_macros::define_syscall(SYS_RENAMEAT2)] -fn renameat2( +async fn renameat2( old_dirfd: FD, - old_pathname: *const u8, + old_pathname: User, new_dirfd: FD, - new_pathname: *const u8, + new_pathname: User, flags: u32, ) -> KResult<()> { let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; @@ -652,7 +684,7 @@ fn renameat2( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_RENAME)] -fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { +async fn rename(old_pathname: User, new_pathname: User) -> KResult<()> { sys_renameat2( thread, FD::AT_FDCWD, @@ -661,6 +693,7 @@ fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { new_pathname, 0, ) + .await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index b6ba5fdc..547635d4 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -2,7 +2,7 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; use crate::{ kernel::{ @@ -39,7 +39,7 @@ fn check_impl(condition: bool, err: u32) -> KResult<()> { } } -fn do_mmap2( +async fn do_mmap2( thread: &Thread, addr: usize, len: usize, @@ -67,7 +67,10 @@ fn do_mmap2( } else { // The mode is unimportant here, since we are checking prot in mm_area. let shared_area = - block_on(SHM_MANAGER.lock()).create_shared_area(len, thread.process.pid, 0x777); + SHM_MANAGER + .lock() + .await + .create_shared_area(len, thread.process.pid, 0x777); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -90,10 +93,14 @@ fn do_mmap2( // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether // `addr` is above user reachable memory. let addr = if flags.contains(UserMmapFlags::MAP_FIXED) { - block_on(mm_list.unmap(addr, len)); - mm_list.mmap_fixed(addr, len, mapping, permission, is_shared) + mm_list.unmap(addr, len).await?; + mm_list + .mmap_fixed(addr, len, mapping, permission, is_shared) + .await } else { - mm_list.mmap_hint(addr, len, mapping, permission, is_shared) + mm_list + .mmap_hint(addr, len, mapping, permission, is_shared) + .await }; addr.map(|addr| addr.addr()) @@ -101,7 +108,7 @@ fn do_mmap2( #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_MMAP)] -fn mmap( +async fn mmap( addr: usize, len: usize, prot: UserMmapProtocol, @@ -109,12 +116,12 @@ fn mmap( fd: FD, offset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, offset) + do_mmap2(thread, addr, len, prot, flags, fd, offset).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MMAP2)] -fn mmap2( +async fn mmap2( addr: usize, len: usize, prot: UserMmapProtocol, @@ -122,33 +129,33 @@ fn mmap2( fd: FD, pgoffset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, pgoffset) + do_mmap2(thread, addr, len, prot, flags, fd, pgoffset).await } #[eonix_macros::define_syscall(SYS_MUNMAP)] -fn munmap(addr: usize, len: usize) -> KResult { +async fn munmap(addr: usize, len: usize) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); } let len = len.align_up(PAGE_SIZE); - block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) + thread.process.mm_list.unmap(addr, len).await } #[eonix_macros::define_syscall(SYS_BRK)] -fn brk(addr: usize) -> KResult { +async fn brk(addr: usize) -> KResult { let vaddr = if addr == 0 { None } else { Some(VAddr::from(addr)) }; - Ok(thread.process.mm_list.set_break(vaddr).addr()) + Ok(thread.process.mm_list.set_break(vaddr).await.addr()) } #[eonix_macros::define_syscall(SYS_MADVISE)] -fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { +async fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_MPROTECT)] -fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { +async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); @@ -156,22 +163,26 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let len = len.align_up(PAGE_SIZE); - block_on(thread.process.mm_list.protect( - addr, - len, - Permission { - read: prot.contains(UserMmapProtocol::PROT_READ), - write: prot.contains(UserMmapProtocol::PROT_WRITE), - execute: prot.contains(UserMmapProtocol::PROT_EXEC), - }, - )) + thread + .process + .mm_list + .protect( + addr, + len, + Permission { + read: prot.contains(UserMmapProtocol::PROT_READ), + write: prot.contains(UserMmapProtocol::PROT_WRITE), + execute: prot.contains(UserMmapProtocol::PROT_EXEC), + }, + ) + .await } #[eonix_macros::define_syscall(SYS_SHMGET)] -fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { +async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let size = size.align_up(PAGE_SIZE); - let mut shm_manager = block_on(SHM_MANAGER.lock()); + let mut shm_manager = SHM_MANAGER.lock().await; let shmid = gen_shm_id(key)?; let mode = shmflg & 0o777; @@ -197,16 +208,17 @@ fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { return Ok(shmid); } - return Err(ENOENT); + Err(ENOENT) } #[eonix_macros::define_syscall(SYS_SHMAT)] -fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { +async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { let mm_list = &thread.process.mm_list; - let shm_manager = block_on(SHM_MANAGER.lock()); + let shm_manager = SHM_MANAGER.lock().await; let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - let mode = shmflg & 0o777; + // Why is this not used? + let _mode = shmflg & 0o777; let shmflg = ShmFlags::from_bits_truncate(shmflg); let mut permission = Permission { @@ -235,9 +247,13 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { return Err(EINVAL); } let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list.mmap_fixed(addr, size, mapping, permission, true) + mm_list + .mmap_fixed(addr, size, mapping, permission, true) + .await } else { - mm_list.mmap_hint(VAddr::NULL, size, mapping, permission, true) + mm_list + .mmap_hint(VAddr::NULL, size, mapping, permission, true) + .await }?; thread.process.shm_areas.lock().insert(addr, size); @@ -246,22 +262,29 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_SHMDT)] -fn shmdt(addr: usize) -> KResult { +async fn shmdt(addr: usize) -> KResult<()> { let addr = VAddr::from(addr); - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - drop(shm_areas); - return block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); + + let size = { + let mut shm_areas = thread.process.shm_areas.lock(); + let size = *shm_areas.get(&addr).ok_or(EINVAL)?; + shm_areas.remove(&addr); + + size + }; + + thread.process.mm_list.unmap(addr, size).await } #[eonix_macros::define_syscall(SYS_SHMCTL)] -fn shmctl(shmid: u32, op: i32, shmid_ds: usize) -> KResult { +async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { + // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_MEMBARRIER)] -fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { +async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { + // TODO Ok(()) } diff --git a/src/kernel/syscall/net.rs b/src/kernel/syscall/net.rs index 82ec9152..41ac58e6 100644 --- a/src/kernel/syscall/net.rs +++ b/src/kernel/syscall/net.rs @@ -3,7 +3,7 @@ use crate::prelude::*; use posix_types::syscall_no::*; #[eonix_macros::define_syscall(SYS_SOCKET)] -fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { +async fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { Err(EINVAL) } diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 62194691..1dee462d 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -7,26 +7,26 @@ use crate::kernel::constants::{ ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; +use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - block_on, do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, - ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, + RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::dataflow::UserString; +use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; -use crate::{kernel::user::dataflow::UserBuffer, prelude::*}; +use crate::{kernel::user::UserBuffer, prelude::*}; use alloc::borrow::ToOwned; use alloc::ffi::CString; use bitflags::bitflags; -use core::ptr::NonNull; use core::time::Duration; use eonix_hal::processor::UserTLS; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_mm::address::{Addr as _, VAddr}; +use eonix_mm::address::Addr as _; use eonix_sync::AsProof as _; use posix_types::ctypes::PtrT; use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; @@ -49,7 +49,7 @@ bitflags! { } #[eonix_macros::define_syscall(SYS_NANOSLEEP)] -fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { +async fn nanosleep(req: User<(u32, u32)>, rem: UserMut<(u32, u32)>) -> KResult { let req = UserPointer::new(req)?.read()?; let rem = if rem.is_null() { None @@ -58,7 +58,7 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -68,11 +68,11 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { } #[eonix_macros::define_syscall(SYS_CLOCK_NANOSLEEP)] -fn clock_nanosleep( +async fn clock_nanosleep( clock_id: u32, - flags: u32, - req: *const (u32, u32), - rem: *mut (u32, u32), + _flags: u32, + req: User<(u32, u32)>, + rem: UserMut<(u32, u32)>, ) -> KResult { if clock_id != CLOCK_REALTIME && clock_id != CLOCK_REALTIME_COARSE @@ -89,7 +89,7 @@ fn clock_nanosleep( }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -99,7 +99,7 @@ fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -fn umask(mask: u32) -> KResult { +async fn umask(mask: u32) -> KResult { let mut umask = thread.fs_context.umask.lock(); let old = *umask; @@ -108,7 +108,7 @@ fn umask(mask: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETCWD)] -fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { +async fn getcwd(buffer: UserMut, bufsize: usize) -> KResult { let mut user_buffer = UserBuffer::new(buffer, bufsize)?; let mut buffer = PageBuffer::new(); @@ -121,7 +121,7 @@ fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_CHDIR)] -fn chdir(path: *const u8) -> KResult<()> { +async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -139,7 +139,7 @@ fn chdir(path: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UMOUNT)] -fn umount(source: *const u8) -> KResult<()> { +async fn umount(source: User) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "./mnt" { return Ok(()); @@ -148,7 +148,7 @@ fn umount(source: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_MOUNT)] -fn mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usize) -> KResult<()> { +async fn mount(source: User, target: User, fstype: User, flags: usize) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "/dev/vda2" { return Ok(()); @@ -184,7 +184,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> break; } - let user_string = UserString::new(ptr.addr() as *const u8)?; + let user_string = UserString::new(User::with_addr(ptr.addr()))?; strings.push(user_string.as_cstr().to_owned()); ptr_strings = ptr_strings.offset(1)?; } @@ -193,7 +193,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> } #[eonix_macros::define_syscall(SYS_EXECVE)] -fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult { +async fn execve(exec: User, argv: User, envp: User) -> KResult { let exec = UserString::new(exec)?; let exec = exec.as_cstr().to_owned(); @@ -207,11 +207,12 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult KResult SyscallNoReturn { +async fn exit(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = block_on(ProcessList::get().write()); - block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); + procs + .do_exit(&thread, WaitType::Exited(status), false) + .await; } SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] -fn exit_group(status: u32) -> SyscallNoReturn { +async fn exit_group(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = block_on(ProcessList::get().write()); - block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); + procs.do_exit(&thread, WaitType::Exited(status), true).await; } SyscallNoReturn } enum WaitInfo { - SigInfo(NonNull), - Status(NonNull), + SigInfo(UserMut), + Status(UserMut), None, } -fn do_waitid( +async fn do_waitid( thread: &Thread, wait_id: WaitId, info: WaitInfo, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { if !rusage.is_null() { unimplemented!("waitid with rusage pointer"); @@ -277,12 +282,15 @@ fn do_waitid( Some(options) => options, }; - let Some(wait_object) = block_on(thread.process.wait( - wait_id, - options.contains(UserWaitOptions::WNOHANG), - options.contains(UserWaitOptions::WUNTRACED), - options.contains(UserWaitOptions::WCONTINUED), - ))? + let Some(wait_object) = thread + .process + .wait( + wait_id, + options.contains(UserWaitOptions::WNOHANG), + options.contains(UserWaitOptions::WUNTRACED), + options.contains(UserWaitOptions::WCONTINUED), + ) + .await? else { return Ok(0); }; @@ -298,11 +306,11 @@ fn do_waitid( siginfo.si_status = status; siginfo.si_code = code; - UserPointerMut::new(siginfo_ptr.as_ptr())?.write(siginfo)?; + UserPointerMut::new(siginfo_ptr)?.write(siginfo)?; Ok(0) } WaitInfo::Status(status_ptr) => { - UserPointerMut::new(status_ptr.as_ptr())?.write(wait_object.code.to_wstatus())?; + UserPointerMut::new(status_ptr)?.write(wait_object.code.to_wstatus())?; Ok(wait_object.pid) } WaitInfo::None => Ok(wait_object.pid), @@ -310,18 +318,16 @@ fn do_waitid( } #[eonix_macros::define_syscall(SYS_WAITID)] -fn waitid( +async fn waitid( id_type: u32, id: u32, - info: *mut SigInfo, + info: UserMut, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { let wait_id = WaitId::from_type_and_id(id_type, id)?; - if let Some(info) = NonNull::new(info) { - do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage) - } else { + if info.is_null() { /* * According to POSIX.1-2008, an application calling waitid() must * ensure that infop points to a siginfo_t structure (i.e., that it @@ -332,34 +338,41 @@ fn waitid( */ unimplemented!("waitid with null info pointer"); } + + do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage).await } #[eonix_macros::define_syscall(SYS_WAIT4)] -fn wait4(wait_id: i32, arg1: *mut u32, options: u32, rusage: *mut RUsage) -> KResult { - let waitinfo = if let Some(status) = NonNull::new(arg1) { - WaitInfo::Status(status) - } else { +async fn wait4( + wait_id: i32, + arg1: UserMut, + options: u32, + rusage: UserMut, +) -> KResult { + let waitinfo = if arg1.is_null() { WaitInfo::None + } else { + WaitInfo::Status(arg1) }; let wait_id = WaitId::from_id(wait_id, thread); - do_waitid(thread, wait_id, waitinfo, options, rusage) + do_waitid(thread, wait_id, waitinfo, options, rusage).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] -fn waitpid(waitpid: i32, arg1: *mut u32, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()) +async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { + sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await } #[eonix_macros::define_syscall(SYS_SETSID)] -fn setsid() -> KResult { - thread.process.setsid() +async fn setsid() -> KResult { + thread.process.setsid().await } #[eonix_macros::define_syscall(SYS_SETPGID)] -fn setpgid(pid: u32, pgid: i32) -> KResult<()> { +async fn setpgid(pid: u32, pgid: i32) -> KResult<()> { let pid = if pid == 0 { thread.process.pid } else { pid }; let pgid = match pgid { @@ -368,15 +381,15 @@ fn setpgid(pid: u32, pgid: i32) -> KResult<()> { _ => return Err(EINVAL), }; - thread.process.setpgid(pid, pgid) + thread.process.setpgid(pid, pgid).await } #[eonix_macros::define_syscall(SYS_GETSID)] -fn getsid(pid: u32) -> KResult { +async fn getsid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.session_rcu().sid) } else { - let procs = block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.session(procs.prove()).sid) @@ -385,11 +398,11 @@ fn getsid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPGID)] -fn getpgid(pid: u32) -> KResult { +async fn getpgid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.pgroup_rcu().pgid) } else { - let procs = block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.pgroup(procs.prove()).pgid) @@ -398,12 +411,12 @@ fn getpgid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPID)] -fn getpid() -> KResult { +async fn getpid() -> KResult { Ok(thread.process.pid) } #[eonix_macros::define_syscall(SYS_GETPPID)] -fn getppid() -> KResult { +async fn getppid() -> KResult { Ok(thread.process.parent_rcu().map_or(0, |x| x.pid)) } @@ -419,78 +432,61 @@ fn do_getuid(_thread: &Thread) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETUID32)] -fn getuid32() -> KResult { +async fn getuid32() -> KResult { do_getuid(thread) } #[eonix_macros::define_syscall(SYS_GETUID)] -fn getuid() -> KResult { +async fn getuid() -> KResult { do_getuid(thread) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETEUID32)] -fn geteuid32() -> KResult { +async fn geteuid32() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEUID)] -fn geteuid() -> KResult { +async fn geteuid() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEGID)] -fn getegid() -> KResult { +async fn getegid() -> KResult { // All users are root for now. Ok(0) } #[eonix_macros::define_syscall(SYS_GETGID)] -fn getgid() -> KResult { - sys_getegid(thread) +async fn getgid() -> KResult { + sys_getegid(thread).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETGID32)] -fn getgid32() -> KResult { - sys_getegid(thread) -} - -#[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize { - if buf.is_null() || buflen == 0 { - return -14; - } - - static mut SEED: u64 = 1; - unsafe { - for i in 0..buflen { - SEED = SEED.wrapping_mul(1103515245).wrapping_add(12345); - *buf.add(i) = (SEED >> 8) as u8; - } - } - - buflen as isize +async fn getgid32() -> KResult { + sys_getegid(thread).await } #[eonix_macros::define_syscall(SYS_SCHED_YIELD)] -fn sched_yield() -> KResult<()> { - block_on(yield_now()); +async fn sched_yield() -> KResult<()> { + yield_now().await; Ok(()) } #[eonix_macros::define_syscall(SYS_SYNC)] -fn sync() -> KResult<()> { +async fn sync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_FSYNC)] -fn fsync() -> KResult<()> { +async fn fsync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_GETTID)] -fn gettid() -> KResult { +async fn gettid() -> KResult { Ok(thread.tid) } @@ -530,7 +526,7 @@ pub fn parse_user_tls(arch_tls: usize) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -fn set_thread_area(arch_tls: usize) -> KResult<()> { +async fn set_thread_area(arch_tls: usize) -> KResult<()> { thread.set_user_tls(parse_user_tls(arch_tls)?)?; // SAFETY: Preemption is disabled on calling `load_thread_area32()`. @@ -544,16 +540,16 @@ fn set_thread_area(arch_tls: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SET_TID_ADDRESS)] -fn set_tid_address(tidptr: usize) -> KResult { +async fn set_tid_address(tidptr: UserMut) -> KResult { thread.clear_child_tid(Some(tidptr)); Ok(thread.tid) } #[eonix_macros::define_syscall(SYS_PRCTL)] -fn prctl(option: u32, arg2: usize) -> KResult<()> { +async fn prctl(option: u32, arg2: PtrT) -> KResult<()> { match option { PR_SET_NAME => { - let name = UserPointer::new(arg2 as *mut [u8; 16])?.read()?; + let name = UserPointer::<[u8; 16]>::new(User::with_addr(arg2.addr()))?.read()?; let len = name.iter().position(|&c| c == 0).unwrap_or(15); thread.set_name(name[..len].into()); Ok(()) @@ -562,7 +558,7 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { let name = thread.get_name(); let len = name.len().min(15); let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); - UserPointerMut::new(arg2 as *mut [u8; 16])?.write(name)?; + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(arg2.addr()))?.write(name)?; Ok(()) } _ => Err(EINVAL), @@ -570,8 +566,8 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_KILL)] -fn kill(pid: i32, sig: u32) -> KResult<()> { - let procs = block_on(ProcessList::get().read()); +async fn kill(pid: i32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; match pid { // Send signal to every process for which the calling process has // permission to send signals. @@ -597,8 +593,10 @@ fn kill(pid: i32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TKILL)] -fn tkill(tid: u32, sig: u32) -> KResult<()> { - block_on(ProcessList::get().read()) +async fn tkill(tid: u32, sig: u32) -> KResult<()> { + ProcessList::get() + .read() + .await .try_find_thread(tid) .ok_or(ESRCH)? .raise(Signal::try_from_raw(sig)?); @@ -606,8 +604,8 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TGKILL)] -fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { - let procs = block_on(ProcessList::get().read()); +async fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?; if thread_to_kill.process.pid != tgid { @@ -619,10 +617,10 @@ fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_RT_SIGPROCMASK)] -fn rt_sigprocmask( +async fn rt_sigprocmask( how: u32, - set: *mut SigSet, - oldset: *mut SigSet, + set: UserMut, + oldset: UserMut, sigsetsize: usize, ) -> KResult<()> { if sigsetsize != size_of::() { @@ -635,7 +633,7 @@ fn rt_sigprocmask( } let new_mask = if !set.is_null() { - UserPointer::new(set)?.read()? + UserPointer::new(set.as_const())?.read()? } else { return Ok(()); }; @@ -657,27 +655,21 @@ struct TimeSpec32 { tv_nsec: i32, } -impl TimeSpec32 { - fn to_duration(&self) -> Duration { - Duration::new(self.tv_sec as u64, self.tv_nsec as u32) - } -} - #[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -fn rt_sigtimedwait_time32( - _uthese: *const SigSet, - _uinfo: *mut SigInfo, - _uts: *const TimeSpec32, +async fn rt_sigtimedwait_time32( + _uthese: User, + _uinfo: UserMut, + _uts: User, ) -> KResult { // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_RT_SIGACTION)] -fn rt_sigaction( +async fn rt_sigaction( signum: u32, - act: *const SigAction, - oldact: *mut SigAction, + act: User, + oldact: UserMut, sigsetsize: usize, ) -> KResult<()> { let signal = Signal::try_from_raw(signum)?; @@ -706,11 +698,11 @@ fn rt_sigaction( } #[eonix_macros::define_syscall(SYS_PRLIMIT64)] -fn prlimit64( +async fn prlimit64( pid: u32, resource: u32, - new_limit: *const RLimit, - old_limit: *mut RLimit, + new_limit: User, + old_limit: UserMut, ) -> KResult<()> { if pid != 0 { return Err(ENOSYS); @@ -742,13 +734,13 @@ fn prlimit64( } #[eonix_macros::define_syscall(SYS_GETRLIMIT)] -fn getrlimit(resource: u32, rlimit: *mut RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, core::ptr::null(), rlimit) +async fn getrlimit(resource: u32, rlimit: UserMut) -> KResult<()> { + sys_prlimit64(thread, 0, resource, User::null(), rlimit).await } #[eonix_macros::define_syscall(SYS_SETRLIMIT)] -fn setrlimit(resource: u32, rlimit: *const RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, rlimit, core::ptr::null_mut()) +async fn setrlimit(resource: u32, rlimit: User) -> KResult<()> { + sys_prlimit64(thread, 0, resource, rlimit, UserMut::null()).await } #[repr(C)] @@ -773,7 +765,7 @@ struct RUsage { } #[eonix_macros::define_syscall(SYS_GETRUSAGE)] -fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { +async fn getrusage(who: u32, rusage: UserMut) -> KResult<()> { if who != 0 { return Err(ENOSYS); } @@ -803,52 +795,52 @@ fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_VFORK)] -fn vfork() -> KResult { +async fn vfork() -> KResult { let clone_args = CloneArgs::for_vfork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_FORK)] -fn fork() -> KResult { +async fn fork() -> KResult { let clone_args = CloneArgs::for_fork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } // Some old platforms including x86_32, riscv and arm have the last two arguments // swapped, so we need to define two versions of `clone` syscall. #[cfg(not(target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, + parent_tidptr: UserMut, tls: usize, - child_tidptr: usize, + child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "loongarch64")] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, - child_tidptr: usize, + parent_tidptr: UserMut, + child_tidptr: UserMut, tls: usize, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[eonix_macros::define_syscall(SYS_FUTEX)] -fn futex( +async fn futex( uaddr: usize, op: u32, val: u32, @@ -866,11 +858,11 @@ fn futex( match futex_op { FutexOp::FUTEX_WAIT => { - block_on(futex_wait(uaddr, pid, val as u32, None))?; + futex_wait(uaddr, pid, val as u32, None).await?; return Ok(0); } FutexOp::FUTEX_WAKE => { - return block_on(futex_wake(uaddr, pid, val as u32)); + return futex_wake(uaddr, pid, val as u32).await; } FutexOp::FUTEX_REQUEUE => { todo!() @@ -882,60 +874,56 @@ fn futex( } #[eonix_macros::define_syscall(SYS_SET_ROBUST_LIST)] -fn set_robust_list(head: usize, len: usize) -> KResult<()> { +async fn set_robust_list(head: User, len: usize) -> KResult<()> { if len != size_of::() { return Err(EINVAL); } - thread.set_robust_list(Some(VAddr::from(head))); + thread.set_robust_list(Some(head)); Ok(()) } #[eonix_macros::define_syscall(SYS_RT_SIGRETURN)] -fn rt_sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - false, - ) - .inspect_err(|err| { - println_warn!( - "`rt_sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn rt_sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + false, + ) { + println_warn!( + "`rt_sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SIGRETURN)] -fn sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - true, - ) - .inspect_err(|err| { - println_warn!( - "`sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + true, + ) { + println_warn!( + "`sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr) +async fn arch_prctl(option: u32, addr: u32) -> KResult { + sys_arch_prctl(thread, option, addr).await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/sysinfo.rs b/src/kernel/syscall/sysinfo.rs index 5092c8a6..69316b2a 100644 --- a/src/kernel/syscall/sysinfo.rs +++ b/src/kernel/syscall/sysinfo.rs @@ -2,6 +2,7 @@ use crate::{ io::Buffer as _, kernel::{ constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINTR, EINVAL}, + syscall::UserMut, task::Thread, timer::{Instant, Ticks}, user::{UserBuffer, UserPointerMut}, @@ -30,7 +31,7 @@ fn copy_cstr_to_array(cstr: &[u8], array: &mut [u8]) { } #[eonix_macros::define_syscall(SYS_NEWUNAME)] -fn newuname(buffer: *mut NewUTSName) -> KResult<()> { +async fn newuname(buffer: UserMut) -> KResult<()> { let buffer = UserPointerMut::new(buffer)?; let mut uname = NewUTSName { sysname: [0; 65], @@ -62,7 +63,7 @@ fn newuname(buffer: *mut NewUTSName) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETTIMEOFDAY)] -fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { +async fn gettimeofday(timeval: UserMut, timezone: UserMut<()>) -> KResult<()> { if !timezone.is_null() { return Err(EINVAL); } @@ -81,7 +82,7 @@ fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { Ok(()) } -fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: UserMut) -> KResult<()> { let timespec = UserPointerMut::new(timespec)?; match clock_id { @@ -106,13 +107,13 @@ fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME)] -fn clock_gettime(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME64)] -fn clock_gettime64(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime64(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } @@ -135,7 +136,7 @@ struct Sysinfo { } #[eonix_macros::define_syscall(SYS_SYSINFO)] -fn sysinfo(info: *mut Sysinfo) -> KResult<()> { +async fn sysinfo(info: UserMut) -> KResult<()> { let info = UserPointerMut::new(info)?; info.write(Sysinfo { uptime: Ticks::since_boot().as_secs() as u32, @@ -164,7 +165,7 @@ struct TMS { } #[eonix_macros::define_syscall(SYS_TIMES)] -fn times(tms: *mut TMS) -> KResult<()> { +async fn times(tms: UserMut) -> KResult<()> { let tms = UserPointerMut::new(tms)?; tms.write(TMS { tms_utime: 0, @@ -175,7 +176,7 @@ fn times(tms: *mut TMS) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn get_random(buf: *mut u8, len: usize, flags: u32) -> KResult { +async fn get_random(buf: UserMut, len: usize, flags: u32) -> KResult { if flags != 0 { return Err(EINVAL); } diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 574cdfc9..e0d578c1 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,7 +1,6 @@ -use super::block_on; use crate::{ kernel::{ - syscall::procops::parse_user_tls, + syscall::{procops::parse_user_tls, UserMut}, task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, user::UserPointerMut, }, @@ -49,9 +48,9 @@ pub struct CloneArgs { pub flags: CloneFlags, pub sp: Option>, // Stack pointer for the new thread. pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option, // Pointer to parent TID in user space. + pub set_tid_ptr: Option>, // Pointer to set child TID in user space. + pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. + pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. pub tls: Option, // Pointer to TLS information. } @@ -61,8 +60,8 @@ impl CloneArgs { pub fn for_clone( flags: usize, sp: usize, - child_tid_ptr: usize, - parent_tid_ptr: usize, + child_tid_ptr: UserMut, + parent_tid_ptr: UserMut, tls: usize, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); @@ -131,8 +130,8 @@ impl CloneArgs { } } -pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { - let mut procs = block_on(ProcessList::get().write()); +pub async fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { + let mut procs = ProcessList::get().write().await; let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?; let current_process = thread.process.clone(); @@ -152,6 +151,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { let (new_thread, _) = ProcessBuilder::new() .clone_from(current_process, &clone_args) + .await .pid(new_pid) .pgroup(current_pgroup) .session(current_session) @@ -161,7 +161,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { }; if let Some(parent_tid_ptr) = clone_args.parent_tid_ptr { - UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? + UserPointerMut::new(parent_tid_ptr)?.write(new_pid)? } RUNTIME.spawn(new_thread.run()); diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index af42a396..a04d7091 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -9,6 +9,7 @@ use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicL use crate::{ kernel::{ constants::{EAGAIN, EINVAL}, + syscall::User, user::UserPointer, }, prelude::KResult, @@ -174,7 +175,7 @@ pub async fn futex_wait( let (_, futex_bucket_ref) = FUTEX_TABLE.get_bucket(&futex_key); let mut futex_bucket = futex_bucket_ref.lock().await; - let val = UserPointer::new(uaddr as *const u32)?.read()?; + let val = UserPointer::new(User::::with_addr(uaddr))?.read()?; if val != expected_val { return Err(EAGAIN); @@ -238,20 +239,20 @@ async fn futex_requeue( pid: Option, wake_count: u32, requeue_uaddr: usize, - requeue_count: u32, + _requeue_count: u32, ) -> KResult { let futex_key = FutexKey::new(uaddr, pid); let futex_requeue_key = FutexKey::new(requeue_uaddr, pid); - let (bucket_idx0, bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); - let (bucket_idx1, bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); + let (bucket_idx0, _bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); + let (bucket_idx1, _bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); if bucket_idx0 == bucket_idx1 { // If the keys are the same, we can just wake up the waiters. return futex_wake(uaddr, pid, wake_count).await; } - let (futex_bucket, futex_requeue_bucket) = + let (_futex_bucket, _futex_requeue_bucket) = double_lock_bucket(futex_key, futex_requeue_key).await; todo!() @@ -299,7 +300,7 @@ impl RobustListHead { futex_wake(futex_addr, None, usize::MAX as u32).await?; // Move to the next entry in the robust list. - let robust_list = UserPointer::new(entry_ptr as *const RobustList)?.read()?; + let robust_list = UserPointer::new(User::::with_addr(entry_ptr))?.read()?; entry_ptr = robust_list.next; diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 073026a9..859e0010 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -215,20 +215,20 @@ impl Elf { }) } - fn load(&self, args: Vec, envs: Vec) -> KResult { + async fn load(&self, args: Vec, envs: Vec) -> KResult { let mm_list = MMList::new(); // Load Segments - let (elf_base, data_segment_end) = self.load_segments(&mm_list)?; + let (elf_base, data_segment_end) = self.load_segments(&mm_list).await?; // Load ldso (if any) - let ldso_load_info = self.load_ldso(&mm_list)?; + let ldso_load_info = self.load_ldso(&mm_list).await?; // Load vdso - self.load_vdso(&mm_list)?; + self.load_vdso(&mm_list).await?; // Heap - mm_list.register_break(data_segment_end + 0x10000); + mm_list.register_break(data_segment_end + 0x10000).await; let aux_vec = self.init_aux_vec( elf_base, @@ -238,7 +238,9 @@ impl Elf { )?; // Map stack - let sp = self.create_and_init_stack(&mm_list, args, envs, aux_vec)?; + let sp = self + .create_and_init_stack(&mm_list, args, envs, aux_vec) + .await?; let entry_ip = if let Some(ldso_load_info) = ldso_load_info { // Normal shared object(DYN) @@ -258,26 +260,30 @@ impl Elf { }) } - fn create_and_init_stack( + async fn create_and_init_stack( &self, mm_list: &MMList, args: Vec, envs: Vec, aux_vec: AuxVec, ) -> KResult { - mm_list.mmap_fixed( - VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), - INIT_STACK_SIZE, - Mapping::Anonymous, - Permission { - read: true, - write: true, - execute: false, - }, - false, - )?; + mm_list + .mmap_fixed( + VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), + INIT_STACK_SIZE, + Mapping::Anonymous, + Permission { + read: true, + write: true, + execute: false, + }, + false, + ) + .await?; - StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec).init() + StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec) + .init() + .await } fn init_aux_vec(&self, elf_base: VAddr, ldso_base: Option) -> KResult> { @@ -309,7 +315,7 @@ impl Elf { Ok(aux_vec) } - fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { + async fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { let base: VAddr = if self.is_shared_object() { E::DYN_BASE_ADDR } else { 0 }.into(); let mut segments_end = VAddr::NULL; @@ -318,7 +324,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - let segment_end = self.load_segment(program_header, mm_list, base)?; + let segment_end = self.load_segment(program_header, mm_list, base).await?; if segment_end > segments_end { segments_end = segment_end; @@ -329,7 +335,7 @@ impl Elf { Ok((base, segments_end)) } - fn load_segment( + async fn load_segment( &self, program_header: &E::Ph, mm_list: &MMList, @@ -353,33 +359,37 @@ impl Elf { if file_len != 0 { let real_file_length = load_vaddr_end - vmap_start; - mm_list.mmap_fixed( - vmap_start, - file_len, - Mapping::File(FileMapping::new( - self.file.get_inode()?, - file_offset, - real_file_length, - )), - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start, + file_len, + Mapping::File(FileMapping::new( + self.file.get_inode()?, + file_offset, + real_file_length, + )), + permission, + false, + ) + .await?; } if vmem_len > file_len { - mm_list.mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; } Ok(vmap_start + vmem_len) } - fn load_ldso(&self, mm_list: &MMList) -> KResult> { + async fn load_ldso(&self, mm_list: &MMList) -> KResult> { let ldso_path = self.ldso_path()?; if let Some(ldso_path) = ldso_path { @@ -393,7 +403,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - ldso_elf.load_segment(program_header, mm_list, base)?; + ldso_elf.load_segment(program_header, mm_list, base).await?; } } @@ -406,8 +416,8 @@ impl Elf { Ok(None) } - fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { - mm_list.map_vdso() + async fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { + mm_list.map_vdso().await } fn ldso_path(&self) -> KResult> { @@ -449,10 +459,10 @@ impl ELF { } } - pub fn load(&self, args: Vec, envs: Vec) -> KResult { + pub async fn load(&self, args: Vec, envs: Vec) -> KResult { match &self { - ELF::Elf32(elf32) => elf32.load(args, envs), - ELF::Elf64(elf64) => elf64.load(args, envs), + ELF::Elf32(elf32) => elf32.load(args, envs).await, + ELF::Elf64(elf64) => elf64.load(args, envs).await, } } } @@ -483,21 +493,21 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { } // return sp after stack init - fn init(mut self) -> KResult { - let env_pointers = self.push_envs()?; - let arg_pointers = self.push_args()?; + async fn init(mut self) -> KResult { + let env_pointers = self.push_envs().await?; + let arg_pointers = self.push_args().await?; self.stack_alignment(); - self.push_aux_vec()?; - self.push_pointers(env_pointers)?; - self.push_pointers(arg_pointers)?; - self.push_argc(T::from_usize(self.args.len()))?; + self.push_aux_vec().await?; + self.push_pointers(env_pointers).await?; + self.push_pointers(arg_pointers).await?; + self.push_argc(T::from_usize(self.args.len())).await?; assert_eq!(self.sp.align_down(16), self.sp); Ok(VAddr::from(self.sp)) } - fn push_envs(&mut self) -> KResult> { + async fn push_envs(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.envs.len()); for string in self.envs.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -505,14 +515,15 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); Ok(addrs) } - fn push_args(&mut self) -> KResult> { + async fn push_args(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.args.len()); for string in self.args.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -520,7 +531,8 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); @@ -538,27 +550,29 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp = align_sp + all_size; } - fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { + async fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { pointers.push(T::from_usize(0)); self.sp -= pointers.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - pointers.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - pointers.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + pointers.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + pointers.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } - fn push_argc(&mut self, val: T) -> KResult<()> { + async fn push_argc(&mut self, val: T) -> KResult<()> { self.sp -= size_of::(); self.mm_list @@ -566,12 +580,13 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { data.copy_from_slice(unsafe { core::slice::from_raw_parts(&val as *const _ as *const u8, data.len()) }) - })?; + }) + .await?; Ok(()) } - fn push_aux_vec(&mut self) -> KResult<()> { + async fn push_aux_vec(&mut self) -> KResult<()> { let mut longs: Vec = vec![]; // Write Auxiliary vectors @@ -593,18 +608,20 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp -= longs.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - longs.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - longs.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + longs.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + longs.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index fc9374be..4e3f4db1 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -106,9 +106,9 @@ impl ProgramLoader { }) } - pub fn load(self) -> KResult { + pub async fn load(self) -> KResult { match self.object { - Object::ELF(elf) => elf.load(self.args, self.envs), + Object::ELF(elf) => elf.load(self.args, self.envs).await, } } } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 3e69fc4b..421e4b8b 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,4 +1,3 @@ -use super::block_on; use super::{ process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, ProcessList, Session, Thread, @@ -109,6 +108,7 @@ pub struct DrainExited<'waitlist> { wait_procs: SpinGuard<'waitlist, VecDeque>, } +#[derive(Debug, Clone, Copy)] pub enum WaitId { Any, Pid(u32), @@ -121,23 +121,17 @@ impl WaitId { P_ALL => Ok(WaitId::Any), P_PID => Ok(WaitId::Pid(id)), P_PGID => Ok(WaitId::Pgid(id)), - P_PIDFD => { - panic!("PDIFD type is unsupported") - } + P_PIDFD => panic!("P_PIDFD type is not supported"), _ => Err(EINVAL), } } pub fn from_id(id: i32, thread: &Thread) -> Self { - if id < -1 { - WaitId::Pgid((-id).cast_unsigned()) - } else if id == -1 { - WaitId::Any - } else if id == 0 { - let procs = block_on(ProcessList::get().read()); - WaitId::Pgid(thread.process.pgroup(procs.prove()).pgid) - } else { - WaitId::Pid(id.cast_unsigned()) + match id { + ..-1 => WaitId::Pgid((-id).cast_unsigned()), + -1 => WaitId::Any, + 0 => WaitId::Pgid(thread.process.pgroup_rcu().pgid), + _ => WaitId::Pid(id.cast_unsigned()), } } } @@ -206,11 +200,11 @@ impl ProcessBuilder { } } - pub fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { - block_on(process.mm_list.new_shared()) + process.mm_list.new_shared().await } else { - block_on(process.mm_list.new_cloned()) + process.mm_list.new_cloned().await }; if let Some(exit_signal) = clone_args.exit_signal { @@ -351,12 +345,18 @@ impl Process { trace_continue: bool, ) -> KResult> { let wait_object = { - let mut waits = self - .wait_list - .entry(wait_id, trace_stop, trace_continue) - .await; + let mut unlocked_waits = None; loop { + let mut waits = match unlocked_waits { + Some(wait) => wait.await?, + None => { + self.wait_list + .entry(wait_id, trace_stop, trace_continue) + .await + } + }; + if let Some(object) = waits.get() { break object; } @@ -374,7 +374,7 @@ impl Process { return Ok(None); } - waits = waits.wait(no_block).await?; + unlocked_waits = Some(waits.wait(no_block)); } }; @@ -395,8 +395,8 @@ impl Process { } /// Create a new session for the process. - pub fn setsid(self: &Arc) -> KResult { - let mut process_list = block_on(ProcessList::get().write()); + pub async fn setsid(self: &Arc) -> KResult { + let mut process_list = ProcessList::get().write().await; // If there exists a session that has the same sid as our pid, we can't create a new // session. The standard says that we should create a new process group and be the // only process in the new process group and session. @@ -473,8 +473,8 @@ impl Process { /// /// This function should be called on the process that issued the syscall in order to do /// permission checks. - pub fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { - let mut procs = block_on(ProcessList::get().write()); + pub async fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { + let mut procs = ProcessList::get().write().await; // We may set pgid of either the calling process or a child process. if pid == self.pid { self.do_setpgid(pgid, &mut procs) @@ -609,9 +609,8 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - let procs = block_on(ProcessList::get().read()); - if let Some(process) = procs.try_find_process(item.pid) { - return process.pgroup(procs.prove()).pgid == pgid; + if let Some(process) = self.process_list.try_find_process(item.pid) { + return process.pgroup(self.process_list.prove()).pgid == pgid; } false } @@ -625,7 +624,7 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> { + pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index 5293b4b7..856030ba 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -9,6 +9,7 @@ use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; +use eonix_mm::address::Addr; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; pub struct ProcessList { @@ -134,11 +135,9 @@ impl ProcessList { } if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid as *mut u32) - .unwrap() - .write(0u32); + let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); - let _ = futex_wake(clear_ctid, None, 1).await; + let _ = futex_wake(clear_ctid.addr(), None, 1).await; } if let Some(robust_list) = thread.get_robust_list() { diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index b6ed34bf..d9970cad 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -293,15 +293,15 @@ impl SignalList { let old_fpu_state_vaddr = old_trap_ctx_vaddr + size_of::(); let old_mask_vaddr = old_fpu_state_vaddr + size_of::(); - *trap_ctx = UserPointer::::new_vaddr(old_trap_ctx_vaddr)?.read()?; + *trap_ctx = UserPointer::::with_addr(old_trap_ctx_vaddr)?.read()?; // Make sure that at least we won't crash the kernel. if !trap_ctx.is_user_mode() || !trap_ctx.is_interrupt_enabled() { return Err(EFAULT)?; } - *fpu_state = UserPointer::::new_vaddr(old_fpu_state_vaddr)?.read()?; - self.inner.lock().mask = UserPointer::::new_vaddr(old_mask_vaddr)?.read()?; + *fpu_state = UserPointer::::with_addr(old_fpu_state_vaddr)?.read()?; + self.inner.lock().mask = UserPointer::::with_addr(old_mask_vaddr)?.read()?; Ok(()) } diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 98682547..708f9802 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -3,6 +3,7 @@ use crate::{ io::BufferFill as _, kernel::{ constants::{EFAULT, EINVAL}, + syscall::UserMut, user::UserBuffer, }, }; @@ -152,7 +153,7 @@ impl SignalAction { let saved_data_addr = (current_sp - SAVED_DATA_SIZE).floor_to(16); let mut saved_data_buffer = - UserBuffer::new(saved_data_addr.addr() as *mut u8, SAVED_DATA_SIZE)?; + UserBuffer::new(UserMut::new(saved_data_addr), SAVED_DATA_SIZE)?; saved_data_buffer.copy(trap_ctx)?.ok_or(EFAULT)?; saved_data_buffer.copy(fpu_state)?.ok_or(EFAULT)?; @@ -200,7 +201,7 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(vaddr.addr() as *mut u8, data.len())?; + let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 50600436..3132a9a9 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -5,7 +5,7 @@ use super::{ use crate::{ kernel::{ interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler}, + syscall::{syscall_handlers, SyscallHandler, User, UserMut}, task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, timer::{should_reschedule, timer_interrupt}, user::{UserPointer, UserPointerMut}, @@ -13,7 +13,7 @@ use crate::{ }, prelude::*, }; -use alloc::sync::Arc; +use alloc::{alloc::Allocator, sync::Arc}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ future::{poll_fn, Future}, @@ -36,10 +36,14 @@ use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; +use stalloc::UnsafeStalloc; #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; +#[derive(Clone, Copy)] +pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<255, 32>); + pub struct ThreadBuilder { tid: Option, name: Option>, @@ -48,8 +52,8 @@ pub struct ThreadBuilder { fs_context: Option>, signal_list: Option, tls: Option, - set_child_tid: Option, - clear_child_tid: Option, + set_child_tid: Option>, + clear_child_tid: Option>, trap_ctx: Option, fpu_state: Option, @@ -65,11 +69,11 @@ struct ThreadInner { /// User pointer /// Store child thread's tid when child thread returns to user space. - set_child_tid: Option, + set_child_tid: Option>, - clear_child_tid: Option, + clear_child_tid: Option>, - robust_list_address: Option, + robust_list_address: Option>, } pub struct Thread { @@ -141,12 +145,12 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option) -> Self { + pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option) -> Self { + pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -285,13 +289,13 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option) { + pub fn set_robust_list(&self, robust_list_address: Option>) { self.inner.lock().robust_list_address = robust_list_address; } pub fn get_robust_list(&self) -> Option { let addr = self.inner.lock().robust_list_address?; - let user_pointer = UserPointer::new(addr.addr() as *const RobustListHead).ok()?; + let user_pointer = UserPointer::new(addr).ok()?; user_pointer.read().ok() } @@ -304,25 +308,30 @@ impl Thread { self.inner.lock().name.clone() } - pub fn clear_child_tid(&self, clear_child_tid: Option) { + pub fn clear_child_tid(&self, clear_child_tid: Option>) { self.inner.lock().clear_child_tid = clear_child_tid; } - pub fn get_set_ctid(&self) -> Option { + pub fn get_set_ctid(&self) -> Option> { self.inner.lock().set_child_tid } - pub fn get_clear_ctid(&self) -> Option { + pub fn get_clear_ctid(&self) -> Option> { self.inner.lock().clear_child_tid } - pub fn handle_syscall(&self, no: usize, args: [usize; 6]) -> Option { + pub async fn handle_syscall( + &self, + thd_alloc: ThreadAlloc<'_>, + no: usize, + args: [usize; 6], + ) -> Option { match syscall_handlers().get(no) { Some(Some(SyscallHandler { handler, name: _name, .. - })) => handler(self, args), + })) => handler(self, thd_alloc, args).await, _ => { println_warn!("Syscall {no}({no:#x}) isn't implemented."); self.raise(Signal::SIGSYS); @@ -347,12 +356,18 @@ impl Thread { async fn real_run(&self) { if let Some(set_ctid) = self.get_set_ctid() { - UserPointerMut::new(set_ctid as *mut u32) + UserPointerMut::new(set_ctid) .expect("set_child_tid pointer is invalid") .write(self.tid) .expect("set_child_tid write failed"); } + let stack_alloc = unsafe { + // SAFETY: The allocator will only be used within the context of this thread. + UnsafeStalloc::new() + }; + let thd_alloc = ThreadAlloc(&stack_alloc); + while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list @@ -401,7 +416,7 @@ impl Thread { } } TrapType::Syscall { no, args } => { - if let Some(retval) = self.handle_syscall(no, args) { + if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); @@ -452,6 +467,19 @@ impl Thread { } } +unsafe impl Allocator for ThreadAlloc<'_> { + fn allocate( + &self, + layout: core::alloc::Layout, + ) -> Result, alloc::alloc::AllocError> { + self.0.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: core::alloc::Layout) { + self.0.deallocate(ptr, layout); + } +} + pub async fn yield_now() { struct Yield { yielded: bool, diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 134021a8..86024338 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,5 +1,5 @@ use super::{ - task::{block_on, ProcessList, Session, Thread}, + task::{ProcessList, Session, Thread}, user::{UserPointer, UserPointerMut}, }; use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; @@ -446,18 +446,18 @@ impl Terminal { } } - fn signal(&self, inner: &mut TerminalInner, signal: Signal) { + async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { if let Some(session) = inner.session.upgrade() { - block_on(session.raise_foreground(signal)); + session.raise_foreground(signal).await; } if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { self.echo_char(inner, ch); - self.signal(inner, signal); + self.signal(inner, signal).await; } fn do_commit_char(&self, inner: &mut TerminalInner, ch: u8) { @@ -481,13 +481,13 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await } _ => {} } diff --git a/src/kernel/user.rs b/src/kernel/user.rs index b3701507..5e410c81 100644 --- a/src/kernel/user.rs +++ b/src/kernel/user.rs @@ -1,7 +1,3 @@ -pub mod dataflow; +mod dataflow; -#[allow(unused_imports)] -pub use dataflow::{UserBuffer, UserString}; - -pub type UserPointer<'a, T> = dataflow::UserPointer<'a, T, true>; -pub type UserPointerMut<'a, T> = dataflow::UserPointer<'a, T, false>; +pub use dataflow::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}; diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 17dbd4c9..02e7d791 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,17 +1,20 @@ +use crate::{ + io::{Buffer, FillResult}, + prelude::*, +}; use crate::{ io::{IntoStream, Stream}, - kernel::constants::{EFAULT, EINVAL}, + kernel::{ + constants::{EFAULT, EINVAL}, + syscall::{User, UserMut}, + }, }; use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; - pub struct CheckedUserPointer<'a> { - ptr: *const u8, + ptr: User, len: usize, _phantom: PhantomData<&'a ()>, } @@ -27,7 +30,12 @@ pub struct UserString<'a> { len: usize, } -pub struct UserPointer<'a, T: Copy, const CONST: bool> { +pub struct UserPointer<'a, T: Copy> { + pointer: CheckedUserPointer<'a>, + _phantom: PhantomData, +} + +pub struct UserPointerMut<'a, T: Copy> { pointer: CheckedUserPointer<'a>, _phantom: PhantomData, } @@ -37,9 +45,9 @@ pub struct UserStream<'a> { cur: usize, } -impl UserPointer<'_, T, CONST> { - pub fn new(ptr: *const T) -> KResult { - let pointer = CheckedUserPointer::new(ptr as *const u8, core::mem::size_of::())?; +impl UserPointer<'_, T> { + pub fn new(ptr: User) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast(), core::mem::size_of::())?; Ok(Self { pointer, @@ -47,8 +55,8 @@ impl UserPointer<'_, T, CONST> { }) } - pub fn new_vaddr(vaddr: usize) -> KResult { - Self::new(vaddr as *mut T) + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(User::with_addr(vaddr)) } /// # Might Sleep @@ -60,22 +68,48 @@ impl UserPointer<'_, T, CONST> { } pub fn offset(&self, offset: isize) -> KResult { - let new_vaddr = self.pointer.ptr as isize + offset * size_of::() as isize; - Self::new_vaddr(new_vaddr as usize) + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(new_ptr.cast()) } } -impl<'a, T: Copy> UserPointer<'a, T, false> { +impl<'a, T: Copy> UserPointerMut<'a, T> { + pub fn new(ptr: UserMut) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast().as_const(), core::mem::size_of::())?; + + Ok(Self { + pointer, + _phantom: PhantomData, + }) + } + + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(UserMut::with_addr(vaddr)) + } + + /// # Might Sleep + pub fn read(&self) -> KResult { + let mut value = core::mem::MaybeUninit::::uninit(); + self.pointer + .read(value.as_mut_ptr() as *mut (), core::mem::size_of::())?; + Ok(unsafe { value.assume_init() }) + } + + pub fn offset(&self, offset: isize) -> KResult { + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(unsafe { new_ptr.cast().as_mut() }) + } + pub fn write(&self, value: T) -> KResult<()> { self.pointer - .write(&value as *const T as *mut (), core::mem::size_of::()) + .write(&raw const value as *mut (), core::mem::size_of::()) } } impl CheckedUserPointer<'_> { - pub fn new(ptr: *const u8, len: usize) -> KResult { + pub fn new(ptr: User, len: usize) -> KResult { const USER_MAX_ADDR: usize = 0x7ff_fff_fff_fff; - let end = (ptr as usize).checked_add(len); + let end = ptr.addr().checked_add(len); if ptr.is_null() || end.ok_or(EFAULT)? > USER_MAX_ADDR { Err(EFAULT) } else { @@ -89,19 +123,10 @@ impl CheckedUserPointer<'_> { pub fn forward(&mut self, offset: usize) { assert!(offset <= self.len); - self.ptr = self.ptr.wrapping_offset(offset as isize); + self.ptr = self.ptr.offset(offset as isize); self.len -= offset; } - pub fn get_const(&self) -> *const T { - self.ptr as *const T - } - - pub fn as_slice(&self) -> &[u8] { - // SAFETY: the pointer's validity is checked in `new` - unsafe { core::slice::from_raw_parts(self.ptr, self.len) } - } - /// # Might Sleep pub fn read(&self, buffer: *mut (), total: usize) -> KResult<()> { assert_preempt_enabled!("UserPointer::read"); @@ -126,7 +151,7 @@ impl CheckedUserPointer<'_> { ".quad 0x3", // type: load ".popsection", inout("rcx") total => error_bytes, - inout("rsi") self.ptr => _, + inout("rsi") self.ptr.addr() => _, inout("rdi") buffer => _, ); @@ -148,7 +173,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("a0") total => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, inout("a2") buffer => _, out("t0") _, ); @@ -171,7 +196,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("$a0") total => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, inout("$a2") buffer => _, out("$t0") _, ); @@ -210,7 +235,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("rcx") total => error_bytes, inout("rsi") data => _, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, ); #[cfg(target_arch = "riscv64")] @@ -232,7 +257,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("a0") total => error_bytes, inout("a1") data => _, - inout("a2") self.ptr => _, + inout("a2") self.ptr.addr() => _, out("t0") _, ); @@ -255,7 +280,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("$a0") total => error_bytes, inout("$a1") data => _, - inout("$a2") self.ptr => _, + inout("$a2") self.ptr.addr() => _, out("$t0") _, ); }; @@ -293,7 +318,7 @@ impl CheckedUserPointer<'_> { ".popsection", in("rax") 0, inout("rcx") self.len => error_bytes, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, options(att_syntax) ); @@ -313,7 +338,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("a0") self.len => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -332,7 +357,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("$a0") self.len => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, ); }; @@ -345,8 +370,8 @@ impl CheckedUserPointer<'_> { } impl UserBuffer<'_> { - pub fn new(ptr: *mut u8, size: usize) -> KResult { - let ptr = CheckedUserPointer::new(ptr, size)?; + pub fn new(ptr: UserMut, size: usize) -> KResult { + let ptr = CheckedUserPointer::new(ptr.as_const(), size)?; Ok(Self { ptr, size, cur: 0 }) } @@ -388,7 +413,7 @@ impl<'lt> Buffer for UserBuffer<'lt> { impl<'lt> UserString<'lt> { /// # Might Sleep - pub fn new(ptr: *const u8) -> KResult { + pub fn new(ptr: User) -> KResult { assert_preempt_enabled!("UserString::new"); const MAX_LEN: usize = 4096; @@ -416,7 +441,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("al") _, inout("rcx") MAX_LEN => result, - ptr = inout(reg) ptr.ptr => _, + ptr = inout(reg) ptr.ptr.addr() => _, options(att_syntax), ); @@ -439,7 +464,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("t0") _, inout("a0") MAX_LEN => result, - inout("a1") ptr.ptr => _, + inout("a1") ptr.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -461,7 +486,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("$t0") _, inout("$a0") MAX_LEN => result, - inout("$a1") ptr.ptr => _, + inout("$a1") ptr.ptr.addr() => _, ); }; @@ -478,7 +503,7 @@ impl<'lt> UserString<'lt> { pub fn as_cstr(&self) -> &'lt CStr { unsafe { CStr::from_bytes_with_nul_unchecked(core::slice::from_raw_parts( - self.ptr.get_const(), + self.ptr.ptr.addr() as *const u8, self.len + 1, )) } diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index 75e4df2f..d3739146 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -466,11 +466,11 @@ impl TerminalFile { fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { block_on(self.terminal.ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::new_vaddr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::new_vaddr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::new_vaddr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::new_vaddr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::new_vaddr(arg3)?), + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), _ => return Err(EINVAL), })) } diff --git a/src/lib.rs b/src/lib.rs index beebe7c1..fe4796de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![no_std] #![no_main] +#![feature(allocator_api)] #![feature(c_size_t)] #![feature(concat_idents)] #![feature(arbitrary_self_types)] @@ -253,6 +254,7 @@ async fn init_process(early_kstack: PRange) { ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) .expect("Failed to parse init program") .load() + .await .expect("Failed to load init program") }; From 973f6f2c710f651add6756405998ab9b5bc5ebd8 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 12 Aug 2025 00:09:30 +0800 Subject: [PATCH 25/54] partial work: vfs asynchronize Signed-off-by: greatbridf --- .../eonix_sync_base/src/locked/proof.rs | 3 + src/kernel/syscall/file_rw.rs | 15 ++-- src/kernel/task/thread.rs | 2 +- src/kernel/vfs/file.rs | 70 ++++++++++--------- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs index bac02301..52a5db49 100644 --- a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs +++ b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs @@ -25,6 +25,9 @@ where _phantom: PhantomData<&'pos ()>, } +unsafe impl Send for Proof<'_, T> {} +unsafe impl Send for ProofMut<'_, T> {} + /// A trait for types that can be converted to a proof of mutable access. /// /// This is used to prove that a mutable reference is valid for the lifetime `'pos` diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index ef222123..3c23b6e9 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -179,7 +179,12 @@ async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents64(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents64(&mut buffer) + .await?; Ok(buffer.wrote()) } @@ -344,9 +349,9 @@ async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult< let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { - SEEK_SET => file.seek(SeekOption::Set(offset as usize))?, - SEEK_CUR => file.seek(SeekOption::Current(offset as isize))?, - SEEK_END => file.seek(SeekOption::End(offset as isize))?, + SEEK_SET => file.seek(SeekOption::Set(offset as usize)).await?, + SEEK_CUR => file.seek(SeekOption::Current(offset as isize)).await?, + SEEK_END => file.seek(SeekOption::End(offset as isize)).await?, _ => return Err(EINVAL), } as u64) } @@ -500,7 +505,7 @@ async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> async fn ioctl(fd: FD, request: usize, arg3: usize) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; - file.ioctl(request, arg3) + file.ioctl(request, arg3).await } #[eonix_macros::define_syscall(SYS_FCNTL64)] diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 3132a9a9..11348e51 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -42,7 +42,7 @@ use stalloc::UnsafeStalloc; static CURRENT_THREAD: Option> = None; #[derive(Clone, Copy)] -pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<255, 32>); +pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<1023, 32>); pub struct ThreadBuilder { tid: Option, diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index d3739146..6616cbcc 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -8,7 +8,7 @@ use crate::{ kernel::{ constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, mem::{paging::Page, AsMemoryBlock as _}, - task::{block_on, Thread}, + task::Thread, terminal::{Terminal, TerminalIORequest}, user::{UserPointer, UserPointerMut}, vfs::inode::Inode, @@ -157,8 +157,8 @@ impl Pipe { ) } - fn close_read(&self) { - let mut inner = block_on(self.inner.lock()); + async fn close_read(&self) { + let mut inner = self.inner.lock().await; if inner.read_closed { return; } @@ -167,8 +167,8 @@ impl Pipe { self.cv_write.notify_all(); } - fn close_write(&self) { - let mut inner = block_on(self.inner.lock()); + async fn close_write(&self) { + let mut inner = self.inner.lock().await; if inner.write_closed { return; } @@ -316,8 +316,8 @@ impl InodeFile { }) } - fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = block_on(self.cursor.lock()); + async fn seek(&self, option: SeekOption) -> KResult { + let mut cursor = self.cursor.lock().await; let new_cursor = match option { SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, @@ -333,12 +333,12 @@ impl InodeFile { Ok(new_cursor) } - fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { if !self.write { return Err(EBADF); } - let mut cursor = block_on(self.cursor.lock()); + let mut cursor = self.cursor.lock().await; if self.append { let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; @@ -357,7 +357,7 @@ impl InodeFile { } } - fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { if !self.read { return Err(EBADF); } @@ -366,7 +366,7 @@ impl InodeFile { let nread = self.dentry.read(buffer, offset)?; nread } else { - let mut cursor = block_on(self.cursor.lock()); + let mut cursor = self.cursor.lock().await; let nread = self.dentry.read(buffer, *cursor)?; @@ -377,8 +377,8 @@ impl InodeFile { Ok(nread) } - fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = block_on(self.cursor.lock()); + async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let mut cursor = self.cursor.lock().await; let nread = self.dentry.readdir(*cursor, |filename, ino| { // Filename length + 1 for padding '\0' @@ -407,8 +407,8 @@ impl InodeFile { Ok(()) } - fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = block_on(self.cursor.lock()); + async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let mut cursor = self.cursor.lock().await; let nread = self.dentry.readdir(*cursor, |filename, ino| { // + 1 for filename length padding '\0', + 1 for d_type. @@ -464,22 +464,24 @@ impl TerminalFile { self.terminal.poll_in().await.map(|_| PollEvent::Readable) } - fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - block_on(self.terminal.ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), - _ => return Err(EINVAL), - })) + async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { + self.terminal + .ioctl(match request as u32 { + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + _ => return Err(EINVAL), + }) + .await } } impl FileType { pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { match self { - FileType::Inode(inode) => inode.read(buffer, offset), + FileType::Inode(inode) => inode.read(buffer, offset).await, FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await, FileType::TTY(tty) => tty.read(buffer).await, FileType::CharDev(device) => device.read(buffer), @@ -504,7 +506,7 @@ impl FileType { pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { match self { - FileType::Inode(inode) => inode.write(stream, offset), + FileType::Inode(inode) => inode.write(stream, offset).await, FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await, FileType::TTY(tty) => tty.write(stream), FileType::CharDev(device) => device.write(stream), @@ -512,23 +514,23 @@ impl FileType { } } - pub fn seek(&self, option: SeekOption) -> KResult { + pub async fn seek(&self, option: SeekOption) -> KResult { match self { - FileType::Inode(inode) => inode.seek(option), + FileType::Inode(inode) => inode.seek(option).await, _ => Err(ESPIPE), } } - pub fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { match self { - FileType::Inode(inode) => inode.getdents(buffer), + FileType::Inode(inode) => inode.getdents(buffer).await, _ => Err(ENOTDIR), } } - pub fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { match self { - FileType::Inode(inode) => inode.getdents64(buffer), + FileType::Inode(inode) => inode.getdents64(buffer).await, _ => Err(ENOTDIR), } } @@ -568,9 +570,9 @@ impl FileType { Ok(nsent) } - pub fn ioctl(&self, request: usize, arg3: usize) -> KResult { + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { match self { - FileType::TTY(tty) => tty.ioctl(request, arg3).map(|_| 0), + FileType::TTY(tty) => tty.ioctl(request, arg3).await.map(|_| 0), _ => Err(ENOTTY), } } From db931a80384bc3f379105024719c204b866a3cc7 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Fri, 15 Aug 2025 02:05:07 +0800 Subject: [PATCH 26/54] partial work: file array rework and asynchronize Signed-off-by: greatbridf --- Cargo.lock | 14 +- Cargo.toml | 2 +- crates/posix_types/src/getdent.rs | 28 ++ crates/posix_types/src/lib.rs | 1 + src/fs/ext4.rs | 37 +- src/fs/fat32.rs | 7 +- src/fs/procfs.rs | 12 +- src/fs/tmpfs.rs | 56 +-- src/kernel/chardev.rs | 7 +- src/kernel/syscall.rs | 10 +- src/kernel/syscall/file_rw.rs | 35 +- src/kernel/syscall/mm.rs | 13 +- src/kernel/syscall/procops.rs | 9 +- src/kernel/task/process_list.rs | 2 +- src/kernel/vfs/dentry.rs | 24 +- src/kernel/vfs/dentry/dcache.rs | 8 +- src/kernel/vfs/file.rs | 637 --------------------------- src/kernel/vfs/file/inode_file.rs | 223 ++++++++++ src/kernel/vfs/file/mod.rs | 232 ++++++++++ src/kernel/vfs/file/pipe.rs | 211 +++++++++ src/kernel/vfs/file/terminal_file.rs | 55 +++ src/kernel/vfs/filearray.rs | 387 ++++++++++------ src/kernel/vfs/inode.rs | 156 ++++++- src/kernel/vfs/mod.rs | 27 +- src/lib.rs | 3 +- 25 files changed, 1280 insertions(+), 916 deletions(-) create mode 100644 crates/posix_types/src/getdent.rs delete mode 100644 src/kernel/vfs/file.rs create mode 100644 src/kernel/vfs/file/inode_file.rs create mode 100644 src/kernel/vfs/file/mod.rs create mode 100644 src/kernel/vfs/file/pipe.rs create mode 100644 src/kernel/vfs/file/terminal_file.rs diff --git a/Cargo.lock b/Cargo.lock index 484f2796..f4ed3bd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,7 +146,7 @@ dependencies = [ "eonix_runtime", "eonix_sync", "ext4_rs", - "intrusive-collections", + "intrusive-collections 0.9.8", "intrusive_list", "itertools", "pointers", @@ -213,7 +213,7 @@ dependencies = [ "eonix_percpu", "eonix_preempt", "eonix_sync", - "intrusive-collections", + "intrusive-collections 0.9.7", "pointers", ] @@ -246,7 +246,7 @@ dependencies = [ "eonix_preempt", "eonix_spin", "eonix_sync_base", - "intrusive-collections", + "intrusive-collections 0.9.7", ] [[package]] @@ -274,6 +274,14 @@ dependencies = [ "memoffset", ] +[[package]] +name = "intrusive-collections" +version = "0.9.8" +source = "git+https://github.com/greatbridf/intrusive-rs#0e2d88bffc9df606566fba2d61d1217182b06975" +dependencies = [ + "memoffset", +] + [[package]] name = "intrusive_list" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 25768c83..4bc8bbe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } bitflags = "2.6.0" -intrusive-collections = "0.9.7" +intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" diff --git a/crates/posix_types/src/getdent.rs b/crates/posix_types/src/getdent.rs new file mode 100644 index 00000000..922121f6 --- /dev/null +++ b/crates/posix_types/src/getdent.rs @@ -0,0 +1,28 @@ +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent64 { + /// Inode number + pub d_ino: u64, + /// Implementation defined. We ignore it + pub d_off: u64, + /// Length of this record + pub d_reclen: u16, + /// File type. Set to 0 + pub d_type: u8, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} + +/// File type is at offset `d_reclen - 1`. Set it to 0 +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent { + /// Inode number + pub d_ino: u32, + /// Implementation defined. We ignore it + pub d_off: u32, + /// Length of this record + pub d_reclen: u16, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} diff --git a/crates/posix_types/src/lib.rs b/crates/posix_types/src/lib.rs index dfe8d089..49d2ac5f 100644 --- a/crates/posix_types/src/lib.rs +++ b/crates/posix_types/src/lib.rs @@ -2,6 +2,7 @@ pub mod constants; pub mod ctypes; +pub mod getdent; pub mod namei; pub mod open; pub mod poll; diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index b4953491..7225d99b 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -2,6 +2,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{PageCache, PageCacheBackend}; use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ io::{Buffer, ByteBuffer}, kernel::{ @@ -12,7 +13,6 @@ use crate::{ dentry::Dentry, inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData}, mount::{register_filesystem, Mount, MountCreator}, - s_isdir, s_isreg, vfs::Vfs, DevId, FsContext, }, @@ -86,30 +86,27 @@ impl Ext4Fs { fn get_or_insert( &self, icache: &mut BTreeMap, - mut idata: InodeData, + idata: InodeData, ) -> Arc { match icache.entry(idata.ino) { Entry::Occupied(occupied) => occupied.get().clone().into_inner(), - Entry::Vacant(vacant) => { - let mode = *idata.mode.get_mut(); - if s_isreg(mode) { - vacant - .insert(Ext4Inode::File(FileInode::new(idata))) - .clone() - .into_inner() - } else if s_isdir(mode) { - vacant - .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) - .clone() - .into_inner() - } else { - println_warn!("ext4: Unsupported inode type: {mode:#o}"); + Entry::Vacant(vacant) => match idata.mode.load().format() { + Mode::REG => vacant + .insert(Ext4Inode::File(FileInode::new(idata))) + .clone() + .into_inner(), + Mode::DIR => vacant + .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) + .clone() + .into_inner(), + mode => { + println_warn!("ext4: Unsupported inode type: {:#o}", mode.format_bits()); vacant .insert(Ext4Inode::File(FileInode::new(idata))) .clone() .into_inner() } - } + }, } } } @@ -137,7 +134,7 @@ impl Ext4Fs { nlink: AtomicNlink::new(root_inode.inode.links_count() as _), uid: AtomicU32::new(root_inode.inode.uid() as _), gid: AtomicU32::new(root_inode.inode.gid() as _), - mode: AtomicU32::new(root_inode.inode.mode() as _), + mode: AtomicMode::new(root_inode.inode.mode() as _), atime: Spin::new(Instant::new( root_inode.inode.atime() as _, root_inode.inode.i_atime_extra() as _, @@ -201,7 +198,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &crate::kernel::mem::CachePage, _offset: usize) -> KResult { todo!() } @@ -269,7 +266,7 @@ impl Inode for DirInode { nlink: AtomicNlink::new(attr.nlink as _), uid: AtomicU32::new(attr.uid), gid: AtomicU32::new(attr.gid), - mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm), + mode: AtomicMode::new(attr.kind.bits() as u32 | real_perm), atime: Spin::new(Instant::new(attr.atime as _, 0)), ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 9f0adac5..781d539b 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -5,12 +5,11 @@ use crate::io::Stream; use crate::kernel::constants::EIO; use crate::kernel::mem::AsMemoryBlock; use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::WriteOffset; +use crate::kernel::vfs::inode::{Mode, WriteOffset}; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, kernel::{ block::{make_device, BlockDevice, BlockDeviceRequest}, - constants::{S_IFDIR, S_IFREG}, mem::{ paging::Page, {CachePage, PageCache, PageCacheBackend}, @@ -253,7 +252,7 @@ impl FileInode { // Safety: We are initializing the inode inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(S_IFREG | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -343,7 +342,7 @@ impl DirInode { // Safety: We are initializing the inode inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(S_IFDIR | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::DIR.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 82f597b8..2ed24613 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,10 +1,10 @@ use crate::kernel::constants::{EACCES, ENOTDIR}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ io::Buffer, kernel::{ - constants::{S_IFDIR, S_IFREG}, mem::paging::PageBuffer, vfs::{ dentry::Dentry, @@ -69,12 +69,12 @@ define_struct_inode! { impl FileInode { pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = S_IFREG; + let mut mode = Mode::REG; if file.can_read() { - mode |= 0o444; + mode.set_perm(0o444); } if file.can_write() { - mode |= 0o200; + mode.set_perm(0o222); } let mut inode = Self { @@ -82,7 +82,7 @@ impl FileInode { file, }; - inode.idata.mode.store(mode, Ordering::Relaxed); + inode.idata.mode.store(mode); inode.idata.nlink.store(1, Ordering::Relaxed); *inode.ctime.get_mut() = Instant::now(); *inode.mtime.get_mut() = Instant::now(); @@ -123,7 +123,7 @@ impl DirInode { pub fn new(ino: Ino, vfs: Weak) -> Arc { Self::new_locked(ino, vfs, |inode, rwsem| unsafe { addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | 0o755).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 840f97b1..5bac591f 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -3,16 +3,14 @@ use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOT use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; +use crate::kernel::vfs::inode::{AtomicMode, InodeData}; use crate::{ io::Buffer, - kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG}, kernel::vfs::{ dentry::{dcache, Dentry}, inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - s_isblk, s_ischr, vfs::Vfs, DevId, }, @@ -46,7 +44,7 @@ impl NodeInode { Self::new_locked(ino, vfs, |inode, _| unsafe { addr_of_mut_field!(inode, devid).write(devid); - addr_of_mut_field!(&mut *inode, mode).write(mode.into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -74,7 +72,8 @@ impl DirectoryInode { .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | (mode & 0o777)).into()); + addr_of_mut_field!(&mut *inode, mode) + .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -108,7 +107,7 @@ impl DirectoryInode { _file_lock: ProofMut<()>, ) -> KResult<()> { // SAFETY: `file_lock` has done the synchronization - if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { + if file.mode.load().is_dir() { return Err(EISDIR); } @@ -163,7 +162,7 @@ impl Inode for DirectoryInode { } fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !s_ischr(mode) && !s_isblk(mode) { + if !mode.is_chr() && !mode.is_blk() { return Err(EINVAL); } @@ -173,12 +172,7 @@ impl Inode for DirectoryInode { let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); - let file = NodeInode::new( - ino, - self.vfs.clone(), - mode & (0o777 | S_IFBLK | S_IFCHR), - dev, - ); + let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); at.save_reg(file) @@ -243,9 +237,8 @@ impl Inode for DirectoryInode { let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -331,12 +324,10 @@ impl Inode for DirectoryInode { let _new_file_lock = block_on(new_file.rwsem.write()); // SAFETY: `new_file_lock` has done the synchronization - if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(EISDIR); - } else { - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(ENOTDIR); - } + match (new_file.mode.load(), old_file.mode.load()) { + (Mode::DIR, _) => return Err(EISDIR), + (_, Mode::DIR) => return Err(ENOTDIR), + _ => {} } entries.remove(new_idx); @@ -393,10 +384,10 @@ impl Inode for DirectoryInode { let new_file = new_file.unwrap(); let new_file_lock = block_on(new_file.rwsem.write()); - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 - && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0 - { - return Err(ENOTDIR); + match (old_file.mode.load(), new_file.mode.load()) { + (Mode::DIR, Mode::DIR) => {} + (Mode::DIR, _) => return Err(ENOTDIR), + (_, _) => {} } // Unlink the old file that was replaced @@ -442,7 +433,7 @@ impl SymlinkInode { let len = target.len(); addr_of_mut_field!(inode, target).write(target); - addr_of_mut_field!(&mut *inode, mode).write((S_IFLNK | 0o777).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -482,9 +473,7 @@ impl FileInode { pages: PageCache::new(weak_self.clone()), }); - inode - .mode - .store(S_IFREG | (mode & 0o777), Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(mode.non_format_bits())); inode.nlink.store(1, Ordering::Relaxed); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -557,9 +546,8 @@ impl Inode for FileInode { let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -600,7 +588,7 @@ impl TmpFs { }); let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, 0o755); + let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); Ok((tmpfs, root_dir)) } diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index 4e0d9d0b..aff3271e 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -4,10 +4,7 @@ use super::{ constants::{EEXIST, EIO}, task::{block_on, ProcessList, Thread}, terminal::Terminal, - vfs::{ - file::{File, FileType, TerminalFile}, - DevId, - }, + vfs::{DevId, File, FileType, TerminalFile}, }; use crate::{ io::{Buffer, Stream, StreamRead}, @@ -71,7 +68,7 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult> { + pub fn open(self: &Arc, flags: OpenFlags) -> KResult { Ok(match &self.device { CharDeviceType::Terminal(terminal) => { let procs = block_on(ProcessList::get().read()); diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 4131f3c4..78ddcd1c 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -263,13 +263,19 @@ impl Deref for UserMut { impl core::fmt::Debug for User { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "User({:#x?})", self.0.addr()) + match self.0 { + VAddr::NULL => write!(f, "User(NULL)"), + _ => write!(f, "User({:#018x?})", self.0.addr()), + } } } impl core::fmt::Debug for UserMut { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "UserMut({:#x?})", self.0.addr()) + match self.0 { + VAddr::NULL => write!(f, "UserMut(NULL)"), + _ => write!(f, "UserMut({:#018x?})", self.0.addr()), + } } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 3c23b6e9..1a48b255 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,20 +1,19 @@ use super::{FromSyscallArg, User}; use crate::io::IntoStream; use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::{PollEvent, SeekOption}; use crate::{ io::{Buffer, BufferFill}, kernel::{ user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, - vfs::{ - dentry::Dentry, - file::{PollEvent, SeekOption}, - }, + vfs::dentry::Dentry, }, path::Path, prelude::*, @@ -120,8 +119,12 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: u32) -> KResult { +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; + + let umask = *thread.fs_context.umask.lock(); + mode.mask_perm(!umask.non_format_bits()); + thread.files.open(&dentry, flags, mode) } @@ -133,7 +136,7 @@ async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { #[eonix_macros::define_syscall(SYS_CLOSE)] async fn close(fd: FD) -> KResult<()> { - thread.files.close(fd) + thread.files.close(fd).await } #[eonix_macros::define_syscall(SYS_DUP)] @@ -149,7 +152,7 @@ async fn dup2(old_fd: FD, new_fd: FD) -> KResult { #[eonix_macros::define_syscall(SYS_DUP3)] async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - thread.files.dup_to(old_fd, new_fd, flags) + thread.files.dup_to(old_fd, new_fd, flags).await } #[eonix_macros::define_syscall(SYS_PIPE2)] @@ -254,9 +257,9 @@ async fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -async fn mkdirat(dirfd: FD, pathname: User, mode: u32) -> KResult<()> { +async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { let umask = *thread.fs_context.umask.lock(); - let mode = mode & !umask & 0o777; + mode.mask_perm(!umask.non_format_bits()); let dentry = dentry_from(thread, dirfd, pathname, true)?; dentry.mkdir(mode) @@ -311,11 +314,15 @@ async fn symlink(target: User, linkpath: User) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mode: u32, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { + if !mode.is_blk() && !mode.is_chr() { + return Err(EINVAL); + } + let dentry = dentry_from(thread, dirfd, pathname, true)?; let umask = *thread.fs_context.umask.lock(); - let mode = mode & ((!umask & 0o777) | (S_IFBLK | S_IFCHR)); + mode.mask_perm(!umask.non_format_bits()); dentry.mknod(mode, dev) } @@ -616,7 +623,7 @@ async fn fchownat( } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> KResult<()> { +async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -632,7 +639,7 @@ async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> K } #[eonix_macros::define_syscall(SYS_FCHMOD)] -async fn chmod(pathname: User, mode: u32) -> KResult<()> { +async fn chmod(pathname: User, mode: Mode) -> KResult<()> { sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index 547635d4..c6300ac7 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -4,6 +4,7 @@ use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; use crate::{ kernel::{ constants::{UserMmapFlags, UserMmapProtocol}, @@ -66,11 +67,11 @@ async fn do_mmap2( Mapping::Anonymous } else { // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = - SHM_MANAGER - .lock() - .await - .create_shared_area(len, thread.process.pid, 0x777); + let shared_area = SHM_MANAGER.lock().await.create_shared_area( + len, + thread.process.pid, + Mode::REG.perm(0o777), + ); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -185,7 +186,7 @@ async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let mut shm_manager = SHM_MANAGER.lock().await; let shmid = gen_shm_id(key)?; - let mode = shmflg & 0o777; + let mode = Mode::REG.perm(shmflg); let shmflg = ShmFlags::from_bits_truncate(shmflg); if key == IPC_PRIVATE { diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 1dee462d..7dd573cc 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -16,6 +16,7 @@ use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::inode::Mode; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; use crate::{kernel::user::UserBuffer, prelude::*}; @@ -99,12 +100,10 @@ async fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -async fn umask(mask: u32) -> KResult { +async fn umask(mask: Mode) -> KResult { let mut umask = thread.fs_context.umask.lock(); - let old = *umask; - *umask = mask & 0o777; - Ok(old) + Ok(core::mem::replace(&mut *umask, mask.non_format())) } #[eonix_macros::define_syscall(SYS_GETCWD)] @@ -221,7 +220,7 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult Err(EISDIR), - mode if s_isreg(mode) => inode.read(buffer, offset), - mode if s_isblk(mode) => { + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.read(buffer, offset), + Mode::BLK => { let device = BlockDevice::get(inode.devid()?)?; Ok(device.read_some(offset, buffer)?.allow_partial()) } - mode if s_ischr(mode) => { + Mode::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -427,11 +427,11 @@ impl Dentry { pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load(Ordering::Relaxed) { - mode if s_isdir(mode) => Err(EISDIR), - mode if s_isreg(mode) => inode.write(stream, offset), - mode if s_isblk(mode) => Err(EINVAL), // TODO - mode if s_ischr(mode) => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.write(stream, offset), + Mode::BLK => Err(EINVAL), // TODO + Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 06a4e14d..188a1cfc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,9 +1,9 @@ use super::{Dentry, Inode}; use crate::kernel::constants::ENOENT; use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::Mode; use crate::rcu::RCUPointer; use crate::{ - kernel::vfs::{s_isdir, s_islnk}, prelude::*, rcu::{RCUIterator, RCUList}, }; @@ -57,9 +57,9 @@ pub fn d_try_revalidate(dentry: &Arc) { /// /// Dentry flags will be determined by the inode's mode. pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load(Ordering::Acquire) { - mode if s_isdir(mode) => dentry.save_dir(inode), - mode if s_islnk(mode) => dentry.save_symlink(inode), + match inode.mode.load().format() { + Mode::DIR => dentry.save_dir(inode), + Mode::LNK => dentry.save_symlink(inode), _ => dentry.save_reg(inode), } } diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs deleted file mode 100644 index 6616cbcc..00000000 --- a/src/kernel/vfs/file.rs +++ /dev/null @@ -1,637 +0,0 @@ -use super::{ - dentry::Dentry, - inode::{Mode, WriteOffset}, - s_isblk, s_isreg, -}; -use crate::{ - io::{Buffer, BufferFill, ByteBuffer, Chunks, IntoStream}, - kernel::{ - constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - mem::{paging::Page, AsMemoryBlock as _}, - task::Thread, - terminal::{Terminal, TerminalIORequest}, - user::{UserPointer, UserPointerMut}, - vfs::inode::Inode, - CharDevice, - }, - prelude::*, - sync::CondVar, -}; -use crate::{ - io::{Stream, StreamRead}, - kernel::constants::{ - EBADF, EFAULT, EINTR, EINVAL, ENOTDIR, ENOTTY, EOVERFLOW, EPIPE, ESPIPE, S_IFMT, - }, -}; -use alloc::{collections::vec_deque::VecDeque, sync::Arc}; -use bitflags::bitflags; -use core::{ - ops::{ControlFlow, Deref}, - sync::atomic::{AtomicU32, Ordering}, -}; -use eonix_sync::Mutex; -use posix_types::{open::OpenFlags, signal::Signal, stat::StatX}; - -pub struct InodeFile { - read: bool, - write: bool, - append: bool, - /// Only a few modes those won't possibly change are cached here to speed up file operations. - /// Specifically, `S_IFMT` masked bits. - mode: Mode, - cursor: Mutex, - dentry: Arc, -} - -pub struct PipeInner { - buffer: VecDeque, - read_closed: bool, - write_closed: bool, -} - -pub struct Pipe { - inner: Mutex, - cv_read: CondVar, - cv_write: CondVar, -} - -pub struct PipeReadEnd { - pipe: Arc, -} - -pub struct PipeWriteEnd { - pipe: Arc, -} - -pub struct TerminalFile { - terminal: Arc, -} - -// TODO: We should use `File` as the base type, instead of `Arc` -// If we need shared states, like for `InodeFile`, the files themselves should -// have their own shared semantics. All `File` variants will just keep the -// `Clone` semantics. -// -// e.g. The `CharDevice` itself is stateless. -pub enum FileType { - Inode(InodeFile), - PipeRead(PipeReadEnd), - PipeWrite(PipeWriteEnd), - TTY(TerminalFile), - CharDev(Arc), -} - -pub struct File { - flags: AtomicU32, - file_type: FileType, -} - -impl File { - pub fn get_inode(&self) -> KResult>> { - match &self.file_type { - FileType::Inode(inode_file) => Ok(Some(inode_file.dentry.get_inode()?)), - _ => Ok(None), - } - } -} - -pub enum SeekOption { - Set(usize), - Current(isize), - End(isize), -} - -bitflags! { - pub struct PollEvent: u16 { - const Readable = 0x0001; - const Writable = 0x0002; - } -} - -impl Drop for PipeReadEnd { - fn drop(&mut self) { - self.pipe.close_read(); - } -} - -impl Drop for PipeWriteEnd { - fn drop(&mut self) { - self.pipe.close_write(); - } -} - -fn send_sigpipe_to_current() { - let current = Thread::current(); - current.raise(Signal::SIGPIPE); -} - -impl Pipe { - const PIPE_SIZE: usize = 4096; - - /// # Return - /// `(read_end, write_end)` - pub fn new(flags: OpenFlags) -> (Arc, Arc) { - let pipe = Arc::new(Self { - inner: Mutex::new(PipeInner { - buffer: VecDeque::with_capacity(Self::PIPE_SIZE), - read_closed: false, - write_closed: false, - }), - cv_read: CondVar::new(), - cv_write: CondVar::new(), - }); - - let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); - let mut write_flags = read_flags; - write_flags.insert(OpenFlags::O_WRONLY); - - ( - Arc::new(File { - flags: AtomicU32::new(read_flags.bits()), - file_type: FileType::PipeRead(PipeReadEnd { pipe: pipe.clone() }), - }), - Arc::new(File { - flags: AtomicU32::new(write_flags.bits()), - file_type: FileType::PipeWrite(PipeWriteEnd { pipe }), - }), - ) - } - - async fn close_read(&self) { - let mut inner = self.inner.lock().await; - if inner.read_closed { - return; - } - - inner.read_closed = true; - self.cv_write.notify_all(); - } - - async fn close_write(&self) { - let mut inner = self.inner.lock().await; - if inner.write_closed { - return; - } - - inner.write_closed = true; - self.cv_read.notify_all(); - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported."); - } - - let mut inner = self.inner.lock().await; - while inner.buffer.is_empty() && !inner.write_closed { - inner = self.cv_read.wait(inner).await; - } - - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - let mut retval = PollEvent::empty(); - if inner.write_closed { - retval |= PollEvent::Writable; - } - - if !inner.buffer.is_empty() { - retval |= PollEvent::Readable; - } - - Ok(retval) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - let mut inner = self.inner.lock().await; - - while !inner.write_closed && inner.buffer.is_empty() { - inner = self.cv_read.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - } - - let (data1, data2) = inner.buffer.as_slices(); - let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); - inner.buffer.drain(..nread); - - self.cv_write.notify_all(); - Ok(nread) - } - - async fn write_atomic(&self, data: &[u8]) -> KResult { - let mut inner = self.inner.lock().await; - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - - while inner.buffer.len() + data.len() > Self::PIPE_SIZE { - inner = self.cv_write.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - } - - inner.buffer.extend(data); - - self.cv_read.notify_all(); - return Ok(data.len()); - } - - async fn write(&self, stream: &mut dyn Stream) -> KResult { - let mut buffer = [0; Self::PIPE_SIZE]; - let mut total = 0; - while let Some(data) = stream.poll_data(&mut buffer)? { - let nwrote = self.write_atomic(data).await?; - total += nwrote; - if nwrote != data.len() { - break; - } - } - Ok(total) - } -} - -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent64 { - /// Inode number - d_ino: u64, - /// Implementation defined. We ignore it - d_off: u64, - /// Length of this record - d_reclen: u16, - /// File type. Set to 0 - d_type: u8, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -/// File type is at offset `d_reclen - 1`. Set it to 0 -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent { - /// Inode number - d_ino: u32, - /// Implementation defined. We ignore it - d_off: u32, - /// Length of this record - d_reclen: u16, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -impl InodeFile { - pub fn new(dentry: Arc, flags: OpenFlags) -> Arc { - // SAFETY: `dentry` used to create `InodeFile` is valid. - // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load(Ordering::Relaxed) - & S_IFMT; - - let (read, write, append) = flags.as_rwa(); - - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::Inode(InodeFile { - dentry, - read, - write, - append, - mode: cached_mode, - cursor: Mutex::new(0), - }), - }) - } - - async fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = self.cursor.lock().await; - - let new_cursor = match option { - SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, - SeekOption::Set(n) => n, - SeekOption::End(off) => { - let inode = self.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; - size.checked_add_signed(off).ok_or(EOVERFLOW)? - } - }; - - *cursor = new_cursor; - Ok(new_cursor) - } - - async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - if !self.write { - return Err(EBADF); - } - - let mut cursor = self.cursor.lock().await; - - if self.append { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; - - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) - } - } - - async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - if !self.read { - return Err(EBADF); - } - - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.read(buffer, *cursor)?; - - *cursor += nread; - nread - }; - - Ok(nread) - } - - async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } - - async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } -} - -impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> Arc { - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::TTY(TerminalFile { terminal: tty }), - }) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - self.terminal.read(buffer).await - } - - fn write(&self, stream: &mut dyn Stream) -> KResult { - stream.read_till_end(&mut [0; 128], |data| { - self.terminal.write(data); - Ok(()) - }) - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported.") - } - - self.terminal.poll_in().await.map(|_| PollEvent::Readable) - } - - async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - self.terminal - .ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), - _ => return Err(EINVAL), - }) - .await - } -} - -impl FileType { - pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.read(buffer, offset).await, - FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await, - FileType::TTY(tty) => tty.read(buffer).await, - FileType::CharDev(device) => device.read(buffer), - _ => Err(EBADF), - } - } - - // TODO - // /// Read from the file into the given buffers. - // /// - // /// Reads are atomic, not intermingled with other reads or writes. - // pub fn readv<'r, 'i, I: Iterator>( - // &'r self, - // buffers: I, - // ) -> KResult { - // match self { - // File::Inode(inode) => inode.readv(buffers), - // File::PipeRead(pipe) => pipe.pipe.readv(buffers), - // _ => Err(EBADF), - // } - // } - - pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.write(stream, offset).await, - FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await, - FileType::TTY(tty) => tty.write(stream), - FileType::CharDev(device) => device.write(stream), - _ => Err(EBADF), - } - } - - pub async fn seek(&self, option: SeekOption) -> KResult { - match self { - FileType::Inode(inode) => inode.seek(option).await, - _ => Err(ESPIPE), - } - } - - pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents(buffer).await, - _ => Err(ENOTDIR), - } - } - - pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents64(buffer).await, - _ => Err(ENOTDIR), - } - } - - pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; - - match self { - FileType::Inode(file) if s_isblk(file.mode) || s_isreg(file.mode) => (), - _ => return Err(EINVAL), - } - - let mut nsent = 0; - for (cur, len) in Chunks::new(0, count, buffer.len()) { - if Thread::current().signal_list.has_pending_signal() { - return if cur == 0 { Err(EINTR) } else { Ok(cur) }; - } - let nread = self - .read(&mut ByteBuffer::new(&mut buffer[..len]), None) - .await?; - if nread == 0 { - break; - } - - let nwrote = dest_file - .write(&mut buffer[..nread].into_stream(), None) - .await?; - nsent += nwrote; - - if nwrote != len { - break; - } - } - - Ok(nsent) - } - - pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { - match self { - FileType::TTY(tty) => tty.ioctl(request, arg3).await.map(|_| 0), - _ => Err(ENOTTY), - } - } - - pub async fn poll(&self, event: PollEvent) -> KResult { - match self { - FileType::Inode(_) => Ok(event), - FileType::TTY(tty) => tty.poll(event).await, - FileType::PipeRead(PipeReadEnd { pipe }) - | FileType::PipeWrite(PipeWriteEnd { pipe }) => pipe.poll(event).await, - _ => unimplemented!("Poll event not supported."), - } - } - - pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.dentry.statx(buffer, mask), - _ => Err(EBADF), - } - } - - pub fn as_path(&self) -> Option<&Arc> { - match self { - FileType::Inode(inode_file) => Some(&inode_file.dentry), - _ => None, - } - } -} - -impl File { - pub fn new(flags: OpenFlags, file_type: FileType) -> Arc { - Arc::new(Self { - flags: AtomicU32::new(flags.bits()), - file_type, - }) - } - - pub fn get_flags(&self) -> OpenFlags { - OpenFlags::from_bits_retain(self.flags.load(Ordering::Relaxed)) - } - - pub fn set_flags(&self, flags: OpenFlags) { - let flags = flags.difference( - OpenFlags::O_WRONLY - | OpenFlags::O_RDWR - | OpenFlags::O_CREAT - | OpenFlags::O_TRUNC - | OpenFlags::O_EXCL, - // | OpenFlags::O_NOCTTY, - ); - - self.flags.store(flags.bits(), Ordering::Relaxed); - } -} - -impl Deref for File { - type Target = FileType; - - fn deref(&self) -> &Self::Target { - &self.file_type - } -} diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs new file mode 100644 index 00000000..6386ba92 --- /dev/null +++ b/src/kernel/vfs/file/inode_file.rs @@ -0,0 +1,223 @@ +use super::{File, FileType, SeekOption}; +use crate::{ + io::{Buffer, BufferFill, Stream}, + kernel::{ + constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, + vfs::{ + dentry::Dentry, + inode::{Inode, Mode, WriteOffset}, + }, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use core::{ops::ControlFlow, sync::atomic::Ordering}; +use eonix_sync::Mutex; +use posix_types::{ + getdent::{UserDirent, UserDirent64}, + open::OpenFlags, + stat::StatX, +}; + +pub struct InodeFile { + pub r: bool, + pub w: bool, + pub a: bool, + /// Only a few modes those won't possibly change are cached here to speed up file operations. + /// Specifically, `S_IFMT` masked bits. + pub mode: Mode, + cursor: Mutex, + dentry: Arc, +} + +impl InodeFile { + pub fn new(dentry: Arc, flags: OpenFlags) -> File { + // SAFETY: `dentry` used to create `InodeFile` is valid. + // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. + let cached_mode = dentry + .get_inode() + .expect("`dentry` is invalid") + .mode + .load() + .format(); + + let (r, w, a) = flags.as_rwa(); + + File::new( + flags, + FileType::Inode(InodeFile { + dentry, + r, + w, + a, + mode: cached_mode, + cursor: Mutex::new(0), + }), + ) + } + + pub fn sendfile_check(&self) -> KResult<()> { + match self.mode { + Mode::REG | Mode::BLK => Ok(()), + _ => Err(EBADF), + } + } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + if !self.w { + return Err(EBADF); + } + + let mut cursor = self.cursor.lock().await; + + if self.a { + let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + + Ok(nwrote) + } else { + let nwrote = if let Some(offset) = offset { + self.dentry.write(stream, WriteOffset::Position(offset))? + } else { + let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; + *cursor += nwrote; + nwrote + }; + + Ok(nwrote) + } + } + + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + if !self.r { + return Err(EBADF); + } + + let nread = if let Some(offset) = offset { + let nread = self.dentry.read(buffer, offset)?; + nread + } else { + let mut cursor = self.cursor.lock().await; + + let nread = self.dentry.read(buffer, *cursor)?; + + *cursor += nread; + nread + }; + + Ok(nread) + } +} + +impl File { + pub fn get_inode(&self) -> KResult>> { + if let FileType::Inode(inode_file) = &**self { + Ok(Some(inode_file.dentry.get_inode()?)) + } else { + Ok(None) + } + } + + pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent { + d_ino: ino as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent64 { + d_ino: ino, + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn seek(&self, option: SeekOption) -> KResult { + let FileType::Inode(inode_file) = &**self else { + return Err(ESPIPE); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let new_cursor = match option { + SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, + SeekOption::Set(n) => n, + SeekOption::End(off) => { + let inode = inode_file.dentry.get_inode()?; + let size = inode.size.load(Ordering::Relaxed) as usize; + size.checked_add_signed(off).ok_or(EOVERFLOW)? + } + }; + + *cursor = new_cursor; + Ok(new_cursor) + } + + pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { + if let FileType::Inode(inode) = &**self { + inode.dentry.statx(buffer, mask) + } else { + Err(EBADF) + } + } + + pub fn as_path(&self) -> Option<&Arc> { + if let FileType::Inode(inode_file) = &**self { + Some(&inode_file.dentry) + } else { + None + } + } +} diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs new file mode 100644 index 00000000..bb1c66ec --- /dev/null +++ b/src/kernel/vfs/file/mod.rs @@ -0,0 +1,232 @@ +mod inode_file; +mod pipe; +mod terminal_file; + +use crate::{ + io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, + kernel::{ + constants::{EBADF, EINTR, EINVAL, ENOTTY}, + mem::{AsMemoryBlock, Page}, + task::Thread, + CharDevice, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use bitflags::bitflags; +use core::{ + ops::Deref, + sync::atomic::{AtomicI32, AtomicU32, Ordering}, +}; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; + +pub use inode_file::InodeFile; +pub use pipe::Pipe; +pub use terminal_file::TerminalFile; + +pub enum FileType { + Inode(InodeFile), + PipeRead(PipeReadEnd), + PipeWrite(PipeWriteEnd), + Terminal(TerminalFile), + CharDev(Arc), +} + +struct FileData { + flags: AtomicU32, + open_count: AtomicI32, + file_type: FileType, +} + +#[derive(Clone)] +pub struct File(Arc); + +pub enum SeekOption { + Set(usize), + Current(isize), + End(isize), +} + +bitflags! { + pub struct PollEvent: u16 { + const Readable = 0x0001; + const Writable = 0x0002; + } +} + +impl FileType { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.read(buffer, offset).await, + FileType::PipeRead(pipe) => pipe.read(buffer).await, + FileType::Terminal(tty) => tty.read(buffer).await, + FileType::CharDev(device) => device.read(buffer), + _ => Err(EBADF), + } + } + + // TODO + // /// Read from the file into the given buffers. + // /// + // /// Reads are atomic, not intermingled with other reads or writes. + // pub fn readv<'r, 'i, I: Iterator>( + // &'r self, + // buffers: I, + // ) -> KResult { + // match self { + // File::Inode(inode) => inode.readv(buffers), + // File::PipeRead(pipe) => pipe.pipe.readv(buffers), + // _ => Err(EBADF), + // } + // } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.write(stream, offset).await, + FileType::PipeWrite(pipe) => pipe.write(stream).await, + FileType::Terminal(tty) => tty.write(stream), + FileType::CharDev(device) => device.write(stream), + _ => Err(EBADF), + } + } + + fn sendfile_check(&self) -> KResult<()> { + match self { + FileType::Inode(file) => file.sendfile_check(), + _ => Err(EINVAL), + } + } + + pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { + let buffer_page = Page::alloc(); + // SAFETY: We are the only owner of the page. + let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + + self.sendfile_check()?; + + let mut nsent = 0; + for (cur, len) in Chunks::new(0, count, buffer.len()) { + if Thread::current().signal_list.has_pending_signal() { + return if cur == 0 { Err(EINTR) } else { Ok(cur) }; + } + let nread = self + .read(&mut ByteBuffer::new(&mut buffer[..len]), None) + .await?; + if nread == 0 { + break; + } + + let nwrote = dest_file + .write(&mut buffer[..nread].into_stream(), None) + .await?; + nsent += nwrote; + + if nwrote != len { + break; + } + } + + Ok(nsent) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { + match self { + FileType::Terminal(tty) => tty.ioctl(request, arg3).await.map(|_| 0), + _ => Err(ENOTTY), + } + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + match self { + FileType::Inode(_) => Ok(event), + FileType::Terminal(tty) => tty.poll(event).await, + FileType::PipeRead(pipe) => pipe.poll(event).await, + FileType::PipeWrite(pipe) => pipe.poll(event).await, + _ => unimplemented!("Poll event not supported."), + } + } +} + +impl File { + pub fn new(flags: OpenFlags, file_type: FileType) -> Self { + Self(Arc::new(FileData { + flags: AtomicU32::new(flags.bits()), + open_count: AtomicI32::new(1), + file_type, + })) + } + + pub fn get_flags(&self) -> OpenFlags { + OpenFlags::from_bits_retain(self.0.flags.load(Ordering::Relaxed)) + } + + pub fn set_flags(&self, flags: OpenFlags) { + let flags = flags.difference( + OpenFlags::O_WRONLY + | OpenFlags::O_RDWR + | OpenFlags::O_CREAT + | OpenFlags::O_TRUNC + | OpenFlags::O_EXCL, + // | OpenFlags::O_NOCTTY, + ); + + self.0.flags.store(flags.bits(), Ordering::Relaxed); + } + + /// Duplicate the file descriptor in order to store it in some [FileArray]. + /// + /// The [`File`]s stored in [FileArray]s hold an "open count", which is used + /// to track how many references to the file are currently open. + /// + /// # Panics + /// The [`File`]s stored in [FileArray]s MUST be retrieved by calling this + /// method. Otherwise, when the last reference to the file is dropped, + /// something bad will happen. ;) + /// + /// [FileArray]: crate::kernel::vfs::filearray::FileArray + pub fn dup(&self) -> Self { + self.0.open_count.fetch_add(1, Ordering::Relaxed); + Self(self.0.clone()) + } + + /// Close the file descriptor, decrementing the open count. + pub async fn close(self) { + // Due to rust async drop limits, we have to do this manually... + // + // Users of files can clone and drop it freely, but references held by + // file arrays must be dropped by calling this function (in order to + // await for the async close operation of the inner FileType). + match self.0.open_count.fetch_sub(1, Ordering::Relaxed) { + ..1 => panic!("File open count underflow."), + 1 => {} + _ => return, + } + + match &self.0.file_type { + FileType::PipeRead(pipe) => pipe.close().await, + FileType::PipeWrite(pipe) => pipe.close().await, + _ => {} + } + } +} + +impl Drop for FileData { + fn drop(&mut self) { + // If you're "lucky" enough to see this, it means that you've violated + // the file reference counting rules. Check File::close() for details. ;) + assert_eq!( + self.open_count.load(Ordering::Relaxed), + 0, + "File dropped with open count 0, check the comments for details." + ); + } +} + +impl Deref for File { + type Target = FileType; + + fn deref(&self) -> &Self::Target { + &self.0.file_type + } +} diff --git a/src/kernel/vfs/file/pipe.rs b/src/kernel/vfs/file/pipe.rs new file mode 100644 index 00000000..910f04fa --- /dev/null +++ b/src/kernel/vfs/file/pipe.rs @@ -0,0 +1,211 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream}, + kernel::{ + constants::{EINTR, EPIPE}, + task::Thread, + }, + prelude::KResult, + sync::CondVar, +}; +use alloc::{collections::vec_deque::VecDeque, sync::Arc}; +use eonix_sync::Mutex; +use posix_types::{open::OpenFlags, signal::Signal}; + +struct PipeInner { + buffer: VecDeque, + read_closed: bool, + write_closed: bool, +} + +pub struct Pipe { + inner: Mutex, + cv_read: CondVar, + cv_write: CondVar, +} + +pub struct PipeReadEnd { + pipe: Arc, +} + +pub struct PipeWriteEnd { + pipe: Arc, +} + +fn send_sigpipe_to_current() { + let current = Thread::current(); + current.raise(Signal::SIGPIPE); +} + +impl Pipe { + const PIPE_SIZE: usize = 4096; + + /// # Return + /// `(read_end, write_end)` + pub fn new(flags: OpenFlags) -> (File, File) { + let pipe = Arc::new(Self { + inner: Mutex::new(PipeInner { + buffer: VecDeque::with_capacity(Self::PIPE_SIZE), + read_closed: false, + write_closed: false, + }), + cv_read: CondVar::new(), + cv_write: CondVar::new(), + }); + + let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); + let mut write_flags = read_flags; + write_flags.insert(OpenFlags::O_WRONLY); + + let read_pipe = pipe.clone(); + let write_pipe = pipe; + + ( + File::new( + read_flags, + FileType::PipeRead(PipeReadEnd { pipe: read_pipe }), + ), + File::new( + write_flags, + FileType::PipeWrite(PipeWriteEnd { pipe: write_pipe }), + ), + ) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported."); + } + + let mut inner = self.inner.lock().await; + while inner.buffer.is_empty() && !inner.write_closed { + inner = self.cv_read.wait(inner).await; + } + + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + let mut retval = PollEvent::empty(); + if inner.write_closed { + retval |= PollEvent::Writable; + } + + if !inner.buffer.is_empty() { + retval |= PollEvent::Readable; + } + + Ok(retval) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + let mut inner = self.inner.lock().await; + + while !inner.write_closed && inner.buffer.is_empty() { + inner = self.cv_read.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + } + + let (data1, data2) = inner.buffer.as_slices(); + let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); + inner.buffer.drain(..nread); + + self.cv_write.notify_all(); + Ok(nread) + } + + async fn write_atomic(&self, data: &[u8]) -> KResult { + let mut inner = self.inner.lock().await; + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + + while inner.buffer.len() + data.len() > Self::PIPE_SIZE { + inner = self.cv_write.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + } + + inner.buffer.extend(data); + + self.cv_read.notify_all(); + return Ok(data.len()); + } + + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + let mut buffer = [0; Self::PIPE_SIZE]; + let mut total = 0; + while let Some(data) = stream.poll_data(&mut buffer)? { + let nwrote = self.write_atomic(data).await?; + total += nwrote; + if nwrote != data.len() { + break; + } + } + Ok(total) + } +} + +impl PipeReadEnd { + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.pipe.read(buffer).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.read_closed { + return; + } + + inner.read_closed = true; + self.pipe.cv_write.notify_all(); + } +} + +impl PipeWriteEnd { + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + self.pipe.write(stream).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.write_closed { + return; + } + + inner.write_closed = true; + self.pipe.cv_read.notify_all(); + } +} + +impl Drop for Pipe { + fn drop(&mut self) { + debug_assert!( + self.inner.get_mut().read_closed, + "Pipe read end should be closed before dropping (check File::close())." + ); + + debug_assert!( + self.inner.get_mut().write_closed, + "Pipe write end should be closed before dropping (check File::close())." + ); + } +} diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs new file mode 100644 index 00000000..f318c5b2 --- /dev/null +++ b/src/kernel/vfs/file/terminal_file.rs @@ -0,0 +1,55 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream, StreamRead}, + kernel::{ + constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, + terminal::TerminalIORequest, + user::{UserPointer, UserPointerMut}, + Terminal, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use posix_types::open::OpenFlags; + +pub struct TerminalFile { + terminal: Arc, +} + +impl TerminalFile { + pub fn new(tty: Arc, flags: OpenFlags) -> File { + File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.terminal.read(buffer).await + } + + pub fn write(&self, stream: &mut dyn Stream) -> KResult { + stream.read_till_end(&mut [0; 128], |data| { + self.terminal.write(data); + Ok(()) + }) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported.") + } + + self.terminal.poll_in().await.map(|_| PollEvent::Readable) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { + self.terminal + .ioctl(match request as u32 { + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + _ => return Err(EINVAL), + }) + .await + } +} diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 0fb9205d..b457a425 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,7 +1,7 @@ use super::{ - file::{File, InodeFile, TerminalFile}, + file::{File, InodeFile, Pipe}, inode::Mode, - s_ischr, Spin, + Spin, TerminalFile, }; use crate::kernel::{ constants::{ @@ -10,19 +10,13 @@ use crate::kernel::{ syscall::{FromSyscallArg, SyscallRetVal}, }; use crate::{ - kernel::{ - console::get_console, - constants::ENXIO, - vfs::{dentry::Dentry, file::Pipe, s_isdir, s_isreg}, - CharDevice, - }, + kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, prelude::*, }; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, +use alloc::sync::Arc; +use intrusive_collections::{ + intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use core::sync::atomic::Ordering; use itertools::{ FoldWhile::{Continue, Done}, Itertools, @@ -34,14 +28,33 @@ pub struct FD(u32); #[derive(Clone)] struct OpenFile { + fd: FD, flags: FDFlags, - file: Arc, + file: File, + + link: RBTreeAtomicLink, +} + +intrusive_adapter!( + OpenFileAdapter = Box: OpenFile { link: RBTreeAtomicLink } +); + +impl<'a> KeyAdapter<'a> for OpenFileAdapter { + type Key = FD; + + fn get_key(&self, value: &'a OpenFile) -> Self::Key { + value.fd + } } #[derive(Clone)] +struct FDAllocator { + min_avail: FD, +} + struct FileArrayInner { - files: BTreeMap, - fd_min_avail: FD, + files: RBTree, + fd_alloc: FDAllocator, } pub struct FileArray { @@ -49,109 +62,202 @@ pub struct FileArray { } impl OpenFile { + fn new(fd: FD, flags: FDFlags, file: File) -> Box { + Box::new(Self { + fd, + flags, + file, + link: RBTreeAtomicLink::new(), + }) + } + pub fn close_on_exec(&self) -> bool { self.flags.contains(FDFlags::FD_CLOEXEC) } } +impl FDAllocator { + const fn new() -> Self { + Self { min_avail: FD(0) } + } + + fn reinit(&mut self) { + self.min_avail = FD(0); + } + + fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + files + .range(Bound::Included(&from), Bound::Unbounded) + .fold_while(from, |current, OpenFile { fd, .. }| { + if current == *fd { + Continue(FD(current.0 + 1)) + } else { + Done(current) + } + }) + .into_inner() + } + + /// Allocate a new file descriptor starting from `from`. + /// + /// Returned file descriptor should be used immediately. + /// + fn allocate_fd(&mut self, from: FD, files: &RBTree) -> FD { + let from = FD::max(from, self.min_avail); + + if from == self.min_avail { + let next_min_avail = self.find_available(FD(from.0 + 1), files); + let allocated = self.min_avail; + self.min_avail = next_min_avail; + allocated + } else { + self.find_available(from, files) + } + } + + fn release_fd(&mut self, fd: FD) { + if fd < self.min_avail { + self.min_avail = fd; + } + } + + fn next_fd(&mut self, files: &RBTree) -> FD { + self.allocate_fd(self.min_avail, files) + } +} + impl FileArray { pub fn new() -> Arc { Arc::new(FileArray { inner: Spin::new(FileArrayInner { - files: BTreeMap::new(), - fd_min_avail: FD(0), + files: RBTree::new(OpenFileAdapter::new()), + fd_alloc: FDAllocator::new(), }), }) } - #[allow(dead_code)] pub fn new_shared(other: &Arc) -> Arc { other.clone() } pub fn new_cloned(other: &Self) -> Arc { Arc::new(Self { - inner: Spin::new(other.inner.lock().clone()), + inner: Spin::new({ + let (new_files, new_fd_alloc) = { + let mut new_files = RBTree::new(OpenFileAdapter::new()); + let other_inner = other.inner.lock(); + + for file in other_inner.files.iter() { + let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + new_files.insert(new_file); + } + (new_files, other_inner.fd_alloc.clone()) + }; + + FileArrayInner { + files: new_files, + fd_alloc: new_fd_alloc, + } + }), }) } /// Acquires the file array lock. - pub fn get(&self, fd: FD) -> Option> { + pub fn get(&self, fd: FD) -> Option { self.inner.lock().get(fd) } - pub fn close_all(&self) { - let _old_files = { + pub async fn close_all(&self) { + let old_files = { let mut inner = self.inner.lock(); - inner.fd_min_avail = FD(0); - core::mem::take(&mut inner.files) + inner.fd_alloc.reinit(); + inner.files.take() }; + + for file in old_files.into_iter() { + file.file.close().await; + } } - pub fn close(&self, fd: FD) -> KResult<()> { - let _file = { + pub async fn close(&self, fd: FD) -> KResult<()> { + let file = { let mut inner = self.inner.lock(); - let file = inner.files.remove(&fd).ok_or(EBADF)?; - inner.release_fd(fd); - file + let file = inner.files.find_mut(&fd).remove().ok_or(EBADF)?; + inner.fd_alloc.release_fd(file.fd); + file.file }; + + file.close().await; Ok(()) } - pub fn on_exec(&self) -> () { - let mut inner = self.inner.lock(); + pub async fn on_exec(&self) { + let files_to_close = { + let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - // TODO: This is not efficient. We should avoid cloning. - let fds_to_close = inner - .files - .iter() - .filter(|(_, ofile)| ofile.close_on_exec()) - .map(|(&fd, _)| fd) - .collect::>(); + files.pick(|ofile| { + if ofile.close_on_exec() { + fd_alloc.release_fd(ofile.fd); + true + } else { + false + } + }) + }; - inner.files.retain(|_, ofile| !ofile.close_on_exec()); - fds_to_close.into_iter().for_each(|fd| inner.release_fd(fd)); + for open_file in files_to_close.into_iter() { + open_file.file.close().await; + } } -} -impl FileArray { pub fn dup(&self, old_fd: FD) -> KResult { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; - let new_file_data = old_file.file.clone(); + let new_file_data = old_file.file.dup(); let new_file_flags = old_file.flags; - let new_fd = inner.next_fd(); + let new_fd = fd_alloc.next_fd(files); inner.do_insert(new_fd, new_file_flags, new_file_data); Ok(new_fd) } - pub fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - let fdflags = flags.as_fd_flags(); - + /// Duplicates the file to a new file descriptor, returning the old file + /// description to be dropped. + fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); - let new_file_data = old_file.file.clone(); + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; + let new_file_data = old_file.file.dup(); - match inner.files.entry(new_fd) { - Entry::Vacant(_) => {} - Entry::Occupied(entry) => { - let new_file = entry.into_mut(); - let mut file_swap = new_file_data; + match files.entry(&new_fd) { + Entry::Vacant(_) => { + assert_eq!(new_fd, fd_alloc.allocate_fd(new_fd, files)); + inner.do_insert(new_fd, fd_flags, new_file_data); - new_file.flags = fdflags; - core::mem::swap(&mut file_swap, &mut new_file.file); + Ok(None) + } + Entry::Occupied(mut entry) => { + let mut file = entry.remove().unwrap(); + file.flags = fd_flags; + let old_file = core::mem::replace(&mut file.file, new_file_data); - drop(inner); - return Ok(new_fd); + entry.insert(file); + + Ok(Some(old_file)) } } + } - assert_eq!(new_fd, inner.allocate_fd(new_fd)); - inner.do_insert(new_fd, fdflags, new_file_data); + pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { + if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + old_file.close().await; + } Ok(new_fd) } @@ -160,9 +266,10 @@ impl FileArray { /// `(read_fd, write_fd)` pub fn pipe(&self, flags: OpenFlags) -> KResult<(FD, FD)> { let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - let read_fd = inner.next_fd(); - let write_fd = inner.next_fd(); + let read_fd = fd_alloc.next_fd(files); + let write_fd = fd_alloc.next_fd(files); let fdflag = flags.as_fd_flags(); @@ -179,23 +286,20 @@ impl FileArray { let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let filemode = inode.mode.load(Ordering::Relaxed); + let file_format = inode.mode.load().format(); - if flags.directory() { - if !s_isdir(filemode) { - return Err(ENOTDIR); - } - } else { - if s_isdir(filemode) && flags.write() { - return Err(EISDIR); - } + match (flags.directory(), file_format, flags.write()) { + (true, Mode::DIR, _) => {} + (true, _, _) => return Err(ENOTDIR), + (false, Mode::DIR, true) => return Err(EISDIR), + _ => {} } - if flags.truncate() && flags.write() && s_isreg(filemode) { + if flags.truncate() && flags.write() && file_format.is_reg() { inode.truncate(0)?; } - let file = if s_ischr(filemode) { + let file = if file_format.is_chr() { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { @@ -203,7 +307,8 @@ impl FileArray { }; let mut inner = self.inner.lock(); - let fd = inner.next_fd(); + let (files, fd_alloc) = inner.split_borrow(); + let fd = fd_alloc.next_fd(files); inner.do_insert(fd, fdflag, file); Ok(fd) @@ -211,43 +316,59 @@ impl FileArray { pub fn fcntl(&self, fd: FD, cmd: u32, arg: usize) -> KResult { let mut inner = self.inner.lock(); - let ofile = inner.files.get_mut(&fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let mut cursor = files.find_mut(&fd); - match cmd { + let ret = match cmd { F_DUPFD | F_DUPFD_CLOEXEC => { + let ofile = cursor.get().ok_or(EBADF)?; + let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); - let new_file_data = ofile.file.clone(); - let new_fd = inner.allocate_fd(FD(arg as u32)); + let new_file_data = ofile.file.dup(); + let new_fd = fd_alloc.allocate_fd(FD(arg as u32), files); inner.do_insert(new_fd, flags, new_file_data); - Ok(new_fd.0 as usize) + new_fd.0 as usize } - F_GETFD => Ok(ofile.flags.bits() as usize), + F_GETFD => cursor.get().ok_or(EBADF)?.flags.bits() as usize, F_SETFD => { + let mut ofile = cursor.remove().ok_or(EBADF)?; ofile.flags = FDFlags::from_bits_truncate(arg as u32); - Ok(0) + cursor.insert(ofile); + 0 } - F_GETFL => Ok(ofile.file.get_flags().bits() as usize), + F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, F_SETFL => { - ofile + cursor + .get() + .ok_or(EBADF)? .file .set_flags(OpenFlags::from_bits_retain(arg as u32)); - Ok(0) + 0 } _ => unimplemented!("fcntl: cmd={}", cmd), - } + }; + + Ok(ret) } /// Only used for init process. pub fn open_console(&self) { let mut inner = self.inner.lock(); - let (stdin, stdout, stderr) = (inner.next_fd(), inner.next_fd(), inner.next_fd()); + let (files, fd_alloc) = inner.split_borrow(); + + let (stdin, stdout, stderr) = ( + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + ); let console_terminal = get_console().expect("No console terminal"); inner.do_insert( @@ -269,53 +390,25 @@ impl FileArray { } impl FileArrayInner { - fn get(&mut self, fd: FD) -> Option> { - self.files.get(&fd).map(|f| f.file.clone()) - } - - fn find_available(&mut self, from: FD) -> FD { - self.files - .range(&from..) - .fold_while(from, |current, (&key, _)| { - if current == key { - Continue(FD(current.0 + 1)) - } else { - Done(current) - } - }) - .into_inner() - } - - /// Allocate a new file descriptor starting from `from`. - /// - /// Returned file descriptor should be used immediately. - /// - fn allocate_fd(&mut self, from: FD) -> FD { - let from = FD::max(from, self.fd_min_avail); - - if from == self.fd_min_avail { - let next_min_avail = self.find_available(FD(from.0 + 1)); - let allocated = self.fd_min_avail; - self.fd_min_avail = next_min_avail; - allocated - } else { - self.find_available(from) - } + fn get(&mut self, fd: FD) -> Option { + self.files.get_fd(fd).map(|open| open.file.clone()) } - fn release_fd(&mut self, fd: FD) { - if fd < self.fd_min_avail { - self.fd_min_avail = fd; + /// Insert a file description to the file array. + fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { + match self.files.entry(&fd) { + Entry::Occupied(_) => { + panic!("File descriptor {fd:?} already exists in the file array."); + } + Entry::Vacant(insert_cursor) => { + insert_cursor.insert(OpenFile::new(fd, flags, file)); + } } } - fn next_fd(&mut self) -> FD { - self.allocate_fd(self.fd_min_avail) - } - - /// Insert a file description to the file array. - fn do_insert(&mut self, fd: FD, flags: FDFlags, file: Arc) { - assert!(self.files.insert(fd, OpenFile { flags, file }).is_none()); + fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + let Self { files, fd_alloc } = self; + (files, fd_alloc) } } @@ -343,3 +436,39 @@ impl SyscallRetVal for FD { Some(self.0 as usize) } } + +trait FilesExt { + fn get_fd(&self, fd: FD) -> Option<&OpenFile>; + + fn pick

(&mut self, pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool; +} + +impl FilesExt for RBTree { + fn get_fd(&self, fd: FD) -> Option<&OpenFile> { + self.find(&fd).get() + } + + fn pick

(&mut self, mut pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool, + { + let mut picked = RBTree::new(OpenFileAdapter::new()); + + // TODO: might be better if we start picking from somewhere else + // or using a different approach. + let mut cursor = self.front_mut(); + while let Some(open_file) = cursor.get() { + if !pred(open_file) { + cursor.move_next(); + continue; + } + + picked.insert(cursor.remove().unwrap()); + cursor.move_next(); + } + + picked + } +} diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 3eb6c8dc..e47df25d 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -1,10 +1,12 @@ -use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId}; +use super::{dentry::Dentry, vfs::Vfs, DevId}; use crate::io::Stream; use crate::kernel::constants::{ EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT, + STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, + S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, }; use crate::kernel::mem::PageCache; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{io::Buffer, prelude::*}; @@ -32,8 +34,11 @@ pub type AtomicUid = AtomicU32; #[allow(dead_code)] pub type Gid = u32; pub type AtomicGid = AtomicU32; -pub type Mode = u32; -pub type AtomicMode = AtomicU32; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +pub struct AtomicMode(AtomicU32); #[derive(Debug)] pub struct InodeData { @@ -97,7 +102,7 @@ pub struct RenameData<'a, 'b> { #[allow(unused_variables)] pub trait Inode: Send + Sync + InodeInner + Any { fn is_dir(&self) -> bool { - self.mode.load(Ordering::SeqCst) & S_IFDIR != 0 + self.mode.load().is_dir() } fn lookup(&self, dentry: &Arc) -> KResult>> { @@ -181,7 +186,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { let vfs = self.vfs.upgrade().expect("Vfs is dropped"); let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(Ordering::Relaxed); + let mode = self.mode.load(); if mask & STATX_NLINK != 0 { stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; @@ -213,13 +218,13 @@ pub trait Inode: Send + Sync + InodeInner + Any { stat.stx_mode = 0; if mask & STATX_MODE != 0 { - stat.stx_mode |= (mode & !S_IFMT) as u16; + stat.stx_mode |= mode.non_format_bits() as u16; stat.stx_mask |= STATX_MODE; } if mask & STATX_TYPE != 0 { - stat.stx_mode |= (mode & S_IFMT) as u16; - if s_isblk(mode) || s_ischr(mode) { + stat.stx_mode |= mode.format_bits() as u16; + if mode.is_blk() || mode.is_chr() { let devid = self.devid(); stat.stx_rdev_major = (devid? >> 8) & 0xff; stat.stx_rdev_minor = devid? & 0xff; @@ -354,3 +359,136 @@ macro_rules! define_struct_inode { } pub(crate) use define_struct_inode; + +impl Mode { + pub const REG: Self = Self(S_IFREG); + pub const DIR: Self = Self(S_IFDIR); + pub const LNK: Self = Self(S_IFLNK); + pub const BLK: Self = Self(S_IFBLK); + pub const CHR: Self = Self(S_IFCHR); + + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn is_reg(&self) -> bool { + (self.0 & S_IFMT) == S_IFREG + } + + pub const fn is_dir(&self) -> bool { + (self.0 & S_IFMT) == S_IFDIR + } + + pub const fn is_lnk(&self) -> bool { + (self.0 & S_IFMT) == S_IFLNK + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn format(&self) -> Self { + Self::new(self.format_bits()) + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub const fn non_format(&self) -> Self { + Self::new(self.non_format_bits()) + } + + pub const fn perm(self, perm: u32) -> Self { + Self::new((self.0 & !0o777) | (perm & 0o777)) + } + + pub const fn set_perm(&mut self, perm: u32) { + *self = self.perm(perm); + } + + pub const fn mask_perm(&mut self, perm_mask: u32) { + let perm_mask = perm_mask & 0o777; + let self_perm = self.non_format_bits() & 0o777; + + *self = self.perm(self_perm & perm_mask); + } +} + +impl AtomicMode { + pub const fn new(bits: u32) -> Self { + Self(AtomicU32::new(bits)) + } + + pub const fn from(mode: Mode) -> Self { + Self::new(mode.0) + } + + pub fn load(&self) -> Mode { + Mode(self.0.load(Ordering::Relaxed)) + } + + pub fn store(&self, mode: Mode) { + self.0.store(mode.0, Ordering::Relaxed); + } +} + +impl core::fmt::Debug for AtomicMode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("AtomicMode") + .field("bits", &self.load().0) + .finish() + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let format_name = match self.format() { + Mode::REG => "REG", + Mode::DIR => "DIR", + Mode::LNK => "LNK", + Mode::BLK => "BLK", + Mode::CHR => "CHR", + _ => "UNK", + }; + + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format_name}, {perm:#o})", + perm = self.non_format_bits() + )?, + rem => write!( + f, + "Mode({format_name}, {perm:#o}, rem={rem:#x})", + perm = self.non_format_bits() & 0o777 + )?, + } + + Ok(()) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index efd68aa7..f62cb9b9 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,4 +1,3 @@ -use crate::kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}; use crate::prelude::*; use alloc::sync::Arc; use dentry::Dentry; @@ -6,33 +5,15 @@ use eonix_sync::LazyLock; use inode::Mode; pub mod dentry; -pub mod file; +mod file; pub mod filearray; pub mod inode; pub mod mount; pub mod vfs; -pub type DevId = u32; - -pub fn s_isreg(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFREG -} +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; -pub fn s_isdir(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFDIR -} - -pub fn s_ischr(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFCHR -} - -pub fn s_isblk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFBLK -} - -pub fn s_islnk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFLNK -} +pub type DevId = u32; pub struct FsContext { pub fsroot: Arc, @@ -44,7 +25,7 @@ static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(0o022), + umask: Spin::new(Mode::new(0o022)), }) }); diff --git a/src/lib.rs b/src/lib.rs index fe4796de..e75f8653 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,7 @@ use kernel::{ task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, + inode::Mode, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, FsContext, }, @@ -214,7 +215,7 @@ async fn init_process(early_kstack: PRange) { let fs_context = FsContext::global(); let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); - mnt_dir.mkdir(0o755).unwrap(); + mnt_dir.mkdir(Mode::new(0o755)).unwrap(); do_mount( &mnt_dir, From 34a625296871b821968eeba64914cc134d9f6e5e Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 17 Aug 2025 00:22:24 +0800 Subject: [PATCH 27/54] feat: unwinding and printing stack backtrace Use unwinding crate to unwind the stack and print stack trace. Sightly adjust the linker script and move eh_frame into rodata section. Due to limited kernel image size, there might be some problems on x86_64 platforms. Further fixes needed but won't be fixed for now. Signed-off-by: greatbridf (cherry picked from commit 6bb54d9eae13b76768f011c44222b25b785b83e0) Signed-off-by: greatbridf --- .cargo/config.toml | 1 + Cargo.lock | 16 ++++++++++ Cargo.toml | 6 ++-- crates/eonix_hal/src/arch/loongarch64/link.x | 4 +-- .../eonix_hal/src/arch/loongarch64/memory.x | 2 -- crates/eonix_hal/src/arch/riscv64/link.x | 5 ++-- crates/eonix_hal/src/arch/riscv64/memory.x | 2 -- crates/eonix_hal/src/link.x.in | 23 +++++++++------ src/lib.rs | 8 +++++ src/panic.rs | 29 +++++++++++++++++++ 10 files changed, 76 insertions(+), 20 deletions(-) create mode 100644 src/panic.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index 76b69dd5..9c7ba798 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,6 +1,7 @@ [build] target = "riscv64gc-unknown-none-elf" target-dir = 'build' +rustflags = ["-C", "force-unwind-tables"] [unstable] build-std-features = ['compiler-builtins-mem'] diff --git a/Cargo.lock b/Cargo.lock index f4ed3bd8..1eb868dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,6 +153,7 @@ dependencies = [ "posix_types", "slab_allocator", "stalloc", + "unwinding", "virtio-drivers", "xmas-elf", ] @@ -265,6 +266,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "gimli" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93563d740bc9ef04104f9ed6f86f1e3275c2cdafb95664e26584b9ca807a8ffe" + [[package]] name = "intrusive-collections" version = "0.9.7" @@ -453,6 +460,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unwinding" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60612c845ef41699f39dc8c5391f252942c0a88b7d15da672eff0d14101bbd6d" +dependencies = [ + "gimli", +] + [[package]] name = "virtio-drivers" version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 4bc8bbe8..ab042dc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,9 @@ stalloc = { version = "0.6.1", default-features = false, features = [ [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } +[target.'cfg(target_arch = "riscv64")'.dependencies] +unwinding = { version = "0.2.8", default-features = false, features = ["unwinder", "fde-static", "personality", "panic"] } + [features] default = [] trace_pci = [] @@ -50,9 +53,6 @@ smp = [] [profile.release] debug = true -[profile.dev] -panic = "abort" - [profile.dev.package.eonix_preempt] opt-level = "s" diff --git a/crates/eonix_hal/src/arch/loongarch64/link.x b/crates/eonix_hal/src/arch/loongarch64/link.x index 11ef5192..f673ad5a 100644 --- a/crates/eonix_hal/src/arch/loongarch64/link.x +++ b/crates/eonix_hal/src/arch/loongarch64/link.x @@ -91,6 +91,6 @@ SECTIONS { } > VDSO AT> RAM VDSO_PADDR = LOADADDR(.vdso); - __kernel_end = ABSOLUTE(LOADADDR(.vdso) + SIZEOF(.vdso)); + __kernel_end = __edata; } -INSERT BEFORE .bss; +INSERT BEFORE .data.after; diff --git a/crates/eonix_hal/src/arch/loongarch64/memory.x b/crates/eonix_hal/src/arch/loongarch64/memory.x index f210b9b5..2a70f81b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/memory.x +++ b/crates/eonix_hal/src/arch/loongarch64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index 3465a0ae..e348e1be 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -43,7 +43,6 @@ SECTIONS { KIMAGE_PAGES = (__edata - _stext + 0x1000 - 1) / 0x1000; KIMAGE_32K_COUNT = (KIMAGE_PAGES + 8 - 1) / 8; - __kernel_end = .; BSS_LENGTH = ABSOLUTE(__ebss - __sbss); } @@ -89,4 +88,6 @@ SECTIONS { VDSO_PADDR = LOADADDR(.vdso); } -INSERT AFTER .data; +INSERT BEFORE .data.after; + +__kernel_end = __edata; diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 9c5ca2ee..0dc7c4ff 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index b4ea6e0a..81c269c2 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,6 +18,15 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); + + . = ALIGN(8); + + PROVIDE(__eh_frame = .); + PROVIDE(__executable_start = __stext); + + KEEP(*(.eh_frame_hdr)); + KEEP(*(.eh_frame)); + KEEP(*(.eh_frame.*)); } > REGION_RODATA AT> LINK_REGION_RODATA @@ -32,6 +41,11 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA + .data.after : + { + __data_after = .; + } > REGION_DATA AT> LINK_REGION_DATA + __edata = .; .bss (NOLOAD) : ALIGN(16) @@ -45,16 +59,7 @@ SECTIONS { __ebss = .; - .eh_frame : ALIGN(16) - { - __seh_frame = .; - - KEEP(*(.eh_frame .eh_frame*)); - - } > REGION_EHFRAME AT> LINK_REGION_EHFRAME - . = ALIGN(0x1000); - __eeh_frame = .; } SECTIONS { diff --git a/src/lib.rs b/src/lib.rs index e75f8653..80d24c28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,9 @@ extern crate alloc; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +extern crate unwinding; + mod driver; mod fs; mod hash; @@ -16,6 +19,8 @@ mod io; mod kernel; mod kernel_init; mod net; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +mod panic; mod path; mod prelude; mod rcu; @@ -53,6 +58,9 @@ use prelude::*; #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { + #[cfg(target_arch = "riscv64")] + panic::stack_trace(); + shutdown(); } diff --git a/src/panic.rs b/src/panic.rs new file mode 100644 index 00000000..3c9c5f34 --- /dev/null +++ b/src/panic.rs @@ -0,0 +1,29 @@ +use core::ffi::c_void; + +use eonix_log::println_fatal; +use unwinding::abi::{ + UnwindContext, UnwindReasonCode, _Unwind_Backtrace, _Unwind_GetIP, _Unwind_GetRegionStart, +}; + +pub fn stack_trace() { + struct CallbackData { + counter: usize, + } + + extern "C" fn callback(unwind_ctx: &UnwindContext<'_>, arg: *mut c_void) -> UnwindReasonCode { + let data = unsafe { &mut *(arg as *mut CallbackData) }; + data.counter += 1; + + println_fatal!( + "{:4}: {:#018x} - at function {:#018x}", + data.counter, + _Unwind_GetIP(unwind_ctx), + _Unwind_GetRegionStart(unwind_ctx), + ); + + UnwindReasonCode::NO_REASON + } + + let mut data = CallbackData { counter: 0 }; + _Unwind_Backtrace(callback, &raw mut data as *mut c_void); +} From 3fb4966118201eeb064743b7fca94224397fdb9f Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 17 Aug 2025 00:43:13 +0800 Subject: [PATCH 28/54] task: fix infinite sleep in stackful tasks The stackful tasks might be woken up before actually being put into sleep by returning a Poll::Pending. Thus, infinite sleep will occur since we are no longer on both the wait list and the ready queue. The solution is to remember that we are woken up in stackful wakers and check before putting us to sleep by wait_for_wakeups(). Also, implement Drop for RCUPointer by using call_rcu to drop the underlying data. We must mark T: Send + Sync + 'static in order to send the arc to the runtime... Signed-off-by: greatbridf --- src/kernel/task.rs | 42 +++++++++++++++++++++++++++++++++++++++--- src/rcu.rs | 27 ++++++++++++++++++--------- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 13e2ec93..2ef58069 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -10,7 +10,6 @@ mod signal; mod thread; pub use clone::{do_clone, CloneArgs, CloneFlags}; -use eonix_runtime::task::Task; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -84,10 +83,14 @@ where interrupt::{default_fault_handler, default_irq_handler}, timer::{should_reschedule, timer_interrupt}, }; + use alloc::sync::Arc; + use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; + use core::sync::atomic::AtomicBool; + use core::sync::atomic::Ordering; use core::task::Context; use core::task::Poll; use core::task::Waker; @@ -97,6 +100,7 @@ where use eonix_hal::trap::TrapContext; use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; + use eonix_runtime::task::Task; use thread::wait_for_wakeups; let stack = KernelStack::new(); @@ -105,18 +109,46 @@ where where F: Future, { - let waker = Waker::from(Task::current().clone()); + struct WakeSaver { + task: Arc, + woken: AtomicBool, + } + + impl Wake for WakeSaver { + fn wake_by_ref(self: &Arc) { + // SAFETY: If we read true below in the loop, we must have been + // woken up and acquired our waker's work by the runtime. + self.woken.store(true, Ordering::Relaxed); + self.task.wake_by_ref(); + } + + fn wake(self: Arc) { + self.wake_by_ref(); + } + } + + let wake_saver = Arc::new(WakeSaver { + task: Task::current().clone(), + woken: AtomicBool::new(false), + }); + let waker = Waker::from(wake_saver.clone()); let mut cx = Context::from_waker(&waker); let output = loop { match future.as_mut().poll(&mut cx) { Poll::Ready(output) => break output, Poll::Pending => { + assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + if Task::current().is_ready() { continue; } - assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + // SAFETY: The runtime must have ensured that we can see the + // work done by the waker. + if wake_saver.woken.swap(false, Ordering::Relaxed) { + continue; + } unsafe { #[cfg(target_arch = "riscv64")] @@ -129,6 +161,10 @@ where } }; + drop(cx); + drop(waker); + drop(wake_saver); + unsafe { output_ptr.write(Some(output)); } diff --git a/src/rcu.rs b/src/rcu.rs index 32ff7657..c1645d33 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -194,9 +194,15 @@ impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { } } -pub struct RCUPointer(AtomicPtr); - -impl core::fmt::Debug for RCUPointer { +pub struct RCUPointer(AtomicPtr) +where + T: Send + Sync + 'static; + +impl core::fmt::Debug for RCUPointer +where + T: core::fmt::Debug, + T: Send + Sync + 'static, +{ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match NonNull::new(self.0.load(Ordering::Acquire)) { Some(pointer) => { @@ -209,7 +215,10 @@ impl core::fmt::Debug for RCUPointer { } } -impl RCUPointer { +impl RCUPointer +where + T: Send + Sync + 'static, +{ pub const fn empty() -> Self { Self(AtomicPtr::new(core::ptr::null_mut())) } @@ -266,16 +275,16 @@ impl RCUPointer { } } -impl Drop for RCUPointer { +impl Drop for RCUPointer +where + T: Send + Sync + 'static, +{ fn drop(&mut self) { // SAFETY: We call `rcu_sync()` to ensure that all readers are done. if let Some(arc) = unsafe { self.swap(None) } { // We only wait if there are other references. if Arc::strong_count(&arc) == 1 { - call_rcu(move || { - let _ = arc; - todo!(); - }); + call_rcu(move || drop(arc)); } } } From 8c656b5898113f9a5bf0dcc678e4119d0f2f9a6c Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 25 Aug 2025 22:38:24 +0800 Subject: [PATCH 29/54] configure: check and use ARCH given in env The current implementation ignores the given argument and uses the default arch. Change the wrong behavior... Signed-off-by: greatbridf --- configure | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 87033ea6..1b8efeee 100755 --- a/configure +++ b/configure @@ -11,7 +11,7 @@ event() { printf "$1... " } -ARCH=${ARCH:-x86_64} +ARCH=${ARCH:-"$DEFAULT_ARCH"} # Define toolchain and QEMU/GDB settings for per architecture event "target architecture" @@ -40,7 +40,7 @@ esac if [ "$QEMU" = "" ]; then event "checking default qemu" - QEMU="qemu-system-$DEFAULT_ARCH" + QEMU="qemu-system-$ARCH" if $QEMU --version > /dev/null 2>&1; then QEMU="qemu-system-\$(ARCH)" break @@ -65,7 +65,7 @@ check_gdb_arch() { local item="$1" if $item --init-eval-command 'set arch' \ --init-eval-command 'q' 2>&1 \ - | grep "$DEFAULT_ARCH" >/dev/null 2>&1; then + | grep "$ARCH" >/dev/null 2>&1; then return 0 else return 1 @@ -74,7 +74,7 @@ check_gdb_arch() { if [ "$GDB" = "" ]; then event "checking default gdb" - if check_gdb_arch "$DEFAULT_ARCH-elf-gdb"; then + if check_gdb_arch "$ARCH-elf-gdb"; then GDB="\$(ARCH)-elf-gdb" break fi @@ -126,7 +126,7 @@ else fi cp Makefile.src "$OUT" -sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" "$OUT" > /dev/null 2>&1 +sed -i '' -e "s|##DEFAULT_ARCH##|$ARCH|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##GDB##|$GDB|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##QEMU##|$QEMU|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##FDISK##|$FDISK|" "$OUT" > /dev/null 2>&1 From cefcd6f783093e071dc7d2db5235b4744ee6abe4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 2 Sep 2025 22:45:55 +0800 Subject: [PATCH 30/54] vfs: rework the vfs subsystem with async Inode and superblock rework: Remove old Inode trait as it used to undertake too much responsibility. The new method use three new traits: InodeOps is used to acquire generic inode attributes. InodeFileOps and InodeDirOps handle file and directory requests respectively. All the three have async fn trait methods and don't need to be ?Sized. Then, we implement Inode, InodeFile and InodeDir for the implementors of the three "Ops" traits, erasing their actual type and provide generic dyn interface by wrapping the futures in boxes. We should provide an io worker? or some io context with an allocator for futures to reduce the overhead of io requests, or come up with some better ideas? For inode usage, we introduce InodeRef and InodeUse. InodeRef is a simple wrapper for Weak and InodeUse for Arc. This helps us use them better as we can't define impls for Arc's as they are foreign types. We also provide some more helper methods for them. After the change, we don't impose ANY structural restriction except for the spinlock wrapped InodeInfo. The InodeInfo struct design might need rethinking but the current implementation seems to be fine aside from unnecessary locks when we try to access some of its fields but this shouldn't be a VERY big problem and very urgent... Similar changes are also made to superblock traits and types. But for the superblock objects, we use a SuperBlockComplex struct to store common fields such as whether the superblock is read only, their device id and so on. Also the structs have a superblock rwsem inside. But we haven't decided how to use that (such as whether we should acquire the lock and pass it to the inode methods) and even whether they should exist and be there. This will need further thinking so we put this off for now... Filesystem rework: Rework tmpfs, fatfs and procfs with the new technology mentioned above, leaving the old ext4 unchanged. The current implementation of ext4 uses some "random" library from the "camp". Its code hasn't been fully reviewed for time reasons but seems to be rather "problematic"... We might rewrite the whole module later and the page cache subsystem requires fully reworking as well. So we put this work off as well. Block device and other parts rework: Wraps PageCacheBackend, MountCreator and BlockRequestQueue with async_trait to provide dyn compatible async functions. Dentry walking functions are also put to the heap since they are recursive functions... This has similar problems to the inode traits, ugly solution. Further optimization is required. Signed-off-by: greatbridf --- .vscode/tasks.json | 2 +- Cargo.lock | 98 +++++ Cargo.toml | 2 + src/driver/ahci/mod.rs | 63 +-- src/driver/ahci/port.rs | 48 ++- src/driver/ahci/slot.rs | 19 +- src/driver/e1000e.rs | 8 +- src/driver/serial.rs | 6 +- src/driver/virtio/riscv64.rs | 7 +- src/driver/virtio/virtio_blk.rs | 5 +- src/fs/ext4.rs | 35 +- src/fs/fat32.rs | 567 ++++++++++++++++----------- src/fs/fat32/dir.rs | 214 +++++------ src/fs/fat32/file.rs | 46 +-- src/fs/mod.rs | 3 +- src/fs/procfs.rs | 437 +++++++++------------ src/fs/shm.rs | 146 ------- src/fs/tmpfs.rs | 613 ------------------------------ src/fs/tmpfs/dir.rs | 415 ++++++++++++++++++++ src/fs/tmpfs/file.rs | 298 +++++++++++++++ src/fs/tmpfs/mod.rs | 73 ++++ src/kernel/block.rs | 38 +- src/kernel/block/mbr.rs | 2 +- src/kernel/chardev.rs | 15 +- src/kernel/mem.rs | 2 +- src/kernel/mem/mm_list/mapping.rs | 15 +- src/kernel/mem/page_cache.rs | 55 ++- src/kernel/pcie/driver.rs | 9 +- src/kernel/syscall/file_rw.rs | 94 +++-- src/kernel/syscall/mm.rs | 122 +----- src/kernel/syscall/procops.rs | 20 +- src/kernel/task/loader/elf.rs | 34 +- src/kernel/task/loader/mod.rs | 13 +- src/kernel/timer.rs | 2 + src/kernel/vfs/dentry.rs | 295 +++++++------- src/kernel/vfs/dentry/dcache.rs | 29 +- src/kernel/vfs/file/inode_file.rs | 139 ++++--- src/kernel/vfs/filearray.rs | 23 +- src/kernel/vfs/inode.rs | 494 ------------------------ src/kernel/vfs/inode/ino.rs | 31 ++ src/kernel/vfs/inode/inode.rs | 389 +++++++++++++++++++ src/kernel/vfs/inode/mod.rs | 10 + src/kernel/vfs/inode/ops.rs | 18 + src/kernel/vfs/inode/statx.rs | 97 +++++ src/kernel/vfs/mod.rs | 22 +- src/kernel/vfs/mount.rs | 32 +- src/kernel/vfs/superblock.rs | 127 +++++++ src/kernel/vfs/types/device_id.rs | 36 ++ src/kernel/vfs/types/mod.rs | 5 + src/kernel/vfs/types/mode.rs | 169 ++++++++ src/kernel/vfs/vfs.rs | 10 - src/lib.rs | 33 +- src/prelude.rs | 30 +- 53 files changed, 2986 insertions(+), 2529 deletions(-) delete mode 100644 src/fs/shm.rs delete mode 100644 src/fs/tmpfs.rs create mode 100644 src/fs/tmpfs/dir.rs create mode 100644 src/fs/tmpfs/file.rs create mode 100644 src/fs/tmpfs/mod.rs delete mode 100644 src/kernel/vfs/inode.rs create mode 100644 src/kernel/vfs/inode/ino.rs create mode 100644 src/kernel/vfs/inode/inode.rs create mode 100644 src/kernel/vfs/inode/mod.rs create mode 100644 src/kernel/vfs/inode/ops.rs create mode 100644 src/kernel/vfs/inode/statx.rs create mode 100644 src/kernel/vfs/superblock.rs create mode 100644 src/kernel/vfs/types/device_id.rs create mode 100644 src/kernel/vfs/types/mod.rs create mode 100644 src/kernel/vfs/types/mode.rs delete mode 100644 src/kernel/vfs/vfs.rs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index e7a54791..a85ea0cf 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -6,7 +6,7 @@ { "label": "debug run riscv64", "type": "shell", - "command": "make srun ARCH=riscv64", + "command": "make srun ARCH=riscv64 IMG=/Volumes/oscomp/sdcard-rv.img", "isBackground": true, "problemMatcher": [ { diff --git a/Cargo.lock b/Cargo.lock index 59242bbc..32868677 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,17 @@ dependencies = [ "log", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" @@ -144,6 +155,7 @@ dependencies = [ "acpi", "align_ext", "another_ext4", + "async-trait", "atomic_unique_refcell", "bitflags", "buddy_allocator", @@ -155,6 +167,7 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", + "futures", "intrusive-collections 0.9.8", "intrusive_list", "itertools", @@ -265,6 +278,79 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", +] + [[package]] name = "gimli" version = "0.32.0" @@ -332,6 +418,18 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pointers" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index e70d8c65..bc7e7b0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch stalloc = { version = "0.6.1", default-features = false, features = [ "allocator-api", ] } +async-trait = "0.1.89" +futures = { version = "0.3.31", features = ["alloc", "async-await"], default-features = false } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index c3b1cfa0..ab405829 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -2,15 +2,16 @@ use crate::{ fs::procfs, io::Buffer as _, kernel::{ - block::{make_device, BlockDevice}, + block::BlockDevice, constants::{EINVAL, EIO}, interrupt::register_irq_handler, pcie::{self, Header, PCIDevice, PCIDriver, PciError}, - task::block_on, + vfs::types::DeviceId, }, prelude::*, }; use alloc::{format, sync::Arc}; +use async_trait::async_trait; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; @@ -108,7 +109,30 @@ impl Device<'_> { } impl Device<'static> { - fn probe_ports(&self) -> KResult<()> { + async fn probe_port(&self, port: Arc>) -> KResult<()> { + port.init().await?; + + { + let port = port.clone(); + let name = format!("ahci-p{}-stats", port.nport); + procfs::populate_root(name.into_bytes().into(), move |buffer| { + port.print_stats(&mut buffer.get_writer()) + }) + .await; + } + + let port = BlockDevice::register_disk( + DeviceId::new(8, port.nport as u16 * 16), + 2147483647, // TODO: get size from device + port, + )?; + + port.partprobe().await?; + + Ok(()) + } + + async fn probe_ports(&self) -> KResult<()> { for nport in self.control.implemented_ports() { let port = Arc::new(AdapterPort::new(self.control_base, nport)); if !port.status_ok() { @@ -116,27 +140,7 @@ impl Device<'static> { } self.ports.lock_irq()[nport as usize] = Some(port.clone()); - if let Err(e) = (|| -> KResult<()> { - port.init()?; - - { - let port = port.clone(); - let name = format!("ahci-p{}-stats", port.nport); - procfs::populate_root(name.into_bytes().into(), move |buffer| { - port.print_stats(&mut buffer.get_writer()) - })?; - } - - let port = BlockDevice::register_disk( - make_device(8, nport * 16), - 2147483647, // TODO: get size from device - port, - )?; - - block_on(port.partprobe())?; - - Ok(()) - })() { + if let Err(e) = self.probe_port(port).await { self.ports.lock_irq()[nport as usize] = None; println_warn!("probe port {nport} failed with {e}"); } @@ -154,6 +158,7 @@ impl AHCIDriver { } } +#[async_trait] impl PCIDriver for AHCIDriver { fn vendor_id(&self) -> u16 { VENDOR_INTEL @@ -163,7 +168,7 @@ impl PCIDriver for AHCIDriver { DEVICE_AHCI } - fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = pcidev.header else { Err(EINVAL)? }; @@ -200,7 +205,7 @@ impl PCIDriver for AHCIDriver { let device_irq = device.clone(); register_irq_handler(irqno as i32, move || device_irq.handle_interrupt())?; - device.probe_ports()?; + device.probe_ports().await?; self.devices.lock().push(device); @@ -208,6 +213,8 @@ impl PCIDriver for AHCIDriver { } } -pub fn register_ahci_driver() { - pcie::register_driver(AHCIDriver::new()).expect("Register ahci driver failed"); +pub async fn register_ahci_driver() { + pcie::register_driver(AHCIDriver::new()) + .await + .expect("Register ahci driver failed"); } diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index f558f6e1..77286ec5 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -9,9 +9,9 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::paging::Page; use crate::kernel::mem::AsMemoryBlock as _; -use crate::kernel::task::block_on; use crate::prelude::*; use alloc::collections::vec_deque::VecDeque; +use async_trait::async_trait; use core::pin::pin; use eonix_mm::address::{Addr as _, PAddr}; use eonix_sync::{SpinIrq as _, WaitList}; @@ -145,18 +145,25 @@ impl AdapterPort<'_> { self.sata_status().read_once() & 0xf == 0x3 } - fn get_free_slot(&self) -> u32 { + async fn get_free_slot(&self) -> u32 { loop { - let mut free_list = self.free_list.lock_irq(); - let free_slot = free_list.free.pop_front(); - if let Some(slot) = free_slot { - return slot; - } let mut wait = pin!(self.free_list_wait.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - drop(free_list); - block_on(wait); + { + let mut free_list = self.free_list.lock_irq(); + + if let Some(slot) = free_list.free.pop_front() { + return slot; + } + + wait.as_mut().add_to_wait_list(); + + if let Some(slot) = free_list.free.pop_front() { + return slot; + } + } + + wait.await; } } @@ -204,11 +211,11 @@ impl AdapterPort<'_> { Ok(()) } - fn send_command(&self, cmd: &impl Command) -> KResult<()> { + async fn send_command(&self, cmd: &impl Command) -> KResult<()> { let mut cmdtable = CommandTable::new(); cmdtable.setup(cmd); - let slot_index = self.get_free_slot(); + let slot_index = self.get_free_slot().await; let slot = &self.slots[slot_index as usize]; slot.prepare_command(&cmdtable, cmd.write()); @@ -222,7 +229,7 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = block_on(slot.wait_finish()) { + if let Err(_) = slot.wait_finish().await { self.stats.inc_cmd_error(); return Err(EIO); }; @@ -231,16 +238,16 @@ impl AdapterPort<'_> { Ok(()) } - fn identify(&self) -> KResult<()> { + async fn identify(&self) -> KResult<()> { let cmd = IdentifyCommand::new(); // TODO: check returned data - self.send_command(&cmd)?; + self.send_command(&cmd).await?; Ok(()) } - pub fn init(&self) -> KResult<()> { + pub async fn init(&self) -> KResult<()> { self.stop_command()?; self.command_list_base() @@ -251,7 +258,7 @@ impl AdapterPort<'_> { self.start_command()?; - match self.identify() { + match self.identify().await { Err(err) => { self.stop_command()?; Err(err) @@ -269,12 +276,13 @@ impl AdapterPort<'_> { } } +#[async_trait] impl BlockRequestQueue for AdapterPort<'_> { fn max_request_pages(&self) -> u64 { 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Read { sector, @@ -287,7 +295,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = ReadLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } BlockDeviceRequest::Write { sector, @@ -300,7 +308,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = WriteLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } } } diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 2198c457..60a66de3 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -71,19 +71,20 @@ impl<'a> CommandSlot<'a> { pub async fn wait_finish(&self) -> KResult<()> { let mut inner = loop { - let inner = self.inner.lock_irq(); - if inner.state != SlotState::Working { - break inner; - } - let mut wait = pin!(self.wait_list.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - if inner.state != SlotState::Working { - break inner; + { + let inner = self.inner.lock_irq(); + if inner.state != SlotState::Working { + break inner; + } + wait.as_mut().add_to_wait_list(); + + if inner.state != SlotState::Working { + break inner; + } } - drop(inner); wait.await; }; diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index ba31b8b1..f362f477 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -8,6 +8,7 @@ use crate::prelude::*; use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; +use async_trait::async_trait; use core::ptr::NonNull; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; @@ -437,6 +438,7 @@ struct Driver { dev_id: u16, } +#[async_trait] impl PCIDriver for Driver { fn vendor_id(&self) -> u16 { 0x8086 @@ -446,7 +448,7 @@ impl PCIDriver for Driver { self.dev_id } - fn handle_device(&self, device: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, device: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = device.header else { Err(EINVAL)? }; @@ -473,10 +475,10 @@ impl PCIDriver for Driver { } } -pub fn register_e1000e_driver() { +pub async fn register_e1000e_driver() { let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a]; for id in dev_ids.into_iter() { - pcie::register_driver(Driver { dev_id: id }).unwrap(); + pcie::register_driver(Driver { dev_id: id }).await.unwrap(); } } diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d69965f4..b634c232 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -2,8 +2,8 @@ mod io; use crate::{ kernel::{ - block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - CharDevice, CharDeviceType, Terminal, TerminalDevice, + console::set_console, constants::EIO, interrupt::register_irq_handler, + vfs::types::DeviceId, CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; @@ -167,7 +167,7 @@ impl Serial { eonix_log::set_console(terminal.clone()); CharDevice::register( - make_device(4, 64 + port.id), + DeviceId::new(4, 64 + port.id as u16), port.name.clone(), CharDeviceType::Terminal(terminal), )?; diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 9bdbf6ce..b33e16ac 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,8 +1,5 @@ use super::virtio_blk::HAL; -use crate::kernel::{ - block::{make_device, BlockDevice}, - task::block_on, -}; +use crate::kernel::{block::BlockDevice, task::block_on, vfs::types::DeviceId}; use alloc::{sync::Arc, vec::Vec}; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; @@ -43,7 +40,7 @@ pub fn init() { .expect("Failed to initialize VirtIO Block device"); let block_device = BlockDevice::register_disk( - make_device(8, 16 * disk_id), + DeviceId::new(8, 16 * disk_id), 2147483647, Arc::new(Spin::new(block_device)), ) diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 37e4fe77..86b500b6 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -7,6 +7,8 @@ use crate::{ }, prelude::KResult, }; +use alloc::boxed::Box; +use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::{ address::{Addr, PAddr, PhysAccess}, @@ -74,6 +76,7 @@ unsafe impl Hal for HAL { } } +#[async_trait] impl BlockRequestQueue for Spin> where T: Transport + Send, @@ -82,7 +85,7 @@ where 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Write { sector, diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 76ca4a34..121339d3 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,5 +1,3 @@ -use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; - use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::kernel::task::block_on; use crate::kernel::timer::Ticks; @@ -31,6 +29,8 @@ use alloc::{ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; +use async_trait::async_trait; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use eonix_sync::RwLock; pub struct Ext4BlockDevice { @@ -194,7 +194,7 @@ impl Ext4Fs { root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), - vfs: Arc::downgrade(&ext4fs) as _, + sb: Arc::downgrade(&ext4fs) as _, }, ) }; @@ -290,7 +290,7 @@ impl Inode for FileInode { } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; @@ -334,7 +334,7 @@ impl Inode for FileInode { fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { //let _lock = Task::block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; 4096]; @@ -363,7 +363,7 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -428,7 +428,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = dentry.get_name(); @@ -477,7 +477,7 @@ impl Inode for DirInode { ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), rwsem: RwLock::new(()), - vfs: self.vfs.clone(), + sb: self.sb.clone(), }, ); @@ -489,7 +489,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let entries = ext4fs @@ -519,7 +519,7 @@ impl Inode for DirInode { fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -534,7 +534,7 @@ impl Inode for DirInode { ) .unwrap(); - let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let file = FileInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(file.as_ref(), now); self.link_file(); @@ -547,7 +547,7 @@ impl Inode for DirInode { fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -562,7 +562,7 @@ impl Inode for DirInode { ) .unwrap(); - let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let new_dir = DirInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(new_dir.as_ref(), now); self.link_dir(); @@ -575,7 +575,7 @@ impl Inode for DirInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _dir_lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let file = at.get_inode()?; @@ -602,7 +602,7 @@ impl Inode for DirInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -638,7 +638,7 @@ impl Inode for DirInode { // TODO: may need another lock let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_file = old_dentry.get_inode()?; @@ -698,6 +698,7 @@ impl From for Mode { struct Ext4MountCreator; +#[async_trait] impl MountCreator for Ext4MountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(1080..) { @@ -707,7 +708,7 @@ impl MountCreator for Ext4MountCreator { } } - fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { + async fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { let source = source.as_bytes(); let path = Path::new(source)?; diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 12eabdd5..1104337d 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,41 +1,72 @@ mod dir; mod file; -use crate::io::Stream; -use crate::kernel::constants::EIO; +use core::future::Future; +use core::ops::Deref; + +use alloc::sync::{Arc, Weak}; +use async_trait::async_trait; +use dir::{as_raw_dirents, ParseDirent}; +use eonix_sync::RwLock; +use itertools::Itertools; + +use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::{Mode, WriteOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, kernel::{ - block::{make_device, BlockDevice, BlockDeviceRequest}, + block::{BlockDevice, BlockDeviceRequest}, mem::{ paging::Page, - {CachePage, PageCache, PageCacheBackend}, + {CachePage, PageCache, PageCacheBackendOps}, }, vfs::{ dentry::Dentry, - inode::{define_struct_inode, Ino, Inode, InodeData}, + inode::{Ino, Inode}, mount::{register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, }, }, - prelude::*, KResult, }; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, - vec::Vec, -}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use dir::Dirs as _; -use eonix_sync::RwLock; -use file::ClusterRead; -type ClusterNo = u32; +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct Cluster(u32); + +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct RawCluster(pub u32); + +impl RawCluster { + const START: u32 = 2; + const EOC: u32 = 0x0FFF_FFF8; + const INVL: u32 = 0xF000_0000; + + fn parse(self) -> Option { + match self.0 { + ..Self::START | Self::EOC..Self::INVL => None, + Self::INVL.. => { + unreachable!("invalid cluster number: RawCluster({:#08x})", self.0) + } + no => Some(Cluster(no)), + } + } +} + +impl Cluster { + pub fn as_ino(self) -> Ino { + Ino::new(self.0 as _) + } + + fn normalized(self) -> Self { + Self(self.0 - 2) + } +} const SECTOR_SIZE: usize = 512; @@ -59,7 +90,7 @@ struct Bootsector { sectors_per_fat: u32, flags: u16, fat_version: u16, - root_cluster: ClusterNo, + root_cluster: RawCluster, fsinfo_sector: u16, backup_bootsector: u16, _reserved: [u8; 12], @@ -73,222 +104,203 @@ struct Bootsector { mbr_signature: u16, } -impl_any!(FatFs); /// # Lock order /// 2. FatTable /// 3. Inodes /// struct FatFs { sectors_per_cluster: u8, - rootdir_cluster: ClusterNo, - data_start: u64, - volume_label: [u8; 11], + data_start_sector: u64, + _rootdir_cluster: Cluster, + _volume_label: Box, device: Arc, - fat: RwLock>, - weak: Weak, - icache: BTreeMap, + fat: RwLock>, } -impl Vfs for FatFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - self.device.devid() - } - - fn is_read_only(&self) -> bool { - true - } -} +impl SuperBlock for FatFs {} impl FatFs { - fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> { - let cluster = cluster - 2; + async fn read_cluster(&self, mut cluster: Cluster, buf: &Page) -> KResult<()> { + cluster = cluster.normalized(); let rq = BlockDeviceRequest::Read { - sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64, + sector: self.data_start_sector as u64 + + cluster.0 as u64 * self.sectors_per_cluster as u64, count: self.sectors_per_cluster as u64, buffer: core::slice::from_ref(buf), }; - self.device.commit_request(rq)?; + self.device.commit_request(rq).await?; Ok(()) } - - fn get_or_alloc_inode(&self, ino: Ino, is_directory: bool, size: u32) -> Arc { - self.icache - .get(&ino) - .cloned() - .map(FatInode::unwrap) - .unwrap_or_else(|| { - if is_directory { - DirInode::new(ino, self.weak.clone(), size) - } else { - FileInode::new(ino, self.weak.clone(), size) - } - }) - } } impl FatFs { - pub fn create(device: DevId) -> KResult<(Arc, Arc)> { + pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { let device = BlockDevice::get(device)?; - let mut fatfs_arc = Arc::new_cyclic(|weak: &Weak| Self { - device, - sectors_per_cluster: 0, - rootdir_cluster: 0, - data_start: 0, - fat: RwLock::new(Vec::new()), - weak: weak.clone(), - icache: BTreeMap::new(), - volume_label: [0; 11], - }); - let fatfs = unsafe { Arc::get_mut_unchecked(&mut fatfs_arc) }; - - let mut info: UninitBuffer = UninitBuffer::new(); - fatfs.device.read_some(0, &mut info)?.ok_or(EIO)?; + let mut info = UninitBuffer::::new(); + device.read_some(0, &mut info).await?.ok_or(EIO)?; let info = info.assume_filled_ref()?; - fatfs.sectors_per_cluster = info.sectors_per_cluster; - fatfs.rootdir_cluster = info.root_cluster; - fatfs.data_start = - info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; - - let fat = fatfs.fat.get_mut(); - - fat.resize( - 512 * info.sectors_per_fat as usize / core::mem::size_of::(), - 0, + let mut fat = Box::new_uninit_slice( + 512 * info.sectors_per_fat as usize / core::mem::size_of::(), ); - let mut buffer = ByteBuffer::from(fat.as_mut_slice()); - - fatfs - .device - .read_some(info.reserved_sectors as usize * 512, &mut buffer)? + device + .read_some( + info.reserved_sectors as usize * 512, + &mut ByteBuffer::from(fat.as_mut()), + ) + .await? .ok_or(EIO)?; - info.volume_label - .iter() - .take_while(|&&c| c != ' ' as u8) - .take(11) - .enumerate() - .for_each(|(idx, c)| fatfs.volume_label[idx] = *c); + let sectors_per_cluster = info.sectors_per_cluster; + let rootdir_cluster = info.root_cluster.parse().ok_or(EINVAL)?; - let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count(); - let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512; + let data_start_sector = + info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; + + let volume_label = { + let end = info + .volume_label + .iter() + .position(|&c| c == b' ') + .unwrap_or(info.volume_label.len()); + + String::from_utf8_lossy(&info.volume_label[..end]) + .into_owned() + .into_boxed_str() + }; - let root_inode = DirInode::new( - (info.root_cluster & !0xF000_0000) as Ino, - fatfs.weak.clone(), - root_dir_size, + let fat = unsafe { fat.assume_init() }; + + let rootdir_cluster_count = ClusterIterator::new(fat.as_ref(), rootdir_cluster).count(); + let rootdir_size = rootdir_cluster_count as u32 * sectors_per_cluster as u32 * 512; + + let fatfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: device.devid(), + read_only: true, + }, + Self { + device, + sectors_per_cluster, + _rootdir_cluster: rootdir_cluster, + data_start_sector, + fat: RwLock::new(fat), + _volume_label: volume_label, + }, ); - Ok((fatfs_arc, root_inode)) + let sbref = SbRef::from(&fatfs); + Ok((fatfs, DirInode::new(rootdir_cluster, sbref, rootdir_size))) } } -struct ClusterIterator<'fat> { - fat: &'fat [ClusterNo], - cur: ClusterNo, +struct ClusterIterator<'a> { + fat: &'a [RawCluster], + cur: Option, } -impl<'fat> ClusterIterator<'fat> { - fn new(fat: &'fat [ClusterNo], start: ClusterNo) -> Self { - Self { fat, cur: start } +impl<'a> ClusterIterator<'a> { + fn new(fat: &'a [RawCluster], start: Cluster) -> Self { + Self { + fat, + cur: Some(start), + } } } impl<'fat> Iterator for ClusterIterator<'fat> { - type Item = ClusterNo; + type Item = Cluster; fn next(&mut self) -> Option { - const EOC: ClusterNo = 0x0FFF_FFF8; - const INVL: ClusterNo = 0xF000_0000; - - match self.cur { - ..2 | EOC..INVL => None, - INVL.. => unreachable!("Invalid cluster number: {}", self.cur), - next => { - self.cur = self.fat[next as usize] & !INVL; - Some(next) - } - } + self.cur.inspect(|&Cluster(no)| { + self.cur = self.fat[no as usize].parse(); + }) } } -#[allow(dead_code)] -#[derive(Clone)] -enum FatInode { - File(Arc), - Dir(Arc), +struct FileInode { + cluster: Cluster, + info: Spin, + sb: SbRef, + page_cache: PageCache, } -impl FatInode { - fn unwrap(self) -> Arc { - match self { - FatInode::File(inode) => inode, - FatInode::Dir(inode) => inode, - } +impl FileInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new_cyclic(|weak: &Weak| Self { + cluster, + info: Spin::new(InodeInfo { + size: size as u64, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + sb, + page_cache: PageCache::new(weak.clone()), + }) } } -define_struct_inode! { - struct FileInode { - page_cache: PageCache, +impl InodeOps for FileInode { + type SuperBlock = FatFs; + + fn ino(&self) -> Ino { + self.cluster.as_ino() } -} -impl FileInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| Self { - idata: InodeData::new(ino, weak), - page_cache: PageCache::new(weak_self.clone()), - }); + fn format(&self) -> Format { + Format::REG + } - // Safety: We are initializing the inode - inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(Mode::REG.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); + fn info(&self) -> &Spin { + &self.info + } - inode + fn super_block(&self) -> &SbRef { + &self.sb } -} -impl Inode for FileInode { fn page_cache(&self) -> Option<&PageCache> { Some(&self.page_cache) } +} - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - block_on(self.page_cache.read(buffer, offset)) +impl InodeDirOps for FileInode {} +impl InodeFileOps for FileInode { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.page_cache.read(buffer, offset).await } - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let sb = self.sb.get()?; + let fs = &sb.backend; + let fat = sb.backend.fat.read().await; - if self.size.load(Ordering::Relaxed) as usize == 0 { + if offset >= self.info.lock().size as usize { return Ok(0); } - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; assert!(cluster_size <= 0x1000, "Cluster size is too large"); let skip_clusters = offset / cluster_size; let inner_offset = offset % cluster_size; - let cluster_iter = - ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters); + let cluster_iter = ClusterIterator::new(fat.as_ref(), self.cluster).skip(skip_clusters); let buffer_page = Page::alloc(); for cluster in cluster_iter { - vfs.read_cluster(cluster, &buffer_page)?; + fs.read_cluster(cluster, &buffer_page).await?; let data = unsafe { // SAFETY: We are the only one holding this page. @@ -296,7 +308,7 @@ impl Inode for FileInode { }; let end = offset + data.len(); - let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize); + let real_end = end.min(self.info.lock().size as usize); let real_size = real_end - offset; if buffer.fill(&data[..real_size])?.should_stop() { @@ -306,108 +318,203 @@ impl Inode for FileInode { Ok(buffer.wrote()) } +} - fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { - todo!() +impl PageCacheBackendOps for FileInode { + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { + self.read_direct(page, offset).await } - fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { + async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { todo!() } + + fn size(&self) -> usize { + self.info.lock().size as usize + } } -impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_direct(page, offset) +struct DirInode { + cluster: Cluster, + info: Spin, + sb: SbRef, + + // TODO: Use the new PageCache... + dir_pages: RwLock>, +} + +impl DirInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new(Self { + cluster, + info: Spin::new(InodeInfo { + size: size as u64, + nlink: 2, // '.' and '..' + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + sb, + dir_pages: RwLock::new(Vec::new()), + }) } - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - todo!() + async fn read_dir_pages(&self) -> KResult<()> { + let mut dir_pages = self.dir_pages.write().await; + if !dir_pages.is_empty() { + return Ok(()); + } + + let sb = self.sb.get()?; + let fs = &sb.backend; + let fat = fs.fat.read().await; + + let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); + + for cluster in clusters { + let page = Page::alloc(); + fs.read_cluster(cluster, &page).await?; + + dir_pages.push(page); + } + + Ok(()) } - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize + async fn get_dir_pages(&self) -> KResult> + use<'_>> { + { + let dir_pages = self.dir_pages.read().await; + if !dir_pages.is_empty() { + return Ok(dir_pages); + } + } + + self.read_dir_pages().await?; + + if let Some(dir_pages) = self.dir_pages.try_read() { + return Ok(dir_pages); + } + + Ok(self.dir_pages.read().await) } } -define_struct_inode! { - struct DirInode; -} +impl InodeOps for DirInode { + type SuperBlock = FatFs; -impl DirInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new(Self { - idata: InodeData::new(ino, weak), - }); + fn ino(&self) -> Ino { + self.cluster.as_ino() + } + + fn format(&self) -> Format { + Format::DIR + } - // Safety: We are initializing the inode - inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(Mode::DIR.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); + fn info(&self) -> &Spin { + &self.info + } - inode + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None } } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, 0) - .dirs(); - - let entry = entries.find(|entry| { - entry - .as_ref() - .map(|entry| &entry.filename == &***dentry.name()) - .unwrap_or(true) +impl InodeFileOps for DirInode {} +impl InodeDirOps for DirInode { + async fn lookup(&self, dentry: &Arc) -> KResult>> { + let sb = self.sb.get()?; + let dir_pages = self.get_dir_pages().await?; + + let dir_data = dir_pages.iter().map(|page| { + unsafe { + // SAFETY: No one could be writing to it. + page.as_memblk().as_bytes() + } }); - match entry { - None => Ok(None), - Some(Err(err)) => Err(err), - Some(Ok(entry)) => Ok(Some(vfs.get_or_alloc_inode( - entry.cluster as Ino, - entry.is_directory, - entry.size, - ))), + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok(); + + let mut dirents = futures::stream::iter(raw_dirents); + + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + if *entry.filename != ****dentry.name() { + continue; + } + + let sbref = SbRef::from(&sb); + + if entry.is_directory { + return Ok(Some(DirInode::new(entry.cluster, sbref, entry.size) as _)); + } else { + return Ok(Some(FileInode::new(entry.cluster, sbref, entry.size) as _)); + } } + + Ok(None) } - fn do_readdir( - &self, + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, offset) - .dirs(); - - let mut nread = 0usize; - for entry in cluster_iter { - let entry = entry?; - - vfs.get_or_alloc_inode(entry.cluster as Ino, entry.is_directory, entry.size); - if callback(&entry.filename, entry.cluster as Ino)?.is_break() { - break; + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + async move { + let sb = self.sb.get()?; + let fs = &sb.backend; + let dir_pages = self.get_dir_pages().await?; + + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; + + let cluster_offset = offset / cluster_size; + let inner_offset = offset % cluster_size; + let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); + + let dir_data = dir_pages.iter().skip(cluster_offset).map(|page| { + unsafe { + // SAFETY: No one could be writing to it. + page.as_memblk().as_bytes() + } + }); + + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok() + .skip(inner_raw_dirent_offset); + + let mut dirents = futures::stream::iter(raw_dirents); + + let mut nread = 0; + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + match callback(&entry.filename, entry.cluster.as_ino()) { + Err(err) => return Ok(Err(err)), + Ok(true) => nread += entry.entry_offset as usize, + Ok(false) => break, + } } - nread += entry.entry_offset as usize; + Ok(Ok(nread)) } - - Ok(nread) } } struct FatMountCreator; +#[async_trait] impl MountCreator for FatMountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(82..) { @@ -417,8 +524,8 @@ impl MountCreator for FatMountCreator { } } - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let (fatfs, root_inode) = FatFs::create(make_device(8, 1))?; + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fatfs, root_inode) = FatFs::create(DeviceId::new(8, 1)).await?; Mount::new(mp, fatfs, root_inode) } diff --git a/src/fs/fat32/dir.rs b/src/fs/fat32/dir.rs index c4ac6c0d..8a5b6f40 100644 --- a/src/fs/fat32/dir.rs +++ b/src/fs/fat32/dir.rs @@ -1,11 +1,16 @@ -use super::file::ClusterReadIterator; +use core::pin::Pin; + +use alloc::{boxed::Box, string::String}; +use futures::{Stream, StreamExt}; +use posix_types::result::PosixError; + use crate::kernel::constants::EINVAL; use crate::prelude::*; -use alloc::{string::String, sync::Arc}; -use itertools::Itertools; + +use super::{Cluster, RawCluster}; #[repr(C, packed)] -pub(super) struct RawDirEntry { +pub struct RawDirEntry { name: [u8; 8], extension: [u8; 3], attr: u8, @@ -21,9 +26,9 @@ pub(super) struct RawDirEntry { size: u32, } -pub(super) struct FatDirectoryEntry { - pub filename: Arc<[u8]>, - pub cluster: u32, +pub struct FatDirectoryEntry { + pub filename: Box<[u8]>, + pub cluster: Cluster, pub size: u32, pub entry_offset: u32, pub is_directory: bool, @@ -79,7 +84,7 @@ impl RawDirEntry { self.attr & Self::ATTR_DIRECTORY != 0 } - fn long_filename(&self) -> Option<[u16; 13]> { + fn as_raw_long_filename(&self) -> Option<[u16; 13]> { if !self.is_long_filename() { return None; } @@ -103,137 +108,114 @@ impl RawDirEntry { } } -impl<'data, I> RawDirs<'data> for I where I: ClusterReadIterator<'data> {} -trait RawDirs<'data>: ClusterReadIterator<'data> { - fn raw_dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - const ENTRY_SIZE: usize = size_of::(); - - self.map(|result| { - let data = result?; - if data.len() % ENTRY_SIZE != 0 { - return Err(EINVAL); - } - - Ok(unsafe { - core::slice::from_raw_parts( - data.as_ptr() as *const RawDirEntry, - data.len() / ENTRY_SIZE, - ) - }) - }) - .flatten_ok() +pub fn as_raw_dirents(data: &[u8]) -> KResult<&[RawDirEntry]> { + let len = data.len(); + if len % size_of::() != 0 { + return Err(EINVAL); } -} - -pub(super) trait Dirs<'data>: ClusterReadIterator<'data> { - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized; -} -impl<'data, I> Dirs<'data> for I -where - I: ClusterReadIterator<'data>, -{ - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - self.raw_dirs().real_dirs() + unsafe { + Ok(core::slice::from_raw_parts( + data.as_ptr() as *const RawDirEntry, + len / size_of::(), + )) } } -trait RealDirs<'data>: Iterator> + 'data { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized; +pub trait ParseDirent { + async fn next_dirent(&mut self) -> Option>; } -impl<'data, I> RealDirs<'data> for I +impl<'a, T> ParseDirent for T where - I: Iterator> + 'data, + T: Stream>, { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized, - { - DirsIter { iter: self } - } -} + async fn next_dirent(&mut self) -> Option> { + let mut me = unsafe { Pin::new_unchecked(self) }; + + // The long filename entries are stored in reverse order. + // So we reverse all filename segments and then reverse the whole string at the end. + let mut filename_rev = String::new(); + + let mut is_lfn = false; + let mut nr_entry_scanned = 0; + let mut cur_entry; + + loop { + match me.as_mut().next().await { + Some(Err(err)) => return Some(Err(err)), + Some(Ok(ent)) => { + cur_entry = ent; + nr_entry_scanned += 1; + } + None => { + if is_lfn { + // Unterminated long filename entries are invalid. + return Some(Err(PosixError::EINVAL.into())); + } else { + return None; + } + } + }; -pub(super) struct DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - iter: I, -} + if !cur_entry.is_invalid() { + break; + } -impl<'data, I> Iterator for DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - type Item = KResult; - - fn next(&mut self) -> Option { - let mut filename = String::new(); - let mut entry_offset = 0; - let entry = loop { - let entry = match self.iter.next()? { - Ok(entry) => entry, - Err(err) => return Some(Err(err)), - }; - entry_offset += 1; - - let long_filename = entry.long_filename(); - if entry.is_invalid() { - if let Some(long_filename) = long_filename { - let long_filename = long_filename - .iter() - .position(|&ch| ch == 0) - .map(|pos| &long_filename[..pos]) - .unwrap_or(&long_filename); - - filename.extend( - long_filename - .into_iter() - .map(|&ch| char::from_u32(ch as u32).unwrap_or('?')) - .rev(), - ); - } + let Some(raw_long_filename) = cur_entry.as_raw_long_filename() else { continue; - } - break entry; + }; + + // We are processing a long filename entry. + is_lfn = true; + + let real_len = raw_long_filename + .iter() + .position(|&ch| ch == 0) + .unwrap_or(raw_long_filename.len()); + + let name_codes_rev = raw_long_filename.into_iter().take(real_len).rev(); + let name_chars_rev = char::decode_utf16(name_codes_rev).map(|r| r.unwrap_or('?')); + + filename_rev.extend(name_chars_rev); + } + + // From now on, `entry` represents a valid directory entry. + + let raw_cluster = + RawCluster(cur_entry.cluster_low as u32 | ((cur_entry.cluster_high as u32) << 16)); + + let Some(cluster) = raw_cluster.parse() else { + return Some(Err(PosixError::EINVAL.into())); }; - let filename: Arc<[u8]> = if filename.is_empty() { - let mut filename = entry.filename().to_vec(); - let extension = entry.extension(); + let filename; + + if filename_rev.is_empty() { + let mut name = cur_entry.filename().to_vec(); + let extension = cur_entry.extension(); if !extension.is_empty() { - filename.push(b'.'); - filename.extend_from_slice(extension); + name.push(b'.'); + name.extend_from_slice(extension); } - if entry.is_filename_lowercase() { - filename.make_ascii_lowercase(); + if cur_entry.is_filename_lowercase() { + name.make_ascii_lowercase(); } - filename.into() + filename = name.into_boxed_slice(); } else { - let mut bytes = filename.into_bytes(); - bytes.reverse(); - - bytes.into() - }; + let mut name = filename_rev.into_bytes(); + name.reverse(); + filename = name.into_boxed_slice(); + } Some(Ok(FatDirectoryEntry { - size: entry.size, - entry_offset, + size: cur_entry.size, + entry_offset: nr_entry_scanned * size_of::() as u32, filename, - cluster: entry.cluster_low as u32 | (((entry.cluster_high & !0xF000) as u32) << 16), - is_directory: entry.is_directory(), + cluster, + is_directory: cur_entry.is_directory(), })) } } diff --git a/src/fs/fat32/file.rs b/src/fs/fat32/file.rs index db16df50..2df69728 100644 --- a/src/fs/fat32/file.rs +++ b/src/fs/fat32/file.rs @@ -1,40 +1,24 @@ -use super::{ClusterIterator, FatFs}; -use crate::{ - kernel::mem::{AsMemoryBlock as _, Page}, - KResult, -}; - -pub trait ClusterReadIterator<'data>: Iterator> + 'data {} -impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator> + 'a {} +use futures::Stream; -pub(super) trait ClusterRead<'data> { - fn read<'vfs>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> - where - Self: Sized, - 'vfs: 'data; -} +use crate::{kernel::mem::Page, prelude::KResult}; -impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> { - fn read<'vfs: 'data>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> { - const SECTOR_SIZE: usize = 512; +use super::{ClusterIterator, FatFs}; - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); +pub trait ReadClusters { + fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send; +} - let skip_clusters = offset / cluster_size; - let mut inner_offset = offset % cluster_size; +impl ReadClusters for ClusterIterator<'_> { + fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send { + futures::stream::unfold(self, move |mut me| async { + let cluster = me.next()?; + let page = Page::alloc(); - // TODO: Use block cache. - let buffer_page = Page::alloc(); + if let Err(err) = fs.read_cluster(cluster, &page).await { + return Some((Err(err), me)); + } - self.skip(skip_clusters).map(move |cluster| { - vfs.read_cluster(cluster, &buffer_page)?; - let data = unsafe { - // SAFETY: No one could be writing to it. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; - inner_offset = 0; - Ok(data) + Some((Ok(page), me)) }) } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 5d9285ec..c59ee801 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,5 +1,4 @@ +// pub mod ext4; pub mod fat32; pub mod procfs; -pub mod shm; pub mod tmpfs; -pub mod ext4; diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 2ed24613..57b881df 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,325 +1,264 @@ -use crate::kernel::constants::{EACCES, ENOTDIR}; -use crate::kernel::task::block_on; +use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR}; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{AtomicMode, Mode}; +use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; use crate::{ io::Buffer, kernel::{ mem::paging::PageBuffer, vfs::{ dentry::Dentry, - inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData}, + inode::{Ino, Inode}, mount::{dump_mounts, register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, }, }, prelude::*, }; -use alloc::sync::{Arc, Weak}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; -use itertools::Itertools; - -#[allow(dead_code)] -pub trait ProcFsFile: Send + Sync { - fn can_read(&self) -> bool { - false - } - - fn can_write(&self) -> bool { - false - } +use alloc::sync::Arc; +use async_trait::async_trait; +use core::future::Future; +use core::sync::atomic::{AtomicU64, Ordering}; +use eonix_sync::{LazyLock, RwLock}; + +struct Node { + ino: Ino, + sb: SbRef, + info: Spin, + kind: NodeKind, +} - fn read(&self, _buffer: &mut PageBuffer) -> KResult { - Err(EACCES) - } +enum NodeKind { + File(FileInode), + Dir(DirInode), +} - fn write(&self, _buffer: &[u8]) -> KResult { - Err(EACCES) - } +struct FileInode { + read: Option KResult<()> + Send + Sync>>, + write: Option<()>, } -pub enum ProcFsNode { - File(Arc), - Dir(Arc), +struct DirInode { + entries: RwLock, InodeUse)>>, } -impl ProcFsNode { - fn unwrap(&self) -> Arc { - match self { - ProcFsNode::File(inode) => inode.clone(), - ProcFsNode::Dir(inode) => inode.clone(), - } - } +impl InodeOps for Node { + type SuperBlock = ProcFs; fn ino(&self) -> Ino { - match self { - ProcFsNode::File(inode) => inode.ino, - ProcFsNode::Dir(inode) => inode.ino, - } + self.ino } -} -define_struct_inode! { - pub struct FileInode { - file: Box, - } -} - -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = Mode::REG; - if file.can_read() { - mode.set_perm(0o444); - } - if file.can_write() { - mode.set_perm(0o222); + fn format(&self) -> Format { + match &self.kind { + NodeKind::File(_) => Format::REG, + NodeKind::Dir(_) => Format::DIR, } + } - let mut inode = Self { - idata: InodeData::new(ino, vfs), - file, - }; + fn info(&self) -> &Spin { + &self.info + } - inode.idata.mode.store(mode); - inode.idata.nlink.store(1, Ordering::Relaxed); - *inode.ctime.get_mut() = Instant::now(); - *inode.mtime.get_mut() = Instant::now(); - *inode.atime.get_mut() = Instant::now(); + fn super_block(&self) -> &SbRef { + &self.sb + } - Arc::new(inode) + fn page_cache(&self) -> Option<&crate::kernel::mem::PageCache> { + None } } -impl Inode for FileInode { - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - if !self.file.can_read() { +impl InodeFileOps for Node { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let NodeKind::File(file_inode) = &self.kind else { + return Err(EISDIR); + }; + + let Some(read_fn) = &file_inode.read else { return Err(EACCES); - } + }; let mut page_buffer = PageBuffer::new(); - self.file.read(&mut page_buffer)?; + read_fn(&mut page_buffer)?; - let data = page_buffer - .data() - .split_at_checked(offset) - .map(|(_, data)| data); + let Some((_, data)) = page_buffer.data().split_at_checked(offset) else { + return Ok(0); + }; - match data { - None => Ok(0), - Some(data) => Ok(buffer.fill(data)?.allow_partial()), - } + Ok(buffer.fill(data)?.allow_partial()) } } -define_struct_inode! { - pub struct DirInode { - entries: Locked, ProcFsNode)>, ()>, - } -} +impl InodeDirOps for Node { + async fn lookup(&self, dentry: &Arc) -> KResult>> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; -impl DirInode { - pub fn new(ino: Ino, vfs: Weak) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} + let entries = dir.entries.read().await; + + let dent_name = dentry.name(); + for (name, node) in entries.iter() { + if *name == ***dent_name { + return Ok(Some(node.clone() as _)); + } + } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = block_on(self.rwsem.read()); - Ok(self - .entries - .access(lock.prove()) - .iter() - .find_map(|(name, node)| (name == &***dentry.name()).then(|| node.unwrap()))) + Ok(None) } - fn do_readdir( - &self, + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, node)| callback(name.as_ref(), node.ino())) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + Box::pin(async move { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; + + let entries = dir.entries.read().await; + + let mut count = 0; + for (name, node) in entries.iter().skip(offset) { + match callback(name.as_ref(), node.ino) { + Err(err) => return Ok(Err(err)), + Ok(true) => count += 1, + Ok(false) => break, + } + } + + Ok(Ok(count)) + }) } } -impl_any!(ProcFs); -pub struct ProcFs { - root_node: Arc, - next_ino: AtomicIno, -} - -impl Vfs for ProcFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - 10 +impl Node { + pub fn new_file( + ino: Ino, + sb: SbRef, + read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static, + ) -> InodeUse { + InodeUse::new(Self { + ino, + sb, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o444), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + kind: NodeKind::File(FileInode::new(Box::new(read))), + }) } - fn is_read_only(&self) -> bool { - false + fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { + InodeUse::new(Self { + ino, + sb, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o755), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + kind: NodeKind::Dir(DirInode::new()), + }) } } -static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { - Arc::new_cyclic(|weak: &Weak| ProcFs { - root_node: DirInode::new(0, weak.clone()), - next_ino: AtomicIno::new(1), - }) -}); - -struct ProcFsMountCreator; - -#[allow(dead_code)] -impl ProcFsMountCreator { - pub fn get() -> Arc { - GLOBAL_PROCFS.clone() - } - - pub fn get_weak() -> Weak { - Arc::downgrade(&GLOBAL_PROCFS) +impl FileInode { + fn new(read: Box KResult<()> + Send + Sync>) -> Self { + Self { + read: Some(read), + write: None, + } } } -impl MountCreator for ProcFsMountCreator { - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let vfs = ProcFsMountCreator::get(); - let root_inode = vfs.root_node.clone(); - Mount::new(mp, vfs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) +impl DirInode { + pub fn new() -> Self { + Self { + entries: RwLock::new(vec![]), + } } } -pub fn root() -> ProcFsNode { - let vfs = ProcFsMountCreator::get(); - let root = vfs.root_node.clone(); - - ProcFsNode::Dir(root) +pub struct ProcFs { + root: InodeUse, + next_ino: AtomicU64, } -pub fn creat( - parent: &ProcFsNode, - name: Arc<[u8]>, - file: Box, -) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = FileInode::new(ino, Arc::downgrade(&fs), file); - - { - let lock = block_on(parent.idata.rwsem.write()); - parent - .entries - .access_mut(lock.prove_mut()) - .push((name, ProcFsNode::File(inode.clone()))); +impl SuperBlock for ProcFs {} +impl ProcFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) } - - Ok(ProcFsNode::File(inode)) } -#[allow(dead_code)] -pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = DirInode::new(ino, Arc::downgrade(&fs)); +static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { + SbUse::new_cyclic( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 10), + read_only: false, + }, + |sbref| ProcFs { + root: Node::new_dir(Ino::new(0), sbref), + next_ino: AtomicU64::new(1), + }, + ) +}); - parent - .entries - .access_mut(block_on(inode.rwsem.write()).prove_mut()) - .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); +struct ProcFsMountCreator; - Ok(ProcFsNode::Dir(inode)) -} +#[async_trait] +impl MountCreator for ProcFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let fs = GLOBAL_PROCFS.clone(); + let root_inode = fs.backend.root.clone(); -struct DumpMountsFile; -impl ProcFsFile for DumpMountsFile { - fn can_read(&self) -> bool { - true + Mount::new(mp, fs, root_inode) } - fn read(&self, buffer: &mut PageBuffer) -> KResult { - dump_mounts(&mut buffer.get_writer()); - - Ok(buffer.data().len()) + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) } } -pub fn init() { - register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); - - creat( - &root(), - Arc::from(b"mounts".as_slice()), - Box::new(DumpMountsFile), - ) - .unwrap(); -} - -pub struct GenericProcFsFile +pub async fn populate_root(name: Arc<[u8]>, read_fn: F) where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, + F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, { - read_fn: Option, -} + let procfs = &GLOBAL_PROCFS.backend; + let root = &procfs.root; -impl ProcFsFile for GenericProcFsFile -where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, -{ - fn can_read(&self) -> bool { - self.read_fn.is_some() - } + let NodeKind::Dir(root) = &root.kind else { + unreachable!(); + }; - fn read(&self, buffer: &mut PageBuffer) -> KResult { - self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.data().len()) - } + let mut entries = root.entries.write().await; + entries.push(( + name.clone(), + Node::new_file(procfs.assign_ino(), SbRef::from(&GLOBAL_PROCFS), read_fn), + )); } -pub fn populate_root(name: Arc<[u8]>, read_fn: F) -> KResult<()> -where - F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, -{ - let root = root(); - - creat( - &root, - name, - Box::new(GenericProcFsFile { - read_fn: Some(read_fn), - }), - ) - .map(|_| ()) +pub async fn init() { + register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); + + populate_root(Arc::from(b"mounts".as_slice()), |buffer| { + dump_mounts(&mut buffer.get_writer()); + Ok(()) + }) + .await; } diff --git a/src/fs/shm.rs b/src/fs/shm.rs deleted file mode 100644 index 09d36141..00000000 --- a/src/fs/shm.rs +++ /dev/null @@ -1,146 +0,0 @@ -use core::sync::atomic::{AtomicU32, Ordering}; - -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; -use bitflags::bitflags; -use eonix_sync::{LazyLock, Mutex}; - -use crate::{ - fs::tmpfs::{DirectoryInode, FileInode, TmpFs}, - kernel::{constants::ENOSPC, vfs::inode::Mode}, - prelude::KResult, -}; - -bitflags! { - #[derive(Debug, Clone, Copy)] - pub struct ShmFlags: u32 { - /// Create a new segment. If this flag is not used, then shmget() will - /// find the segment associated with key and check to see if the user - /// has permission to access the segment. - const IPC_CREAT = 0o1000; - /// This flag is used with IPC_CREAT to ensure that this call creates - /// the segment. If the segment already exists, the call fails. - const IPC_EXCL = 0o2000; - - /// Attach the segment for read-only access.If this flag is not specified, - /// the segment is attached for read and write access, and the process - /// must have read and write permission for the segment. - const SHM_RDONLY = 0o10000; - /// round attach address to SHMLBA boundary - const SHM_RND = 0o20000; - /// Allow the contents of the segment to be executed. - const SHM_EXEC = 0o100000; - } -} - -pub const IPC_PRIVATE: usize = 0; - -pub struct ShmManager { - tmpfs: Arc, - root: Arc, - areas: BTreeMap, -} - -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct IpcPerm { - key: i32, - uid: u32, - gid: u32, - cuid: u32, - cgid: u32, - mode: u16, - seq: u16, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct ShmIdDs { - // Ownership and permissions - pub shm_perm: IpcPerm, - // Size of segment (bytes). In our system, this must be aligned - pub shm_segsz: usize, - // Last attach time - pub shm_atime: usize, - // Last detach time - pub shm_dtime: usize, - // Creation time/time of last modification via shmctl() - pub shm_ctime: usize, - // PID of creator - pub shm_cpid: usize, - // PID of last shmat(2)/shmdt(2) - pub shm_lpid: usize, - // No. of current attaches - pub shm_nattch: usize, -} - -impl ShmIdDs { - fn new(size: usize, pid: u32) -> Self { - Self { - shm_perm: IpcPerm::default(), - shm_segsz: size, - shm_atime: 0, - shm_dtime: 0, - shm_ctime: 0, // Should set instant now - shm_cpid: pid as usize, - shm_lpid: 0, - shm_nattch: 0, - } - } -} - -#[derive(Debug)] -pub struct ShmArea { - pub area: Arc, - pub shmid_ds: ShmIdDs, -} - -// A big lock here to protect the shared memory area. -// Can be improved with finer-grained locking? -pub static SHM_MANAGER: LazyLock> = - LazyLock::new(|| Mutex::new(ShmManager::new())); - -impl ShmManager { - fn new() -> Self { - let (tmpfs, root) = TmpFs::create(false).expect("should create shm_area successfully"); - Self { - tmpfs, - root, - areas: BTreeMap::new(), - } - } - - pub fn create_shared_area(&self, size: usize, pid: u32, mode: Mode) -> ShmArea { - let ino = self.tmpfs.assign_ino(); - let vfs = Arc::downgrade(&self.tmpfs); - ShmArea { - area: FileInode::new(ino, vfs, size, mode), - shmid_ds: ShmIdDs::new(size, pid), - } - } - - pub fn get(&self, shmid: u32) -> Option<&ShmArea> { - self.areas.get(&shmid) - } - - pub fn insert(&mut self, shmid: u32, area: ShmArea) { - self.areas.insert(shmid, area); - } -} - -pub fn gen_shm_id(key: usize) -> KResult { - const SHM_MAGIC: u32 = 114514000; - - static NEXT_SHMID: AtomicU32 = AtomicU32::new(0); - - if key == IPC_PRIVATE { - let shmid = NEXT_SHMID.fetch_add(1, Ordering::Relaxed); - - if shmid >= SHM_MAGIC { - return Err(ENOSPC); - } else { - return Ok(shmid); - } - } - - (key as u32).checked_add(SHM_MAGIC).ok_or(ENOSPC) -} diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs deleted file mode 100644 index 7a5bd52b..00000000 --- a/src/fs/tmpfs.rs +++ /dev/null @@ -1,613 +0,0 @@ -use crate::io::Stream; -use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::RenameData; -use crate::kernel::vfs::inode::{AtomicMode, InodeData}; -use crate::{ - io::Buffer, - kernel::vfs::{ - dentry::{dcache, Dentry}, - inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, - mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - vfs::Vfs, - DevId, - }, - prelude::*, -}; -use alloc::sync::{Arc, Weak}; -use core::fmt::Debug; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_mm::paging::PAGE_SIZE; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; -use itertools::Itertools; - -fn acquire(vfs: &Weak) -> KResult> { - vfs.upgrade().ok_or(EIO) -} - -fn astmp(vfs: &Arc) -> &TmpFs { - vfs.as_any() - .downcast_ref::() - .expect("corrupted tmpfs data structure") -} - -define_struct_inode! { - struct NodeInode { - devid: DevId, - } -} - -impl NodeInode { - fn new(ino: Ino, vfs: Weak, mode: Mode, devid: DevId) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - addr_of_mut_field!(inode, devid).write(devid); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for NodeInode { - fn devid(&self) -> KResult { - Ok(self.devid) - } -} - -define_struct_inode! { - pub(super) struct DirectoryInode { - entries: Locked, Ino)>, ()>, - } -} - -impl DirectoryInode { - fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries) - .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); - - addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode) - .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } - - fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) { - let now = Instant::now(); - - // SAFETY: Only `unlink` will do something based on `nlink` count - // No need to synchronize here - file.nlink.fetch_add(1, Ordering::Relaxed); - *self.ctime.lock() = now; - - // SAFETY: `rwsem` has done the synchronization - self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; - - self.entries.access_mut(dlock).push((name, file.ino)); - } - - fn do_unlink( - &self, - file: &Arc, - filename: &[u8], - entries: &mut Vec<(Arc<[u8]>, Ino)>, - now: Instant, - decrease_size: bool, - _dir_lock: ProofMut<()>, - _file_lock: ProofMut<()>, - ) -> KResult<()> { - // SAFETY: `file_lock` has done the synchronization - if file.mode.load().is_dir() { - return Err(EISDIR); - } - - entries.retain(|(name, ino)| *ino != file.ino || name.as_ref() != filename); - - if decrease_size { - // SAFETY: `dir_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - } - - *self.mtime.lock() = now; - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `file_lock` has done the synchronization - file.nlink.fetch_sub(1, Ordering::Relaxed); - *file.ctime.lock() = now; - - Ok(()) - } -} - -impl Inode for DirectoryInode { - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, ino)| callback(&name, *ino)) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = FileInode::new(ino, self.vfs.clone(), 0, mode); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !mode.is_chr() && !mode.is_blk() { - return Err(EINVAL); - } - - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_symlink(file) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); - - self.link(at.get_name(), newdir.as_ref(), rwsem.prove_mut()); - at.save_dir(newdir) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - - let dir_lock = block_on(self.rwsem.write()); - - let file = at.get_inode()?; - let filename = at.get_name(); - let file_lock = block_on(file.rwsem.write()); - - let entries = self.entries.access_mut(dir_lock.prove_mut()); - - self.do_unlink( - &file, - &filename, - entries, - Instant::now(), - true, - dir_lock.prove_mut(), - file_lock.prove_mut(), - )?; - - // Remove the dentry from the dentry cache immediately - // so later lookup will fail with ENOENT - dcache::d_remove(at); - - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - let RenameData { - old_dentry, - new_dentry, - new_parent, - is_exchange, - no_replace, - vfs, - } = rename_data; - - if is_exchange { - println_warn!("TmpFs does not support exchange rename for now"); - return Err(ENOSYS); - } - - let vfs = vfs - .as_any() - .downcast_ref::() - .expect("vfs must be a TmpFs"); - - let _rename_lock = block_on(vfs.rename_lock.lock()); - - let old_file = old_dentry.get_inode()?; - let new_file = new_dentry.get_inode(); - - if no_replace && new_file.is_ok() { - return Err(EEXIST); - } - - let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; - if same_parent { - // Same directory rename - // Remove from old location and add to new location - let parent_lock = block_on(self.rwsem.write()); - let entries = self.entries.access_mut(parent_lock.prove_mut()); - - fn rename_old( - old_entry: &mut (Arc<[u8]>, Ino), - old_file: &Arc, - new_dentry: &Arc, - now: Instant, - ) { - let (name, _) = old_entry; - *name = new_dentry.get_name(); - *old_file.ctime.lock() = now; - } - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old and new entries in the directory after we've locked the directory. - let indices = - entries - .iter() - .enumerate() - .fold([None, None], |[old, new], (idx, (name, ino))| { - if Some(*ino) == new_ino && *name == new_name { - [old, Some(idx)] - } else if *ino == old_ino && *name == old_name { - [Some(idx), new] - } else { - [old, new] - } - }); - - let (old_entry_idx, new_entry_idx) = match indices { - [None, ..] => return Err(ENOENT), - [Some(old_idx), new_idx] => (old_idx, new_idx), - }; - - let now = Instant::now(); - - if let Some(new_idx) = new_entry_idx { - // Replace existing file (i.e. rename the old and unlink the new) - let new_file = new_file.unwrap(); - let _new_file_lock = block_on(new_file.rwsem.write()); - - // SAFETY: `new_file_lock` has done the synchronization - match (new_file.mode.load(), old_file.mode.load()) { - (Mode::DIR, _) => return Err(EISDIR), - (_, Mode::DIR) => return Err(ENOTDIR), - _ => {} - } - - entries.remove(new_idx); - - // SAFETY: `parent_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `new_file_lock` has done the synchronization - new_file.nlink.fetch_sub(1, Ordering::Relaxed); - *new_file.ctime.lock() = now; - } - - rename_old(&mut entries[old_entry_idx], &old_file, new_dentry, now); - *self.mtime.lock() = now; - } else { - // Cross-directory rename - handle similar to same directory case - - // Get new parent directory - let new_parent_inode = new_dentry.parent().get_inode()?; - assert!(new_parent_inode.is_dir()); - let new_parent = (new_parent_inode.as_ref() as &dyn Any) - .downcast_ref::() - .expect("new parent must be a DirectoryInode"); - - let old_parent_lock = block_on(self.rwsem.write()); - let new_parent_lock = block_on(new_parent_inode.rwsem.write()); - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old entry in the old directory - let old_entries = self.entries.access_mut(old_parent_lock.prove_mut()); - let old_pos = old_entries - .iter() - .position(|(name, ino)| *ino == old_ino && *name == old_name) - .ok_or(ENOENT)?; - - // Find the new entry in the new directory (if it exists) - let new_entries = new_parent.entries.access_mut(new_parent_lock.prove_mut()); - let has_new = new_entries - .iter() - .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) - .is_some(); - - let now = Instant::now(); - - if has_new { - // Replace existing file (i.e. move the old and unlink the new) - let new_file = new_file.unwrap(); - let new_file_lock = block_on(new_file.rwsem.write()); - - match (old_file.mode.load(), new_file.mode.load()) { - (Mode::DIR, Mode::DIR) => {} - (Mode::DIR, _) => return Err(ENOTDIR), - (_, _) => {} - } - - // Unlink the old file that was replaced - new_parent.do_unlink( - &new_file, - &new_name, - new_entries, - now, - false, - new_parent_lock.prove_mut(), - new_file_lock.prove_mut(), - )?; - } else { - new_parent.size.fetch_add(1, Ordering::Relaxed); - } - - // Remove from old directory - old_entries.remove(old_pos); - - // Add new entry - new_entries.push((new_name, old_ino)); - - self.size.fetch_sub(1, Ordering::Relaxed); - *self.mtime.lock() = now; - *old_file.ctime.lock() = now; - } - - block_on(dcache::d_exchange(old_dentry, new_dentry)); - - Ok(()) - } -} - -define_struct_inode! { - struct SymlinkInode { - target: Arc<[u8]>, - } -} - -impl SymlinkInode { - fn new(ino: Ino, vfs: Weak, target: Arc<[u8]>) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - let len = target.len(); - addr_of_mut_field!(inode, target).write(target); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); - addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for SymlinkInode { - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - buffer - .fill(self.target.as_ref()) - .map(|result| result.allow_partial()) - } - - fn chmod(&self, _: Mode) -> KResult<()> { - Ok(()) - } -} - -define_struct_inode! { - pub struct FileInode { - pages: PageCache, - } -} - -impl Debug for FileInode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "FileInode({:?})", self.idata) - } -} - -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, size: usize, mode: Mode) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| FileInode { - idata: InodeData::new(ino, vfs), - pages: PageCache::new(weak_self.clone()), - }); - - inode.mode.store(Mode::REG.perm(mode.non_format_bits())); - inode.nlink.store(1, Ordering::Relaxed); - inode.size.store(size as u64, Ordering::Relaxed); - inode - } -} - -impl PageCacheBackend for FileInode { - fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize - } -} - -impl Inode for FileInode { - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.pages) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.read(buffer, offset)) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - // TODO: We don't need that strong guarantee, find some way to avoid locks - let _lock = block_on(self.rwsem.write()); - - let mut store_new_end = None; - let offset = match offset { - WriteOffset::Position(offset) => offset, - WriteOffset::End(end) => { - store_new_end = Some(end); - - // SAFETY: `lock` has done the synchronization - self.size.load(Ordering::Relaxed) as usize - } - }; - - let wrote = block_on(self.pages.write(stream, offset))?; - let cursor_end = offset + wrote; - - if let Some(store_end) = store_new_end { - *store_end = cursor_end; - } - - // SAFETY: `lock` has done the synchronization - *self.mtime.lock() = Instant::now(); - self.size.store(cursor_end as u64, Ordering::Relaxed); - - Ok(wrote) - } - - fn truncate(&self, length: usize) -> KResult<()> { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.resize(length))?; - self.size.store(length as u64, Ordering::Relaxed); - *self.mtime.lock() = Instant::now(); - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } -} - -impl_any!(TmpFs); -pub(super) struct TmpFs { - next_ino: AtomicIno, - readonly: bool, - rename_lock: Mutex<()>, -} - -impl Vfs for TmpFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - 2 - } - - fn is_read_only(&self) -> bool { - self.readonly - } -} - -impl TmpFs { - pub(super) fn assign_ino(&self) -> Ino { - self.next_ino.fetch_add(1, Ordering::AcqRel) - } - - pub fn create(readonly: bool) -> KResult<(Arc, Arc)> { - let tmpfs = Arc::new(Self { - next_ino: AtomicIno::new(1), - readonly, - rename_lock: Mutex::new(()), - }); - - let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); - - Ok((tmpfs, root_dir)) - } -} - -struct TmpFsMountCreator; - -impl MountCreator for TmpFsMountCreator { - fn create_mount(&self, _source: &str, flags: u64, mp: &Arc) -> KResult { - let (fs, root_inode) = TmpFs::create(flags & MS_RDONLY != 0)?; - - Mount::new(mp, fs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) - } -} - -pub fn init() { - register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); -} diff --git a/src/fs/tmpfs/dir.rs b/src/fs/tmpfs/dir.rs new file mode 100644 index 00000000..e2be1d12 --- /dev/null +++ b/src/fs/tmpfs/dir.rs @@ -0,0 +1,415 @@ +use core::{any::Any, future::Future}; + +use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; +use eonix_log::println_warn; +use eonix_sync::{LazyLock, RwLock, Spin}; + +use crate::{ + kernel::{ + constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}, + mem::PageCache, + timer::Instant, + vfs::{ + dentry::{dcache, Dentry}, + inode::{ + Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, RenameData, + }, + types::{DeviceId, Format, Mode, Permission}, + SbRef, + }, + }, + prelude::KResult, +}; + +use super::{ + file::{DeviceInode, FileInode, SymlinkInode}, + TmpFs, +}; + +pub struct DirectoryInode { + sb: SbRef, + ino: Ino, + info: Spin, + entries: RwLock, Ino)>>, +} + +impl InodeOps for DirectoryInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::DIR + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl DirectoryInode { + pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { + static DOT: LazyLock> = LazyLock::new(|| Arc::from(b".".as_slice())); + + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: 1, + nlink: 1, // link from `.` to itself + perm, + ctime: now, + mtime: now, + atime: now, + uid: 0, + gid: 0, + }), + entries: RwLock::new(vec![(DOT.clone(), ino)]), + }) + } + + fn link( + &self, + entries: &mut Vec<(Arc<[u8]>, Ino)>, + name: Arc<[u8]>, + file: &InodeUse, + ) { + let mut self_info = self.info.lock(); + let mut file_info = file.info().lock(); + + let now = Instant::now(); + + file_info.nlink += 1; + file_info.ctime = now; + + self_info.size += 1; + self_info.mtime = now; + self_info.ctime = now; + + entries.push((name, file.ino())); + } + + fn do_unlink( + &self, + file: &InodeUse, + filename: &[u8], + entries: &mut Vec<(Arc<[u8]>, Ino)>, + now: Instant, + decrease_size: bool, + self_info: &mut InodeInfo, + file_info: &mut InodeInfo, + ) -> KResult<()> { + // SAFETY: `file_lock` has done the synchronization + if file.format() == Format::DIR { + return Err(EISDIR); + } + + let file_ino = file.ino(); + entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename); + + if decrease_size { + self_info.size -= 1; + } + + self_info.mtime = now; + self_info.ctime = now; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + file_info.nlink -= 1; + file_info.ctime = now; + + // TODO!!!: Remove the file if nlink == 1 + + Ok(()) + } +} + +impl InodeDirOps for DirectoryInode { + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + Box::pin(async move { + let _sb = self.sb.get()?; + let entries = self.entries.read().await; + + let mut count = 0; + for entry in entries.iter().skip(offset) { + match for_each_entry(&entry.0, entry.1) { + Err(err) => return Ok(Err(err)), + Ok(false) => break, + Ok(true) => count += 1, + } + } + + Ok(Ok(count)) + }) + } + + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = FileInode::new(ino, self.sb.clone(), 0, perm); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + if !mode.is_chr() && !mode.is_blk() { + return Err(EINVAL); + } + + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = DeviceInode::new(ino, self.sb.clone(), mode, dev); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = SymlinkInode::new(ino, self.sb.clone(), target.into()); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let new_dir: InodeUse = DirectoryInode::new(ino, self.sb.clone(), perm); + + self.link(&mut entries, at.get_name(), &new_dir); + at.fill(new_dir); + + Ok(()) + } + + async fn unlink(&self, at: &Arc) -> KResult<()> { + let _sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let file = at.get_inode()?; + let filename = at.get_name(); + + self.do_unlink( + &file, + &filename, + &mut entries, + Instant::now(), + true, + &mut self.info.lock(), + &mut file.info().lock(), + )?; + + // Remove the dentry from the dentry cache immediately + // so later lookup will fail with ENOENT + dcache::d_remove(at); + + Ok(()) + } + + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + let sb = self.sb.get()?; + let _rename_lock = sb.backend.rename_lock.lock().await; + let mut self_entries = self.entries.write().await; + + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + } = rename_data; + + if is_exchange { + println_warn!("TmpFs does not support exchange rename for now"); + return Err(ENOSYS); + } + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.inode(); + + if no_replace && new_file.is_some() { + return Err(EEXIST); + } + + if new_parent.as_raw() == &raw const *self { + // Same directory rename + // Remove from old location and add to new location + let old_ino = old_file.ino(); + let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old and new entries in the directory after we've locked the directory. + let (mut old_ent_idx, mut new_ent_idx) = (None, None); + for (idx, (name, ino)) in self_entries.iter().enumerate() { + if *ino == old_ino && *name == old_name { + old_ent_idx = Some(idx); + } + + if Some(*ino) == new_ino && *name == new_name { + new_ent_idx = Some(idx); + } + } + + let Some(old_ent_idx) = old_ent_idx else { + return Err(ENOENT); + }; + + if Some(old_ent_idx) == new_ent_idx { + return Ok(()); + } + + let now = Instant::now(); + if let Some(new_idx) = new_ent_idx { + // Replace existing file (i.e. rename the old and unlink the new) + let new_file = new_file.unwrap(); + + match (new_file.format(), old_file.format()) { + (Format::DIR, _) => return Err(EISDIR), + (_, Format::DIR) => return Err(ENOTDIR), + _ => {} + } + + self_entries.remove(new_idx); + + self.info.lock().size -= 1; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + let mut new_info = new_file.info().lock(); + + new_info.nlink -= 1; + new_info.mtime = now; + new_info.ctime = now; + } + + let (name, _) = &mut self_entries[old_ent_idx]; + *name = new_dentry.get_name(); + + let mut self_info = self.info.lock(); + self_info.mtime = now; + self_info.ctime = now; + } else { + // Cross-directory rename - handle similar to same directory case + + // Get new parent directory + let new_parent_inode = new_dentry.parent().get_inode()?; + assert_eq!(new_parent_inode.format(), Format::DIR); + + let new_parent = (&new_parent_inode as &dyn Any) + .downcast_ref::() + .expect("new parent must be a DirectoryInode"); + + let mut new_entries = new_parent.entries.write().await; + + let old_ino = old_file.ino(); + let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old entry in the old directory + let old_pos = self_entries + .iter() + .position(|(name, ino)| *ino == old_ino && *name == old_name) + .ok_or(ENOENT)?; + + // Find the new entry in the new directory (if it exists) + let has_new = new_entries + .iter() + .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) + .is_some(); + + let now = Instant::now(); + + if has_new { + // Replace existing file (i.e. move the old and unlink the new) + let new_file = new_file.unwrap(); + + match (old_file.format(), new_file.format()) { + (Format::DIR, Format::DIR) => {} + (Format::DIR, _) => return Err(ENOTDIR), + (_, _) => {} + } + + // Unlink the old file that was replaced + new_parent.do_unlink( + &new_file, + &new_name, + &mut new_entries, + now, + false, + &mut new_parent.info.lock(), + &mut new_file.info().lock(), + )?; + } else { + new_parent.info.lock().size += 1; + new_parent.info.lock().mtime = now; + new_parent.info.lock().ctime = now; + } + + // Remove from old directory + self_entries.remove(old_pos); + + // Add new entry + new_entries.push((new_name, old_ino)); + + let mut self_info = self.info.lock(); + self_info.size -= 1; + self_info.mtime = now; + self_info.ctime = now; + } + + dcache::d_exchange(old_dentry, new_dentry).await; + Ok(()) + } +} + +impl InodeFileOps for DirectoryInode { + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } +} diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs new file mode 100644 index 00000000..624112e0 --- /dev/null +++ b/src/fs/tmpfs/file.rs @@ -0,0 +1,298 @@ +use alloc::sync::Arc; +use eonix_mm::paging::PAGE_SIZE; +use eonix_sync::{RwLock, Spin}; + +use crate::{ + io::{Buffer, Stream}, + kernel::{ + mem::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}, + timer::Instant, + vfs::{ + inode::{Ino, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + SbRef, + }, + }, + prelude::KResult, +}; + +use super::TmpFs; + +pub struct FileInode { + sb: SbRef, + ino: Ino, + info: Spin, + rwsem: RwLock<()>, + pages: PageCache, +} + +impl FileInode { + pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { + let now = Instant::now(); + + InodeUse::new_cyclic(|weak| Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: size as _, + nlink: 1, + uid: 0, + gid: 0, + perm, + atime: now, + ctime: now, + mtime: now, + }), + rwsem: RwLock::new(()), + pages: PageCache::new(weak.clone() as _), + }) + } +} + +impl PageCacheBackendOps for FileInode { + async fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { + Ok(PAGE_SIZE) + } + + async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { + Ok(PAGE_SIZE) + } + + fn size(&self) -> usize { + self.info.lock().size as usize + } +} + +impl InodeOps for FileInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::REG + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + Some(&self.pages) + } +} + +impl InodeDirOps for FileInode {} +impl InodeFileOps for FileInode { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let _lock = self.rwsem.read().await; + self.pages.read(buffer, offset).await + } + + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { + let _lock = self.rwsem.write().await; + + let mut store_new_end = None; + let offset = match offset { + WriteOffset::Position(offset) => offset, + WriteOffset::End(end) => { + store_new_end = Some(end); + + // `info.size` won't change since we are holding the write lock. + self.info.lock().size as usize + } + }; + + let wrote = self.pages.write(stream, offset).await?; + let cursor_end = offset + wrote; + + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + { + let now = Instant::now(); + let mut info = self.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = info.size.max(cursor_end as u64); + } + + Ok(wrote) + } + + async fn truncate(&self, length: usize) -> KResult<()> { + let _lock = self.rwsem.write().await; + + self.pages.resize(length).await?; + + { + let now = Instant::now(); + let mut info = self.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = length as u64; + } + + Ok(()) + } + + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } +} + +pub struct DeviceInode { + sb: SbRef, + ino: Ino, + info: Spin, + is_block: bool, + devid: DeviceId, +} + +impl DeviceInode { + pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(mode.non_format_bits()), + atime: now, + ctime: now, + mtime: now, + }), + is_block: mode.format() == Format::BLK, + devid, + }) + } +} + +impl InodeOps for DeviceInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + if self.is_block { + Format::BLK + } else { + Format::CHR + } + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl InodeDirOps for DeviceInode {} +impl InodeFileOps for DeviceInode { + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } + + fn devid(&self) -> KResult { + Ok(self.devid) + } +} + +pub struct SymlinkInode { + sb: SbRef, + ino: Ino, + info: Spin, + target: Arc<[u8]>, +} + +impl SymlinkInode { + pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: target.len() as _, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: now, + ctime: now, + mtime: now, + }), + target, + }) + } +} + +impl InodeDirOps for SymlinkInode {} +impl InodeOps for SymlinkInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::LNK + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl InodeFileOps for SymlinkInode { + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + buffer + .fill(self.target.as_ref()) + .map(|result| result.allow_partial()) + } +} diff --git a/src/fs/tmpfs/mod.rs b/src/fs/tmpfs/mod.rs new file mode 100644 index 00000000..2bef67b6 --- /dev/null +++ b/src/fs/tmpfs/mod.rs @@ -0,0 +1,73 @@ +mod dir; +mod file; + +use crate::kernel::vfs::inode::{Ino, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::{ + kernel::vfs::{ + dentry::Dentry, + mount::{register_filesystem, Mount, MountCreator}, + }, + prelude::*, +}; +use alloc::sync::Arc; +use async_trait::async_trait; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; +use dir::DirectoryInode; +use eonix_sync::Mutex; + +pub struct TmpFs { + next_ino: AtomicU64, + rename_lock: Mutex<()>, +} + +impl SuperBlock for TmpFs {} + +impl TmpFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) + } + + fn create() -> KResult<(SbUse, InodeUse)> { + let tmpfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 2), + read_only: false, + }, + Self { + next_ino: AtomicU64::new(1), + rename_lock: Mutex::new(()), + }, + ); + + let root_dir = DirectoryInode::new( + tmpfs.backend.assign_ino(), + SbRef::from(&tmpfs), + Permission::new(0o755), + ); + + Ok((tmpfs, root_dir)) + } +} + +struct TmpFsMountCreator; + +#[async_trait] +impl MountCreator for TmpFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fs, root_inode) = TmpFs::create()?; + + Mount::new(mp, fs, root_inode) + } + + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) + } +} + +pub fn init() { + register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); +} diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 349e3656..3e4b65d1 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -3,7 +3,7 @@ mod mbr; use super::{ constants::ENOENT, mem::{paging::Page, AsMemoryBlock as _}, - vfs::DevId, + vfs::types::DeviceId, }; use crate::kernel::constants::{EEXIST, EINVAL}; use crate::{ @@ -14,13 +14,10 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; +use async_trait::async_trait; use core::cmp::Ordering; use mbr::MBRPartTable; -pub fn make_device(major: u32, minor: u32) -> DevId { - (major << 8) & 0xff00u32 | minor & 0xffu32 -} - pub struct Partition { pub lba_offset: u64, pub sector_count: u64, @@ -30,11 +27,12 @@ pub trait PartTable { fn partitions(&self) -> impl Iterator + use<'_, Self>; } +#[async_trait] pub trait BlockRequestQueue: Send + Sync { /// Maximum number of sectors that can be read in one request fn max_request_pages(&self) -> u64; - fn submit(&self, req: BlockDeviceRequest) -> KResult<()>; + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()>; } enum BlockDeviceType { @@ -42,7 +40,7 @@ enum BlockDeviceType { queue: Arc, }, Partition { - disk_dev: DevId, + disk_dev: DeviceId, lba_offset: u64, queue: Arc, }, @@ -50,7 +48,7 @@ enum BlockDeviceType { pub struct BlockDevice { /// Unique device identifier, major and minor numbers - devid: DevId, + devid: DeviceId, /// Total size of the device in sectors (512 bytes each) sector_count: u64, @@ -77,11 +75,11 @@ impl Ord for BlockDevice { } } -static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); +static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); impl BlockDevice { pub fn register_disk( - devid: DevId, + devid: DeviceId, size: u64, queue: Arc, ) -> KResult> { @@ -97,13 +95,13 @@ impl BlockDevice { } } - pub fn get(devid: DevId) -> KResult> { + pub fn get(devid: DeviceId) -> KResult> { BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT) } } impl BlockDevice { - pub fn devid(&self) -> DevId { + pub fn devid(&self) -> DeviceId { self.devid } @@ -121,7 +119,7 @@ impl BlockDevice { }; let device = Arc::new(BlockDevice { - devid: make_device(self.devid >> 8, (self.devid & 0xff) + idx as u32 + 1), + devid: DeviceId::new(self.devid.major, self.devid.minor + idx as u16 + 1), sector_count: size, dev_type: BlockDeviceType::Partition { disk_dev: self.devid, @@ -159,7 +157,7 @@ impl BlockDevice { /// - `req.sector` must be within the disk size /// - `req.buffer` must be enough to hold the data /// - pub fn commit_request(&self, mut req: BlockDeviceRequest) -> KResult<()> { + pub async fn commit_request(&self, mut req: BlockDeviceRequest<'_>) -> KResult<()> { // Verify the request parameters. match &mut req { BlockDeviceRequest::Read { sector, count, .. } => { @@ -184,7 +182,7 @@ impl BlockDevice { } } - self.queue().submit(req) + self.queue().submit(req).await } /// Read some from the block device, may involve some copy and fragmentation @@ -194,7 +192,7 @@ impl BlockDevice { /// # Arguments /// `offset` - offset in bytes /// - pub fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { + pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { let mut sector_start = offset as u64 / 512; let mut first_sector_offset = offset as u64 % 512; let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512; @@ -241,7 +239,7 @@ impl BlockDevice { buffer: &pages, }; - self.commit_request(req)?; + self.commit_request(req).await?; for page in pages.iter() { // SAFETY: We are the only owner of the page so no one could be mutating it. @@ -277,7 +275,7 @@ impl BlockDevice { /// `offset` - offset in bytes /// `data` - data to write /// - pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { + pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult { let mut sector_start = offset as u64 / 512; let mut first_sector_offset = offset as u64 % 512; let mut remaining_data = data; @@ -320,7 +318,7 @@ impl BlockDevice { count: sector_count, buffer: pages, }; - self.commit_request(read_req)?; + self.commit_request(read_req).await?; } let mut data_offset = 0; @@ -356,7 +354,7 @@ impl BlockDevice { count: sector_count, buffer: pages, }; - self.commit_request(write_req)?; + self.commit_request(write_req).await?; let bytes_written = data_offset; nwritten += bytes_written; diff --git a/src/kernel/block/mbr.rs b/src/kernel/block/mbr.rs index 74cdc36e..c5820679 100644 --- a/src/kernel/block/mbr.rs +++ b/src/kernel/block/mbr.rs @@ -31,7 +31,7 @@ pub struct MBRPartTable { impl MBRPartTable { pub async fn from_disk(disk: &BlockDevice) -> KResult { let mut mbr: UninitBuffer = UninitBuffer::new(); - disk.read_some(0, &mut mbr)?.ok_or(EIO)?; + disk.read_some(0, &mut mbr).await?.ok_or(EIO)?; let mbr = mbr.assume_init()?; if mbr.magic != [0x55, 0xaa] { diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index aff3271e..4e01d83a 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -1,10 +1,9 @@ use super::{ - block::make_device, console::get_console, constants::{EEXIST, EIO}, task::{block_on, ProcessList, Thread}, terminal::Terminal, - vfs::{DevId, File, FileType, TerminalFile}, + vfs::{types::DeviceId, File, FileType, TerminalFile}, }; use crate::{ io::{Buffer, Stream, StreamRead}, @@ -34,7 +33,7 @@ pub struct CharDevice { device: CharDeviceType, } -static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); +static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { @@ -54,11 +53,11 @@ impl CharDevice { } } - pub fn get(devid: DevId) -> Option> { + pub fn get(devid: DeviceId) -> Option> { CHAR_DEVICES.lock().get(&devid).cloned() } - pub fn register(devid: DevId, name: Arc, device: CharDeviceType) -> KResult<()> { + pub fn register(devid: DeviceId, name: Arc, device: CharDeviceType) -> KResult<()> { match CHAR_DEVICES.lock().entry(devid) { Entry::Vacant(entry) => { entry.insert(Arc::new(CharDevice { name, device })); @@ -134,19 +133,19 @@ impl VirtualCharDevice for ConsoleDevice { impl CharDevice { pub fn init() -> KResult<()> { Self::register( - make_device(1, 3), + DeviceId::new(1, 3), Arc::from("null"), CharDeviceType::Virtual(Box::new(NullDevice)), )?; Self::register( - make_device(1, 5), + DeviceId::new(1, 5), Arc::from("zero"), CharDeviceType::Virtual(Box::new(ZeroDevice)), )?; Self::register( - make_device(5, 1), + DeviceId::new(5, 1), Arc::from("console"), CharDeviceType::Virtual(Box::new(ConsoleDevice)), )?; diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index efd06824..c147306e 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; pub use paging::{Page, PageBuffer}; diff --git a/src/kernel/mem/mm_list/mapping.rs b/src/kernel/mem/mm_list/mapping.rs index 662000ba..5446ae42 100644 --- a/src/kernel/mem/mm_list/mapping.rs +++ b/src/kernel/mem/mm_list/mapping.rs @@ -1,24 +1,15 @@ -use core::fmt::Debug; - -use crate::kernel::vfs::inode::Inode; -use alloc::sync::Arc; +use crate::kernel::vfs::inode::{Inode, InodeUse}; use eonix_mm::paging::PAGE_SIZE; #[derive(Debug, Clone)] pub struct FileMapping { - pub file: Arc, + pub file: InodeUse, /// Offset in the file, aligned to 4KB boundary. pub offset: usize, /// Length of the mapping. Exceeding part will be zeroed. pub length: usize, } -impl Debug for dyn Inode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Inode()") - } -} - #[derive(Debug, Clone)] pub enum Mapping { // private anonymous memory @@ -28,7 +19,7 @@ pub enum Mapping { } impl FileMapping { - pub fn new(file: Arc, offset: usize, length: usize) -> Self { + pub fn new(file: InodeUse, offset: usize, length: usize) -> Self { assert_eq!(offset & (PAGE_SIZE - 1), 0); Self { file, diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 3ccf3255..9deb50cf 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -6,8 +6,10 @@ use crate::{ GlobalPageAlloc, }; use align_ext::AlignExt; +use alloc::boxed::Box; use alloc::{collections::btree_map::BTreeMap, sync::Weak}; -use core::mem::ManuallyDrop; +use async_trait::async_trait; +use core::{future::Future, mem::ManuallyDrop}; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::{ address::{PAddr, PhysAccess}, @@ -159,7 +161,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; + .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) + .await?; pages.insert(page_id, new_page); } } @@ -205,7 +208,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; + .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) + .await?; new_page }; @@ -224,7 +228,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; + .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS) + .await?; page.clear_dirty(); } } @@ -286,7 +291,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset_aligin)?; + .read_page(&mut new_page, offset_aligin) + .await?; pages.insert(page_id, new_page); new_page.0 } @@ -349,14 +355,47 @@ impl Stream for CachePageStream { // for fs, offset is file offset (floor algin to PAGE_SIZE) // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) // Oh no, this would make unnecessary cache -pub trait PageCacheBackend { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; +pub trait PageCacheBackendOps: Sized { + fn read_page( + &self, + page: &mut CachePage, + offset: usize, + ) -> impl Future> + Send; + + fn write_page( + &self, + page: &mut CachePageStream, + offset: usize, + ) -> impl Future> + Send; - fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; + fn size(&self) -> usize; +} +#[async_trait] +pub trait PageCacheBackend: Send + Sync { + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; + async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; fn size(&self) -> usize; } +#[async_trait] +impl PageCacheBackend for T +where + T: PageCacheBackendOps + Send + Sync + 'static, +{ + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { + self.read_page(page, offset).await + } + + async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { + self.write_page(page, offset).await + } + + fn size(&self) -> usize { + self.size() + } +} + pub trait PageCacheRawPage: RawPage { fn valid_size(&self) -> &mut usize; diff --git a/src/kernel/pcie/driver.rs b/src/kernel/pcie/driver.rs index be88b7df..eebaa896 100644 --- a/src/kernel/pcie/driver.rs +++ b/src/kernel/pcie/driver.rs @@ -4,21 +4,24 @@ use super::{ }; use crate::{kernel::constants::EEXIST, KResult}; use alloc::{ + boxed::Box, collections::btree_map::{self, BTreeMap}, sync::Arc, }; +use async_trait::async_trait; use eonix_sync::Spin; static PCIE_DRIVERS: Spin>> = Spin::new(BTreeMap::new()); +#[async_trait] pub trait PCIDriver: Send + Sync { fn vendor_id(&self) -> u16; fn device_id(&self) -> u16; - fn handle_device(&self, device: Arc>) -> Result<(), PciError>; + async fn handle_device(&self, device: Arc>) -> Result<(), PciError>; } -pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { +pub async fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let index = (driver.vendor_id() as u32) << 16 | driver.device_id() as u32; let driver = Arc::new(driver); @@ -31,7 +34,7 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let devices = PCIE_DEVICES.lock().get(&index).cloned(); if let Some(devices) = devices { for device in devices { - driver.handle_device(device)?; + driver.handle_device(device).await?; } }; diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 1a48b255..db32b0e5 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -7,7 +7,7 @@ use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::types::{DeviceId, Mode}; use crate::kernel::vfs::{PollEvent, SeekOption}; use crate::{ io::{Buffer, BufferFill}, @@ -41,7 +41,7 @@ impl FromSyscallArg for AtFlags { } } -fn dentry_from( +async fn dentry_from( thread: &Thread, dirfd: FD, pathname: User, @@ -52,7 +52,7 @@ fn dentry_from( match (path.as_cstr().to_bytes_with_nul()[0], dirfd) { (b'/', _) | (_, FD::AT_FDCWD) => { let path = Path::new(path.as_cstr().to_bytes())?; - Dentry::open(&thread.fs_context, path, follow_symlink) + Dentry::open(&thread.fs_context, path, follow_symlink).await } (0, dirfd) => { let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; @@ -63,7 +63,7 @@ fn dentry_from( let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?; - Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink) + Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink).await } } } @@ -119,13 +119,11 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> KResult { + let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); - - thread.files.open(&dentry, flags, mode) + thread.files.open(&dentry, flags, perm).await } #[cfg(target_arch = "x86_64")] @@ -206,7 +204,7 @@ async fn newfstatat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; let statbuf = UserPointerMut::new(statbuf)?; @@ -247,7 +245,7 @@ async fn statx( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; dentry.statx(&mut statx, mask)?; @@ -257,12 +255,11 @@ async fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); +async fn mkdirat(dirfd: FD, pathname: User, mode: Mode) -> KResult<()> { + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - let dentry = dentry_from(thread, dirfd, pathname, true)?; - dentry.mkdir(mode) + dentry.mkdir(perm).await } #[cfg(target_arch = "x86_64")] @@ -274,7 +271,7 @@ async fn mkdir(pathname: User, mode: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; - file.as_path().ok_or(EBADF)?.truncate(length) + file.as_path().ok_or(EBADF)?.truncate(length).await } #[cfg(target_arch = "x86_64")] @@ -290,7 +287,10 @@ async fn truncate(pathname: User, length: usize) -> KResult<()> { #[eonix_macros::define_syscall(SYS_UNLINKAT)] async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { - dentry_from(thread, dirfd, pathname, false)?.unlink() + dentry_from(thread, dirfd, pathname, false) + .await? + .unlink() + .await } #[cfg(target_arch = "x86_64")] @@ -302,9 +302,9 @@ async fn unlink(pathname: User) -> KResult<()> { #[eonix_macros::define_syscall(SYS_SYMLINKAT)] async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { let target = UserString::new(target)?; - let dentry = dentry_from(thread, dirfd, linkpath, false)?; + let dentry = dentry_from(thread, dirfd, linkpath, false).await?; - dentry.symlink(target.as_cstr().to_bytes()) + dentry.symlink(target.as_cstr().to_bytes()).await } #[cfg(target_arch = "x86_64")] @@ -313,18 +313,36 @@ async fn symlink(target: User, linkpath: User) -> KResult<()> { sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) } +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +struct UserDeviceId(u32); + +impl FromSyscallArg for UserDeviceId { + fn from_arg(value: usize) -> Self { + Self(value as u32) + } +} + +impl UserDeviceId { + pub fn into_devid(self) -> DeviceId { + let major = (self.0 >> 8) & 0xfff; + let minor = (self.0 & 0xff) | ((self.0 >> 12) & 0xfff00); + + // TODO: We strip off the high 4 bits of the minor ID for now... + DeviceId::new(major as u16, minor as u16) + } +} + #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceId) -> KResult<()> { if !mode.is_blk() && !mode.is_chr() { return Err(EINVAL); } - let dentry = dentry_from(thread, dirfd, pathname, true)?; - - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + mode.set_perm(mode.perm().mask_with(*thread.fs_context.umask.lock())); - dentry.mknod(mode, dev) + dentry.mknod(mode, dev.into_devid()).await } #[cfg(target_arch = "x86_64")] @@ -340,10 +358,10 @@ async fn readlinkat( buffer: UserMut, bufsize: usize, ) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, false)?; + let dentry = dentry_from(thread, dirfd, pathname, false).await?; let mut buffer = UserBuffer::new(buffer, bufsize)?; - dentry.readlink(&mut buffer) + dentry.readlink(&mut buffer).await } #[cfg(target_arch = "x86_64")] @@ -471,7 +489,7 @@ async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -614,12 +632,12 @@ async fn fchownat( gid: u32, flags: AtFlags, ) -> KResult<()> { - let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; + let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chown(uid, gid) + dentry.chown(uid, gid).await } #[eonix_macros::define_syscall(SYS_FCHMODAT)] @@ -628,14 +646,14 @@ async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chmod(mode) + dentry.chmod(mode).await } #[eonix_macros::define_syscall(SYS_FCHMOD)] @@ -654,7 +672,7 @@ async fn utimensat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -688,10 +706,10 @@ async fn renameat2( Err(EINVAL)?; } - let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false)?; - let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false)?; + let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false).await?; + let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false).await?; - old_dentry.rename(&new_dentry, flags) + old_dentry.rename(&new_dentry, flags).await } #[cfg(target_arch = "x86_64")] diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index c6300ac7..4cb7908c 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,10 +1,8 @@ use super::FromSyscallArg; -use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; +use crate::kernel::constants::{EBADF, EINVAL}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; use crate::{ kernel::{ constants::{UserMmapFlags, UserMmapProtocol}, @@ -66,13 +64,7 @@ async fn do_mmap2( if !is_shared { Mapping::Anonymous } else { - // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = SHM_MANAGER.lock().await.create_shared_area( - len, - thread.process.pid, - Mode::REG.perm(0o777), - ); - Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) + unimplemented!("mmap MAP_ANONYMOUS | MAP_SHARED"); } } else { let file = thread @@ -179,114 +171,4 @@ async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<() .await } -#[eonix_macros::define_syscall(SYS_SHMGET)] -async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { - let size = size.align_up(PAGE_SIZE); - - let mut shm_manager = SHM_MANAGER.lock().await; - let shmid = gen_shm_id(key)?; - - let mode = Mode::REG.perm(shmflg); - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - if key == IPC_PRIVATE { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - if let Some(_) = shm_manager.get(shmid) { - if shmflg.contains(ShmFlags::IPC_CREAT | ShmFlags::IPC_EXCL) { - return Err(EEXIST); - } - - return Ok(shmid); - } - - if shmflg.contains(ShmFlags::IPC_CREAT) { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - Err(ENOENT) -} - -#[eonix_macros::define_syscall(SYS_SHMAT)] -async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { - let mm_list = &thread.process.mm_list; - let shm_manager = SHM_MANAGER.lock().await; - let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - - // Why is this not used? - let _mode = shmflg & 0o777; - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - let mut permission = Permission { - read: true, - write: true, - execute: false, - }; - - if shmflg.contains(ShmFlags::SHM_EXEC) { - permission.execute = true; - } - if shmflg.contains(ShmFlags::SHM_RDONLY) { - permission.write = false; - } - - let size = shm_area.shmid_ds.shm_segsz; - - let mapping = Mapping::File(FileMapping { - file: shm_area.area.clone(), - offset: 0, - length: size, - }); - - let addr = if addr != 0 { - if addr % PAGE_SIZE != 0 && !shmflg.contains(ShmFlags::SHM_RND) { - return Err(EINVAL); - } - let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list - .mmap_fixed(addr, size, mapping, permission, true) - .await - } else { - mm_list - .mmap_hint(VAddr::NULL, size, mapping, permission, true) - .await - }?; - - thread.process.shm_areas.lock().insert(addr, size); - - Ok(addr.addr()) -} - -#[eonix_macros::define_syscall(SYS_SHMDT)] -async fn shmdt(addr: usize) -> KResult<()> { - let addr = VAddr::from(addr); - - let size = { - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - - size - }; - - thread.process.mm_list.unmap(addr, size).await -} - -#[eonix_macros::define_syscall(SYS_SHMCTL)] -async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { - // TODO - Ok(0) -} - -#[eonix_macros::define_syscall(SYS_MEMBARRIER)] -async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { - // TODO - Ok(()) -} - pub fn keep_alive() {} diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 7dd573cc..b4d3e449 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -16,7 +16,7 @@ use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; -use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::types::Permission; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; use crate::{kernel::user::UserBuffer, prelude::*}; @@ -100,10 +100,11 @@ async fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -async fn umask(mask: Mode) -> KResult { - let mut umask = thread.fs_context.umask.lock(); +async fn umask(raw_new_mask: u32) -> KResult { + let new_mask = Permission::new(!raw_new_mask); + let old_mask = core::mem::replace(&mut *thread.fs_context.umask.lock(), new_mask); - Ok(core::mem::replace(&mut *umask, mask.non_format())) + Ok(!old_mask.bits()) } #[eonix_macros::define_syscall(SYS_GETCWD)] @@ -124,7 +125,7 @@ async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; if !dentry.is_valid() { return Err(ENOENT); } @@ -159,7 +160,8 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz &thread.fs_context, Path::new(target.as_cstr().to_bytes())?, true, - )?; + ) + .await?; if !mountpoint.is_valid() { return Err(ENOENT); @@ -172,6 +174,7 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz fstype.as_cstr().to_str().map_err(|_| EINVAL)?, flags as u64, ) + .await } fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> { @@ -199,14 +202,15 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult Elf { Err(ENOEXEC) } - fn parse(elf_file: Arc) -> KResult { + async fn parse(elf_file: Arc) -> KResult { let mut elf_header = UninitBuffer::>::new(); - elf_file.read(&mut elf_header, 0)?; + elf_file.read(&mut elf_header, 0).await?; let elf_header = elf_header.assume_init().map_err(|_| ENOEXEC)?; @@ -203,10 +203,12 @@ impl Elf { let ph_count = elf_header.pt2.ph_count; let mut program_headers = vec![E::Ph::default(); ph_count as usize]; - elf_file.read( - &mut ByteBuffer::from(program_headers.as_mut_slice()), - ph_offset.into_usize(), - )?; + elf_file + .read( + &mut ByteBuffer::from(program_headers.as_mut_slice()), + ph_offset.into_usize(), + ) + .await?; Ok(Self { file: elf_file, @@ -390,12 +392,13 @@ impl Elf { } async fn load_ldso(&self, mm_list: &MMList) -> KResult> { - let ldso_path = self.ldso_path()?; + let ldso_path = self.ldso_path().await?; if let Some(ldso_path) = ldso_path { let fs_context = FsContext::global(); - let ldso_file = Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true)?; - let ldso_elf = Elf::::parse(ldso_file)?; + let ldso_file = + Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true).await?; + let ldso_elf = Elf::::parse(ldso_file).await?; let base = VAddr::from(E::LDSO_BASE_ADDR); @@ -420,7 +423,7 @@ impl Elf { mm_list.map_vdso().await } - fn ldso_path(&self) -> KResult> { + async fn ldso_path(&self) -> KResult> { for program_header in &self.program_headers { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; @@ -430,7 +433,8 @@ impl Elf { let mut ldso_vec = vec![0u8; file_size - 1]; // -1 due to '\0' self.file - .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset)?; + .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset) + .await?; let ldso_path = String::from_utf8(ldso_vec).map_err(|_| ENOEXEC)?; return Ok(Some(ldso_path)); } @@ -445,16 +449,16 @@ pub enum ELF { } impl ELF { - pub fn parse(elf_file: Arc) -> KResult { + pub async fn parse(elf_file: Arc) -> KResult { let mut header_pt1 = UninitBuffer::::new(); - elf_file.read(&mut header_pt1, 0)?; + elf_file.read(&mut header_pt1, 0).await?; let header_pt1 = header_pt1.assume_init().map_err(|_| ENOEXEC)?; assert_eq!(header_pt1.magic, ELF_MAGIC); match header_pt1.class() { - Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file)?)), - Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file)?)), + Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file).await?)), + Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file).await?)), _ => Err(ENOEXEC), } } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index 4e3f4db1..7679aaf4 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -33,7 +33,7 @@ pub struct ProgramLoader { } impl ProgramLoader { - pub fn parse( + pub async fn parse( fs_context: &FsContext, mut exec_path: CString, mut file: Arc, @@ -49,12 +49,15 @@ impl ProgramLoader { } let mut magic = [0; 4]; - file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0)?; + file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0) + .await?; match magic { [b'#', b'!', ..] => { let mut interpreter_line = [0; 256]; - let nread = file.read(&mut ByteBuffer::new(&mut interpreter_line), 0)?; + let nread = file + .read(&mut ByteBuffer::new(&mut interpreter_line), 0) + .await?; // There is a tiny time gap between reading the magic number and // reading the interpreter line, so we need to check if the line @@ -77,7 +80,7 @@ impl ProgramLoader { } let path = Path::new(interpreter_name.as_bytes())?; - file = Dentry::open(fs_context, path, true)?; + file = Dentry::open(fs_context, path, true).await?; args.insert(0, interpreter_name.clone()); if let Some(arg) = interpreter_arg { @@ -92,7 +95,7 @@ impl ProgramLoader { exec_path = interpreter_name; } - ELF_MAGIC => break ELF::parse(file)?, + ELF_MAGIC => break ELF::parse(file).await?, _ => return Err(ENOEXEC), } diff --git a/src/kernel/timer.rs b/src/kernel/timer.rs index 9b6a3ff2..1dbb1382 100644 --- a/src/kernel/timer.rs +++ b/src/kernel/timer.rs @@ -76,6 +76,8 @@ impl Ticks { } impl Instant { + pub const UNIX_EPOCH: Self = Self::default(); + pub const fn default() -> Self { Instant { secs_since_epoch: 0, diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 8bcd9f8a..5ac4e407 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,8 +1,9 @@ pub mod dcache; use super::{ - inode::{Ino, Inode, Mode, RenameData, WriteOffset}, - DevId, FsContext, + inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + FsContext, }; use crate::{ hash::KernelHasher, @@ -14,22 +15,31 @@ use crate::{ }; use crate::{ io::Stream, - kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, + kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, }; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::{ fmt, + future::Future, hash::{BuildHasher, BuildHasherDefault, Hasher}, - ops::ControlFlow, + pin::Pin, sync::atomic::{AtomicPtr, AtomicU64, Ordering}, }; use eonix_sync::LazyLock; use pointers::BorrowedArc; use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; +#[derive(PartialEq, Eq)] +enum DentryKind { + Regular, + Directory, + Symlink, + Mountpoint, +} + struct DentryData { - inode: Arc, - flags: u64, + inode: InodeUse, + kind: DentryKind, } /// # Safety @@ -79,12 +89,6 @@ impl fmt::Debug for Dentry { } } -const D_DIRECTORY: u64 = 1; -#[allow(dead_code)] -const D_MOUNTPOINT: u64 = 2; -const D_SYMLINK: u64 = 4; -const D_REGULAR: u64 = 8; - impl RCUNode for Dentry { fn rcu_prev(&self) -> &AtomicPtr { &self.prev @@ -116,11 +120,11 @@ impl Dentry { self.hash.store(hash, Ordering::Relaxed); } - fn find(self: &Arc, name: &[u8]) -> KResult> { + async fn find(self: &Arc, name: &[u8]) -> KResult> { let data = self.data.load(); let data = data.as_ref().ok_or(ENOENT)?; - if data.flags & D_DIRECTORY == 0 { + if data.kind != DentryKind::Directory { return Err(ENOTDIR); } @@ -141,7 +145,7 @@ impl Dentry { return Ok(found); } - dcache::d_try_revalidate(&dentry); + let _ = dcache::d_try_revalidate(&dentry).await; dcache::d_add(dentry.clone()); Ok(dentry) @@ -192,8 +196,8 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - fn save_data(&self, inode: Arc, flags: u64) -> KResult<()> { - let new = DentryData { inode, flags }; + fn save(&self, inode: InodeUse, kind: DentryKind) { + let new = DentryData { inode, kind }; // TODO!!!: We don't actually need to use `RCUPointer` here // Safety: this function may only be called from `create`-like functions which requires the @@ -201,41 +205,35 @@ impl Dentry { // can't get a reference to the old data. let old = unsafe { self.data.swap(Some(Arc::new(new))) }; assert!(old.is_none()); - - Ok(()) } - pub fn save_reg(&self, file: Arc) -> KResult<()> { - self.save_data(file, D_REGULAR) - } - - pub fn save_symlink(&self, link: Arc) -> KResult<()> { - self.save_data(link, D_SYMLINK) + pub fn fill(&self, file: InodeUse) { + match file.format() { + Format::REG | Format::BLK | Format::CHR => self.save(file, DentryKind::Regular), + Format::DIR => self.save(file, DentryKind::Directory), + Format::LNK => self.save(file, DentryKind::Symlink), + } } - pub fn save_dir(&self, dir: Arc) -> KResult<()> { - self.save_data(dir, D_DIRECTORY) + pub fn inode(&self) -> Option> { + self.data.load().as_ref().map(|data| data.inode.clone()) } - pub fn get_inode(&self) -> KResult> { - self.data - .load() - .as_ref() - .ok_or(ENOENT) - .map(|data| data.inode.clone()) + pub fn get_inode(&self) -> KResult> { + self.inode().ok_or(ENOENT) } pub fn is_directory(&self) -> bool { let data = self.data.load(); data.as_ref() - .map_or(false, |data| data.flags & D_DIRECTORY != 0) + .map_or(false, |data| data.kind == DentryKind::Directory) } pub fn is_valid(&self) -> bool { self.data.load().is_some() } - pub fn open_check(self: &Arc, flags: OpenFlags, mode: Mode) -> KResult<()> { + pub async fn open_check(self: &Arc, flags: OpenFlags, perm: Permission) -> KResult<()> { let data = self.data.load(); if data.is_some() { @@ -250,7 +248,7 @@ impl Dentry { } let parent = self.parent().get_inode()?; - parent.creat(self, mode) + parent.create(self, perm).await } } } @@ -260,110 +258,120 @@ impl Dentry { context: &FsContext, dentry: Arc, nrecur: u32, - ) -> KResult> { - if nrecur >= 16 { - return Err(ELOOP); - } + ) -> Pin>> + use<'_>>> { + Box::pin(async move { + if nrecur >= 16 { + return Err(ELOOP); + } - let data = dentry.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; + let data = dentry.data.load(); + let data = data.as_ref().ok_or(ENOENT)?; - match data.flags { - flags if flags & D_REGULAR != 0 => Err(ENOTDIR), - flags if flags & D_DIRECTORY != 0 => Ok(dentry), - flags if flags & D_SYMLINK != 0 => { - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); + match data.kind { + DentryKind::Regular => Err(ENOTDIR), + DentryKind::Directory => Ok(dentry), + DentryKind::Symlink => { + let mut buffer = [0u8; 256]; + let mut buffer = ByteBuffer::new(&mut buffer); - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; + data.inode.readlink(&mut buffer).await?; + let path = Path::new(buffer.data())?; - let dentry = - Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)?; + let dentry = + Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1) + .await?; - Self::resolve_directory(context, dentry, nrecur + 1) + Self::resolve_directory(context, dentry, nrecur + 1).await + } + _ => panic!("Invalid dentry flags"), } - _ => panic!("Invalid dentry flags"), - } + }) } - pub fn open_recursive( - context: &FsContext, - cwd: &Arc, - path: Path, + pub fn open_recursive<'r, 'a: 'r, 'b: 'r, 'c: 'r>( + context: &'a FsContext, + cwd: &'b Arc, + path: Path<'c>, follow: bool, nrecur: u32, - ) -> KResult> { - // too many recursive search layers will cause stack overflow - // so we use 16 for now - if nrecur >= 16 { - return Err(ELOOP); - } + ) -> Pin>> + 'r>> { + Box::pin(async move { + // too many recursive search layers will cause stack overflow + // so we use 16 for now + if nrecur >= 16 { + return Err(ELOOP); + } - let mut cwd = if path.is_absolute() { - context.fsroot.clone() - } else { - cwd.clone() - }; + let mut cwd = if path.is_absolute() { + context.fsroot.clone() + } else { + cwd.clone() + }; - for item in path.iter() { - if let PathComponent::TrailingEmpty = item { - if cwd.data.load().as_ref().is_none() { - return Ok(cwd); + for item in path.iter() { + if let PathComponent::TrailingEmpty = item { + if cwd.data.load().as_ref().is_none() { + return Ok(cwd); + } } - } - cwd = Self::resolve_directory(context, cwd, nrecur)?; + cwd = Self::resolve_directory(context, cwd, nrecur).await?; - match item { - PathComponent::TrailingEmpty | PathComponent::Current => {} // pass - PathComponent::Parent => { - if !cwd.hash_eq(&context.fsroot) { - let parent = cwd.parent().clone(); - cwd = Self::resolve_directory(context, parent, nrecur)?; + match item { + PathComponent::TrailingEmpty | PathComponent::Current => {} // pass + PathComponent::Parent => { + if !cwd.hash_eq(&context.fsroot) { + let parent = cwd.parent().clone(); + cwd = Self::resolve_directory(context, parent, nrecur).await?; + } + continue; + } + PathComponent::Name(name) => { + cwd = cwd.find(name).await?; } - continue; - } - PathComponent::Name(name) => { - cwd = cwd.find(name)?; } } - } - if follow { - let data = cwd.data.load(); + if follow { + let data = cwd.data.load(); - if let Some(data) = data.as_ref() { - if data.flags & D_SYMLINK != 0 { - let data = cwd.data.load(); - let data = data.as_ref().unwrap(); - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); + if let Some(data) = data.as_ref() { + if data.kind == DentryKind::Symlink { + let data = cwd.data.load(); + let data = data.as_ref().unwrap(); + let mut buffer = [0u8; 256]; + let mut buffer = ByteBuffer::new(&mut buffer); - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; + data.inode.readlink(&mut buffer).await?; + let path = Path::new(buffer.data())?; - let parent = cwd.parent().clone(); - cwd = Self::open_recursive(context, &parent, path, true, nrecur + 1)?; + let parent = cwd.parent().clone(); + cwd = + Self::open_recursive(context, &parent, path, true, nrecur + 1).await?; + } } } - } - Ok(cwd) + Ok(cwd) + }) } - pub fn open(context: &FsContext, path: Path, follow_symlinks: bool) -> KResult> { + pub async fn open( + context: &FsContext, + path: Path<'_>, + follow_symlinks: bool, + ) -> KResult> { let cwd = context.cwd.lock().clone(); - Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0) + Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0).await } - pub fn open_at( + pub async fn open_at( context: &FsContext, at: &Arc, - path: Path, + path: Path<'_>, follow_symlinks: bool, ) -> KResult> { - Dentry::open_recursive(context, at, path, follow_symlinks, 0) + Dentry::open_recursive(context, at, path, follow_symlinks, 0).await } pub fn get_path( @@ -405,18 +413,18 @@ impl Dentry { } impl Dentry { - pub fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.read(buffer, offset), - Mode::BLK => { + match inode.format() { + Format::DIR => Err(EISDIR), + Format::REG => inode.read(buffer, offset).await, + Format::BLK => { let device = BlockDevice::get(inode.devid()?)?; - Ok(device.read_some(offset, buffer)?.allow_partial()) + Ok(device.read_some(offset, buffer).await?.allow_partial()) } - Mode::CHR => { + Format::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -424,32 +432,32 @@ impl Dentry { } } - pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.write(stream, offset), - Mode::BLK => Err(EINVAL), // TODO - Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.format() { + Format::DIR => Err(EISDIR), + Format::REG => inode.write(stream, offset).await, + Format::BLK => Err(EINVAL), // TODO + Format::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } - pub fn readdir(&self, offset: usize, mut callback: F) -> KResult + pub async fn readdir(&self, offset: usize, mut for_each_entry: F) -> KResult> where - F: FnMut(&[u8], Ino) -> KResult>, + F: FnMut(&[u8], Ino) -> KResult + Send, { let dir = self.get_inode()?; - dir.do_readdir(offset, &mut callback) + dir.readdir(offset, &mut for_each_entry).await } - pub fn mkdir(&self, mode: Mode) -> KResult<()> { + pub async fn mkdir(&self, perm: Permission) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mkdir(self, mode) + dir.mkdir(self, perm).await } } @@ -457,50 +465,50 @@ impl Dentry { self.get_inode()?.statx(stat, mask) } - pub fn truncate(&self, size: usize) -> KResult<()> { - self.get_inode()?.truncate(size) + pub async fn truncate(&self, size: usize) -> KResult<()> { + self.get_inode()?.truncate(size).await } - pub fn unlink(self: &Arc) -> KResult<()> { + pub async fn unlink(self: &Arc) -> KResult<()> { if self.get_inode().is_err() { Err(ENOENT) } else { let dir = self.parent().get_inode()?; - dir.unlink(self) + dir.unlink(self).await } } - pub fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { + pub async fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.symlink(self, link) + dir.symlink(self, link).await } } - pub fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - self.get_inode()?.readlink(buffer) + pub async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + self.get_inode()?.readlink(buffer).await } - pub fn mknod(&self, mode: Mode, devid: DevId) -> KResult<()> { + pub async fn mknod(&self, mode: Mode, devid: DeviceId) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mknod(self, mode, devid) + dir.mknod(self, mode, devid).await } } - pub fn chmod(&self, mode: Mode) -> KResult<()> { - self.get_inode()?.chmod(mode) + pub async fn chmod(&self, mode: Mode) -> KResult<()> { + self.get_inode()?.chmod(mode).await } - pub fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - self.get_inode()?.chown(uid, gid) + pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + self.get_inode()?.chown(uid, gid).await } - pub fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { + pub async fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { if Arc::ptr_eq(self, new) { return Ok(()); } @@ -509,22 +517,19 @@ impl Dentry { let new_parent = new.parent().get_inode()?; // If the two dentries are not in the same filesystem, return EXDEV. - if !Weak::ptr_eq(&old_parent.vfs, &new_parent.vfs) { + if old_parent.sbref().eq(&new_parent.sbref()) { Err(PosixError::EXDEV)?; } - let vfs = old_parent.vfs.upgrade().ok_or(EIO)?; - let rename_data = RenameData { old_dentry: self, new_dentry: new, new_parent, - vfs, is_exchange: flags.contains(RenameFlags::RENAME_EXCHANGE), no_replace: flags.contains(RenameFlags::RENAME_NOREPLACE), }; // Delegate to the parent directory's rename implementation - old_parent.rename(rename_data) + old_parent.rename(rename_data).await } } diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 188a1cfc..e2491235 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,7 +1,5 @@ -use super::{Dentry, Inode}; +use super::Dentry; use crate::kernel::constants::ENOENT; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::Mode; use crate::rcu::RCUPointer; use crate::{ prelude::*, @@ -41,27 +39,14 @@ pub fn d_find_fast(dentry: &Dentry) -> Option> { /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode /// /// Silently fail without any side effects -pub fn d_try_revalidate(dentry: &Arc) { - let _lock = block_on(D_EXCHANGE_LOCK.lock()); - - (|| -> KResult<()> { - let parent = dentry.parent().get_inode()?; - let inode = parent.lookup(dentry)?.ok_or(ENOENT)?; +pub async fn d_try_revalidate(dentry: &Arc) -> KResult<()> { + let _lock = D_EXCHANGE_LOCK.lock().await; - d_save(dentry, inode) - })() - .unwrap_or_default(); -} + let parent = dentry.parent().get_inode()?; + let inode = parent.lookup(dentry).await?.ok_or(ENOENT)?; -/// Save the inode to the dentry. -/// -/// Dentry flags will be determined by the inode's mode. -pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load().format() { - Mode::DIR => dentry.save_dir(inode), - Mode::LNK => dentry.save_symlink(inode), - _ => dentry.save_reg(inode), - } + dentry.fill(inode); + Ok(()) } /// Replace the old dentry with the new one in the dcache diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs index 6386ba92..96526ee9 100644 --- a/src/kernel/vfs/file/inode_file.rs +++ b/src/kernel/vfs/file/inode_file.rs @@ -5,13 +5,13 @@ use crate::{ constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, vfs::{ dentry::Dentry, - inode::{Inode, Mode, WriteOffset}, + inode::{Inode, InodeUse, WriteOffset}, + types::Format, }, }, prelude::KResult, }; use alloc::sync::Arc; -use core::{ops::ControlFlow, sync::atomic::Ordering}; use eonix_sync::Mutex; use posix_types::{ getdent::{UserDirent, UserDirent64}, @@ -25,7 +25,7 @@ pub struct InodeFile { pub a: bool, /// Only a few modes those won't possibly change are cached here to speed up file operations. /// Specifically, `S_IFMT` masked bits. - pub mode: Mode, + pub format: Format, cursor: Mutex, dentry: Arc, } @@ -34,12 +34,7 @@ impl InodeFile { pub fn new(dentry: Arc, flags: OpenFlags) -> File { // SAFETY: `dentry` used to create `InodeFile` is valid. // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load() - .format(); + let format = dentry.inode().expect("dentry should be invalid").format(); let (r, w, a) = flags.as_rwa(); @@ -50,15 +45,15 @@ impl InodeFile { r, w, a, - mode: cached_mode, + format, cursor: Mutex::new(0), }), ) } pub fn sendfile_check(&self) -> KResult<()> { - match self.mode { - Mode::REG | Mode::BLK => Ok(()), + match self.format { + Format::REG | Format::BLK => Ok(()), _ => Err(EBADF), } } @@ -70,21 +65,19 @@ impl InodeFile { let mut cursor = self.cursor.lock().await; - if self.a { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + let (offset, update_offset) = match (self.a, offset) { + (true, _) => (WriteOffset::End(&mut cursor), None), + (false, Some(offset)) => (WriteOffset::Position(offset), None), + (false, None) => (WriteOffset::Position(*cursor), Some(&mut *cursor)), + }; - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) + let nr_write = self.dentry.write(stream, offset).await?; + + if let Some(update_offset) = update_offset { + *update_offset += nr_write; } + + Ok(nr_write) } pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { @@ -92,24 +85,20 @@ impl InodeFile { return Err(EBADF); } - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.read(buffer, *cursor)?; + if let Some(offset) = offset { + return Ok(self.dentry.read(buffer, offset).await?); + } - *cursor += nread; - nread - }; + let mut cursor = self.cursor.lock().await; + let nread = self.dentry.read(buffer, *cursor).await?; + *cursor += nread; Ok(nread) } } impl File { - pub fn get_inode(&self) -> KResult>> { + pub fn get_inode(&self) -> KResult>> { if let FileType::Inode(inode_file) = &**self { Ok(Some(inode_file.dentry.get_inode()?)) } else { @@ -124,27 +113,30 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; + let record = UserDirent { + d_ino: ino.as_raw() as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -157,28 +149,31 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; + let record = UserDirent64 { + d_ino: ino.as_raw(), + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -196,7 +191,7 @@ impl File { SeekOption::Set(n) => n, SeekOption::End(off) => { let inode = inode_file.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; + let size = inode.info().lock().size as usize; size.checked_add_signed(off).ok_or(EOVERFLOW)? } }; diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index b457a425..1862a3e1 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,6 +1,6 @@ use super::{ file::{File, InodeFile, Pipe}, - inode::Mode, + types::{Format, Permission}, Spin, TerminalFile, }; use crate::kernel::{ @@ -280,26 +280,31 @@ impl FileArray { Ok((read_fd, write_fd)) } - pub fn open(&self, dentry: &Arc, flags: OpenFlags, mode: Mode) -> KResult { - dentry.open_check(flags, mode)?; + pub async fn open( + &self, + dentry: &Arc, + flags: OpenFlags, + perm: Permission, + ) -> KResult { + dentry.open_check(flags, perm).await?; let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let file_format = inode.mode.load().format(); + let file_format = inode.format(); match (flags.directory(), file_format, flags.write()) { - (true, Mode::DIR, _) => {} + (true, Format::DIR, _) => {} (true, _, _) => return Err(ENOTDIR), - (false, Mode::DIR, true) => return Err(EISDIR), + (false, Format::DIR, true) => return Err(EISDIR), _ => {} } - if flags.truncate() && flags.write() && file_format.is_reg() { - inode.truncate(0)?; + if flags.truncate() && flags.write() && file_format == Format::REG { + inode.truncate(0).await?; } - let file = if file_format.is_chr() { + let file = if file_format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs deleted file mode 100644 index 52529f84..00000000 --- a/src/kernel/vfs/inode.rs +++ /dev/null @@ -1,494 +0,0 @@ -use super::{dentry::Dentry, vfs::Vfs, DevId}; -use crate::io::Stream; -use crate::kernel::constants::{ - EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, - S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, -}; -use crate::kernel::mem::PageCache; -use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::{io::Buffer, prelude::*}; -use alloc::sync::{Arc, Weak}; -use core::{ - mem::MaybeUninit, - ops::ControlFlow, - ptr::addr_of_mut, - sync::atomic::{AtomicU32, AtomicU64, Ordering}, -}; -use eonix_sync::RwLock; -use posix_types::stat::StatX; - -pub type Ino = u64; -pub type AtomicIno = AtomicU64; -#[allow(dead_code)] -pub type ISize = u64; -pub type AtomicISize = AtomicU64; -#[allow(dead_code)] -pub type Nlink = u64; -pub type AtomicNlink = AtomicU64; -#[allow(dead_code)] -pub type Uid = u32; -pub type AtomicUid = AtomicU32; -#[allow(dead_code)] -pub type Gid = u32; -pub type AtomicGid = AtomicU32; - -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct Mode(u32); - -pub struct AtomicMode(AtomicU32); - -#[derive(Debug)] -pub struct InodeData { - pub ino: Ino, - pub size: AtomicISize, - pub nlink: AtomicNlink, - - pub uid: AtomicUid, - pub gid: AtomicGid, - pub mode: AtomicMode, - - pub atime: Spin, - pub ctime: Spin, - pub mtime: Spin, - - pub rwsem: RwLock<()>, - - pub vfs: Weak, -} - -impl InodeData { - pub fn new(ino: Ino, vfs: Weak) -> Self { - Self { - ino, - vfs, - atime: Spin::new(Instant::now()), - ctime: Spin::new(Instant::now()), - mtime: Spin::new(Instant::now()), - rwsem: RwLock::new(()), - size: AtomicU64::new(0), - nlink: AtomicNlink::new(0), - uid: AtomicUid::new(0), - gid: AtomicGid::new(0), - mode: AtomicMode::new(0), - } - } -} - -#[allow(dead_code)] -pub trait InodeInner: - Send + Sync + core::ops::Deref + core::ops::DerefMut -{ - fn data(&self) -> &InodeData; - fn data_mut(&mut self) -> &mut InodeData; -} - -pub enum WriteOffset<'end> { - Position(usize), - End(&'end mut usize), -} - -pub struct RenameData<'a, 'b> { - pub old_dentry: &'a Arc, - pub new_dentry: &'b Arc, - pub new_parent: Arc, - pub vfs: Arc, - pub is_exchange: bool, - pub no_replace: bool, -} - -#[allow(unused_variables)] -pub trait Inode: Send + Sync + InodeInner + Any { - fn is_dir(&self) -> bool { - self.mode.load().is_dir() - } - - fn lookup(&self, dentry: &Arc) -> KResult>> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn devid(&self) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn truncate(&self, length: usize) -> KResult<()> { - Err(if self.is_dir() { EISDIR } else { EPERM }) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - Err(EPERM) - } - - fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - Err(EPERM) - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } - - fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { - // Safety: ffi should have checked reference - let vfs = self.vfs.upgrade().expect("Vfs is dropped"); - - let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(); - - if mask & STATX_NLINK != 0 { - stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; - stat.stx_mask |= STATX_NLINK; - } - - if mask & STATX_ATIME != 0 { - let atime = *self.atime.lock(); - stat.stx_atime = atime.into(); - stat.stx_mask |= STATX_ATIME; - } - - if mask & STATX_MTIME != 0 { - let mtime = *self.mtime.lock(); - stat.stx_mtime = mtime.into(); - stat.stx_mask |= STATX_MTIME; - } - - if mask & STATX_CTIME != 0 { - let ctime = *self.ctime.lock(); - stat.stx_ctime = ctime.into(); - stat.stx_mask |= STATX_CTIME; - } - - if mask & STATX_SIZE != 0 { - stat.stx_size = self.size.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_SIZE; - } - - stat.stx_mode = 0; - if mask & STATX_MODE != 0 { - stat.stx_mode |= mode.non_format_bits() as u16; - stat.stx_mask |= STATX_MODE; - } - - if mask & STATX_TYPE != 0 { - stat.stx_mode |= mode.format_bits() as u16; - if mode.is_blk() || mode.is_chr() { - let devid = self.devid(); - stat.stx_rdev_major = (devid? >> 8) & 0xff; - stat.stx_rdev_minor = devid? & 0xff; - } - stat.stx_mask |= STATX_TYPE; - } - - if mask & STATX_INO != 0 { - stat.stx_ino = self.ino as _; - stat.stx_mask |= STATX_INO; - } - - if mask & STATX_BLOCKS != 0 { - stat.stx_blocks = (size + 512 - 1) / 512; - stat.stx_blksize = vfs.io_blksize() as _; - stat.stx_mask |= STATX_BLOCKS; - } - - if mask & STATX_UID != 0 { - stat.stx_uid = self.uid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_UID; - } - - if mask & STATX_GID != 0 { - stat.stx_gid = self.gid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_GID; - } - - let fsdev = vfs.fs_devid(); - stat.stx_dev_major = (fsdev >> 8) & 0xff; - stat.stx_dev_minor = fsdev & 0xff; - - // TODO: support more attributes - stat.stx_attributes_mask = 0; - - Ok(()) - } - - fn new_locked(ino: Ino, vfs: Weak, f: F) -> Arc - where - Self: Sized, - F: FnOnce(*mut Self, &()), - { - let mut uninit = Arc::::new_uninit(); - - let uninit_mut = Arc::get_mut(&mut uninit).unwrap(); - - // Safety: `idata` is owned by `uninit` - let idata = unsafe { - addr_of_mut!(*(*uninit_mut.as_mut_ptr()).data_mut()) - .cast::>() - .as_mut() - .unwrap() - }; - - idata.write(InodeData::new(ino, vfs)); - - f( - uninit_mut.as_mut_ptr(), - // SAFETY: `idata` is initialized and we will never move the lock. - &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), - ); - - // Safety: `uninit` is initialized - unsafe { uninit.assume_init() } - } -} - -// TODO: define multiple inode structs a time -macro_rules! define_struct_inode { - ($v:vis struct $inode_t:ident;) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; - ($v:vis struct $inode_t:ident { $($vis:vis $name:ident: $type:ty,)* }) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - $($vis $name: $type,)* - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; -} - -pub(crate) use define_struct_inode; - -impl Mode { - pub const REG: Self = Self(S_IFREG); - pub const DIR: Self = Self(S_IFDIR); - pub const LNK: Self = Self(S_IFLNK); - pub const BLK: Self = Self(S_IFBLK); - pub const CHR: Self = Self(S_IFCHR); - - pub const fn new(bits: u32) -> Self { - Self(bits) - } - - pub const fn is_blk(&self) -> bool { - (self.0 & S_IFMT) == S_IFBLK - } - - pub const fn is_chr(&self) -> bool { - (self.0 & S_IFMT) == S_IFCHR - } - - pub const fn is_reg(&self) -> bool { - (self.0 & S_IFMT) == S_IFREG - } - - pub const fn is_dir(&self) -> bool { - (self.0 & S_IFMT) == S_IFDIR - } - - pub const fn is_lnk(&self) -> bool { - (self.0 & S_IFMT) == S_IFLNK - } - - pub const fn bits(&self) -> u32 { - self.0 - } - - pub const fn format_bits(&self) -> u32 { - self.0 & S_IFMT - } - - pub const fn format(&self) -> Self { - Self::new(self.format_bits()) - } - - pub const fn non_format_bits(&self) -> u32 { - self.0 & !S_IFMT - } - - pub const fn non_format(&self) -> Self { - Self::new(self.non_format_bits()) - } - - pub const fn perm(self, perm: u32) -> Self { - Self::new((self.0 & !0o777) | (perm & 0o777)) - } - - pub const fn set_perm(&mut self, perm: u32) { - *self = self.perm(perm); - } - - pub const fn mask_perm(&mut self, perm_mask: u32) { - let perm_mask = perm_mask & 0o777; - let self_perm = self.non_format_bits() & 0o777; - - *self = self.perm(self_perm & perm_mask); - } -} - -impl AtomicMode { - pub const fn new(bits: u32) -> Self { - Self(AtomicU32::new(bits)) - } - - pub const fn from(mode: Mode) -> Self { - Self::new(mode.0) - } - - pub fn load(&self) -> Mode { - Mode(self.0.load(Ordering::Relaxed)) - } - - pub fn store(&self, mode: Mode) { - self.0.store(mode.0, Ordering::Relaxed); - } -} - -impl core::fmt::Debug for AtomicMode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("AtomicMode") - .field("bits", &self.load().0) - .finish() - } -} - -impl core::fmt::Debug for Mode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let format_name = match self.format() { - Mode::REG => "REG", - Mode::DIR => "DIR", - Mode::LNK => "LNK", - Mode::BLK => "BLK", - Mode::CHR => "CHR", - _ => "UNK", - }; - - match self.non_format_bits() & !0o777 { - 0 => write!( - f, - "Mode({format_name}, {perm:#o})", - perm = self.non_format_bits() - )?, - rem => write!( - f, - "Mode({format_name}, {perm:#o}, rem={rem:#x})", - perm = self.non_format_bits() & 0o777 - )?, - } - - Ok(()) - } -} - -impl FromSyscallArg for Mode { - fn from_arg(value: usize) -> Self { - Mode::new(value as u32) - } -} - -impl SyscallRetVal for Mode { - fn into_retval(self) -> Option { - Some(self.bits() as usize) - } -} diff --git a/src/kernel/vfs/inode/ino.rs b/src/kernel/vfs/inode/ino.rs new file mode 100644 index 00000000..b5ee7ac0 --- /dev/null +++ b/src/kernel/vfs/inode/ino.rs @@ -0,0 +1,31 @@ +use core::{ + fmt::{Debug, Display, Formatter}, + sync::atomic::AtomicU64, +}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ino(u64); + +pub struct AtomicIno(AtomicU64); + +impl Ino { + pub const fn new(ino: u64) -> Self { + Self(ino) + } + + pub const fn as_raw(self) -> u64 { + self.0 + } +} + +impl Debug for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "Ino({})", self.0) + } +} + +impl Display for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs new file mode 100644 index 00000000..786a31fe --- /dev/null +++ b/src/kernel/vfs/inode/inode.rs @@ -0,0 +1,389 @@ +use alloc::boxed::Box; +use core::{ + any::Any, + future::Future, + marker::Unsize, + ops::{CoerceUnsized, Deref}, + pin::Pin, +}; +use eonix_sync::Spin; + +use alloc::sync::{Arc, Weak}; +use async_trait::async_trait; + +use crate::{ + io::{Buffer, Stream}, + kernel::{ + constants::{EINVAL, EPERM}, + mem::PageCache, + timer::Instant, + vfs::{ + dentry::Dentry, + types::{DeviceId, Format, Mode, Permission}, + SbRef, SbUse, SuperBlock, + }, + }, + prelude::KResult, +}; + +use super::{Ino, RenameData, WriteOffset}; + +pub trait InodeOps: Sized + Send + Sync + 'static { + type SuperBlock: SuperBlock + Sized; + + fn ino(&self) -> Ino; + fn format(&self) -> Format; + fn info(&self) -> &Spin; + + fn super_block(&self) -> &SbRef; + + fn page_cache(&self) -> Option<&PageCache>; +} + +#[allow(unused_variables)] +pub trait InodeDirOps: InodeOps { + fn lookup( + &self, + dentry: &Arc, + ) -> impl Future>>> + Send { + async { Err(EPERM) } + } + + /// Read directory entries and call the given closure for each entry. + /// + /// # Returns + /// - Ok(count): The number of entries read. + /// - Ok(Err(err)): Some error occurred while calling the given closure. + /// - Err(err): An error occurred while reading the directory. + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + async { Err(EPERM) } + } + + fn create( + &self, + at: &Arc, + mode: Permission, + ) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn mkdir(&self, at: &Dentry, mode: Permission) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn mknod( + &self, + at: &Dentry, + mode: Mode, + dev: DeviceId, + ) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn unlink(&self, at: &Arc) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn symlink(&self, at: &Arc, target: &[u8]) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn rename(&self, rename_data: RenameData<'_, '_>) -> impl Future> + Send { + async { Err(EPERM) } + } +} + +#[allow(unused_variables)] +pub trait InodeFileOps: InodeOps { + fn read( + &self, + buffer: &mut dyn Buffer, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn read_direct( + &self, + buffer: &mut dyn Buffer, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn write( + &self, + stream: &mut dyn Stream, + offset: WriteOffset<'_>, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn write_direct( + &self, + stream: &mut dyn Stream, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn devid(&self) -> KResult { + Err(EINVAL) + } + + fn readlink(&self, buffer: &mut dyn Buffer) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn truncate(&self, length: usize) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn chmod(&self, perm: Permission) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn chown(&self, uid: u32, gid: u32) -> impl Future> + Send { + async { Err(EPERM) } + } +} + +#[async_trait] +pub trait InodeDir { + async fn lookup(&self, dentry: &Arc) -> KResult>>; + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()>; + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()>; + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()>; + async fn unlink(&self, at: &Arc) -> KResult<()>; + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()>; + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()>; + + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> Pin>> + Send + 'r>>; +} + +#[async_trait] +pub trait InodeFile { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult; + async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult; + fn devid(&self) -> KResult; + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult; + async fn truncate(&self, length: usize) -> KResult<()>; + async fn chmod(&self, mode: Mode) -> KResult<()>; + async fn chown(&self, uid: u32, gid: u32) -> KResult<()>; +} + +pub trait Inode: InodeFile + InodeDir + Any + Send + Sync + 'static { + fn ino(&self) -> Ino; + fn format(&self) -> Format; + fn info(&self) -> &Spin; + + // TODO: This might should be removed... Temporary workaround for now. + fn page_cache(&self) -> Option<&PageCache>; + + fn sbref(&self) -> SbRef; + fn sbget(&self) -> KResult>; +} + +#[async_trait] +impl InodeFile for T +where + T: InodeFileOps, +{ + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.read(buffer, offset).await + } + + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.read_direct(buffer, offset).await + } + + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { + self.write(stream, offset).await + } + + async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { + self.write_direct(stream, offset).await + } + + fn devid(&self) -> KResult { + self.devid() + } + + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + self.readlink(buffer).await + } + + async fn truncate(&self, length: usize) -> KResult<()> { + self.truncate(length).await + } + + async fn chmod(&self, mode: Mode) -> KResult<()> { + self.chmod(Permission::new(mode.non_format_bits())).await + } + + async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + self.chown(uid, gid).await + } +} + +#[async_trait] +impl InodeDir for T +where + T: InodeDirOps, +{ + async fn lookup(&self, dentry: &Arc) -> KResult>> { + self.lookup(dentry).await + } + + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { + self.create(at, perm).await + } + + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { + self.mkdir(at, perm).await + } + + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + self.mknod(at, mode, dev).await + } + + async fn unlink(&self, at: &Arc) -> KResult<()> { + self.unlink(at).await + } + + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + self.symlink(at, target).await + } + + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + self.rename(rename_data).await + } + + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> Pin>> + Send + 'r>> { + Box::pin(self.readdir(offset, callback)) + } +} + +impl Inode for T +where + T: InodeOps + InodeFile + InodeDir, +{ + fn ino(&self) -> Ino { + self.ino() + } + + fn format(&self) -> Format { + self.format() + } + + fn info(&self) -> &Spin { + self.info() + } + + fn page_cache(&self) -> Option<&PageCache> { + self.page_cache() + } + + fn sbref(&self) -> SbRef { + self.super_block().clone() + } + + fn sbget(&self) -> KResult> { + self.super_block().get().map(|sb| sb as _) + } +} + +#[derive(Debug, Clone)] +pub struct InodeInfo { + pub size: u64, + pub nlink: u64, + + pub uid: u32, + pub gid: u32, + pub perm: Permission, + + pub atime: Instant, + pub ctime: Instant, + pub mtime: Instant, +} + +#[derive(Clone)] +pub struct InodeRef(Weak) +where + I: Inode + ?Sized; + +pub struct InodeUse(Arc) +where + I: Inode + ?Sized; + +impl InodeUse +where + I: Inode, +{ + pub fn new(inode: I) -> Self { + Self(Arc::new(inode)) + } + + pub fn new_cyclic(inode_func: impl FnOnce(&Weak) -> I) -> Self { + Self(Arc::new_cyclic(inode_func)) + } +} + +impl InodeUse +where + I: Inode + ?Sized, +{ + pub fn as_raw(&self) -> *const I { + Arc::as_ptr(&self.0) + } +} + +impl CoerceUnsized> for InodeUse +where + T: Inode + Unsize + ?Sized, + U: Inode + ?Sized, +{ +} + +impl Clone for InodeUse +where + I: Inode + ?Sized, +{ + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl core::fmt::Debug for InodeUse +where + I: Inode + ?Sized, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "InodeUse(ino={})", self.ino()) + } +} + +impl Deref for InodeUse +where + I: Inode + ?Sized, +{ + type Target = I; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs new file mode 100644 index 00000000..786d30fa --- /dev/null +++ b/src/kernel/vfs/inode/mod.rs @@ -0,0 +1,10 @@ +mod ino; +mod inode; +mod ops; +mod statx; + +pub use ino::Ino; +pub use inode::{ + Inode, InodeDir, InodeDirOps, InodeFile, InodeFileOps, InodeInfo, InodeOps, InodeRef, InodeUse, +}; +pub use ops::{RenameData, WriteOffset}; diff --git a/src/kernel/vfs/inode/ops.rs b/src/kernel/vfs/inode/ops.rs new file mode 100644 index 00000000..baab1a80 --- /dev/null +++ b/src/kernel/vfs/inode/ops.rs @@ -0,0 +1,18 @@ +use alloc::sync::Arc; + +use crate::kernel::vfs::dentry::Dentry; + +use super::{inode::InodeUse, Inode}; + +pub enum WriteOffset<'end> { + Position(usize), + End(&'end mut usize), +} + +pub struct RenameData<'a, 'b> { + pub old_dentry: &'a Arc, + pub new_dentry: &'b Arc, + pub new_parent: InodeUse, + pub is_exchange: bool, + pub no_replace: bool, +} diff --git a/src/kernel/vfs/inode/statx.rs b/src/kernel/vfs/inode/statx.rs new file mode 100644 index 00000000..a85ef3af --- /dev/null +++ b/src/kernel/vfs/inode/statx.rs @@ -0,0 +1,97 @@ +use posix_types::stat::StatX; + +use crate::{ + kernel::{ + constants::{ + STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, + STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, + }, + vfs::types::Format, + }, + prelude::KResult, +}; + +use super::{inode::InodeUse, Inode}; + +impl InodeUse +where + I: Inode + ?Sized, +{ + pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { + let sb = self.sbget()?; + let info = self.info().lock(); + + if mask & STATX_NLINK != 0 { + stat.stx_nlink = info.nlink as _; + stat.stx_mask |= STATX_NLINK; + } + + if mask & STATX_ATIME != 0 { + stat.stx_atime = info.atime.into(); + stat.stx_mask |= STATX_ATIME; + } + + if mask & STATX_MTIME != 0 { + stat.stx_mtime = info.mtime.into(); + stat.stx_mask |= STATX_MTIME; + } + + if mask & STATX_CTIME != 0 { + stat.stx_ctime = info.ctime.into(); + stat.stx_mask |= STATX_CTIME; + } + + if mask & STATX_SIZE != 0 { + stat.stx_size = info.size as _; + stat.stx_mask |= STATX_SIZE; + } + + stat.stx_mode = 0; + if mask & STATX_MODE != 0 { + stat.stx_mode |= info.perm.bits() as u16; + stat.stx_mask |= STATX_MODE; + } + + if mask & STATX_TYPE != 0 { + let format = self.format(); + + stat.stx_mode |= format.as_raw() as u16; + if let Format::BLK | Format::CHR = format { + let devid = self.devid()?; + stat.stx_rdev_major = devid.major as _; + stat.stx_rdev_minor = devid.minor as _; + } + stat.stx_mask |= STATX_TYPE; + } + + if mask & STATX_INO != 0 { + stat.stx_ino = self.ino().as_raw(); + stat.stx_mask |= STATX_INO; + } + + if mask & STATX_BLOCKS != 0 { + stat.stx_blocks = (info.size + 512 - 1) / 512; + stat.stx_blksize = sb.info.io_blksize as _; + stat.stx_mask |= STATX_BLOCKS; + } + + if mask & STATX_UID != 0 { + stat.stx_uid = info.uid; + stat.stx_mask |= STATX_UID; + } + + if mask & STATX_GID != 0 { + stat.stx_gid = info.gid; + stat.stx_mask |= STATX_GID; + } + + let fsdev = sb.info.device_id; + stat.stx_dev_major = fsdev.major as _; + stat.stx_dev_minor = fsdev.minor as _; + + // TODO: support more attributes + stat.stx_attributes_mask = 0; + + Ok(()) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index f62cb9b9..5b8eca5a 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,31 +1,31 @@ -use crate::prelude::*; -use alloc::sync::Arc; -use dentry::Dentry; -use eonix_sync::LazyLock; -use inode::Mode; - pub mod dentry; mod file; pub mod filearray; pub mod inode; pub mod mount; -pub mod vfs; +mod superblock; +pub mod types; -pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +use crate::prelude::*; +use alloc::sync::Arc; +use dentry::Dentry; +use eonix_sync::LazyLock; +use types::Permission; -pub type DevId = u32; +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +pub use superblock::{SbRef, SbUse, SuperBlock, SuperBlockInfo, SuperBlockLock}; pub struct FsContext { pub fsroot: Arc, pub cwd: Spin>, - pub umask: Spin, + pub umask: Spin, } static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(Mode::new(0o022)), + umask: Spin::new(Permission::new(0o755)), }) }); diff --git a/src/kernel/vfs/mount.rs b/src/kernel/vfs/mount.rs index 0b38e0c0..213acae9 100644 --- a/src/kernel/vfs/mount.rs +++ b/src/kernel/vfs/mount.rs @@ -1,11 +1,15 @@ use super::{ dentry::{dcache, Dentry, DROOT}, - inode::Inode, - vfs::Vfs, + inode::{Inode, InodeUse}, + SbUse, SuperBlock, +}; +use crate::kernel::{ + constants::{EEXIST, ENODEV, ENOTDIR}, + task::block_on, }; -use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR}; use crate::prelude::*; use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc}; +use async_trait::async_trait; use eonix_sync::LazyLock; pub const MS_RDONLY: u64 = 1 << 0; @@ -30,17 +34,21 @@ static MOUNT_CREATORS: Spin>> = Spin::new static MOUNTS: Spin, MountPointData)>> = Spin::new(vec![]); pub struct Mount { - _vfs: Arc, + sb: SbUse, root: Arc, } impl Mount { - pub fn new(mp: &Dentry, vfs: Arc, root_inode: Arc) -> KResult { + pub fn new( + mp: &Dentry, + sb: SbUse, + root_inode: InodeUse, + ) -> KResult { let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name()); - root_dentry.save_dir(root_inode)?; + root_dentry.fill(root_inode); Ok(Self { - _vfs: vfs, + sb, root: root_dentry, }) } @@ -53,9 +61,10 @@ impl Mount { unsafe impl Send for Mount {} unsafe impl Sync for Mount {} +#[async_trait] pub trait MountCreator: Send + Sync { fn check_signature(&self, first_block: &[u8]) -> KResult; - fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; + async fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; } pub fn register_filesystem(fstype: &str, creator: Arc) -> KResult<()> { @@ -77,7 +86,7 @@ struct MountPointData { flags: u64, } -pub fn do_mount( +pub async fn do_mount( mountpoint: &Arc, source: &str, mountpoint_str: &str, @@ -101,7 +110,7 @@ pub fn do_mount( let creators = { MOUNT_CREATORS.lock() }; creators.get(fstype).ok_or(ENODEV)?.clone() }; - let mount = creator.create_mount(source, flags, mountpoint)?; + let mount = creator.create_mount(source, flags, mountpoint).await?; let root_dentry = mount.root().clone(); @@ -165,8 +174,7 @@ impl Dentry { .cloned() .expect("tmpfs not registered."); - let mount = creator - .create_mount(&source, mount_flags, &DROOT) + let mount = block_on(creator.create_mount(&source, mount_flags, &DROOT)) .expect("Failed to create root mount."); let root_dentry = mount.root().clone(); diff --git a/src/kernel/vfs/superblock.rs b/src/kernel/vfs/superblock.rs new file mode 100644 index 00000000..85b28c01 --- /dev/null +++ b/src/kernel/vfs/superblock.rs @@ -0,0 +1,127 @@ +use core::{ + marker::Unsize, + ops::{CoerceUnsized, Deref}, +}; + +use alloc::sync::{Arc, Weak}; +use eonix_sync::RwLock; + +use crate::{kernel::constants::EIO, prelude::KResult}; + +use super::types::DeviceId; + +pub trait SuperBlock: Send + Sync + 'static {} + +#[derive(Debug, Clone)] +pub struct SuperBlockInfo { + pub io_blksize: u32, + pub device_id: DeviceId, + pub read_only: bool, +} + +pub struct SuperBlockLock(()); + +pub struct SuperBlockComplex +where + Backend: SuperBlock + ?Sized, +{ + pub info: SuperBlockInfo, + pub rwsem: RwLock, + pub backend: Backend, +} + +pub struct SbRef(Weak>) +where + S: SuperBlock + ?Sized; + +pub struct SbUse(Arc>) +where + S: SuperBlock + ?Sized; + +impl SbRef +where + S: SuperBlock + ?Sized, +{ + pub fn try_get(&self) -> Option> { + self.0.upgrade().map(|arc| SbUse(arc)) + } + + pub fn get(&self) -> KResult> { + self.try_get().ok_or(EIO) + } + + pub fn from(sb: &SbUse) -> Self { + SbRef(Arc::downgrade(&sb.0)) + } + + pub fn eq(&self, other: &SbRef) -> bool + where + U: SuperBlock + ?Sized, + { + core::ptr::addr_eq(self.0.as_ptr(), other.0.as_ptr()) + } +} + +impl SbUse +where + S: SuperBlock, +{ + pub fn new(info: SuperBlockInfo, backend: S) -> Self { + Self(Arc::new(SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend, + })) + } + + pub fn new_cyclic(info: SuperBlockInfo, backend_func: impl FnOnce(SbRef) -> S) -> Self { + Self(Arc::new_cyclic(|weak| SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend: backend_func(SbRef(weak.clone())), + })) + } +} + +impl Clone for SbRef +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbRef(self.0.clone()) + } +} + +impl Clone for SbUse +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbUse(self.0.clone()) + } +} + +impl CoerceUnsized> for SbRef +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl CoerceUnsized> for SbUse +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl Deref for SbUse +where + S: SuperBlock + ?Sized, +{ + type Target = SuperBlockComplex; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/src/kernel/vfs/types/device_id.rs b/src/kernel/vfs/types/device_id.rs new file mode 100644 index 00000000..cf3ea886 --- /dev/null +++ b/src/kernel/vfs/types/device_id.rs @@ -0,0 +1,36 @@ +use core::fmt::{Debug, Display, Formatter}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct DeviceId { + pub major: u16, + pub minor: u16, +} + +impl DeviceId { + pub const fn new(major: u16, minor: u16) -> Self { + Self { major, minor } + } + + pub const fn from_raw(raw: u32) -> Self { + Self { + major: (raw >> 16) as u16, + minor: (raw & 0xFFFF) as u16, + } + } + + pub const fn to_raw(self) -> u32 { + ((self.major as u32) << 16) | (self.minor as u32) + } +} + +impl Debug for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "DeviceId({:04x}:{:04x})", self.major, self.minor) + } +} + +impl Display for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:04x}:{:04x}", self.major, self.minor) + } +} diff --git a/src/kernel/vfs/types/mod.rs b/src/kernel/vfs/types/mod.rs new file mode 100644 index 00000000..4a7505f7 --- /dev/null +++ b/src/kernel/vfs/types/mod.rs @@ -0,0 +1,5 @@ +mod device_id; +mod mode; + +pub use device_id::DeviceId; +pub use mode::{Format, Mode, Permission}; diff --git a/src/kernel/vfs/types/mode.rs b/src/kernel/vfs/types/mode.rs new file mode 100644 index 00000000..dc1b88ec --- /dev/null +++ b/src/kernel/vfs/types/mode.rs @@ -0,0 +1,169 @@ +use crate::kernel::{ + constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}, + syscall::{FromSyscallArg, SyscallRetVal}, +}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Format { + REG, + DIR, + LNK, + BLK, + CHR, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Permission(u32); + +impl Mode { + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub fn format(&self) -> Format { + match self.format_bits() { + S_IFREG => Format::REG, + S_IFDIR => Format::DIR, + S_IFLNK => Format::LNK, + S_IFBLK => Format::BLK, + S_IFCHR => Format::CHR, + _ => panic!("unknown format bits: {:#o}", self.format_bits()), + } + } + + pub fn perm(&self) -> Permission { + Permission::new(self.non_format_bits()) + } + + pub const fn non_format(&self) -> Self { + Self::new(self.non_format_bits()) + } + + pub const fn set_perm(&mut self, perm: Permission) { + self.0 = self.format_bits() | perm.bits(); + } +} + +impl Format { + pub const fn as_raw(&self) -> u32 { + match self { + Self::REG => S_IFREG, + Self::DIR => S_IFDIR, + Self::LNK => S_IFLNK, + Self::BLK => S_IFBLK, + Self::CHR => S_IFCHR, + } + } +} + +impl Permission { + const RWX: [&str; 8] = ["---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx"]; + + pub const fn new(perm_bits: u32) -> Self { + Self(perm_bits & 0o7777) + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn mask_with(&self, mask: Self) -> Self { + Self(self.0 & mask.0) + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format:?}, {perm:#o})", + format = self.format(), + perm = self.non_format_bits() + )?, + rem => write!( + f, + "Mode({format:?}, {perm:#o}, rem={rem:#x})", + format = self.format(), + perm = self.non_format_bits() & 0o777 + )?, + } + + Ok(()) + } +} + +impl core::fmt::Debug for Format { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::REG => write!(f, "REG"), + Self::DIR => write!(f, "DIR"), + Self::LNK => write!(f, "LNK"), + Self::BLK => write!(f, "BLK"), + Self::CHR => write!(f, "CHR"), + } + } +} + +impl core::fmt::Debug for Permission { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let owner = self.0 >> 6 & 0o7; + let group = self.0 >> 3 & 0o7; + let other = self.0 & 0o7; + + write!( + f, + "{}{}{}", + Self::RWX[owner as usize], + Self::RWX[group as usize], + Self::RWX[other as usize] + ) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} + +impl FromSyscallArg for Permission { + fn from_arg(value: usize) -> Self { + Permission::new(value as u32) + } +} + +impl SyscallRetVal for Permission { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/vfs.rs b/src/kernel/vfs/vfs.rs deleted file mode 100644 index ee66f0b6..00000000 --- a/src/kernel/vfs/vfs.rs +++ /dev/null @@ -1,10 +0,0 @@ -use crate::prelude::*; - -use super::DevId; - -#[allow(dead_code)] -pub trait Vfs: Send + Sync + AsAny { - fn io_blksize(&self) -> usize; - fn fs_devid(&self) -> DevId; - fn is_read_only(&self) -> bool; -} diff --git a/src/lib.rs b/src/lib.rs index 80d24c28..98e196f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,12 @@ #![no_main] #![feature(allocator_api)] #![feature(c_size_t)] +#![feature(coerce_unsized)] #![feature(concat_idents)] #![feature(arbitrary_self_types)] #![feature(get_mut_unchecked)] #![feature(macro_metavar_expr)] +#![feature(unsize)] extern crate alloc; @@ -46,8 +48,8 @@ use kernel::{ task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, - inode::Mode, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, + types::Permission, FsContext, }, CharDevice, @@ -192,16 +194,16 @@ async fn init_process(early_kstack: PRange) { { // We might want the serial initialized as soon as possible. driver::serial::init().unwrap(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } #[cfg(target_arch = "riscv64")] { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; driver::goldfish_rtc::probe(); } @@ -209,21 +211,26 @@ async fn init_process(early_kstack: PRange) { { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } fs::tmpfs::init(); - fs::procfs::init(); + fs::procfs::init().await; fs::fat32::init(); - fs::ext4::init(); + // fs::ext4::init(); let load_info = { // mount fat32 /mnt directory let fs_context = FsContext::global(); - let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); + let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true) + .await + .unwrap(); - mnt_dir.mkdir(Mode::new(0o755)).unwrap(); + mnt_dir + .mkdir(Permission::new(0o755)) + .await + .expect("Failed to create /mnt directory"); do_mount( &mnt_dir, @@ -232,6 +239,7 @@ async fn init_process(early_kstack: PRange) { "fat32", MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID, ) + .await .unwrap(); let init_names = [&b"/init"[..], &b"/sbin/init"[..], &b"/mnt/initsh"[..]]; @@ -239,7 +247,7 @@ async fn init_process(early_kstack: PRange) { let mut init_name = None; let mut init = None; for name in init_names { - if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true) { + if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true).await { if dentry.is_valid() { init_name = Some(CString::new(name).unwrap()); init = Some(dentry); @@ -261,6 +269,7 @@ async fn init_process(early_kstack: PRange) { ]; ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) + .await .expect("Failed to parse init program") .load() .await diff --git a/src/prelude.rs b/src/prelude.rs index b3dbe2ce..880489da 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -18,34 +18,6 @@ pub(crate) use crate::kernel::console::{ pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec}; -pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str}; +pub(crate) use core::{fmt::Write, marker::PhantomData, str}; pub use crate::sync::Spin; - -#[allow(dead_code)] -pub trait AsAny: Send + Sync { - fn as_any(&self) -> &dyn Any; - fn as_any_mut(&mut self) -> &mut dyn Any; -} - -macro_rules! impl_any { - ($t:ty) => { - impl AsAny for $t { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - } - }; -} - -macro_rules! addr_of_mut_field { - ($pointer:expr, $field:ident) => { - core::ptr::addr_of_mut!((*$pointer).$field) - }; -} - -pub(crate) use {addr_of_mut_field, impl_any}; From 3392e46ca312b4490170309cab94ccca37b9f7d2 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 21:46:19 +0800 Subject: [PATCH 31/54] vfs, rcu: rework path walking with new rcu syntax The old path walking algorithm requires recursion, which is not supported in async rust. So we boxed them all as a temporary solution in previous commits. This would introduce mass overhead even for fast path walks just because we might sleep in `readlink()` and `lookup()` calls. The new proposed method is to break the walk into several phases similar to that in Linux: RCU walk and REF walk. The RCU walk will hold the RCU lock and never blocks so the function itself can be non-async. If we hit non-present dentries, we will fallback to REF walk. In REF walks, we clone the Arcs and consult to the VFS layer for an accurate answer. Note that in both the two methods mentioned above, symlinks are not handled and will be returned directly with all path components left untouched. We have a dedicated async function to follow the symlinks by recursive calling the walk function. This can be slow and won't be called frequently. So we wrapped the function with `Box::pin()` to break the recursion chain. After the symlink resolution is done, we return to the original position and continue the walk. We found that the association of an inode to a dentry is one way. So the `data` RCUPointer field is actually unnecessary and we can use the atomic dentry type to sync readers with the writer. This way we can eliminate `DentryData` allocations and improve performance. We also introduced a new RCU read lock syntax. In the RCU walk mentioned above, we need to store dentry references protected by some RCU read lock. With the old syntax, we can't express the lifetime associtated by the common RCU read lock. The new syntax provides a `rcu_read_lock()` method to acquire the RCU read lock. The lock returned has a associated lifetime so we can use it in the RCU session. Signed-off-by: greatbridf --- Cargo.lock | 5 + Cargo.toml | 8 +- crates/arcref/Cargo.lock | 7 + crates/arcref/Cargo.toml | 11 + crates/arcref/src/arcref.rs | 216 +++++++++++++++++ crates/arcref/src/lib.rs | 8 + crates/posix_types/src/result.rs | 6 + src/kernel/vfs/dentry.rs | 394 +++++++++++++------------------ src/kernel/vfs/dentry/dcache.rs | 76 +++++- src/kernel/vfs/dentry/walk.rs | 370 +++++++++++++++++++++++++++++ src/path.rs | 40 ++-- src/rcu.rs | 99 ++++---- 12 files changed, 942 insertions(+), 298 deletions(-) create mode 100644 crates/arcref/Cargo.lock create mode 100644 crates/arcref/Cargo.toml create mode 100644 crates/arcref/src/arcref.rs create mode 100644 crates/arcref/src/lib.rs create mode 100644 src/kernel/vfs/dentry/walk.rs diff --git a/Cargo.lock b/Cargo.lock index 32868677..f85a9d2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,10 @@ dependencies = [ "log", ] +[[package]] +name = "arcref" +version = "0.1.0" + [[package]] name = "async-trait" version = "0.1.89" @@ -155,6 +159,7 @@ dependencies = [ "acpi", "align_ext", "another_ext4", + "arcref", "async-trait", "atomic_unique_refcell", "bitflags", diff --git a/Cargo.toml b/Cargo.toml index bc7e7b0c..5158025e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,9 @@ edition = "2021" crate-type = ["bin"] [dependencies] +arcref = { path = "./crates/arcref", default-features = false, features = [ + "alloc", +] } atomic_unique_refcell = { path = "./crates/atomic_unique_refcell", features = [ "no_std", ] } @@ -35,7 +38,10 @@ stalloc = { version = "0.6.1", default-features = false, features = [ "allocator-api", ] } async-trait = "0.1.89" -futures = { version = "0.3.31", features = ["alloc", "async-await"], default-features = false } +futures = { version = "0.3.31", features = [ + "alloc", + "async-await", +], default-features = false } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/arcref/Cargo.lock b/crates/arcref/Cargo.lock new file mode 100644 index 00000000..3c4e1567 --- /dev/null +++ b/crates/arcref/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arcref" +version = "0.1.0" diff --git a/crates/arcref/Cargo.toml b/crates/arcref/Cargo.toml new file mode 100644 index 00000000..a0af89f8 --- /dev/null +++ b/crates/arcref/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "arcref" +version = "0.1.0" +edition = "2024" + +[dependencies] + +[features] +alloc = [] +std = ["alloc"] +default = ["std"] diff --git a/crates/arcref/src/arcref.rs b/crates/arcref/src/arcref.rs new file mode 100644 index 00000000..3d01852d --- /dev/null +++ b/crates/arcref/src/arcref.rs @@ -0,0 +1,216 @@ +#[cfg(not(feature = "std"))] +use core::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, +}; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +extern crate alloc; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +use alloc::sync::Arc; + +#[cfg(feature = "std")] +use std::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, + sync::Arc, +}; + +pub trait AsArcRef +where + T: ?Sized, +{ + /// Borrow the [`Arc`] and convert the reference into [`ArcRef`]. + fn aref(&self) -> ArcRef<'_, T>; +} + +pub struct ArcRef<'a, T: ?Sized> { + ptr: *const T, + _phantom: PhantomData<&'a ()>, +} + +unsafe impl Send for ArcRef<'_, T> {} +unsafe impl Sync for ArcRef<'_, T> {} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, T: ?Sized> ArcRef<'a, T> { + pub fn new(arc: &'a Arc) -> Self { + Self { + ptr: Arc::as_ptr(arc), + _phantom: PhantomData, + } + } + + /// Create a new `ArcRef` from a raw pointer. + /// + /// # Safety + /// The given pointer MUST be created by `Arc::as_ptr` or `Arc::into_raw`. + /// The caller is responsible to ensure that the pointer is valid for the + /// lifetime of the `ArcRef`. + pub unsafe fn new_unchecked(arc_ptr: *const T) -> Self { + Self { + ptr: arc_ptr, + _phantom: PhantomData, + } + } + + pub fn with_arc(self, func: Func) -> Out + where + Func: FnOnce(&Arc) -> Out, + { + func(&ManuallyDrop::new(unsafe { Arc::from_raw(self.ptr) })) + } + + pub fn clone_arc(self) -> Arc { + self.with_arc(|arc| arc.clone()) + } + + pub fn ptr_eq_arc(self, other: &Arc) -> bool { + self.with_arc(|arc| Arc::ptr_eq(arc, other)) + } +} + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +impl AsArcRef for Arc +where + T: ?Sized, +{ + fn aref(&self) -> ArcRef<'_, T> { + ArcRef::new(self) + } +} + +impl AsRef for ArcRef<'_, T> +where + T: ?Sized, +{ + fn as_ref(&self) -> &T { + self.deref() + } +} + +impl Borrow for ArcRef<'_, T> +where + T: ?Sized, +{ + fn borrow(&self) -> &T { + self.deref() + } +} + +impl<'a, T> Clone for ArcRef<'a, T> +where + T: ?Sized, +{ + fn clone(&self) -> Self { + Self { + ptr: self.ptr, + _phantom: PhantomData, + } + } +} + +impl Copy for ArcRef<'_, T> where T: ?Sized {} + +impl Deref for ArcRef<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { + // SAFETY: `self.ptr` points to a valid `T` instance because it was + // created from a valid `Arc`. + self.ptr.as_ref().unwrap_unchecked() + } + } +} + +impl<'a, T, U> DispatchFromDyn> for ArcRef<'a, T> +where + T: ?Sized + Unsize, + U: ?Sized, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn create_from_arc() { + let data = Arc::new(42); + let _arc_ref = ArcRef::new(&data); + } + + #[test] + fn deref() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + assert_eq!(*arc_ref, 42); + } + + #[test] + fn clone_into_arc() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_arc(); + + assert_eq!(Arc::strong_count(&data), 2); + assert_eq!(*cloned, 42); + } + + #[test] + fn dyn_compatible_receiver() { + struct Data(u32); + + trait Trait { + fn foo(self: ArcRef) -> u32; + } + + impl Trait for Data { + fn foo(self: ArcRef) -> u32 { + self.0 + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + assert_eq!(arc_ref.foo(), 42); + } + + #[test] + fn clone_from_train_methods() { + struct Data(u32); + + trait Trait { + fn foo(&self) -> u32; + + fn clone_self(self: ArcRef) -> Arc; + } + + impl Trait for Data { + fn foo(&self) -> u32 { + self.0 + } + + fn clone_self(self: ArcRef) -> Arc { + self.clone_arc() as _ + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_self(); + + assert_eq!(arc_ref.foo(), 42); + assert_eq!(cloned.foo(), 42); + } +} diff --git a/crates/arcref/src/lib.rs b/crates/arcref/src/lib.rs new file mode 100644 index 00000000..83a61985 --- /dev/null +++ b/crates/arcref/src/lib.rs @@ -0,0 +1,8 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![feature(arbitrary_self_types)] +#![feature(dispatch_from_dyn)] +#![feature(unsize)] + +mod arcref; + +pub use arcref::{ArcRef, AsArcRef}; diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index a10ff0ad..1535c444 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -1,14 +1,18 @@ pub enum PosixError { + ENOENT = 2, EFAULT = 14, EXDEV = 18, + ENOTDIR = 20, EINVAL = 22, } impl From for u32 { fn from(error: PosixError) -> Self { match error { + PosixError::ENOENT => 2, PosixError::EFAULT => 14, PosixError::EXDEV => 18, + PosixError::ENOTDIR => 20, PosixError::EINVAL => 22, } } @@ -17,8 +21,10 @@ impl From for u32 { impl core::fmt::Debug for PosixError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { + Self::ENOENT => write!(f, "ENOENT"), Self::EFAULT => write!(f, "EFAULT"), Self::EXDEV => write!(f, "EXDEV"), + Self::ENOTDIR => write!(f, "ENOTDIR"), Self::EINVAL => write!(f, "EINVAL"), } } diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 5ac4e407..c1eb8cb8 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,45 +1,62 @@ pub mod dcache; +mod walk; -use super::{ - inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - FsContext, -}; -use crate::{ - hash::KernelHasher, - io::{Buffer, ByteBuffer}, - kernel::{block::BlockDevice, CharDevice}, - path::{Path, PathComponent}, - prelude::*, - rcu::{RCUNode, RCUPointer, RCUReadGuard}, -}; -use crate::{ - io::Stream, - kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, -}; -use alloc::sync::Arc; use core::{ + cell::UnsafeCell, fmt, - future::Future, hash::{BuildHasher, BuildHasherDefault, Hasher}, - pin::Pin, - sync::atomic::{AtomicPtr, AtomicU64, Ordering}, + sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}, }; + +use alloc::sync::Arc; +use arcref::AsArcRef; use eonix_sync::LazyLock; use pointers::BorrowedArc; use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; -#[derive(PartialEq, Eq)] +use crate::{ + hash::KernelHasher, + io::Buffer, + io::Stream, + kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}, + kernel::{block::BlockDevice, CharDevice}, + path::Path, + prelude::*, + rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}, +}; + +use super::{ + inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + FsContext, +}; + +const D_INVALID: u8 = 0; +const D_REGULAR: u8 = 1; +const D_DIRECTORY: u8 = 2; +const D_SYMLINK: u8 = 3; + +#[derive(Debug, PartialEq, Eq)] enum DentryKind { - Regular, - Directory, - Symlink, - Mountpoint, + Regular = D_REGULAR as isize, + Directory = D_DIRECTORY as isize, + Symlink = D_SYMLINK as isize, } -struct DentryData { - inode: InodeUse, - kind: DentryKind, +/// The [`Inode`] associated with a [`Dentry`]. +/// +/// We could assign an inode to a negative dentry exactly once when the dentry +/// is invalid and we create a file or directory on it, or the dentry is brought +/// to the dcache by [lookup()]. +/// +/// This guarantees that as long as we acquire a non-invalid from [`Self::kind`], +/// we are synced with the writer and can safely read the [`Self::inode`] field +/// without reading torn data. +/// +/// [lookup()]: crate::kernel::vfs::inode::InodeDirOps::lookup +struct AssociatedInode { + kind: UnsafeCell>, + inode: UnsafeCell>>, } /// # Safety @@ -58,8 +75,7 @@ pub struct Dentry { prev: AtomicPtr, next: AtomicPtr, - // RCU Mutable - data: RCUPointer, + inode: AssociatedInode, } pub(super) static DROOT: LazyLock> = LazyLock::new(|| { @@ -69,7 +85,7 @@ pub(super) static DROOT: LazyLock> = LazyLock::new(|| { hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); unsafe { @@ -119,50 +135,19 @@ impl Dentry { self.hash.store(hash, Ordering::Relaxed); } - - async fn find(self: &Arc, name: &[u8]) -> KResult> { - let data = self.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - if data.kind != DentryKind::Directory { - return Err(ENOTDIR); - } - - match name { - b"." => Ok(self.clone()), - b".." => Ok(self.parent().clone()), - _ => { - let dentry = Dentry::create(self.clone(), name); - - if let Some(found) = dcache::d_find_fast(&dentry) { - unsafe { - // SAFETY: This is safe because the dentry is never shared with - // others so we can drop them safely. - let _ = dentry.name.swap(None); - let _ = dentry.parent.swap(None); - } - - return Ok(found); - } - - let _ = dcache::d_try_revalidate(&dentry).await; - dcache::d_add(dentry.clone()); - - Ok(dentry) - } - } - } } impl Dentry { pub fn create(parent: Arc, name: &[u8]) -> Arc { + // TODO!!!: don't acquire our parent's refcount here... + let val = Arc::new(Self { parent: RCUPointer::new(parent), name: RCUPointer::new(Arc::new(Arc::from(name))), hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); val.rehash(); @@ -196,27 +181,12 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - fn save(&self, inode: InodeUse, kind: DentryKind) { - let new = DentryData { inode, kind }; - - // TODO!!!: We don't actually need to use `RCUPointer` here - // Safety: this function may only be called from `create`-like functions which requires the - // superblock's write locks to be held, so only one creation can happen at a time and we - // can't get a reference to the old data. - let old = unsafe { self.data.swap(Some(Arc::new(new))) }; - assert!(old.is_none()); - } - pub fn fill(&self, file: InodeUse) { - match file.format() { - Format::REG | Format::BLK | Format::CHR => self.save(file, DentryKind::Regular), - Format::DIR => self.save(file, DentryKind::Directory), - Format::LNK => self.save(file, DentryKind::Symlink), - } + self.inode.store(file); } pub fn inode(&self) -> Option> { - self.data.load().as_ref().map(|data| data.inode.clone()) + self.inode.load().map(|(_, inode)| inode.clone()) } pub fn get_inode(&self) -> KResult> { @@ -224,181 +194,85 @@ impl Dentry { } pub fn is_directory(&self) -> bool { - let data = self.data.load(); - data.as_ref() - .map_or(false, |data| data.kind == DentryKind::Directory) + self.inode + .load() + .map_or(false, |(kind, _)| kind == DentryKind::Directory) } pub fn is_valid(&self) -> bool { - self.data.load().is_some() + self.inode.load().is_some() } pub async fn open_check(self: &Arc, flags: OpenFlags, perm: Permission) -> KResult<()> { - let data = self.data.load(); - - if data.is_some() { - if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { - Err(EEXIST) - } else { - Ok(()) - } - } else { - if !flags.contains(OpenFlags::O_CREAT) { - return Err(ENOENT); - } - - let parent = self.parent().get_inode()?; - parent.create(self, perm).await - } - } -} - -impl Dentry { - fn resolve_directory( - context: &FsContext, - dentry: Arc, - nrecur: u32, - ) -> Pin>> + use<'_>>> { - Box::pin(async move { - if nrecur >= 16 { - return Err(ELOOP); - } - - let data = dentry.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - match data.kind { - DentryKind::Regular => Err(ENOTDIR), - DentryKind::Directory => Ok(dentry), - DentryKind::Symlink => { - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer).await?; - let path = Path::new(buffer.data())?; - - let dentry = - Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1) - .await?; - - Self::resolve_directory(context, dentry, nrecur + 1).await + match self.inode.load() { + Some(_) => { + if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { + Err(EEXIST) + } else { + Ok(()) } - _ => panic!("Invalid dentry flags"), - } - }) - } - - pub fn open_recursive<'r, 'a: 'r, 'b: 'r, 'c: 'r>( - context: &'a FsContext, - cwd: &'b Arc, - path: Path<'c>, - follow: bool, - nrecur: u32, - ) -> Pin>> + 'r>> { - Box::pin(async move { - // too many recursive search layers will cause stack overflow - // so we use 16 for now - if nrecur >= 16 { - return Err(ELOOP); } - - let mut cwd = if path.is_absolute() { - context.fsroot.clone() - } else { - cwd.clone() - }; - - for item in path.iter() { - if let PathComponent::TrailingEmpty = item { - if cwd.data.load().as_ref().is_none() { - return Ok(cwd); - } + None => { + if !flags.contains(OpenFlags::O_CREAT) { + return Err(ENOENT); } - cwd = Self::resolve_directory(context, cwd, nrecur).await?; - - match item { - PathComponent::TrailingEmpty | PathComponent::Current => {} // pass - PathComponent::Parent => { - if !cwd.hash_eq(&context.fsroot) { - let parent = cwd.parent().clone(); - cwd = Self::resolve_directory(context, parent, nrecur).await?; - } - continue; - } - PathComponent::Name(name) => { - cwd = cwd.find(name).await?; - } - } + let parent = self.parent().get_inode()?; + parent.create(self, perm).await } - - if follow { - let data = cwd.data.load(); - - if let Some(data) = data.as_ref() { - if data.kind == DentryKind::Symlink { - let data = cwd.data.load(); - let data = data.as_ref().unwrap(); - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer).await?; - let path = Path::new(buffer.data())?; - - let parent = cwd.parent().clone(); - cwd = - Self::open_recursive(context, &parent, path, true, nrecur + 1).await?; - } - } - } - - Ok(cwd) - }) + } } +} +impl Dentry { pub async fn open( context: &FsContext, - path: Path<'_>, + path: &Path, follow_symlinks: bool, ) -> KResult> { let cwd = context.cwd.lock().clone(); - Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0).await + Self::open_at(context, &cwd, path, follow_symlinks).await } pub async fn open_at( context: &FsContext, at: &Arc, - path: Path<'_>, + path: &Path, follow_symlinks: bool, ) -> KResult> { - Dentry::open_recursive(context, at, path, follow_symlinks, 0).await - } + let mut found = context.start_recursive_walk(at, path).await?; - pub fn get_path( - self: &Arc, - context: &FsContext, - buffer: &mut dyn Buffer, - ) -> KResult<()> { - let locked_parent = self.parent(); + if !follow_symlinks { + return Ok(found); + } - let path = { - let mut path = vec![]; + loop { + match found.inode.load() { + Some((DentryKind::Symlink, inode)) => { + found = context.follow_symlink(found.aref(), inode, 0).await?; + } + _ => return Ok(found), + } + } + } - let mut parent = locked_parent.borrow(); - let mut dentry = BorrowedArc::new(self); + pub fn get_path(self: &Arc, context: &FsContext, buffer: &mut dyn Buffer) -> KResult<()> { + let rcu_read = rcu_read_lock(); - while Arc::as_ptr(&dentry) != Arc::as_ptr(&context.fsroot) { - if path.len() > 32 { - return Err(ELOOP); - } + let mut path = vec![]; + + let mut current = self.aref(); + let mut parent = self.parent.dereference(&rcu_read).unwrap(); - path.push(dentry.name().clone()); - dentry = parent; - parent = dentry.parent.load_protected(&locked_parent).unwrap(); + while !current.ptr_eq_arc(&context.fsroot) { + if path.len() > 32 { + return Err(ELOOP); } - path - }; + path.push(current.name.dereference(&rcu_read).unwrap()); + current = parent; + parent = current.parent.dereference(&rcu_read).unwrap(); + } buffer.fill(b"/")?.ok_or(ERANGE)?; for item in path.iter().rev().map(|name| name.as_ref()) { @@ -533,3 +407,71 @@ impl Dentry { old_parent.rename(rename_data).await } } + +impl DentryKind { + fn into_raw(self) -> u8 { + unsafe { core::mem::transmute(self) } + } + + fn from_raw(raw: u8) -> Option { + unsafe { core::mem::transmute(raw) } + } + + fn as_atomic(me: &UnsafeCell>) -> &AtomicU8 { + unsafe { AtomicU8::from_ptr(me.get().cast()) } + } + + fn atomic_acq(me: &UnsafeCell>) -> Option { + Self::from_raw(Self::as_atomic(me).load(Ordering::Acquire)) + } + + fn atomic_swap_acqrel(me: &UnsafeCell>, kind: Option) -> Option { + Self::from_raw(Self::as_atomic(me).swap(kind.map_or(0, Self::into_raw), Ordering::AcqRel)) + } +} + +impl AssociatedInode { + fn new() -> Self { + Self { + inode: UnsafeCell::new(None), + kind: UnsafeCell::new(None), + } + } + + fn store(&self, inode: InodeUse) { + let kind = match inode.format() { + Format::REG | Format::BLK | Format::CHR => DentryKind::Regular, + Format::DIR => DentryKind::Directory, + Format::LNK => DentryKind::Symlink, + }; + + unsafe { + // SAFETY: We should be the first and only one to store the inode as + // is checked below. All other readers reading non-invalid + // kind will see the fully written inode. + self.inode.get().write(Some(inode)); + } + + assert_eq!( + DentryKind::atomic_swap_acqrel(&self.kind, Some(kind)), + None, + "Dentry can only be stored once." + ); + } + + fn kind(&self) -> Option { + DentryKind::atomic_acq(&self.kind) + } + + fn load(&self) -> Option<(DentryKind, &InodeUse)> { + self.kind().map(|kind| unsafe { + let inode = (&*self.inode.get()) + .as_ref() + .expect("Dentry with non-invalid kind has no inode"); + (kind, inode) + }) + } +} + +unsafe impl Send for AssociatedInode {} +unsafe impl Sync for AssociatedInode {} diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index e2491235..ee7503dc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,11 +1,13 @@ use super::Dentry; use crate::kernel::constants::ENOENT; -use crate::rcu::RCUPointer; +use crate::rcu::{RCUPointer, RCUReadLock}; use crate::{ prelude::*, rcu::{RCUIterator, RCUList}, }; use alloc::sync::Arc; +use arcref::ArcRef; +use core::ops::Deref; use core::sync::atomic::Ordering; use eonix_sync::Mutex; @@ -16,26 +18,47 @@ static DCACHE: [RCUList; 1 << DCACHE_HASH_BITS] = static D_EXCHANGE_LOCK: Mutex<()> = Mutex::new(()); -pub fn d_hinted(dentry: &Dentry) -> &'static RCUList { - let hash = dentry.hash.load(Ordering::Relaxed) as usize & ((1 << DCACHE_HASH_BITS) - 1); +pub trait DCacheItem { + fn d_hash(&self) -> usize; + fn d_parent(&self) -> *const Dentry; + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r; +} + +fn d_eq(lhs: &impl DCacheItem, rhs: &impl DCacheItem, rcu_read: &RCUReadLock) -> bool { + lhs.d_hash() == rhs.d_hash() + && lhs.d_parent() == rhs.d_parent() + && *lhs.d_name(rcu_read) == *rhs.d_name(rcu_read) +} + +fn d_hinted(item: &impl DCacheItem) -> &'static RCUList { + let hash = item.d_hash() & ((1 << DCACHE_HASH_BITS) - 1); &DCACHE[hash] } -pub fn d_iter_for(dentry: &Dentry) -> RCUIterator<'static, Dentry> { - d_hinted(dentry).iter() +fn d_iter_for<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> RCUIterator<'static, 'rcu, Dentry> { + d_hinted(item).iter(rcu_read) +} + +pub fn d_find_rcu<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> Option> { + d_iter_for(item, rcu_read).find(|cur_ref| cur_ref.with_arc(|cur| d_eq(cur, item, rcu_read))) } /// Add the dentry to the dcache pub fn d_add(dentry: Arc) { + // TODO: Add `children` field to parent and lock parent dentry to avoid + // concurrent insertion causing duplication. d_hinted(&dentry).insert(dentry); } -pub fn d_find_fast(dentry: &Dentry) -> Option> { - d_iter_for(dentry) - .find(|cur| cur.hash_eq(dentry)) - .map(|dentry| dentry.clone()) -} - /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode /// /// Silently fail without any side effects @@ -80,3 +103,34 @@ pub async fn d_exchange(old: &Arc, new: &Arc) { d_add(old.clone()); d_add(new.clone()); } + +impl DCacheItem for Arc { + fn d_hash(&self) -> usize { + self.hash.load(Ordering::Relaxed) as usize + } + + fn d_parent(&self) -> *const Dentry { + self.parent_addr() + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + struct Name<'a>(ArcRef<'a, Arc<[u8]>>); + + impl Deref for Name<'_> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + Name( + self.name + .dereference(rcu_read) + .expect("Dentry should have a non-null name"), + ) + } +} diff --git a/src/kernel/vfs/dentry/walk.rs b/src/kernel/vfs/dentry/walk.rs new file mode 100644 index 00000000..3e401b4b --- /dev/null +++ b/src/kernel/vfs/dentry/walk.rs @@ -0,0 +1,370 @@ +use core::{ + future::Future, + hash::{BuildHasher, BuildHasherDefault, Hasher}, + ops::Deref, + pin::Pin, +}; + +use alloc::{boxed::Box, sync::Arc}; +use arcref::{ArcRef, AsArcRef}; +use posix_types::result::PosixError; + +use crate::{ + hash::KernelHasher, + io::ByteBuffer, + kernel::{ + constants::ELOOP, + vfs::{ + inode::{Inode, InodeUse}, + FsContext, + }, + }, + path::{Path, PathComponent, PathIterator}, + prelude::KResult, + rcu::{rcu_read_lock, RCUReadLock}, +}; + +use super::{ + dcache::{self, DCacheItem}, + Dentry, DentryKind, +}; + +struct DentryFind<'a, 'b> { + parent: &'a Dentry, + name: &'b [u8], + hash: usize, +} + +pub enum WalkResultRcu<'rcu, 'path> { + Err(PosixError), + Ok(ArcRef<'rcu, Dentry>), + Symlink { + symlink: ArcRef<'rcu, Dentry>, + inode: InodeUse, + }, + Miss { + parent: ArcRef<'rcu, Dentry>, + name: &'path [u8], + }, +} + +pub enum WalkResult { + Err(PosixError), + Ok(Arc), + Symlink { + symlink: Arc, + inode: InodeUse, + }, +} + +impl Dentry { + /// Quick path of the dentry find operation. + /// + /// Check invalid and non-directory dentries, return immediately on dot and + /// dotdot component, and do a quick rcu dcache lookup. + /// + /// Note that while `Some(dentry)` guarantees present and valid dentry, + /// returning `None` is acceptable if the actual file exists but is not in + /// the dentry cache. If so, we should check again with `lookup`. + fn find_rcu<'r, 's: 'r>( + self: ArcRef<'s, Self>, + name: &[u8], + rcu_read: &'r RCUReadLock, + ) -> Result>, PosixError> { + match self.inode.load() { + Some((DentryKind::Directory, _)) => {} + Some(_) => return Err(PosixError::ENOTDIR), + None => return Err(PosixError::ENOENT), + } + + match name { + b"." => Ok(Some(self)), + b".." => Ok(Some( + self.parent + .dereference(rcu_read) + .expect("The field `parent` should be non-null"), + )), + _ => { + let dentry_find = DentryFind::new(&self, name); + Ok(dcache::d_find_rcu(&dentry_find, rcu_read)) + } + } + } + + async fn find_slow(self: &Arc, name: &[u8]) -> Result, PosixError> { + let dentry = Dentry::create(self.clone(), name); + + let _ = dcache::d_try_revalidate(&dentry).await; + dcache::d_add(dentry.clone()); + + Ok(dentry) + } + + pub async fn find_full(self: &Arc, name: &[u8]) -> Result, PosixError> { + if let Some(dentry) = self.aref().find_rcu(name, &rcu_read_lock())? { + return Ok(dentry.clone_arc()); + } + + self.find_slow(name).await + } +} + +impl FsContext { + /// Walk the pathname and try to find the corresponding dentry FAST without + /// consulting the VFS for invalid dentries encountered. + fn walk_rcu<'rcu, 'path>( + &self, + mut current: ArcRef<'rcu, Dentry>, + iter: &mut PathIterator<'path>, + rcu_read: &'rcu RCUReadLock, + ) -> WalkResultRcu<'rcu, 'path> { + use PathComponent::*; + + loop { + let inode = current.inode.load(); + + if iter.is_empty() { + break; + } + + // Skip symlink resolution in rcu walk without consuming the iter. + if let Some((DentryKind::Symlink, inode)) = inode { + return WalkResultRcu::Symlink { + symlink: current, + inode: inode.clone(), + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (inode, component) { + // Skip trailing empty and dot for normal directories. + (Some((DentryKind::Directory, _)), TrailingEmpty | Current) => {} + // Walk to parent directory unless we are at the filesystem root. + (Some((DentryKind::Directory, _)), Parent) => { + if current.ptr_eq_arc(&self.fsroot) { + continue; + } + + current = current + .parent + .dereference(&rcu_read) + .expect("parent should exist"); + } + // Normal directory traversal + (Some((DentryKind::Directory, _)), Name(name)) => { + match current.find_rcu(name, &rcu_read) { + Err(err) => return WalkResultRcu::Err(err), + Ok(Some(found)) => { + current = found; + } + Ok(None) => { + return WalkResultRcu::Miss { + name, + parent: current, + }; + } + } + } + // Not a directory, fail and exit. + (Some(_), _) => return WalkResultRcu::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResultRcu::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResultRcu::Err(PosixError::ENOENT), + } + } + + WalkResultRcu::Ok(current) + } + + /// Walk the pathname slowly with refcounts held and VFS lookups. + async fn walk_slow(&self, mut current: Arc, iter: &mut PathIterator<'_>) -> WalkResult { + use PathComponent::*; + + loop { + // `current` should be the parent directory and `component` is the + // next path component we are stepping into. + + if iter.is_empty() { + break; + } + + if let Some((DentryKind::Symlink, inode)) = current.inode.load() { + return WalkResult::Symlink { + inode: inode.clone(), + symlink: current, + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (current.inode.load(), &component) { + // Normal directory traversal + (Some((DentryKind::Directory, _)), _) => {} + // Not a directory, fail and exit. + (Some(_), _) => return WalkResult::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResult::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResult::Err(PosixError::ENOENT), + } + + match component { + PathComponent::TrailingEmpty => {} + PathComponent::Current => {} + PathComponent::Parent => { + if current.hash_eq(&self.fsroot) { + continue; + } + + let parent = current.parent().clone(); + current = parent; + } + PathComponent::Name(name) => { + match current.find_full(name).await { + Ok(found) => current = found, + Err(err) => return WalkResult::Err(err), + }; + } + } + } + + WalkResult::Ok(current) + } + + /// Walk the pathname and get an accurate answer. Stop at symlinks. + async fn walk_full( + &self, + current: ArcRef<'_, Dentry>, + iter: &mut PathIterator<'_>, + ) -> WalkResult { + let (parent_slow, name_slow); + + match self.walk_rcu(current, iter, &rcu_read_lock()) { + WalkResultRcu::Err(error) => return WalkResult::Err(error.into()), + WalkResultRcu::Ok(dentry) => return WalkResult::Ok(dentry.clone_arc()), + WalkResultRcu::Symlink { symlink, inode } => { + return WalkResult::Symlink { + symlink: symlink.clone_arc(), + inode, + }; + } + WalkResultRcu::Miss { parent, name } => { + // Fallback to regular refcounted lookup + parent_slow = parent.clone_arc(); + name_slow = name; + } + } + + match parent_slow.find_slow(name_slow).await { + Ok(found) => self.walk_slow(found, iter).await, + Err(err) => return WalkResult::Err(err), + } + } + + pub async fn follow_symlink( + &self, + symlink: ArcRef<'_, Dentry>, + inode: &InodeUse, + nr_follows: u32, + ) -> KResult> { + let mut target = [0; 256]; + let mut target = ByteBuffer::new(&mut target); + inode.readlink(&mut target).await?; + + self.walk_recursive( + &symlink.parent().clone(), + Path::new(target.data()).unwrap(), + nr_follows + 1, + ) + .await + } + + fn follow_symlink_boxed<'r, 'a: 'r, 'b: 'r, 'c: 'r>( + &'a self, + symlink: ArcRef<'b, Dentry>, + inode: &'c InodeUse, + nr_follows: u32, + ) -> Pin>> + Send + 'r>> { + Box::pin(self.follow_symlink(symlink, inode, nr_follows)) + } + + async fn walk_recursive( + &self, + cwd: &Arc, + path: &Path, + nr_follows: u32, + ) -> KResult> { + const MAX_NR_FOLLOWS: u32 = 16; + + let mut current_owned; + let mut current; + if path.is_absolute() { + current = self.fsroot.aref(); + } else { + current = cwd.aref(); + } + + let mut path_iter = path.iter(); + + loop { + match self.walk_full(current, &mut path_iter).await { + WalkResult::Err(posix_error) => return Err(posix_error.into()), + WalkResult::Ok(dentry) => return Ok(dentry), + WalkResult::Symlink { symlink, inode } => { + if nr_follows >= MAX_NR_FOLLOWS { + return Err(ELOOP); + } + + current_owned = self + .follow_symlink_boxed(symlink.aref(), &inode, nr_follows) + .await?; + current = current_owned.aref(); + } + } + } + } + + pub async fn start_recursive_walk( + &self, + cwd: &Arc, + path: &Path, + ) -> KResult> { + self.walk_recursive(cwd, path, 0).await + } +} + +impl<'a, 'b> DentryFind<'a, 'b> { + fn new(parent: &'a Dentry, name: &'b [u8]) -> Self { + let builder: BuildHasherDefault = Default::default(); + let mut hasher = builder.build_hasher(); + + hasher.write_usize(parent as *const _ as usize); + hasher.write(name); + let hash = hasher.finish() as usize; + + Self { parent, name, hash } + } +} + +impl DCacheItem for DentryFind<'_, '_> { + fn d_hash(&self) -> usize { + self.hash + } + + fn d_parent(&self) -> *const Dentry { + self.parent as *const _ + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + _rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + self.name + } +} diff --git a/src/path.rs b/src/path.rs index 8b740095..b342ef5f 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,34 +1,30 @@ use crate::{kernel::constants::ENOENT, prelude::*}; use core::fmt::{self, Debug, Formatter}; -pub struct Path<'lt> { - all: &'lt [u8], +#[repr(transparent)] +pub struct Path { + all: [u8], } pub struct PathIterator<'lt> { rem: &'lt [u8], } -#[allow(dead_code)] -impl<'lt> Path<'lt> { - pub fn new(all: &'lt [u8]) -> KResult { +impl Path { + pub fn new(all: &[u8]) -> KResult<&Self> { if all.is_empty() { Err(ENOENT) } else { - Ok(Self { all }) + Ok(unsafe { &*(all as *const [u8] as *const Path) }) } } - pub fn from_str(all: &'lt str) -> KResult { - Self::new(all.as_bytes()) - } - pub fn is_absolute(&self) -> bool { self.all.starts_with(&['/' as u8]) } - pub fn iter(&self) -> PathIterator<'lt> { - PathIterator::new(self.all) + pub fn iter(&self) -> PathIterator { + PathIterator::new(&self.all) } } @@ -46,11 +42,17 @@ pub enum PathComponent<'lt> { Parent, } +impl PathIterator<'_> { + pub fn is_empty(&self) -> bool { + self.rem.is_empty() + } +} + impl<'lt> Iterator for PathIterator<'lt> { type Item = PathComponent<'lt>; fn next(&mut self) -> Option { - if self.rem.is_empty() { + if self.is_empty() { return None; } @@ -71,16 +73,16 @@ impl<'lt> Iterator for PathIterator<'lt> { self.rem = rem; match cur { - cur if cur.is_empty() => Some(PathComponent::TrailingEmpty), - cur if cur == b"." => Some(PathComponent::Current), - cur if cur == b".." => Some(PathComponent::Parent), - cur => Some(PathComponent::Name(cur)), + b"" => Some(PathComponent::TrailingEmpty), + b"." => Some(PathComponent::Current), + b".." => Some(PathComponent::Parent), + name => Some(PathComponent::Name(name)), } } } -impl Debug for Path<'_> { +impl Debug for Path { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "Path({:?})", self.all) + write!(f, "Path({:?})", &self.all) } } diff --git a/src/rcu.rs b/src/rcu.rs index c1645d33..b06db9e2 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,21 +1,35 @@ use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; +use arcref::ArcRef; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; +use eonix_preempt::PreemptGuard; use eonix_runtime::scheduler::RUNTIME; -use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; +use eonix_sync::{RwLock, RwLockReadGuard}; use pointers::BorrowedArc; +/// The RCU Read Lock. Holding a reference to an instance of the struct assures +/// you that any RCU protected data would not be dropped. +/// +/// The struct cannot be created directly. Instead, use [`rcu_read_lock()`]. +#[derive(Debug)] +pub struct RCUReadLock(); + +pub struct RCUReadGuardNew { + guard: RwLockReadGuard<'static, RCUReadLock>, + _disable_preempt: PreemptGuard<()>, +} + pub struct RCUReadGuard<'data, T: 'data> { value: T, - _guard: RwLockReadGuard<'data, ()>, + _guard: RwLockReadGuard<'static, RCUReadLock>, _phantom: PhantomData<&'data T>, } -static GLOBAL_RCU_SEM: RwLock<()> = RwLock::new(()); +static GLOBAL_RCU_SEM: RwLock = RwLock::new(RCUReadLock()); impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { @@ -25,14 +39,6 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { _phantom: PhantomData, } } - - pub fn borrow(&self) -> BorrowedArc<'data, T> { - unsafe { - BorrowedArc::from_raw(NonNull::new_unchecked( - &raw const *self.value.borrow() as *mut T - )) - } - } } impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> { @@ -63,17 +69,14 @@ pub trait RCUNode { pub struct RCUList> { head: AtomicPtr, - - reader_lock: RwLock<()>, - update_lock: Mutex<()>, + update_lock: Spin<()>, } impl> RCUList { pub const fn new() -> Self { Self { head: AtomicPtr::new(core::ptr::null_mut()), - reader_lock: RwLock::new(()), - update_lock: Mutex::new(()), + update_lock: Spin::new(()), } } @@ -117,7 +120,6 @@ impl> RCUList { unsafe { Arc::from_raw(me) }; } - let _lck = self.reader_lock.write(); node.rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); node.rcu_next() @@ -152,7 +154,6 @@ impl> RCUList { unsafe { Arc::from_raw(old) }; } - let _lck = self.reader_lock.write(); old_node .rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); @@ -161,36 +162,36 @@ impl> RCUList { .store(core::ptr::null_mut(), Ordering::Release); } - pub fn iter(&self) -> RCUIterator { - let _lck = block_on(self.reader_lock.read()); - + pub fn iter<'a, 'r>(&'a self, _lock: &'r RCUReadLock) -> RCUIterator<'a, 'r, T> { RCUIterator { - // SAFETY: We have a read lock, so the node is still alive. - cur: NonNull::new(self.head.load(Ordering::SeqCst)), - _lock: _lck, + cur: NonNull::new(self.head.load(Ordering::Acquire)), + _phantom: PhantomData, } } } -pub struct RCUIterator<'lt, T: RCUNode> { +pub struct RCUIterator<'list, 'rcu, T: RCUNode> { cur: Option>, - _lock: RwLockReadGuard<'lt, ()>, + _phantom: PhantomData<(&'list (), &'rcu ())>, } -impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { - type Item = BorrowedArc<'lt, T>; +impl<'rcu, T: RCUNode> Iterator for RCUIterator<'_, 'rcu, T> { + type Item = ArcRef<'rcu, T>; fn next(&mut self) -> Option { - match self.cur { - None => None, - Some(pointer) => { - // SAFETY: We have a read lock, so the node is still alive. - let reference = unsafe { pointer.as_ref() }; + self.cur.map(|pointer| { + let reference = unsafe { + // SAFETY: We have the read lock so the node is still alive. + pointer.as_ref() + }; + + self.cur = NonNull::new(reference.rcu_next().load(Ordering::Acquire)); - self.cur = NonNull::new(reference.rcu_next().load(Ordering::SeqCst)); - Some(unsafe { BorrowedArc::from_raw(pointer) }) + unsafe { + // SAFETY: We have the read lock so the node is still alive. + ArcRef::new_unchecked(pointer.as_ptr()) } - } + }) } } @@ -228,15 +229,16 @@ where } pub fn load<'lt>(&self) -> Option>> { + // BUG: We should acquire the lock before loading the pointer NonNull::new(self.0.load(Ordering::Acquire)) .map(|p| RCUReadGuard::lock(unsafe { BorrowedArc::from_raw(p) })) } - pub fn load_protected<'a, U: 'a>( - &self, - _guard: &RCUReadGuard<'a, U>, - ) -> Option> { - NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { BorrowedArc::from_raw(p) }) + pub fn dereference<'r, 'a: 'r>(&self, _lock: &'a RCUReadLock) -> Option> { + NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { + // SAFETY: We have a read lock, so the node is still alive. + ArcRef::new_unchecked(p.as_ptr()) + }) } /// # Safety @@ -289,3 +291,18 @@ where } } } + +impl Deref for RCUReadGuardNew { + type Target = RCUReadLock; + + fn deref(&self) -> &Self::Target { + &self.guard + } +} + +pub fn rcu_read_lock() -> RCUReadGuardNew { + RCUReadGuardNew { + guard: block_on(GLOBAL_RCU_SEM.read()), + _disable_preempt: PreemptGuard::new(()), + } +} From 689d8169af36a8c7e79813a7ad8b5c28f32c10fd Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 22:22:51 +0800 Subject: [PATCH 32/54] style, vfs: remove unused imports and InodeRef Signed-off-by: greatbridf --- src/kernel/vfs/inode/inode.rs | 5 ----- src/kernel/vfs/inode/mod.rs | 4 +--- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs index 786a31fe..870a045d 100644 --- a/src/kernel/vfs/inode/inode.rs +++ b/src/kernel/vfs/inode/inode.rs @@ -321,11 +321,6 @@ pub struct InodeInfo { pub mtime: Instant, } -#[derive(Clone)] -pub struct InodeRef(Weak) -where - I: Inode + ?Sized; - pub struct InodeUse(Arc) where I: Inode + ?Sized; diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs index 786d30fa..08471ef3 100644 --- a/src/kernel/vfs/inode/mod.rs +++ b/src/kernel/vfs/inode/mod.rs @@ -4,7 +4,5 @@ mod ops; mod statx; pub use ino::Ino; -pub use inode::{ - Inode, InodeDir, InodeDirOps, InodeFile, InodeFileOps, InodeInfo, InodeOps, InodeRef, InodeUse, -}; +pub use inode::{Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; pub use ops::{RenameData, WriteOffset}; From 6348a378b33d018b14fb2c34c25d34499b04c606 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 22:23:09 +0800 Subject: [PATCH 33/54] vfs: fix debug print of `Mode` structs If we have invalid format, we should print a None instead of panicking. Signed-off-by: greatbridf --- src/kernel/vfs/types/mode.rs | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/kernel/vfs/types/mode.rs b/src/kernel/vfs/types/mode.rs index dc1b88ec..a58c8215 100644 --- a/src/kernel/vfs/types/mode.rs +++ b/src/kernel/vfs/types/mode.rs @@ -44,13 +44,20 @@ impl Mode { } pub fn format(&self) -> Format { + match self.try_format() { + None => panic!("unknown format bits: {:#o}", self.format_bits()), + Some(format) => format, + } + } + + pub fn try_format(&self) -> Option { match self.format_bits() { - S_IFREG => Format::REG, - S_IFDIR => Format::DIR, - S_IFLNK => Format::LNK, - S_IFBLK => Format::BLK, - S_IFCHR => Format::CHR, - _ => panic!("unknown format bits: {:#o}", self.format_bits()), + S_IFREG => Some(Format::REG), + S_IFDIR => Some(Format::DIR), + S_IFLNK => Some(Format::LNK), + S_IFBLK => Some(Format::BLK), + S_IFCHR => Some(Format::CHR), + _ => None, } } @@ -58,10 +65,6 @@ impl Mode { Permission::new(self.non_format_bits()) } - pub const fn non_format(&self) -> Self { - Self::new(self.non_format_bits()) - } - pub const fn set_perm(&mut self, perm: Permission) { self.0 = self.format_bits() | perm.bits(); } @@ -100,15 +103,15 @@ impl core::fmt::Debug for Mode { match self.non_format_bits() & !0o777 { 0 => write!( f, - "Mode({format:?}, {perm:#o})", - format = self.format(), - perm = self.non_format_bits() + "Mode({format:?}, {perm:?})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()), )?, rem => write!( f, - "Mode({format:?}, {perm:#o}, rem={rem:#x})", - format = self.format(), - perm = self.non_format_bits() & 0o777 + "Mode({format:?}, {perm:?}, rem={rem:#x})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()) )?, } From 39d15727b667dba8ffb4dcdb740ac5e9b5c0410e Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 8 Oct 2025 11:10:13 +0800 Subject: [PATCH 34/54] mem, paging: introduce page locks and exclusive pages Introduce the new page locking mechanism to ensure exclusiveness of pages when we access them. The underlying locks are not implemented yet for now because we will change the paging structs in the following few patches. Introduce a new `PageExcl` struct representing a page that conforms with Rust's ownership rules. The page owned exclusively can be accessed without taking page locks. Remove the `MemoryBlock` structs as they are not easy to use and represent barely no semantic meanings. Signed-off-by: greatbridf --- .rustfmt.toml | 4 +- Cargo.lock | 7 + Cargo.toml | 1 + src/driver/ahci/command_table.rs | 61 ++++--- src/driver/ahci/mod.rs | 42 +++-- src/driver/ahci/port.rs | 104 +++++------- src/driver/ahci/slot.rs | 179 +++++++++++++++------ src/driver/e1000e.rs | 135 ++++++---------- src/driver/virtio/virtio_blk.rs | 55 +++---- src/fs/fat32.rs | 64 +++----- src/io.rs | 30 ++-- src/kernel/block.rs | 222 +++++++------------------- src/kernel/mem.rs | 4 +- src/kernel/mem/access.rs | 118 +------------- src/kernel/mem/allocator.rs | 8 +- src/kernel/mem/mm_area.rs | 52 +++--- src/kernel/mem/mm_list.rs | 39 ++--- src/kernel/mem/page_alloc/raw_page.rs | 9 +- src/kernel/mem/page_cache.rs | 37 ++--- src/kernel/mem/paging.rs | 129 ++++++++++++--- src/kernel/vfs/file/mod.rs | 34 ++-- src/lib.rs | 50 +++--- 22 files changed, 633 insertions(+), 751 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index d69872c6..17b2bbc5 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -29,8 +29,8 @@ fn_single_line = false where_single_line = false imports_indent = "Block" imports_layout = "Mixed" -imports_granularity = "Preserve" -group_imports = "Preserve" +imports_granularity = "Module" +group_imports = "StdExternalCrate" reorder_imports = true reorder_modules = true reorder_impl_items = false diff --git a/Cargo.lock b/Cargo.lock index f85a9d2f..c70190a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,6 +180,7 @@ dependencies = [ "posix_types", "slab_allocator", "stalloc", + "static_assertions", "unwinding", "virtio-drivers", "xmas-elf", @@ -525,6 +526,12 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "syn" version = "2.0.104" diff --git a/Cargo.toml b/Cargo.toml index 5158025e..dca5d34d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ futures = { version = "0.3.31", features = [ "alloc", "async-await", ], default-features = false } +static_assertions = "1.1.0" [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/driver/ahci/command_table.rs b/src/driver/ahci/command_table.rs index c77b4abd..7b78d26f 100644 --- a/src/driver/ahci/command_table.rs +++ b/src/driver/ahci/command_table.rs @@ -1,45 +1,54 @@ -use super::{command::Command, PRDTEntry, FISH2D}; -use crate::kernel::mem::{AsMemoryBlock as _, Page}; +use core::ptr::NonNull; + use eonix_mm::address::PAddr; -pub struct CommandTable<'a> { - page: Page, - command_fis: &'a mut FISH2D, +use super::command::Command; +use super::{PRDTEntry, FISH2D}; +use crate::kernel::mem::{Page, PageExt}; - prdt: &'a mut [PRDTEntry; 248], - prdt_entries: Option, +pub struct CommandTable { + page: Page, + cmd_fis: NonNull, + prdt: NonNull<[PRDTEntry; 248]>, + prdt_entries: usize, } -impl CommandTable<'_> { +unsafe impl Send for CommandTable {} +unsafe impl Sync for CommandTable {} + +impl CommandTable { pub fn new() -> Self { let page = Page::alloc(); - let memory = page.as_memblk(); - - let (lhs, prdt) = memory.split_at(0x80); - - let (command_fis, _) = lhs.split_at(size_of::()); - let command_fis = unsafe { command_fis.as_ptr().as_mut() }; - let prdt = unsafe { prdt.as_ptr().as_mut() }; - - Self { - page, - command_fis, - prdt, - prdt_entries: None, + let base = page.get_ptr(); + + unsafe { + Self { + page, + cmd_fis: base.cast(), + prdt: base.byte_add(0x80).cast(), + prdt_entries: 0, + } } } pub fn setup(&mut self, cmd: &impl Command) { - self.command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count()); - self.prdt_entries = Some(cmd.pages().len() as u16); + unsafe { + self.cmd_fis + .as_mut() + .setup(cmd.cmd(), cmd.lba(), cmd.count()); + } + + self.prdt_entries = cmd.pages().len(); for (idx, page) in cmd.pages().iter().enumerate() { - self.prdt[idx].setup(page); + unsafe { + self.prdt.as_mut()[idx].setup(page); + } } } - pub fn prdt_len(&self) -> u16 { - self.prdt_entries.unwrap() + pub fn prdt_len(&self) -> usize { + self.prdt_entries } pub fn base(&self) -> PAddr { diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index ab405829..3ea44ed3 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -1,25 +1,23 @@ -use crate::{ - fs::procfs, - io::Buffer as _, - kernel::{ - block::BlockDevice, - constants::{EINVAL, EIO}, - interrupt::register_irq_handler, - pcie::{self, Header, PCIDevice, PCIDriver, PciError}, - vfs::types::DeviceId, - }, - prelude::*, -}; -use alloc::{format, sync::Arc}; +use alloc::format; +use alloc::sync::Arc; + use async_trait::async_trait; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; use eonix_sync::SpinIrq as _; use port::AdapterPort; - pub(self) use register::Register; +use crate::fs::procfs; +use crate::io::Buffer as _; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::kernel::vfs::types::DeviceId; +use crate::prelude::*; + mod command; mod command_table; mod control; @@ -30,7 +28,7 @@ pub(self) mod slot; mod stats; pub struct AHCIDriver { - devices: Spin>>>, + devices: Spin>>, } pub struct BitsIterator { @@ -64,22 +62,22 @@ impl Iterator for BitsIterator { } } -struct Device<'a> { +struct Device { control_base: PAddr, control: AdapterControl, _pcidev: Arc>, /// # Lock /// Might be accessed from irq handler, use with `lock_irq()` - ports: Spin<[Option>>; 32]>, + ports: Spin<[Option>; 32]>, } /// # Safety /// `pcidev` is never accessed from Rust code /// TODO!!!: place *mut pci_device in a safe wrapper -unsafe impl Send for Device<'_> {} -unsafe impl Sync for Device<'_> {} +unsafe impl Send for Device {} +unsafe impl Sync for Device {} -impl Device<'_> { +impl Device { fn handle_interrupt(&self) { // Safety // `self.ports` is accessed inside irq handler @@ -108,8 +106,8 @@ impl Device<'_> { } } -impl Device<'static> { - async fn probe_port(&self, port: Arc>) -> KResult<()> { +impl Device { + async fn probe_port(&self, port: Arc) -> KResult<()> { port.init().await?; { diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index 77286ec5..a54bbbba 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -1,20 +1,18 @@ +use alloc::collections::vec_deque::VecDeque; +use core::task::{Poll, Waker}; + +use async_trait::async_trait; +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_sync::SpinIrq as _; + use super::command::{Command, IdentifyCommand, ReadLBACommand, WriteLBACommand}; -use super::slot::CommandSlot; +use super::slot::CommandList; use super::stats::AdapterPortStats; -use super::{ - CommandHeader, Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT, -}; +use super::{Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT}; use crate::driver::ahci::command_table::CommandTable; use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::paging::Page; -use crate::kernel::mem::AsMemoryBlock as _; use crate::prelude::*; -use alloc::collections::vec_deque::VecDeque; -use async_trait::async_trait; -use core::pin::pin; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. /// @@ -55,6 +53,8 @@ pub struct AdapterPortData { struct FreeList { free: VecDeque, working: VecDeque, + + wakers: VecDeque, } impl FreeList { @@ -62,57 +62,32 @@ impl FreeList { Self { free: (0..32).collect(), working: VecDeque::new(), + wakers: VecDeque::new(), } } } -pub struct AdapterPort<'a> { +pub struct AdapterPort { pub nport: u32, regs_base: PAddr, - slots: [CommandSlot<'a>; 32], + cmdlist: CommandList, free_list: Spin, - free_list_wait: WaitList, - - /// Holds the command list. - /// **DO NOT USE IT DIRECTLY** - _page: Page, - - cmdlist_base: PAddr, - fis_base: PAddr, stats: AdapterPortStats, } -impl<'a> AdapterPort<'a> { +impl AdapterPort { pub fn new(base: PAddr, nport: u32) -> Self { - let page = Page::alloc(); - let cmdlist_base = page.start(); - let cmdlist_size = 32 * size_of::(); - let fis_base = cmdlist_base + cmdlist_size; - - let (mut cmdheaders, _) = page.as_memblk().split_at(cmdlist_size); - let slots = core::array::from_fn(move |_| { - let (cmdheader, next) = cmdheaders.split_at(size_of::()); - cmdheaders = next; - CommandSlot::new(unsafe { cmdheader.as_ptr().as_mut() }) - }); - Self { nport, regs_base: base + 0x100 + 0x80 * nport as usize, - slots, + cmdlist: CommandList::new(), free_list: Spin::new(FreeList::new()), - free_list_wait: WaitList::new(), - _page: page, stats: AdapterPortStats::new(), - cmdlist_base, - fis_base, } } -} -impl AdapterPort<'_> { fn command_list_base(&self) -> Register { Register::new(self.regs_base + 0x00) } @@ -146,25 +121,16 @@ impl AdapterPort<'_> { } async fn get_free_slot(&self) -> u32 { - loop { - let mut wait = pin!(self.free_list_wait.prepare_to_wait()); - - { - let mut free_list = self.free_list.lock_irq(); - - if let Some(slot) = free_list.free.pop_front() { - return slot; - } - - wait.as_mut().add_to_wait_list(); - - if let Some(slot) = free_list.free.pop_front() { - return slot; - } + core::future::poll_fn(|ctx| { + let mut free_list = self.free_list.lock_irq(); + if let Some(slot) = free_list.free.pop_front() { + return Poll::Ready(slot); } - wait.await; - } + free_list.wakers.push_back(ctx.waker().clone()); + Poll::Pending + }) + .await } fn save_working(&self, slot: u32) { @@ -172,8 +138,10 @@ impl AdapterPort<'_> { } fn release_free_slot(&self, slot: u32) { - self.free_list.lock_irq().free.push_back(slot); - self.free_list_wait.notify_one(); + let mut free_list = self.free_list.lock_irq(); + + free_list.free.push_back(slot); + free_list.wakers.drain(..).for_each(|waker| waker.wake()); } pub fn handle_interrupt(&self) { @@ -187,7 +155,7 @@ impl AdapterPort<'_> { return true; } - self.slots[n as usize].handle_irq(); + self.cmdlist.get(n as usize).handle_irq(); self.stats.inc_int_fired(); false @@ -216,7 +184,7 @@ impl AdapterPort<'_> { cmdtable.setup(cmd); let slot_index = self.get_free_slot().await; - let slot = &self.slots[slot_index as usize]; + let slot = self.cmdlist.get(slot_index as usize); slot.prepare_command(&cmdtable, cmd.write()); self.save_working(slot_index); @@ -229,10 +197,9 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = slot.wait_finish().await { + slot.wait_finish().await.inspect_err(|_| { self.stats.inc_cmd_error(); - return Err(EIO); - }; + })?; self.release_free_slot(slot_index); Ok(()) @@ -251,8 +218,9 @@ impl AdapterPort<'_> { self.stop_command()?; self.command_list_base() - .write(self.cmdlist_base.addr() as u64); - self.fis_base().write(self.fis_base.addr() as u64); + .write(self.cmdlist.cmdlist_base().addr() as u64); + self.fis_base() + .write(self.cmdlist.recv_fis_base().addr() as u64); self.interrupt_enable().write_once(PORT_IE_DEFAULT); @@ -277,7 +245,7 @@ impl AdapterPort<'_> { } #[async_trait] -impl BlockRequestQueue for AdapterPort<'_> { +impl BlockRequestQueue for AdapterPort { fn max_request_pages(&self) -> u64 { 1024 } diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 60a66de3..06c6f2ec 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -1,20 +1,37 @@ -use super::{command_table::CommandTable, CommandHeader}; +use core::cell::UnsafeCell; +use core::ptr::NonNull; +use core::task::{Poll, Waker}; + +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_sync::{Spin, SpinIrq as _}; + +use super::command_table::CommandTable; +use super::CommandHeader; +use crate::kernel::constants::EIO; +use crate::kernel::mem::paging::AllocZeroed; +use crate::kernel::mem::{Page, PageExt}; use crate::KResult; -use core::pin::pin; -use eonix_mm::address::Addr as _; -use eonix_sync::{Spin, SpinIrq as _, WaitList}; + +pub struct CommandList { + base: NonNull, + _page: Page, +} + +unsafe impl Send for CommandList {} +unsafe impl Sync for CommandList {} pub struct CommandSlot<'a> { - /// # Usage - /// `inner.cmdheader` might be used in irq handler. So in order to wait for - /// commands to finish, we should use `lock_irq` on `inner` - inner: Spin>, - wait_list: WaitList, + cmdheader: &'a UnsafeCell, + /// [`Self::control`] might be used in irq handlers. + control: &'a Spin, } -struct CommandSlotInner<'a> { +unsafe impl Send for CommandSlot<'_> {} +unsafe impl Sync for CommandSlot<'_> {} + +struct SlotControl { state: SlotState, - cmdheader: &'a mut CommandHeader, + waker: Option, } #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -25,32 +42,103 @@ enum SlotState { Error, } -impl<'a> CommandSlot<'a> { - pub fn new(cmdheader: &'a mut CommandHeader) -> Self { +impl CommandList { + fn cmdheaders(&self) -> &[UnsafeCell; 32] { + unsafe { self.base.cast().as_ref() } + } + + fn controls_ptr(base: NonNull) -> NonNull> { + // 24 bytes for SlotControl and extra 8 bytes for Spin. + const_assert_eq!(size_of::>(), 32); + + unsafe { base.add(size_of::>() * 32).cast() } + } + + fn controls(&self) -> &[Spin; 32] { + unsafe { Self::controls_ptr(self.base).cast().as_ref() } + } + + pub fn cmdlist_base(&self) -> PAddr { + self._page.start() + } + + pub fn recv_fis_base(&self) -> PAddr { + self._page.start() + + (size_of::>() + size_of::>()) * 32 + } + + pub fn get(&self, index: usize) -> CommandSlot { + CommandSlot { + cmdheader: &self.cmdheaders()[index], + control: &self.controls()[index], + } + } + + pub fn new() -> Self { + let page = Page::zeroed(); + let base = page.get_ptr(); + + let controls_ptr = Self::controls_ptr(base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).write(Spin::new(SlotControl { + state: SlotState::Idle, + waker: None, + })); + } + } + Self { - inner: Spin::new(CommandSlotInner { - state: SlotState::Idle, - cmdheader, - }), - wait_list: WaitList::new(), + base: page.get_ptr(), + _page: page, } } +} +impl Drop for CommandList { + fn drop(&mut self) { + let controls_ptr = Self::controls_ptr(self.base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).drop_in_place(); + } + } + } +} + +impl CommandSlot<'_> { pub fn handle_irq(&self) { - let mut inner = self.inner.lock(); - debug_assert_eq!(inner.state, SlotState::Working); + // We are already in the IRQ handler. + let mut control = self.control.lock(); + assert_eq!(control.state, SlotState::Working); + + let cmdheader = unsafe { + // SAFETY: The IRQ handler is only called after the command + // is finished. + &mut *self.cmdheader.get() + }; // TODO: Check errors. - inner.state = SlotState::Finished; - inner.cmdheader.bytes_transferred = 0; - inner.cmdheader.prdt_length = 0; + cmdheader.bytes_transferred = 0; + cmdheader.prdt_length = 0; - self.wait_list.notify_all(); + control.state = SlotState::Finished; + + if let Some(waker) = control.waker.take() { + waker.wake(); + } } pub fn prepare_command(&self, cmdtable: &CommandTable, write: bool) { - let mut inner = self.inner.lock_irq(); - let cmdheader = &mut inner.cmdheader; + let mut control = self.control.lock_irq(); + assert_eq!(control.state, SlotState::Idle); + + let cmdheader = unsafe { + // SAFETY: We are in the idle state. + &mut *self.cmdheader.get() + }; cmdheader.first = 0x05; // FIS type @@ -60,36 +148,37 @@ impl<'a> CommandSlot<'a> { cmdheader.second = 0x00; - cmdheader.prdt_length = cmdtable.prdt_len(); + cmdheader.prdt_length = cmdtable.prdt_len() as u16; cmdheader.bytes_transferred = 0; cmdheader.command_table_base = cmdtable.base().addr() as u64; cmdheader._reserved = [0; 4]; - inner.state = SlotState::Working; + control.state = SlotState::Working; } pub async fn wait_finish(&self) -> KResult<()> { - let mut inner = loop { - let mut wait = pin!(self.wait_list.prepare_to_wait()); - - { - let inner = self.inner.lock_irq(); - if inner.state != SlotState::Working { - break inner; + core::future::poll_fn(|ctx| { + let mut control = self.control.lock_irq(); + + match control.state { + SlotState::Idle => unreachable!("Poll called in idle state"), + SlotState::Working => { + control.waker = Some(ctx.waker().clone()); + Poll::Pending } - wait.as_mut().add_to_wait_list(); + SlotState::Finished => { + control.state = SlotState::Idle; + Poll::Ready(Ok(())) + } + SlotState::Error => { + control.state = SlotState::Idle; - if inner.state != SlotState::Working { - break inner; + // TODO: Report errors. + Poll::Ready(Err(EIO)) } } - - wait.await; - }; - - inner.state = SlotState::Idle; - - Ok(()) + }) + .await } } diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index f362f477..73143c2c 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -1,19 +1,18 @@ -use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; -use crate::kernel::interrupt::register_irq_handler; -use crate::kernel::mem::paging::{self, AllocZeroed}; -use crate::kernel::mem::{AsMemoryBlock, PhysAccess}; -use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; -use crate::net::netdev; -use crate::prelude::*; use alloc::boxed::Box; use alloc::sync::Arc; -use alloc::vec::Vec; -use async_trait::async_trait; use core::ptr::NonNull; + +use async_trait::async_trait; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; use eonix_sync::SpinIrq; -use paging::Page; + +use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::mem::{PageExcl, PageExt, PhysAccess}; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::net::netdev; +use crate::prelude::*; mod defs; @@ -55,13 +54,13 @@ struct E1000eDev { id: u32, regs: Registers, - rt_desc_page: Page, + rt_desc_page: PageExcl, rx_head: Option, rx_tail: Option, tx_tail: Option, - rx_buffers: Option>>, - tx_buffers: Option>>, + rx_buffers: Box<[PageExcl; RX_DESC_SIZE]>, + tx_buffers: Box<[Option; TX_DESC_SIZE]>, } fn test(val: u32, bit: u32) -> bool { @@ -196,7 +195,7 @@ impl netdev::Netdev for E1000eDev { break; } - let ref mut desc = self.rx_desc_table()[next_tail as usize]; + let desc = unsafe { &mut self.rx_desc_table()[next_tail as usize] }; if !test(desc.status as u32, defs::RXD_STAT_DD as u32) { Err(EIO)?; } @@ -204,11 +203,8 @@ impl netdev::Netdev for E1000eDev { desc.status = 0; let len = desc.length as usize; - let buffers = self.rx_buffers.as_mut().ok_or(EIO)?; - let data = unsafe { - // SAFETY: No one could be writing to the buffer at this point. - &buffers[next_tail as usize].as_memblk().as_bytes()[..len] - }; + let buffer = &self.rx_buffers[next_tail as usize]; + let data = &buffer.as_bytes()[..len]; println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data)); self.rx_tail = Some(next_tail); @@ -226,20 +222,17 @@ impl netdev::Netdev for E1000eDev { return Err(EAGAIN); } - let ref mut desc = self.tx_desc_table()[tail as usize]; + let desc = unsafe { &mut self.tx_desc_table()[tail as usize] }; if !test(desc.status as u32, defs::TXD_STAT_DD as u32) { return Err(EIO); } - let buffer_page = Page::alloc(); + let mut buffer_page = PageExcl::alloc(); if buf.len() > buffer_page.len() { return Err(EFAULT); } - unsafe { - // SAFETY: We are the only one writing to this memory block. - buffer_page.as_memblk().as_bytes_mut()[..buf.len()].copy_from_slice(buf); - } + buffer_page.as_bytes_mut()[..buf.len()].copy_from_slice(buf); desc.buffer = PAddr::from(buffer_page.pfn()).addr() as u64; desc.length = buf.len() as u16; @@ -249,9 +242,8 @@ impl netdev::Netdev for E1000eDev { self.tx_tail = Some(next_tail); self.regs.write(defs::REG_TDT, next_tail); - // TODO: check if the packets are sent and update self.tx_head state - - Ok(()) + unimplemented!("Check if the packets are sent and update self.tx_head state"); + // Ok(()) } } @@ -324,26 +316,26 @@ impl E1000eDev { Ok(()) } - fn reset(&self) -> Result<(), u32> { + fn reset(regs: &Registers) -> Result<(), u32> { // disable interrupts so we won't mess things up - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); - while self.regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { + while regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { // wait for link up } - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); - while self.regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { + while regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { // wait for reset } // disable interrupts again - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); Ok(()) } @@ -360,64 +352,45 @@ impl E1000eDev { Ok(()) } - pub fn new(base: PAddr, irq_no: usize) -> Result { - let page = Page::zeroed(); + pub fn new(base: PAddr, irq_no: usize) -> KResult { + let regs = Registers::new(base); + Self::reset(®s)?; - let mut dev = Self { + let dev = Self { irq_no, - mac: [0; 6], + mac: regs.read_as(0x5400), status: netdev::LinkStatus::Down, speed: netdev::LinkSpeed::SpeedUnknown, id: netdev::alloc_id(), - regs: Registers::new(base), - rt_desc_page: page, + regs, + rt_desc_page: PageExcl::zeroed(), rx_head: None, rx_tail: None, tx_tail: None, - rx_buffers: None, - tx_buffers: None, + rx_buffers: Box::new(core::array::from_fn(|_| PageExcl::alloc_order(2))), + tx_buffers: Box::new([const { None }; 32]), }; - dev.reset()?; - - dev.mac = dev.regs.read_as(0x5400); - dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE))); - - let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE)); - - for index in 0..RX_DESC_SIZE { - let page = Page::alloc_order(2); - - let ref mut desc = dev.rx_desc_table()[index]; - desc.buffer = PAddr::from(page.pfn()).addr() as u64; - desc.status = 0; - - rx_buffers.push(page); - } + unsafe { + for (desc, page) in dev.rx_desc_table().into_iter().zip(dev.rx_buffers.iter()) { + desc.buffer = page.start().addr() as u64; + desc.status = 0; + } - for index in 0..TX_DESC_SIZE { - let ref mut desc = dev.tx_desc_table()[index]; - desc.status = defs::TXD_STAT_DD; + for desc in dev.tx_desc_table() { + desc.status = defs::TXD_STAT_DD; + } } - dev.rx_buffers = Some(rx_buffers); - Ok(dev) } - fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { - unsafe { - // SAFETY: TODO - self.rt_desc_page.as_memblk().as_ptr().as_mut() - } + unsafe fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { + self.rt_desc_page.get_ptr().cast().as_mut() } - fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { - let (_, right) = self.rt_desc_page.as_memblk().split_at(0x200); - unsafe { - // SAFETY: TODO - right.as_ptr().as_mut() - } + unsafe fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { + self.rt_desc_page.get_ptr().add(0x200).cast().as_mut() } } @@ -425,12 +398,8 @@ impl Drop for E1000eDev { fn drop(&mut self) { assert_eq!(self.status, netdev::LinkStatus::Down); - if let Some(_) = self.rx_buffers.take() {} - - // TODO: we should wait until all packets are sent - if let Some(_) = self.tx_buffers.take() {} - - let _ = self.rt_desc_page; + // TODO: we should wait until all packets are sent before dropping + // tx buffers. } } diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 86b500b6..c5a3c3d2 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -1,21 +1,19 @@ -use crate::{ - io::Chunks, - kernel::{ - block::{BlockDeviceRequest, BlockRequestQueue}, - constants::EIO, - mem::{AsMemoryBlock, Page}, - }, - prelude::KResult, -}; use alloc::boxed::Box; + use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; +use eonix_mm::address::{Addr, PAddr, PhysAccess}; +use eonix_mm::paging::PFN; use eonix_sync::Spin; -use virtio_drivers::{device::blk::VirtIOBlk, transport::Transport, Hal}; +use virtio_drivers::device::blk::VirtIOBlk; +use virtio_drivers::transport::Transport; +use virtio_drivers::Hal; + +use crate::io::Chunks; +use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; +use crate::kernel::constants::EIO; +use crate::kernel::mem::{Page, PageExt}; +use crate::prelude::KResult; pub struct HAL; @@ -26,11 +24,10 @@ unsafe impl Hal for HAL { ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull) { let page = Page::alloc_at_least(pages); - let paddr = page.start().addr(); - let ptr = page.as_memblk().as_byte_ptr(); - page.into_raw(); + let ptr = page.get_ptr(); + let pfn = page.into_raw(); - (paddr, ptr) + (PAddr::from(pfn).addr(), ptr) } unsafe fn dma_dealloc( @@ -93,15 +90,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &buffer_page.as_memblk().as_bytes()[..len as usize * 512] - }; + let len = sectors * 512; + let pg = buffer_page.lock(); - dev.write_blocks(start, buffer).map_err(|_| EIO)?; + dev.write_blocks(start, &pg.as_bytes()[..len]) + .map_err(|_| EIO)?; } } BlockDeviceRequest::Read { @@ -110,15 +106,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &mut buffer_page.as_memblk().as_bytes_mut()[..len as usize * 512] - }; + let len = sectors * 512; + let mut pg = buffer_page.lock(); - dev.read_blocks(start, buffer).map_err(|_| EIO)?; + dev.read_blocks(start, &mut pg.as_bytes_mut()[..len]) + .map_err(|_| EIO)?; } } } diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 1104337d..9a4e03ec 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,45 +1,38 @@ mod dir; mod file; +use alloc::sync::{Arc, Weak}; use core::future::Future; use core::ops::Deref; -use alloc::sync::{Arc, Weak}; use async_trait::async_trait; use dir::{as_raw_dirents, ParseDirent}; use eonix_sync::RwLock; use itertools::Itertools; +use crate::io::{Buffer, ByteBuffer, UninitBuffer}; +use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; +use crate::kernel::mem::{ + CachePage, CachePageStream, Page, PageCache, PageCacheBackendOps, PageExcl, PageExt, +}; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{ + Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, +}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; use crate::prelude::*; -use crate::{ - io::{Buffer, ByteBuffer, UninitBuffer}, - kernel::{ - block::{BlockDevice, BlockDeviceRequest}, - mem::{ - paging::Page, - {CachePage, PageCache, PageCacheBackendOps}, - }, - vfs::{ - dentry::Dentry, - inode::{Ino, Inode}, - mount::{register_filesystem, Mount, MountCreator}, - }, - }, - KResult, -}; +use crate::KResult; #[repr(transparent)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] struct Cluster(u32); #[repr(transparent)] -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] struct RawCluster(pub u32); impl RawCluster { @@ -70,7 +63,7 @@ impl Cluster { const SECTOR_SIZE: usize = 512; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[repr(C, packed)] struct Bootsector { jmp: [u8; 3], @@ -302,10 +295,8 @@ impl InodeFileOps for FileInode { for cluster in cluster_iter { fs.read_cluster(cluster, &buffer_page).await?; - let data = unsafe { - // SAFETY: We are the only one holding this page. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; + let pg = buffer_page.lock(); + let data = &pg.as_bytes()[inner_offset..]; let end = offset + data.len(); let real_end = end.min(self.info.lock().size as usize); @@ -340,7 +331,7 @@ struct DirInode { sb: SbRef, // TODO: Use the new PageCache... - dir_pages: RwLock>, + dir_pages: RwLock>, } impl DirInode { @@ -375,7 +366,7 @@ impl DirInode { let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); for cluster in clusters { - let page = Page::alloc(); + let page = PageExcl::alloc(); fs.read_cluster(cluster, &page).await?; dir_pages.push(page); @@ -384,7 +375,7 @@ impl DirInode { Ok(()) } - async fn get_dir_pages(&self) -> KResult> + use<'_>> { + async fn get_dir_pages(&self) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { @@ -432,12 +423,7 @@ impl InodeDirOps for DirInode { let sb = self.sb.get()?; let dir_pages = self.get_dir_pages().await?; - let dir_data = dir_pages.iter().map(|page| { - unsafe { - // SAFETY: No one could be writing to it. - page.as_memblk().as_bytes() - } - }); + let dir_data = dir_pages.iter().map(|pg| pg.as_bytes()); let raw_dirents = dir_data .map(as_raw_dirents) @@ -481,12 +467,10 @@ impl InodeDirOps for DirInode { let inner_offset = offset % cluster_size; let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); - let dir_data = dir_pages.iter().skip(cluster_offset).map(|page| { - unsafe { - // SAFETY: No one could be writing to it. - page.as_memblk().as_bytes() - } - }); + let dir_data = dir_pages + .iter() + .skip(cluster_offset) + .map(|pg| pg.as_bytes()); let raw_dirents = dir_data .map(as_raw_dirents) diff --git a/src/io.rs b/src/io.rs index 85675dea..d7094f6d 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,6 +1,8 @@ +use core::mem::MaybeUninit; +use core::ops::{Add, AddAssign, Sub}; + use crate::kernel::constants::EFAULT; use crate::prelude::*; -use core::{cmp, mem::MaybeUninit}; #[must_use] #[derive(Debug)] @@ -236,18 +238,26 @@ impl Buffer for ByteBuffer<'_> { } } +pub trait Integer: + Add + Sub + AddAssign + Copy + PartialOrd + Ord +{ +} + +impl Integer for u64 {} +impl Integer for usize {} + /// Iterator that generates chunks of a given length from a start index /// until the end of the total length. /// /// The iterator returns a tuple of (start, len) for each chunk. -pub struct Chunks { - end: usize, - cur: usize, - chunk_len: usize, +pub struct Chunks { + end: T, + cur: T, + chunk_len: T, } -impl Chunks { - pub const fn new(start: usize, total_len: usize, chunk_len: usize) -> Self { +impl Chunks { + pub fn new(start: T, total_len: T, chunk_len: T) -> Self { Self { end: start + total_len, cur: start, @@ -256,8 +266,8 @@ impl Chunks { } } -impl Iterator for Chunks { - type Item = (usize, usize); +impl Iterator for Chunks { + type Item = (T, T); fn next(&mut self) -> Option { if self.cur >= self.end { @@ -265,7 +275,7 @@ impl Iterator for Chunks { } let start = self.cur; - let len = cmp::min(self.chunk_len, self.end - start); + let len = self.chunk_len.min(self.end - start); self.cur += self.chunk_len; Some((start, len)) diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 3e4b65d1..8e017336 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -1,23 +1,20 @@ mod mbr; -use super::{ - constants::ENOENT, - mem::{paging::Page, AsMemoryBlock as _}, - vfs::types::DeviceId, -}; -use crate::kernel::constants::{EEXIST, EINVAL}; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; -use async_trait::async_trait; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; use core::cmp::Ordering; + +use async_trait::async_trait; use mbr::MBRPartTable; +use super::constants::ENOENT; +use super::mem::paging::Page; +use super::mem::PageExt; +use super::vfs::types::DeviceId; +use crate::io::{Buffer, Chunks, FillResult}; +use crate::kernel::constants::{EEXIST, EINVAL}; +use crate::prelude::*; + pub struct Partition { pub lba_offset: u64, pub sector_count: u64, @@ -193,177 +190,72 @@ impl BlockDevice { /// `offset` - offset in bytes /// pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512; - - let mut nfilled = 0; - 'outer: while sector_count != 0 { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - let nread; - - match sector_count { - count if count <= 8 => { - nread = count; - - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); + let sector_start = offset as u64 / 512; + let mut first_sector_offset = offset % 512; + let nr_sectors = (first_sector_offset + buffer.total() + 511) / 512; + + let nr_sectors_per_batch = self.queue().max_request_pages() / 2 * 2 * 8; + + let mut nr_filled = 0; + for (start, nr_batch) in Chunks::new(sector_start, nr_sectors as u64, nr_sectors_per_batch) + { + let (page_slice, page, mut page_vec); + match nr_batch { + ..=8 => { + page = Page::alloc(); + page_slice = core::slice::from_ref(&page); } - count if count <= 16 => { - nread = count; - - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); + ..=16 => { + page = Page::alloc_order(1); + page_slice = core::slice::from_ref(&page); + } + ..=32 => { + page = Page::alloc_order(2); + page_slice = core::slice::from_ref(&page); } count => { - nread = count.min(self.queue().max_request_pages()); + let nr_huge_pages = count as usize / 32; + let nr_small_pages = ((count as usize % 32) + 7) / 8; - let npages = (nread + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); + let nr_pages = nr_huge_pages + nr_small_pages; + page_vec = Vec::with_capacity(nr_pages); + + page_vec.resize_with(nr_huge_pages, || Page::alloc_order(2)); + page_vec.resize_with(nr_pages, || Page::alloc()); + page_slice = &page_vec; } } let req = BlockDeviceRequest::Read { - sector: sector_start, - count: nread, - buffer: &pages, + sector: start, + count: nr_batch, + buffer: page_slice, }; self.commit_request(req).await?; - for page in pages.iter() { - // SAFETY: We are the only owner of the page so no one could be mutating it. - let data = unsafe { &page.as_memblk().as_bytes()[first_sector_offset as usize..] }; + for page in page_slice { + let pg = page.lock(); + let data = &pg.as_bytes()[first_sector_offset..]; first_sector_offset = 0; - match buffer.fill(data)? { - FillResult::Done(n) => nfilled += n, - FillResult::Partial(n) => { - nfilled += n; - break 'outer; - } - FillResult::Full => { - break 'outer; - } - } - } - - sector_start += nread; - sector_count -= nread; - } - - if nfilled == buffer.total() { - Ok(FillResult::Done(nfilled)) - } else { - Ok(FillResult::Partial(nfilled)) - } - } - - /// Write some data to the block device, may involve some copy and fragmentation - /// - /// # Arguments - /// `offset` - offset in bytes - /// `data` - data to write - /// - pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut remaining_data = data; - let mut nwritten = 0; - - while !remaining_data.is_empty() { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - // Calculate sectors needed for this write - let write_end = first_sector_offset + remaining_data.len() as u64; - let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); - - match sector_count { - count if count <= 8 => { - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count if count <= 16 => { - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count => { - let npages = (count + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); - } - } - - if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { - let read_req = BlockDeviceRequest::Read { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(read_req).await?; - } - - let mut data_offset = 0; - let mut page_offset = first_sector_offset as usize; + nr_filled += buffer.fill(data)?.allow_partial(); - for page in pages.iter() { - // SAFETY: We own the page and can modify it - let page_data = unsafe { - let memblk = page.as_memblk(); - core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) - }; - - let copy_len = - (remaining_data.len() - data_offset).min(page_data.len() - page_offset); - - if copy_len == 0 { - break; - } - - page_data[page_offset..page_offset + copy_len] - .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); - - data_offset += copy_len; - page_offset = 0; // Only first page has offset - - if data_offset >= remaining_data.len() { + if buffer.available() == 0 { break; } } - let write_req = BlockDeviceRequest::Write { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(write_req).await?; - - let bytes_written = data_offset; - nwritten += bytes_written; - remaining_data = &remaining_data[bytes_written..]; - sector_start += sector_count; - first_sector_offset = 0; + if buffer.available() == 0 { + break; + } } - Ok(nwritten) + if buffer.available() == 0 { + Ok(FillResult::Done(nr_filled)) + } else { + Ok(FillResult::Partial(nr_filled)) + } } } diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index c147306e..bfc826bf 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -8,9 +8,9 @@ mod mm_list; mod page_alloc; mod page_cache; -pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; +pub use access::PhysAccess; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; -pub use paging::{Page, PageBuffer}; +pub use paging::{Page, PageBuffer, PageExcl, PageExt}; diff --git a/src/kernel/mem/access.rs b/src/kernel/mem/access.rs index ce525a0a..328dcfbd 100644 --- a/src/kernel/mem/access.rs +++ b/src/kernel/mem/access.rs @@ -1,22 +1,7 @@ -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _PhysAccess}; -/// A block of memory starting at a non-zero address and having a specific length. -/// -/// This struct is used to represent a memory block that can be accessed -/// in the kernel space. -pub struct MemoryBlock { - addr: NonZero, - len: usize, -} - -pub trait AsMemoryBlock { - /// Translate the physical page the page object pointing to into kernel - /// accessible pointer. Use it with care. - fn as_memblk(&self) -> MemoryBlock; -} - pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. @@ -30,107 +15,6 @@ pub trait PhysAccess { unsafe fn as_ptr(&self) -> NonNull; } -impl MemoryBlock { - /// Create a new `MemoryBlock` with the given address and length. - /// - /// # Safety - /// The caller must ensure that the address is valid. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn new(addr: NonZero, len: usize) -> Self { - Self { addr, len } - } - - /// Get the start address of the memory block. - #[allow(dead_code)] - pub fn addr(&self) -> NonZero { - self.addr - } - - /// Get the length of the memory block. - #[allow(dead_code)] - pub fn len(&self) -> usize { - self.len - } - - /// Split the memory block into two parts at the given offset. - pub fn split_at(&self, at: usize) -> (Self, Self) { - if at > self.len { - panic!("Out of bounds"); - } - - let rhs_start = self.addr.checked_add(at).expect("Overflow"); - - let lhs = unsafe { Self::new(self.addr, at) }; - let rhs = unsafe { Self::new(rhs_start, self.len - at) }; - - (lhs, rhs) - } - - /// Provide a pointer to the data. - /// - /// # Safety - /// Using the returned pointer is undefined behavior if the address is not - /// properly aligned or the size is not equal to the size of `T`. - pub unsafe fn as_ptr_unchecked(&self) -> NonNull { - // SAFETY: `self.addr` is a non-zero value. - NonNull::new_unchecked(self.addr.get() as *mut T) - } - - /// Provide a pointer to the data. - /// - /// # Panic - /// Panic if the address is not properly aligned. - pub fn as_ptr(&self) -> NonNull { - let alignment = align_of::(); - - if self.addr.get() % alignment != 0 { - panic!("Alignment error"); - } - - unsafe { - // SAFETY: We've checked that `self.addr` is properly aligned. - self.as_ptr_unchecked() - } - } - - /// Provide a pointer to the bytes. - pub fn as_byte_ptr(&self) -> NonNull { - unsafe { - // SAFETY: No alignment check is needed for bytes. - self.as_ptr_unchecked() - } - } - - /// Provide immutable access to the data it pointed to. - /// - /// # Safety - /// This function is unsafe because it returns an immutable reference with - /// a created lifetime. - /// - /// The caller must ensure that the data has no other mutable aliases while - /// the reference is in use. Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes<'a>(&self) -> &'a [u8] { - core::slice::from_raw_parts(self.as_ptr_unchecked().as_ptr(), self.len) - } - - /// Provide mutable access to the data it pointed to. - /// - /// # Panic - /// Panic if the address is not properly aligned or the size is not - /// equal to the size of `T`. - /// - /// # Safety - /// This function is unsafe because it returns a mutable reference with a - /// created lifetime. - /// - /// The caller must ensure that the data has no other immutable or mutable - /// aliases while the reference is in use. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes_mut<'a>(&mut self) -> &'a mut [u8] { - core::slice::from_raw_parts_mut(self.as_ptr_unchecked().as_ptr(), self.len) - } -} - impl PhysAccess for PAddr { unsafe fn as_ptr(&self) -> NonNull { ArchPhysAccess::as_ptr(*self) diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index 36b19612..9e5df69b 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -1,13 +1,15 @@ -use super::page_alloc::RawPagePtr; -use super::{AsMemoryBlock, GlobalPageAlloc, Page}; use core::alloc::{GlobalAlloc, Layout}; use core::ptr::NonNull; + use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; use slab_allocator::SlabAllocator; +use super::page_alloc::RawPagePtr; +use super::{GlobalPageAlloc, Page, PageExt}; + static SLAB_ALLOCATOR: LazyLock> = LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc)); @@ -23,7 +25,7 @@ unsafe impl GlobalAlloc for Allocator { let page_count = size >> PAGE_SIZE_BITS; let page = Page::alloc_at_least(page_count); - let ptr = page.as_memblk().as_ptr(); + let ptr = page.get_ptr(); page.into_raw(); ptr.as_ptr() diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 731c5303..dcbeeb63 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -1,15 +1,17 @@ -use super::mm_list::EMPTY_PAGE; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, Mapping, Page, Permission}; -use crate::kernel::constants::EINVAL; -use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; use core::cmp; + use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; use eonix_mm::paging::{PAGE_SIZE, PFN}; +use super::mm_list::EMPTY_PAGE; +use super::{Mapping, Page, Permission}; +use crate::kernel::constants::EINVAL; +use crate::kernel::mem::{PageExcl, PageExt}; +use crate::prelude::KResult; + #[derive(Debug)] pub struct MMArea { range: UnsafeCell, @@ -105,25 +107,23 @@ impl MMArea { return; } - let new_page; + let mut new_page; if *pfn == EMPTY_PAGE.pfn() { - new_page = Page::zeroed(); + new_page = PageExcl::zeroed(); } else { - new_page = Page::alloc(); + new_page = PageExcl::alloc(); unsafe { // SAFETY: `page` is CoW, which means that others won't write to it. - let old_page_data = page.as_memblk().as_bytes(); - - // SAFETY: `new_page` is exclusive owned by us. - let new_page_data = new_page.as_memblk().as_bytes_mut(); + let old_page_data = page.get_bytes_ptr().as_ref(); + let new_page_data = new_page.as_bytes_mut(); new_page_data.copy_from_slice(old_page_data); }; } attr.remove(PageAttribute::ACCESSED); - *pfn = new_page.into_raw(); + *pfn = new_page.into_page().into_raw(); } /// # Arguments @@ -156,13 +156,12 @@ impl MMArea { // Bss is embarrassing in pagecache! // We have to assume cnt_to_read < PAGE_SIZE all bss if cnt_to_read < PAGE_SIZE { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); + let mut new_page = PageExcl::zeroed(); + + new_page.as_bytes_mut()[..cnt_to_read] + .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); + + *pfn = new_page.into_page().into_raw(); } else { *pfn = page.clone().into_raw(); } @@ -182,13 +181,12 @@ impl MMArea { cache_page.set_dirty(); *pfn = page.clone().into_raw(); } else { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); + let mut new_page = PageExcl::zeroed(); + + new_page.as_bytes_mut()[..cnt_to_read] + .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); + + *pfn = new_page.into_page().into_raw(); } attr.insert(PageAttribute::WRITE); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index ad1e45c2..17dc1b05 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -1,33 +1,30 @@ mod mapping; mod page_fault; -use super::address::{VAddrExt as _, VRangeExt as _}; -use super::page_alloc::GlobalPageAlloc; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, MMArea, Page}; -use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; -use crate::kernel::mem::page_alloc::RawPagePtr; -use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; use core::sync::atomic::{AtomicUsize, Ordering}; + use eonix_hal::mm::{ flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE, }; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_mm::page_table::PageAttribute; -use eonix_mm::paging::PFN; -use eonix_mm::{ - address::{AddrOps as _, VAddr, VRange}, - page_table::{PageTable, RawAttribute, PTE}, - paging::PAGE_SIZE, -}; +use eonix_mm::address::{Addr as _, AddrOps as _, PAddr, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PageTable, RawAttribute, PTE}; +use eonix_mm::paging::{PAGE_SIZE, PFN}; use eonix_sync::{LazyLock, Mutex}; - pub use mapping::{FileMapping, Mapping}; pub use page_fault::handle_kernel_page_fault; +use super::address::{VAddrExt as _, VRangeExt as _}; +use super::page_alloc::GlobalPageAlloc; +use super::paging::AllocZeroed as _; +use super::{MMArea, Page, PageExt}; +use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; +use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::prelude::*; +use crate::sync::ArcSwap; + pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| Page::zeroed()); #[derive(Debug, Clone, Copy)] @@ -697,12 +694,10 @@ impl MMList { unsafe { // SAFETY: We are sure that the page is valid and we have the right to access it. Page::with_raw(pte.get_pfn(), |page| { - // SAFETY: The caller guarantees that no one else is using the page. - let page_data = page.as_memblk().as_bytes_mut(); - func( - offset + idx * 0x1000, - &mut page_data[start_offset..end_offset], - ); + let mut pg = page.lock(); + let page_data = &mut pg.as_bytes_mut()[start_offset..end_offset]; + + func(offset + idx * 0x1000, page_data); }); } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 54d4d590..08536693 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -81,7 +81,7 @@ pub struct RawPagePtr(NonNull); impl PageFlags { pub const PRESENT: u32 = 1 << 0; - // pub const LOCKED: u32 = 1 << 1; + pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; pub const SLAB: u32 = 1 << 3; pub const DIRTY: u32 = 1 << 4; @@ -99,6 +99,13 @@ impl PageFlags { pub fn clear(&self, flag: u32) { self.0.fetch_and(!flag, Ordering::Relaxed); } + + /// Set the flag and return whether it was already set. + /// + /// If multiple flags are given, returns true if any of them were already set. + pub fn test_and_set(&self, flag: u32) -> bool { + (self.0.fetch_or(flag, Ordering::Relaxed) & flag) != 0 + } } impl RawPagePtr { diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 9deb50cf..6a1c04ca 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,22 +1,23 @@ -use super::{paging::AllocZeroed, Page}; -use crate::{ - io::{Buffer, FillResult, Stream}, - kernel::mem::page_alloc::RawPagePtr, - prelude::KResult, - GlobalPageAlloc, -}; -use align_ext::AlignExt; use alloc::boxed::Box; -use alloc::{collections::btree_map::BTreeMap, sync::Weak}; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Weak; +use core::future::Future; +use core::mem::ManuallyDrop; + +use align_ext::AlignExt; use async_trait::async_trait; -use core::{future::Future, mem::ManuallyDrop}; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess}, - paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}, -}; +use eonix_mm::address::{PAddr, PhysAccess}; +use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; +use super::paging::AllocZeroed; +use super::Page; +use crate::io::{Buffer, FillResult, Stream}; +use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::prelude::KResult; +use crate::GlobalPageAlloc; + pub struct PageCache { pages: Mutex>, backend: Weak, @@ -315,14 +316,6 @@ impl CachePageStream { pub fn new(page: CachePage) -> Self { Self { page, cur: 0 } } - - pub fn remaining(&self) -> usize { - self.page.valid_size().saturating_sub(self.cur) - } - - pub fn is_drained(&self) -> bool { - self.cur >= self.page.valid_size() - } } impl Stream for CachePageStream { diff --git a/src/kernel/mem/paging.rs b/src/kernel/mem/paging.rs index 8c5f41f2..bca573fb 100644 --- a/src/kernel/mem/paging.rs +++ b/src/kernel/mem/paging.rs @@ -1,41 +1,54 @@ -use super::{access::AsMemoryBlock, page_alloc::GlobalPageAlloc, MemoryBlock, PhysAccess}; +use core::ops::Deref; +use core::ptr::NonNull; + +use eonix_mm::paging::Page as GenericPage; + +use super::page_alloc::GlobalPageAlloc; +use super::PhysAccess; use crate::io::{Buffer, FillResult}; -use eonix_mm::paging::{Page as GenericPage, PageAlloc}; pub type Page = GenericPage; /// A buffer that wraps a page and provides a `Buffer` interface. pub struct PageBuffer { - page: Page, + page: PageExcl, offset: usize, } +pub struct PageLocked<'a> { + page: &'a Page, +} + +/// A page that is exclusively owned. +#[repr(transparent)] +pub struct PageExcl(Page); + pub trait AllocZeroed { fn zeroed() -> Self; } -impl AsMemoryBlock for GenericPage { - fn as_memblk(&self) -> MemoryBlock { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - MemoryBlock::new(self.start().as_ptr::<()>().addr(), self.len()) - } +pub trait PageExt { + fn lock(&self) -> PageLocked; + + /// Get a vmem pointer to the page data as a byte slice. + fn get_bytes_ptr(&self) -> NonNull<[u8]>; + + /// Get a vmem pointer to the start of the page. + fn get_ptr(&self) -> NonNull { + self.get_bytes_ptr().cast() } } impl PageBuffer { pub fn new() -> Self { Self { - page: Page::alloc(), + page: PageExcl::alloc(), offset: 0, } } pub fn all(&self) -> &[u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - self.page.as_memblk().as_bytes() - } + self.page.as_bytes() } pub fn data(&self) -> &[u8] { @@ -43,10 +56,7 @@ impl PageBuffer { } pub fn available_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - &mut self.page.as_memblk().as_bytes_mut()[self.offset..] - } + &mut self.page.as_bytes_mut()[self.offset..] } } @@ -80,10 +90,87 @@ impl Buffer for PageBuffer { impl AllocZeroed for Page { fn zeroed() -> Self { let page = Self::alloc(); + + page.lock().as_bytes_mut().fill(0); + + page + } +} + +impl PageExt for Page { + fn lock(&self) -> PageLocked { + // TODO: Actually perform the lock. + PageLocked { page: self } + } + + fn get_bytes_ptr(&self) -> NonNull<[u8]> { unsafe { - // SAFETY: The page is exclusivly owned by us. - page.as_memblk().as_bytes_mut().fill(0); + // SAFETY: `self.start()` can't be null. + NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) } - page + } +} + +impl PageLocked<'_> { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) + } + } +} + +impl Deref for PageLocked<'_> { + type Target = Page; + + fn deref(&self) -> &Self::Target { + self.page + } +} + +impl PageExcl { + pub fn alloc() -> Self { + Self(Page::alloc()) + } + + pub fn alloc_order(order: u32) -> Self { + Self(Page::alloc_order(order)) + } + + pub fn zeroed() -> Self { + Self(Page::zeroed()) + } + + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_ref() + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_mut() + } + } + + pub fn into_page(self) -> Page { + self.0 + } +} + +impl Deref for PageExcl { + type Target = Page; + + fn deref(&self) -> &Self::Target { + &self.0 } } diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs index bb1c66ec..eb00cc4c 100644 --- a/src/kernel/vfs/file/mod.rs +++ b/src/kernel/vfs/file/mod.rs @@ -2,29 +2,24 @@ mod inode_file; mod pipe; mod terminal_file; -use crate::{ - io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, - kernel::{ - constants::{EBADF, EINTR, EINVAL, ENOTTY}, - mem::{AsMemoryBlock, Page}, - task::Thread, - CharDevice, - }, - prelude::KResult, -}; use alloc::sync::Arc; -use bitflags::bitflags; -use core::{ - ops::Deref, - sync::atomic::{AtomicI32, AtomicU32, Ordering}, -}; -use pipe::{PipeReadEnd, PipeWriteEnd}; -use posix_types::open::OpenFlags; +use core::ops::Deref; +use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use bitflags::bitflags; pub use inode_file::InodeFile; pub use pipe::Pipe; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; pub use terminal_file::TerminalFile; +use crate::io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}; +use crate::kernel::constants::{EBADF, EINTR, EINVAL, ENOTTY}; +use crate::kernel::mem::PageExcl; +use crate::kernel::task::Thread; +use crate::kernel::CharDevice; +use crate::prelude::KResult; + pub enum FileType { Inode(InodeFile), PipeRead(PipeReadEnd), @@ -99,9 +94,8 @@ impl FileType { } pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + let mut buffer_page = PageExcl::alloc(); + let buffer = buffer_page.as_bytes_mut(); self.sendfile_check()?; diff --git a/src/lib.rs b/src/lib.rs index 98e196f8..959cb29f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ extern crate alloc; +#[macro_use] +extern crate static_assertions; + #[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] extern crate unwinding; @@ -28,36 +31,33 @@ mod prelude; mod rcu; mod sync; -use crate::kernel::task::alloc_pid; -use alloc::{ffi::CString, sync::Arc}; -use core::{ - hint::spin_loop, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; -use eonix_hal::{ - arch_exported::bootstrap::shutdown, - context::TaskContext, - processor::{halt, CPU, CPU_COUNT}, - traits::{context::RawTaskContext, trap::IrqState}, - trap::disable_irqs_save, -}; +use alloc::ffi::CString; +use alloc::sync::Arc; +use core::hint::spin_loop; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +use eonix_hal::arch_exported::bootstrap::shutdown; +use eonix_hal::context::TaskContext; +use eonix_hal::processor::{halt, CPU, CPU_COUNT}; +use eonix_hal::traits::context::RawTaskContext; +use eonix_hal::traits::trap::IrqState; +use eonix_hal::trap::disable_irqs_save; use eonix_mm::address::PRange; -use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; -use kernel::{ - mem::GlobalPageAlloc, - task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, - vfs::{ - dentry::Dentry, - mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, - types::Permission, - FsContext, - }, - CharDevice, -}; +use eonix_runtime::executor::Stack; +use eonix_runtime::scheduler::RUNTIME; +use kernel::mem::GlobalPageAlloc; +use kernel::task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}; +use kernel::vfs::dentry::Dentry; +use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}; +use kernel::vfs::types::Permission; +use kernel::vfs::FsContext; +use kernel::CharDevice; use kernel_init::setup_memory; use path::Path; use prelude::*; +use crate::kernel::task::alloc_pid; + #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { #[cfg(target_arch = "riscv64")] From 9a0eeb00ae79a656a25e619d74eecbcc095e654b Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 23 Dec 2025 00:12:42 +0800 Subject: [PATCH 35/54] tls: rework of arch's UserTLS design Separate old UserTLS into UserTLS and UserTLSDescriptor. UserTLS is for threads to hold infomation about its storage. Descriptors are used in clone syscalls. Signed-off-by: greatbridf --- Cargo.lock | 5 +- Cargo.toml | 1 + crates/eonix_hal/src/arch/riscv64/cpu.rs | 17 --- crates/eonix_hal/src/lib.rs | 2 +- .../posix_types/src/syscall_no/loongarch64.rs | 3 +- crates/posix_types/src/syscall_no/riscv64.rs | 2 +- src/kernel/interrupt.rs | 12 +- src/kernel/pcie/init.rs | 18 +-- src/kernel/syscall/file_rw.rs | 67 +++++---- src/kernel/syscall/procops.rs | 134 +++++++----------- src/kernel/task.rs | 21 ++- src/kernel/task/clone.rs | 42 +++--- src/kernel/task/thread.rs | 76 ++++------ src/kernel/task/user_tls/mod.rs | 34 +++++ src/kernel/task/user_tls/x86_64.rs | 83 +++++++++++ src/kernel/user/dataflow.rs | 21 ++- 16 files changed, 304 insertions(+), 234 deletions(-) create mode 100644 src/kernel/task/user_tls/mod.rs create mode 100644 src/kernel/task/user_tls/x86_64.rs diff --git a/Cargo.lock b/Cargo.lock index c70190a0..5487f284 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,9 +75,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "critical-section" @@ -164,6 +164,7 @@ dependencies = [ "atomic_unique_refcell", "bitflags", "buddy_allocator", + "cfg-if", "eonix_hal", "eonix_log", "eonix_macros", diff --git a/Cargo.toml b/Cargo.toml index dca5d34d..4fcb6f70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ futures = { version = "0.3.31", features = [ "async-await", ], default-features = false } static_assertions = "1.1.0" +cfg-if = "1.0.4" [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 9c843eaf..3c58580e 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -27,22 +27,11 @@ static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); -#[derive(Debug, Clone)] -pub enum UserTLS { - Base(u64), -} - /// RISC-V Hart pub struct CPU { pub(crate) interrupt: InterruptControl, } -impl UserTLS { - pub fn new(base: u64) -> Self { - Self::Base(base) - } -} - impl CPU { fn new(cpuid: usize) -> Self { Self { @@ -66,12 +55,6 @@ impl CPU { sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} - - pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { - // nothing - } - pub fn local() -> PreemptGuard> { unsafe { // SAFETY: We pass the reference into a `PreemptGuard`, which ensures diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index e789ecbb..b9c7d053 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -19,7 +19,7 @@ pub mod fpu { } pub mod processor { - pub use crate::arch::cpu::{halt, UserTLS, CPU, CPU_COUNT}; + pub use crate::arch::cpu::{halt, CPU, CPU_COUNT}; } /// Re-export the arch module for use in other crates diff --git a/crates/posix_types/src/syscall_no/loongarch64.rs b/crates/posix_types/src/syscall_no/loongarch64.rs index 19776a65..b0d54689 100644 --- a/crates/posix_types/src/syscall_no/loongarch64.rs +++ b/crates/posix_types/src/syscall_no/loongarch64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; @@ -295,7 +295,6 @@ pub const SYS_RECVMMSG: usize = 417; pub const SYS_MQ_TIMEDSEND: usize = 418; pub const SYS_MQ_TIMEDRECEIVE: usize = 419; pub const SYS_SEMTIMEDOP: usize = 420; -pub const SYS_RT_SIGTIMEDWAIT: usize = 421; pub const SYS_FUTEX: usize = 422; pub const SYS_SCHED_RR_GET_INTERVAL: usize = 423; pub const SYS_PIDFD_SEND_SIGNAL: usize = 424; diff --git a/crates/posix_types/src/syscall_no/riscv64.rs b/crates/posix_types/src/syscall_no/riscv64.rs index 4457c20e..076942e9 100644 --- a/crates/posix_types/src/syscall_no/riscv64.rs +++ b/crates/posix_types/src/syscall_no/riscv64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 742727cb..2092bfcb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,15 +1,17 @@ -use super::mem::handle_kernel_page_fault; -use super::task::block_on; -use super::timer::timer_interrupt; -use crate::kernel::constants::EINVAL; -use crate::prelude::*; use alloc::sync::Arc; + use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::SpinIrq as _; +use super::mem::handle_kernel_page_fault; +use super::task::block_on; +use super::timer::timer_interrupt; +use crate::kernel::constants::EINVAL; +use crate::prelude::*; + static IRQ_HANDLERS: Spin<[Vec>; 16]> = Spin::new([const { Vec::new() }; 16]); diff --git a/src/kernel/pcie/init.rs b/src/kernel/pcie/init.rs index c0253f4e..4c183bc5 100644 --- a/src/kernel/pcie/init.rs +++ b/src/kernel/pcie/init.rs @@ -1,13 +1,14 @@ -use super::{ - device::{PCIDevice, SegmentGroup, PCIE_DEVICES}, - error::PciError, -}; -use crate::kernel::{mem::PhysAccess as _, pcie::device::PciMemoryAllocator}; -use acpi::{AcpiHandler, PhysicalMapping}; use alloc::collections::btree_map::Entry; use alloc::vec; + +use acpi::{AcpiHandler, PhysicalMapping}; use eonix_log::println_trace; -use eonix_mm::address::PAddr; +use eonix_mm::address::{PAddr, PRange}; + +use super::device::{PCIDevice, SegmentGroup, PCIE_DEVICES}; +use super::error::PciError; +use crate::kernel::mem::PhysAccess as _; +use crate::kernel::pcie::device::PciMemoryAllocator; #[derive(Clone)] struct AcpiHandlerImpl; @@ -67,10 +68,11 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - use crate::kernel::constants::{EINVAL, EIO, ENOENT}; use eonix_hal::arch_exported::fdt::FDT; use eonix_mm::address::PRange; + use crate::kernel::constants::{EINVAL, EIO, ENOENT}; + let pcie_node = FDT .find_compatible(&["pci-host-ecam-generic"]) .ok_or(ENOENT)?; diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index db32b0e5..93a543d7 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,33 +1,31 @@ +use alloc::sync::Arc; +use core::time::Duration; + +use posix_types::ctypes::{Long, PtrT}; +use posix_types::namei::RenameFlags; +use posix_types::open::{AtFlags, OpenFlags}; +use posix_types::poll::FDSet; +use posix_types::signal::{SigSet, Signal}; +use posix_types::stat::{Stat, StatX, TimeSpec}; +use posix_types::syscall_no::*; + use super::{FromSyscallArg, User}; -use crate::io::IntoStream; +use crate::io::{Buffer, BufferFill, IntoStream}; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; +use crate::kernel::user::{ + CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString, +}; +use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::filearray::FD; use crate::kernel::vfs::types::{DeviceId, Mode}; use crate::kernel::vfs::{PollEvent, SeekOption}; -use crate::{ - io::{Buffer, BufferFill}, - kernel::{ - user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, - vfs::dentry::Dentry, - }, - path::Path, - prelude::*, -}; -use alloc::sync::Arc; -use core::time::Duration; -use posix_types::ctypes::{Long, PtrT}; -use posix_types::namei::RenameFlags; -use posix_types::open::{AtFlags, OpenFlags}; -use posix_types::poll::FDSet; -use posix_types::signal::{SigSet, Signal}; -use posix_types::stat::Stat; -use posix_types::stat::{StatX, TimeSpec}; -use posix_types::syscall_no::*; +use crate::path::Path; +use crate::prelude::*; impl FromSyscallArg for OpenFlags { fn from_arg(value: usize) -> Self { @@ -128,7 +126,7 @@ async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { +async fn open(path: User, flags: OpenFlags, mode: Mode) -> KResult { sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } @@ -145,7 +143,10 @@ async fn dup(fd: FD) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] async fn dup2(old_fd: FD, new_fd: FD) -> KResult { - thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) + thread + .files + .dup_to(old_fd, new_fd, OpenFlags::empty()) + .await } #[eonix_macros::define_syscall(SYS_DUP3)] @@ -172,7 +173,13 @@ async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents(&mut buffer) + .await?; + Ok(buffer.wrote()) } @@ -264,7 +271,7 @@ async fn mkdirat(dirfd: FD, pathname: User, mode: Mode) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -async fn mkdir(pathname: User, mode: u32) -> KResult<()> { +async fn mkdir(pathname: User, mode: Mode) -> KResult<()> { sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } @@ -280,9 +287,9 @@ async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; - dentry.truncate(length) + dentry.truncate(length).await } #[eonix_macros::define_syscall(SYS_UNLINKAT)] @@ -296,7 +303,7 @@ async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] async fn unlink(pathname: User) -> KResult<()> { - sys_unlinkat(thread, FD::AT_FDCWD, pathname) + sys_unlinkat(thread, FD::AT_FDCWD, pathname).await } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] @@ -310,7 +317,7 @@ async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] async fn symlink(target: User, linkpath: User) -> KResult<()> { - sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) + sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath).await } #[derive(Clone, Copy, Debug)] @@ -347,7 +354,7 @@ async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceI #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { +async fn mknod(pathname: User, mode: Mode, dev: UserDeviceId) -> KResult<()> { sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } @@ -389,7 +396,7 @@ async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek( +async fn llseek( fd: FD, offset_high: u32, offset_low: u32, diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index b4d3e449..1359d0ab 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -1,38 +1,37 @@ +use alloc::borrow::ToOwned; +use alloc::ffi::CString; +use core::time::Duration; + +use bitflags::bitflags; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; +use eonix_mm::address::Addr as _; +use eonix_sync::AsProof as _; +use posix_types::ctypes::PtrT; +use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; +use posix_types::stat::{TimeSpec, TimeVal}; +use posix_types::syscall_no::*; +use posix_types::SIGNAL_NOW; + use super::SyscallNoReturn; use crate::io::Buffer; use crate::kernel::constants::{ - CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH, -}; -use crate::kernel::constants::{ - ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOSYS, ENOTDIR, + ERANGE, ESRCH, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, - RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, FutexOp, + ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; -use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::UserString; -use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString}; +use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::types::Permission; -use crate::kernel::vfs::{self, dentry::Dentry}; +use crate::kernel::vfs::{self}; use crate::path::Path; -use crate::{kernel::user::UserBuffer, prelude::*}; -use alloc::borrow::ToOwned; -use alloc::ffi::CString; -use bitflags::bitflags; -use core::time::Duration; -use eonix_hal::processor::UserTLS; -use eonix_hal::traits::trap::RawTrapContext; -use eonix_hal::trap::TrapContext; -use eonix_mm::address::Addr as _; -use eonix_sync::AsProof as _; -use posix_types::ctypes::PtrT; -use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; -use posix_types::stat::TimeVal; -use posix_types::{syscall_no::*, SIGNAL_NOW}; +use crate::prelude::*; #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -366,7 +365,7 @@ async fn wait4( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await + sys_wait4(thread, waitpid, arg1, options, UserMut::null()).await } #[eonix_macros::define_syscall(SYS_SETSID)] @@ -493,51 +492,15 @@ async fn gettid() -> KResult { Ok(thread.tid) } -pub fn parse_user_tls(arch_tls: usize) -> KResult { - #[cfg(target_arch = "x86_64")] - { - let desc = arch_tls as *mut posix_types::x86_64::UserDescriptor; - let desc_pointer = UserPointerMut::new(desc)?; - let mut desc = desc_pointer.read()?; - - // Clear the TLS area if it is not present. - if desc.flags.is_read_exec_only() && !desc.flags.is_present() { - if desc.limit != 0 && desc.base != 0 { - let len = if desc.flags.is_limit_in_pages() { - (desc.limit as usize) << 12 - } else { - desc.limit as usize - }; - - CheckedUserPointer::new(desc.base as _, len)?.zero()?; - } - } - - let (new_tls, entry) = - UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages()); - desc.entry = entry; - desc_pointer.write(desc)?; - - Ok(new_tls) - } - - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] - { - Ok(UserTLS::new(arch_tls as u64)) - } -} - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -async fn set_thread_area(arch_tls: usize) -> KResult<()> { - thread.set_user_tls(parse_user_tls(arch_tls)?)?; +async fn set_thread_area(tls: PtrT) -> KResult<()> { + use crate::kernel::task::UserTLSDescriptor; - // SAFETY: Preemption is disabled on calling `load_thread_area32()`. - unsafe { - eonix_preempt::disable(); - thread.load_thread_area32(); - eonix_preempt::enable(); - } + let tls = UserTLSDescriptor::new(tls)?.read()?; + + thread.set_user_tls(tls)?; + thread.activate_tls(); Ok(()) } @@ -651,18 +614,14 @@ async fn rt_sigprocmask( Ok(()) } -#[repr(C)] -#[derive(Clone, Copy)] -struct TimeSpec32 { - tv_sec: i32, - tv_nsec: i32, -} - -#[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -async fn rt_sigtimedwait_time32( +#[cfg_attr( + any(target_arch = "riscv64", target_arch = "loongarch64"), + eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT) +)] +async fn rt_sigtimedwait( _uthese: User, _uinfo: UserMut, - _uts: User, + _uts: User, ) -> KResult { // TODO Ok(0) @@ -820,7 +779,7 @@ async fn clone( clone_flags: usize, new_sp: usize, parent_tidptr: UserMut, - tls: usize, + tls: PtrT, child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; @@ -925,8 +884,23 @@ async fn sigreturn() -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -async fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr).await +async fn arch_prctl(option: u32, addr: PtrT) -> KResult { + match option { + PR_SET_NAME => { + let name = UserPointer::<[u8; 16]>::new(User::with_addr(addr.addr()))?.read()?; + let len = name.iter().position(|&c| c == 0).unwrap_or(15); + thread.set_name(name[..len].into()); + Ok(0) + } + PR_GET_NAME => { + let name = thread.get_name(); + let len = name.len().min(15); + let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(addr.addr()))?.write(name)?; + Ok(0) + } + _ => Err(EINVAL), + } } pub fn keep_alive() {} diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 2ef58069..3fe6fe97 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -8,6 +8,7 @@ mod process_list; mod session; mod signal; mod thread; +mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; @@ -19,6 +20,7 @@ pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; +pub use user_tls::{UserTLS, UserTLSDescriptor}; fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output where @@ -79,30 +81,25 @@ pub async fn stackful(mut future: F) -> F::Output where F: core::future::Future, { - use crate::kernel::{ - interrupt::{default_fault_handler, default_irq_handler}, - timer::{should_reschedule, timer_interrupt}, - }; use alloc::sync::Arc; use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; - use core::sync::atomic::AtomicBool; - use core::sync::atomic::Ordering; - use core::task::Context; - use core::task::Poll; - use core::task::Waker; - use eonix_hal::traits::trap::RawTrapContext; - use eonix_hal::traits::trap::TrapReturn; - use eonix_hal::traits::trap::TrapType; + use core::sync::atomic::{AtomicBool, Ordering}; + use core::task::{Context, Poll, Waker}; + + use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; use eonix_hal::trap::TrapContext; use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; use eonix_runtime::task::Task; use thread::wait_for_wakeups; + use crate::kernel::interrupt::{default_fault_handler, default_irq_handler}; + use crate::kernel::timer::{should_reschedule, timer_interrupt}; + let stack = KernelStack::new(); fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index e0d578c1..dd6f538d 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,18 +1,17 @@ -use crate::{ - kernel::{ - syscall::{procops::parse_user_tls, UserMut}, - task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, - user::UserPointerMut, - }, - KResult, -}; -use bitflags::bitflags; use core::num::NonZero; -use eonix_hal::processor::UserTLS; + +use bitflags::bitflags; use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; +use posix_types::ctypes::PtrT; use posix_types::signal::Signal; +use super::{UserTLS, UserTLSDescriptor}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}; +use crate::kernel::user::UserPointerMut; +use crate::KResult; + bitflags! { #[derive(Debug, Default)] pub struct CloneFlags: usize { @@ -46,12 +45,18 @@ bitflags! { #[derive(Debug)] pub struct CloneArgs { pub flags: CloneFlags, - pub sp: Option>, // Stack pointer for the new thread. - pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option>, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. - pub tls: Option, // Pointer to TLS information. + /// Stack pointer for the new thread. + pub sp: Option>, + /// Signal to send to the parent on exit. + pub exit_signal: Option, + /// Pointer to set child TID in user space. + pub set_tid_ptr: Option>, + /// Pointer to clear child TID in user space. + pub clear_tid_ptr: Option>, + /// Pointer to parent TID in user space. + pub parent_tid_ptr: Option>, + /// Pointer to TLS information. + pub tls: Option, } impl CloneArgs { @@ -62,7 +67,7 @@ impl CloneArgs { sp: usize, child_tid_ptr: UserMut, parent_tid_ptr: UserMut, - tls: usize, + tls: PtrT, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); let exit_signal = flags & Self::MASK; @@ -87,7 +92,8 @@ impl CloneArgs { .then_some(parent_tid_ptr); let tls = if clone_flags.contains(CloneFlags::CLONE_SETTLS) { - Some(parse_user_tls(tls)?) + let tls_desc = UserTLSDescriptor::new(tls)?; + Some(tls_desc.read()?) } else { None }; diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 11348e51..77e8e618 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,43 +1,37 @@ -use super::{ - signal::{RaiseResult, SignalList}, - stackful, Process, ProcessList, WaitType, -}; -use crate::{ - kernel::{ - interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler, User, UserMut}, - task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, - timer::{should_reschedule, timer_interrupt}, - user::{UserPointer, UserPointerMut}, - vfs::{filearray::FileArray, FsContext}, - }, - prelude::*, -}; -use alloc::{alloc::Allocator, sync::Arc}; +use alloc::alloc::Allocator; +use alloc::sync::Arc; +use core::future::{poll_fn, Future}; +use core::pin::Pin; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicBool, Ordering}; +use core::task::{Context, Poll}; + use atomic_unique_refcell::AtomicUniqueRefCell; -use core::{ - future::{poll_fn, Future}, - pin::Pin, - ptr::NonNull, - sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll}, -}; -use eonix_hal::{ - fpu::FpuState, - processor::{UserTLS, CPU}, - traits::{ - fault::Fault, - fpu::RawFpuState as _, - trap::{RawTrapContext, TrapReturn, TrapType}, - }, - trap::TrapContext, -}; +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::fault::Fault; +use eonix_hal::traits::fpu::RawFpuState as _; +use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; use stalloc::UnsafeStalloc; +use super::signal::{RaiseResult, SignalList}; +use super::user_tls::UserTLS; +use super::{stackful, Process, ProcessList, WaitType}; +use crate::kernel::interrupt::default_irq_handler; +use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut}; +use crate::kernel::task::clone::CloneArgs; +use crate::kernel::task::futex::RobustListHead; +use crate::kernel::task::CloneFlags; +use crate::kernel::timer::{should_reschedule, timer_interrupt}; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::filearray::FileArray; +use crate::kernel::vfs::FsContext; +use crate::prelude::*; + #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; @@ -275,12 +269,9 @@ impl Thread { self.signal_list.raise(signal) } - /// # Safety - /// This function is unsafe because it accesses the `current_cpu()`, which needs - /// to be called in a preemption disabled context. - pub unsafe fn load_thread_area32(&self) { + pub fn activate_tls(&self) { if let Some(tls) = self.inner.lock().tls.as_ref() { - CPU::local().as_mut().set_tls32(tls); + tls.activate(); } } @@ -442,14 +433,7 @@ impl Thread { CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); - unsafe { - eonix_preempt::disable(); - - // SAFETY: Preemption is disabled. - self.load_thread_area32(); - - eonix_preempt::enable(); - } + self.activate_tls(); let result = future.as_mut().poll(cx); diff --git a/src/kernel/task/user_tls/mod.rs b/src/kernel/task/user_tls/mod.rs new file mode 100644 index 00000000..2583b580 --- /dev/null +++ b/src/kernel/task/user_tls/mod.rs @@ -0,0 +1,34 @@ +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + mod x86_64; + pub use x86_64::*; + } else { + use eonix_mm::address::VAddr; + use posix_types::ctypes::PtrT; + + use crate::prelude::KResult; + + + #[derive(Debug, Clone)] + pub struct UserTLS(VAddr); + + #[derive(Debug, Clone)] + pub struct UserTLSDescriptor(VAddr); + + impl UserTLS { + pub fn activate(&self) { + self.0; + } + } + + impl UserTLSDescriptor { + pub fn new(tp: PtrT) -> KResult { + Ok(Self(VAddr::from(tp.addr()))) + } + + pub fn read(&self) -> KResult { + Ok(UserTLS(self.0)) + } + } + } +} diff --git a/src/kernel/task/user_tls/x86_64.rs b/src/kernel/task/user_tls/x86_64.rs new file mode 100644 index 00000000..5bb33b97 --- /dev/null +++ b/src/kernel/task/user_tls/x86_64.rs @@ -0,0 +1,83 @@ +use core::fmt; + +use eonix_hal::arch_exported::gdt::{GDTEntry, GDT}; +use eonix_hal::processor::CPU; +use eonix_mm::address::VAddr; +use posix_types::ctypes::PtrT; +use posix_types::x86_64::UserDescriptor; + +use crate::kernel::syscall::{User, UserMut}; +use crate::kernel::user::{CheckedUserPointer, UserPointerMut}; +use crate::prelude::KResult; + +#[derive(Debug, Clone)] +pub struct UserTLS { + desc: GDTEntry, + base: u64, +} + +pub struct UserTLSDescriptor<'a> { + ptr: UserPointerMut<'a, UserDescriptor>, +} + +impl UserTLS { + fn new(base: u32, limit: u32) -> Self { + Self { + desc: GDTEntry::new_tls(base, limit), + base: base as u64, + } + } + + fn new_page_limit(base: u32, limit_in_pages: u32) -> Self { + Self { + desc: GDTEntry::new_tls_page_limit(base, limit_in_pages), + base: base as u64, + } + } + + pub fn activate(&self) { + CPU::local().as_mut().set_tls32(self.desc, self.base); + } +} + +impl UserTLSDescriptor<'_> { + pub fn new(raw_tls: PtrT) -> KResult { + Ok(Self { + ptr: UserPointerMut::new(UserMut::::with_addr(raw_tls.addr()))?, + }) + } + + pub fn read(&self) -> KResult { + let mut desc = self.ptr.read()?; + + let base = VAddr::from(desc.base as usize); + + // Clear the TLS area if it is not present. + if desc.flags.is_read_exec_only() && !desc.flags.is_present() { + if desc.limit != 0 && base != VAddr::NULL { + let len = if desc.flags.is_limit_in_pages() { + (desc.limit as usize) << 12 + } else { + desc.limit as usize + }; + + CheckedUserPointer::new(User::new(base), len)?.zero()?; + } + } + + desc.entry = GDT::TLS32_INDEX as u32; + self.ptr.write(desc)?; + + Ok(if desc.flags.is_limit_in_pages() { + UserTLS::new_page_limit(desc.base, desc.limit) + } else { + UserTLS::new(desc.base, desc.limit) + }) + } +} + +impl fmt::Debug for UserTLSDescriptor<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("UserTLSDescriptor").finish_non_exhaustive() + } +} diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 02e7d791..5d8ac167 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,18 +1,15 @@ -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use crate::{ - io::{IntoStream, Stream}, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::{User, UserMut}, - }, -}; -use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use core::arch::asm; +use core::ffi::CStr; +use core::marker::PhantomData; + use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; +use crate::io::{Buffer, FillResult, IntoStream, Stream}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::{User, UserMut}; +use crate::prelude::*; + pub struct CheckedUserPointer<'a> { ptr: User, len: usize, From 036568b99f9f8a854e19d48e5bd9e80b1e79ac9a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 6 Jan 2026 22:02:22 +0800 Subject: [PATCH 36/54] mem, slab: rework the slab system Signed-off-by: greatbridf --- .vscode/settings.json | 1 + Cargo.lock | 1 - crates/intrusive_list/src/lib.rs | 2 + crates/slab_allocator/Cargo.toml | 2 - crates/slab_allocator/src/lib.rs | 291 +++++++++++++++++++++--- crates/slab_allocator/src/slab_cache.rs | 164 ------------- src/kernel/mem/allocator.rs | 21 +- src/kernel/mem/page_alloc.rs | 10 +- src/kernel/mem/page_alloc/raw_page.rs | 199 ++++++++++------ 9 files changed, 401 insertions(+), 290 deletions(-) delete mode 100644 crates/slab_allocator/src/slab_cache.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index 634d16af..10b4a8b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { "makefile.configureOnOpen": false, + "editor.formatOnSave": true, } diff --git a/Cargo.lock b/Cargo.lock index 5487f284..3e8a36bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -518,7 +518,6 @@ version = "0.1.0" dependencies = [ "eonix_mm", "eonix_sync", - "intrusive_list", ] [[package]] diff --git a/crates/intrusive_list/src/lib.rs b/crates/intrusive_list/src/lib.rs index af8c4f1a..440944d0 100644 --- a/crates/intrusive_list/src/lib.rs +++ b/crates/intrusive_list/src/lib.rs @@ -25,6 +25,8 @@ impl List { } pub fn insert(&mut self, node: &mut Link) { + // TODO: `node` above should be of 'static. + self.head.insert(node); self.count += 1; } diff --git a/crates/slab_allocator/Cargo.toml b/crates/slab_allocator/Cargo.toml index 067b6f53..926ac688 100644 --- a/crates/slab_allocator/Cargo.toml +++ b/crates/slab_allocator/Cargo.toml @@ -6,5 +6,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } eonix_sync = { path = "../eonix_sync" } -intrusive_list = { path = "../intrusive_list" } - diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index ce163183..8a684edd 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -1,69 +1,288 @@ #![no_std] -mod slab_cache; +use core::ptr::NonNull; -use core::{cmp::max, ptr::NonNull}; - -use eonix_mm::paging::{PageAlloc, RawPage}; use eonix_sync::Spin; -use intrusive_list::Link; -use slab_cache::SlabCache; -pub trait SlabRawPage: RawPage { - /// Get the container raw page struct of the list link. +#[repr(C)] +pub union SlabSlot { + slab_slot: Option>, + data: u8, +} + +pub trait SlabPageList: Sized { + type Page: SlabPage; + + fn new() -> Self; + fn is_empty(&self) -> bool; + + fn peek_head(&mut self) -> Option<&mut Self::Page>; + + fn pop_head(&mut self) -> Option<&'static mut Self::Page>; + fn push_tail(&mut self, page: &'static mut Self::Page); + fn remove(&mut self, page: &mut Self::Page); +} + +pub trait SlabPage: Sized + 'static { + fn get_data_ptr(&self) -> NonNull<[u8]>; + + fn get_free_slot(&self) -> Option>; + fn set_free_slot(&mut self, next: Option>); + + fn get_alloc_count(&self) -> usize; + + /// Increase the allocation count by 1 and return the increased value. + fn inc_alloc_count(&mut self) -> usize; + + /// Decrease the allocation count by 1 and return the decreased value. + fn dec_alloc_count(&mut self) -> usize; + + /// Get the [`SlabPage`] that `ptr` is allocated from. /// /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; + /// The caller MUST ensure that no others could be calling this function and + /// getting the [`SlabPage`] at the same time. + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self; +} + +pub(crate) trait SlabPageExt { + fn alloc_slot(&mut self) -> Option>; - /// Get the list link of the raw page. - /// /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; + /// The caller MUST ensure that `slot_data_ptr` points to some position + /// previously allocated by [`SlabPageExt::alloc_slot`]. + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull); + + fn is_empty(&self) -> bool; + fn is_full(&self) -> bool; +} + +impl SlabPageExt for T +where + T: SlabPage, +{ + fn alloc_slot(&mut self) -> Option> { + let mut free_slot = self.get_free_slot()?; + + unsafe { + let free_slot = free_slot.as_mut(); + + let next_slot = free_slot.slab_slot; + // ===== `free_slot` is now safe to be overwritten - fn slab_init(&self, first_free: Option>); + self.set_free_slot(next_slot); + self.inc_alloc_count(); - // which slab page the ptr belong - fn in_which(ptr: *mut u8) -> Self; + Some(NonNull::new_unchecked(&mut free_slot.data)) + } + } + + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull) { + unsafe { + let mut free_slot: NonNull = slot_data_ptr.cast(); + free_slot.as_mut().slab_slot = self.get_free_slot(); - fn real_page_ptr(&self) -> *mut u8; + self.set_free_slot(Some(free_slot)); + self.dec_alloc_count(); + } + } - fn allocated_count(&self) -> &mut u32; + fn is_empty(&self) -> bool { + self.get_alloc_count() == 0 + } - fn next_free(&self) -> &mut Option>; + fn is_full(&self) -> bool { + self.get_free_slot().is_none() + } } -pub struct SlabAllocator { - slabs: [Spin>; SLAB_CACHE_COUNT], - alloc: A, +pub trait SlabPageAlloc { + type Page: SlabPage; + type PageList: SlabPageList; + + /// Allocate a page suitable for slab system use. The page MUST come with + /// its allocation count 0 and next free slot None. + /// + /// # Safety + /// The page returned MUST be properly initialized before its usage. + unsafe fn alloc_uninit(&self) -> &'static mut Self::Page; } -unsafe impl Send for SlabAllocator {} -unsafe impl Sync for SlabAllocator {} +pub(crate) struct SlabList +where + T: SlabPageList, +{ + empty_list: T, + partial_list: T, + full_list: T, + object_size: usize, +} + +pub struct SlabAlloc +where + P: SlabPageAlloc, +{ + slabs: [Spin>; COUNT], + alloc: P, +} + +unsafe impl Send for SlabAlloc where P: SlabPageAlloc {} +unsafe impl Sync for SlabAlloc where P: SlabPageAlloc {} -impl SlabAllocator +impl SlabAlloc where - Raw: SlabRawPage, - Allocator: PageAlloc, + L: SlabPageAlloc, { - pub fn new_in(alloc: Allocator) -> Self { + pub fn new_in(alloc: L) -> Self { Self { - slabs: core::array::from_fn(|i| Spin::new(SlabCache::new_in(1 << (i + 3)))), + slabs: core::array::from_fn(|i| Spin::new(SlabList::new(1 << (i + 3)))), alloc, } } - pub fn alloc(&self, mut size: usize) -> *mut u8 { - size = max(8, size); + pub fn alloc(&self, mut size: usize) -> NonNull { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; self.slabs[idx as usize].lock().alloc(&self.alloc) } - pub fn dealloc(&self, ptr: *mut u8, mut size: usize) { - size = max(8, size); + pub unsafe fn dealloc(&self, ptr: NonNull, mut size: usize) { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; - self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + + unsafe { + // SAFETY: + self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + } } } + +impl SlabList +where + T: SlabPageList, +{ + fn new(object_size: usize) -> Self { + Self { + empty_list: T::new(), + partial_list: T::new(), + full_list: T::new(), + object_size, + } + } + + fn alloc_from_partial(&mut self) -> NonNull { + let head = self.partial_list.peek_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + let head = self.partial_list.pop_head().unwrap(); + self.full_list.push_tail(head); + } + + slot + } + + fn alloc_from_empty(&mut self) -> NonNull { + let head = self.empty_list.pop_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + self.full_list.push_tail(head); + } else { + self.partial_list.push_tail(head); + } + + slot + } + + fn charge(&mut self, alloc: &impl SlabPageAlloc) { + unsafe { + let slab = alloc.alloc_uninit(); + let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); + + slab.set_free_slot(Some(free_slot)); + + self.empty_list.push_tail(slab); + } + } + + fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { + if !self.partial_list.is_empty() { + return self.alloc_from_partial(); + } + + if self.empty_list.is_empty() { + self.charge(alloc); + } + + self.alloc_from_empty() + } + + unsafe fn dealloc(&mut self, ptr: NonNull, _alloc: &impl SlabPageAlloc) { + let slab_page = unsafe { + // SAFETY: + ::from_allocated(ptr) + }; + + let (was_full, is_empty); + + was_full = slab_page.is_full(); + + unsafe { + // SAFETY: + slab_page.free_slot(ptr); + } + + is_empty = slab_page.is_empty(); + + match (was_full, is_empty) { + (false, false) => {} + (false, true) => { + self.partial_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + (true, false) => { + self.full_list.remove(slab_page); + self.partial_list.push_tail(slab_page); + } + (true, true) => { + self.full_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + } + + // TODO: Check whether we should place some pages back with `alloc` if + // the global free page count is below the watermark. + } +} + +pub fn make_slab_page(page_ptr: NonNull<[u8]>, slot_size: usize) -> NonNull { + assert!( + slot_size >= core::mem::size_of::(), + "The minimum slot size is of a pointer's width" + ); + + let page_size = page_ptr.len(); + let slot_count = page_size / slot_size; + let page_start: NonNull = page_ptr.cast(); + + // Quick checks + assert!( + page_size % slot_size == 0, + "The page's size should be a multiple of the slot size" + ); + + let mut prev_free_slot = None; + for i in (0..slot_count).rev() { + let offset = i * slot_size; + + unsafe { + let mut slot_ptr: NonNull = page_start.add(offset).cast(); + + slot_ptr.as_mut().slab_slot = prev_free_slot; + prev_free_slot = Some(slot_ptr); + } + } + + prev_free_slot.expect("There should be at least one slot.") +} diff --git a/crates/slab_allocator/src/slab_cache.rs b/crates/slab_allocator/src/slab_cache.rs deleted file mode 100644 index 98e27fc8..00000000 --- a/crates/slab_allocator/src/slab_cache.rs +++ /dev/null @@ -1,164 +0,0 @@ -use super::SlabRawPage; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::paging::{PageAlloc, PAGE_SIZE}; -use intrusive_list::List; - -pub(crate) struct SlabCache { - empty_list: List, - partial_list: List, - full_list: List, - object_size: u32, - _phantom: PhantomData<(T, A)>, -} - -trait SlabRawPageExt { - fn alloc_slot(&self) -> Option>; - fn dealloc_slot(&self, slot_ptr: *mut u8); - fn is_full(&self) -> bool; - fn is_empty(&self) -> bool; - fn slab_page_init(&self, object_size: u32) -> Option>; -} - -impl SlabRawPageExt for T -where - T: SlabRawPage, -{ - fn alloc_slot(&self) -> Option> { - let ptr = self.next_free().clone(); - - let next_free = match ptr { - Some(ptr) => unsafe { ptr.read() as *mut usize }, - None => unreachable!(), - }; - *self.allocated_count() += 1; - *self.next_free() = NonNull::new(next_free); - return ptr; - } - - fn dealloc_slot(&self, slot_ptr: *mut u8) { - let slot_ptr = slot_ptr as *mut usize; - - if let Some(last_free) = self.next_free().clone() { - unsafe { *slot_ptr = last_free.as_ptr() as usize } - } else { - unsafe { *slot_ptr = 0 } - } - - *self.allocated_count() -= 1; - *self.next_free() = NonNull::new(slot_ptr); - } - - fn slab_page_init(&self, object_size: u32) -> Option> { - assert!(object_size >= core::mem::size_of::() as u32); - - let first_free = self.real_page_ptr() as *mut usize; - - let mut slot_ptr = first_free; - let mut slot_count = PAGE_SIZE / object_size as usize; - - // SAFETY: carefully ptr operate - unsafe { - loop { - if slot_count == 1 { - *slot_ptr = 0; - break; - } - - let next_ptr = slot_ptr.byte_add(object_size as usize); - *slot_ptr = next_ptr as usize; - slot_ptr = next_ptr; - slot_count -= 1; - } - } - - NonNull::new(first_free) - } - - fn is_empty(&self) -> bool { - self.allocated_count().clone() == 0 - } - - fn is_full(&self) -> bool { - self.next_free().is_none() - } -} - -impl SlabCache -where - Raw: SlabRawPage, - Allocator: PageAlloc, -{ - pub(crate) const fn new_in(object_size: u32) -> Self { - // avoid unnecessary branch in alloc and dealloc - assert!(object_size <= PAGE_SIZE as u32 / 2); - - Self { - empty_list: List::new(), - partial_list: List::new(), - full_list: List::new(), - object_size: object_size, - _phantom: PhantomData, - } - } - - pub(crate) fn alloc(&mut self, alloc: &Allocator) -> *mut u8 { - if !self.partial_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.partial_list - .head() - .expect("partial pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - - if page_ptr.is_full() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.full_list.insert(unsafe { page_ptr.get_link() }); - } - return ptr.as_ptr() as *mut u8; - } - - if !self.empty_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.empty_list - .head() - .expect("empty pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - self.empty_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - return ptr.as_ptr() as *mut u8; - } - - let new_page_ptr = alloc.alloc().expect("slab_cache get page fail!"); - let first_free = new_page_ptr.slab_page_init(self.object_size); - new_page_ptr.slab_init(first_free); - let ptr = new_page_ptr.alloc_slot().expect("should get slot"); - self.partial_list.insert(unsafe { new_page_ptr.get_link() }); - ptr.as_ptr() as *mut u8 - } - - pub(crate) fn dealloc(&mut self, ptr: *mut u8, _alloc: &Allocator) { - let page_ptr = Raw::in_which(ptr); - - if page_ptr.is_full() { - self.full_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - } - - page_ptr.dealloc_slot(ptr); - - if page_ptr.is_empty() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.empty_list.insert(unsafe { page_ptr.get_link() }); - } - - // TODO: Check whether we should place some pages back with `alloc` if the global - // free page count is below the watermark. - } -} diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index 9e5df69b..a3676ce0 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -5,13 +5,12 @@ use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; -use slab_allocator::SlabAllocator; +use slab_allocator::SlabAlloc; -use super::page_alloc::RawPagePtr; use super::{GlobalPageAlloc, Page, PageExt}; -static SLAB_ALLOCATOR: LazyLock> = - LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc)); +static SLAB_ALLOCATOR: LazyLock> = + LazyLock::new(|| SlabAlloc::new_in(GlobalPageAlloc)); struct Allocator; @@ -28,23 +27,23 @@ unsafe impl GlobalAlloc for Allocator { let ptr = page.get_ptr(); page.into_raw(); - ptr.as_ptr() + ptr }; - if result.is_null() { - core::ptr::null_mut() - } else { - result as *mut u8 - } + result.as_ptr() } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { let size = layout.size().next_power_of_two(); + let ptr = unsafe { + // SAFETY: The memory we've allocated MUST be non-null. + NonNull::new_unchecked(ptr) + }; if size <= 2048 { SLAB_ALLOCATOR.dealloc(ptr, size) } else { - let paddr = ArchPhysAccess::from_ptr(NonNull::new_unchecked(ptr)); + let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); Page::from_raw(pfn); }; diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index fcbe9bb3..1c018f37 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -1,15 +1,13 @@ mod raw_page; -use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}, -}; + +use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; +use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}; use eonix_sync::{NoContext, Spin}; use intrusive_list::List; use raw_page::PageFlags; - pub use raw_page::{RawPage, RawPagePtr}; const COSTLY_ORDER: u32 = 3; diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 08536693..d793ccd7 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,65 +1,46 @@ -use crate::kernel::mem::page_cache::PageCacheRawPage; -use crate::kernel::mem::PhysAccess; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; + use buddy_allocator::BuddyRawPage; -use core::{ - ptr::NonNull, - sync::atomic::{AtomicU32, AtomicUsize, Ordering}, -}; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess as _}, - paging::{RawPage as RawPageTrait, PFN}, -}; -use intrusive_list::{container_of, Link}; -use slab_allocator::SlabRawPage; +use eonix_mm::address::{PAddr, PhysAccess as _}; +use eonix_mm::paging::{PageAlloc, RawPage as RawPageTrait, PFN}; +use intrusive_list::{container_of, Link, List}; +use slab_allocator::{SlabPage, SlabPageAlloc, SlabPageList, SlabSlot}; + +use super::GlobalPageAlloc; +use crate::kernel::mem::page_cache::PageCacheRawPage; +use crate::kernel::mem::PhysAccess; const PAGE_ARRAY: NonNull = unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct PageFlags(AtomicU32); -struct SlabPageInner { - allocated_count: u32, - free_next: Option>, +#[derive(Clone, Copy)] +struct SlabPageData { + allocated_count: usize, + free_next: Option>, } -impl SlabPageInner { - fn new(free_next: Option>) -> Self { +impl SlabPageData { + const fn new() -> Self { Self { allocated_count: 0, - free_next, + free_next: None, } } } -struct PageCacheInner { +#[derive(Clone, Copy)] +struct PageCacheData { valid_size: usize, } -pub struct BuddyPageInner {} - -enum PageType { - Buddy(BuddyPageInner), - Slab(SlabPageInner), - PageCache(PageCacheInner), -} - -impl PageType { - fn slab_data(&mut self) -> &mut SlabPageInner { - if let PageType::Slab(slab_data) = self { - return slab_data; - } else { - unreachable!() - } - } - - fn page_cache_data(&mut self) -> &mut PageCacheInner { - if let PageType::PageCache(cache_data) = self { - return cache_data; - } else { - unreachable!() - } - } +#[repr(C)] +union PageData { + slab: SlabPageData, + page_cache: PageCacheData, } pub struct RawPage { @@ -73,7 +54,7 @@ pub struct RawPage { flags: PageFlags, refcount: AtomicUsize, - shared_data: PageType, + shared_data: PageData, } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -109,6 +90,13 @@ impl PageFlags { } impl RawPagePtr { + pub const fn from_ref(raw_page_ref: &RawPage) -> Self { + Self::new(unsafe { + // SAFETY: Rust references always points to non-null addresses. + NonNull::new_unchecked(&raw const *raw_page_ref as *mut _) + }) + } + pub const fn new(ptr: NonNull) -> Self { Self(ptr) } @@ -215,50 +203,68 @@ impl BuddyRawPage for RawPagePtr { } } -impl SlabRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } +impl SlabPage for RawPage { + fn get_data_ptr(&self) -> NonNull<[u8]> { + let raw_page_ptr = RawPagePtr::from_ref(self); + let paddr_start = PAddr::from(PFN::from(raw_page_ptr)); + let page_data_ptr = unsafe { paddr_start.as_ptr() }; - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link + NonNull::slice_from_raw_parts(page_data_ptr, 1 << (self.order + 12)) } - fn in_which(ptr: *mut u8) -> RawPagePtr { + fn get_free_slot(&self) -> Option> { unsafe { - // SAFETY: The pointer is allocated from the slab allocator, - // which can't be null. - let ptr = NonNull::new_unchecked(ptr); + // SAFETY: TODO + self.shared_data.slab.free_next + } + } - // SAFETY: The pointer is valid. - let paddr = ArchPhysAccess::from_ptr(ptr); - let pfn = PFN::from(paddr); + fn set_free_slot(&mut self, next: Option>) { + self.shared_data.slab.free_next = next; + } - RawPagePtr::from(pfn) + fn get_alloc_count(&self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count } } - fn allocated_count(&self) -> &mut u32 { - &mut self.as_mut().shared_data.slab_data().allocated_count - } + fn inc_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count += 1; - fn next_free(&self) -> &mut Option> { - &mut self.as_mut().shared_data.slab_data().free_next + self.shared_data.slab.allocated_count + } } - fn real_page_ptr(&self) -> *mut u8 { - self.real_ptr().as_ptr() + fn dec_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count -= 1; + + self.shared_data.slab.allocated_count + } } - fn slab_init(&self, first_free: Option>) { - self.as_mut().shared_data = PageType::Slab(SlabPageInner::new(first_free)); + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self { + unsafe { + // SAFETY: The caller ensures that `ptr` is valid. + let paddr = ArchPhysAccess::from_ptr(ptr); + let pfn = PFN::from(paddr); + + RawPagePtr::from(pfn).as_mut() + } } } impl PageCacheRawPage for RawPagePtr { fn valid_size(&self) -> &mut usize { - &mut self.as_mut().shared_data.page_cache_data().valid_size + unsafe { + // SAFETY: The caller ensures that the page is in some page cache. + &mut self.as_mut().shared_data.page_cache.valid_size + } } fn is_dirty(&self) -> bool { @@ -274,6 +280,59 @@ impl PageCacheRawPage for RawPagePtr { } fn cache_init(&self) { - self.as_mut().shared_data = PageType::PageCache(PageCacheInner { valid_size: 0 }); + self.as_mut().shared_data.page_cache = PageCacheData { valid_size: 0 }; + } +} + +pub struct RawSlabPageList(List); + +impl SlabPageList for RawSlabPageList { + type Page = RawPage; + + fn new() -> Self { + Self(List::new()) + } + + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn peek_head(&mut self) -> Option<&mut Self::Page> { + unsafe { + let link = self.0.head()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); + + Some(raw_page_ptr.as_mut()) + } + } + + fn pop_head(&mut self) -> Option<&'static mut Self::Page> { + unsafe { + let link = self.0.pop()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); + + Some(raw_page_ptr.as_mut()) + } + } + + fn push_tail(&mut self, page: &'static mut Self::Page) { + self.0.insert(&mut page.link); + } + + fn remove(&mut self, page: &mut Self::Page) { + self.0.remove(&mut page.link) + } +} + +impl SlabPageAlloc for GlobalPageAlloc { + type Page = RawPage; + type PageList = RawSlabPageList; + + unsafe fn alloc_uninit(&self) -> &'static mut RawPage { + let raw_page = self.alloc().expect("Out of memory").as_mut(); + raw_page.flags.set(PageFlags::SLAB); + raw_page.shared_data.slab = SlabPageData::new(); + + raw_page } } From 5cddfc2684545d9174caef3a83f213a02309d9d4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 7 Jan 2026 02:16:47 +0800 Subject: [PATCH 37/54] mem, buddy: rework the buddy system Introduce `Zone`s: a Zone is a region of physical memory that all share the same NUMA node. The Zone will hold all RawPage structs. Buddy allocator will now store a reference to the zone that hold all its pages. Thus, we make the buddy allocator independent of underlying physical page frame management framework. Remove unnecessary page flags and structs. Signed-off-by: greatbridf --- Cargo.lock | 1 - crates/buddy_allocator/Cargo.toml | 1 - crates/buddy_allocator/src/free_area.rs | 59 ------ crates/buddy_allocator/src/lib.rs | 265 +++++++++++++++++++----- crates/buddy_allocator/src/zone.rs | 146 ------------- crates/eonix_mm/src/paging.rs | 4 + crates/eonix_mm/src/paging/list.rs | 19 ++ crates/eonix_mm/src/paging/raw_page.rs | 9 +- crates/eonix_mm/src/paging/zone.rs | 20 ++ crates/slab_allocator/src/lib.rs | 35 ++-- src/kernel/mem/page_alloc.rs | 116 +++++------ src/kernel/mem/page_alloc/raw_page.rs | 87 ++++---- src/kernel/mem/page_alloc/zones.rs | 25 +++ src/kernel_init.rs | 24 +-- 14 files changed, 407 insertions(+), 404 deletions(-) delete mode 100644 crates/buddy_allocator/src/free_area.rs delete mode 100644 crates/buddy_allocator/src/zone.rs create mode 100644 crates/eonix_mm/src/paging/list.rs create mode 100644 crates/eonix_mm/src/paging/zone.rs create mode 100644 src/kernel/mem/page_alloc/zones.rs diff --git a/Cargo.lock b/Cargo.lock index 3e8a36bd..896ec493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,7 +70,6 @@ name = "buddy_allocator" version = "0.1.0" dependencies = [ "eonix_mm", - "intrusive_list", ] [[package]] diff --git a/crates/buddy_allocator/Cargo.toml b/crates/buddy_allocator/Cargo.toml index 51f02295..bdb0a28c 100644 --- a/crates/buddy_allocator/Cargo.toml +++ b/crates/buddy_allocator/Cargo.toml @@ -5,4 +5,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } -intrusive_list = { path = "../intrusive_list" } diff --git a/crates/buddy_allocator/src/free_area.rs b/crates/buddy_allocator/src/free_area.rs deleted file mode 100644 index 837f733f..00000000 --- a/crates/buddy_allocator/src/free_area.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::BuddyRawPage; -use core::marker::{PhantomData, Send, Sync}; -use intrusive_list::Link; - -pub struct FreeArea { - free_list: Link, - count: usize, - _phantom: PhantomData, -} - -unsafe impl Send for FreeArea {} -unsafe impl Sync for FreeArea {} - -impl FreeArea -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_list: Link::new(), - count: 0, - _phantom: PhantomData, - } - } - - pub fn get_free_pages(&mut self) -> Option { - self.free_list.next_mut().map(|pages_link| { - assert_ne!(self.count, 0); - - let pages_ptr = unsafe { - // SAFETY: Items in `self.free_list` are guaranteed to be of type `Raw`. - Raw::from_link(pages_link) - }; - - self.count -= 1; - pages_link.remove(); - - pages_ptr - }) - } - - pub fn add_pages(&mut self, pages_ptr: Raw) { - self.count += 1; - pages_ptr.set_free(); - - unsafe { - self.free_list.insert(pages_ptr.get_link()); - } - } - - pub fn del_pages(&mut self, pages_ptr: Raw) { - assert!(self.count >= 1 && pages_ptr.is_free()); - self.count -= 1; - pages_ptr.clear_free(); - unsafe { - pages_ptr.get_link().remove(); - } - } -} diff --git a/crates/buddy_allocator/src/lib.rs b/crates/buddy_allocator/src/lib.rs index f8c8eeda..abe1ef7b 100644 --- a/crates/buddy_allocator/src/lib.rs +++ b/crates/buddy_allocator/src/lib.rs @@ -1,87 +1,250 @@ #![no_std] -mod free_area; -mod zone; +use core::hint::unreachable_unchecked; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::PAddr, - paging::{RawPage, PFN}, -}; -use intrusive_list::Link; -use zone::Zone; +use eonix_mm::address::{AddrOps as _, PAddr, PRange}; +use eonix_mm::paging::{PageList, PageListSized, Zone, PFN}; const MAX_ORDER: u32 = 10; -const ZONE_AREAS: usize = const { MAX_ORDER as usize + 1 }; +const AREAS: usize = const { MAX_ORDER as usize + 1 }; -pub trait BuddyRawPage: RawPage { - /// Get the container raw page struct of the list link. - /// - /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; - - /// Get the list link of the raw page. - /// - /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; - - fn set_order(&self, order: u32); +pub trait BuddyPage: Sized + 'static { + fn pfn(&self) -> PFN; + fn get_order(&self) -> u32; fn is_buddy(&self) -> bool; - fn is_free(&self) -> bool; - fn set_buddy(&self); - fn set_free(&self); + fn set_order(&mut self, order: u32); + fn set_buddy(&mut self, value: bool); +} - fn clear_buddy(&self); - fn clear_free(&self); +struct FreeArea +where + L: PageList, +{ + free_list: L, + count: usize, } -pub struct BuddyAllocator +unsafe impl Send for FreeArea where L: PageList {} +unsafe impl Sync for FreeArea where L: PageList {} + +pub struct BuddyAllocator where - T: BuddyRawPage, + Z: Zone + 'static, + L: PageList, { - zone: Zone, + zone: &'static Z, + free_areas: [FreeArea; AREAS], } -impl BuddyAllocator +impl BuddyAllocator where - T: BuddyRawPage, + Z: Zone + 'static, + Z::Page: BuddyPage, + L: PageListSized, { - pub const fn new() -> Self { - Self { zone: Zone::new() } + pub const fn new(zone: &'static Z) -> Self { + Self { + zone, + free_areas: [const { FreeArea::new() }; AREAS], + } } +} +impl BuddyAllocator +where + Z: Zone, + L: PageList, + P: BuddyPage + 'static, +{ pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - self.zone.create_pages(start, end); + assert!( + self.zone + .contains_prange(PRange::new(start.ceil(), end.floor())), + "The given address range is not within the zone." + ); + + let mut pfn = PFN::from(start.ceil()); + let end_pfn = PFN::from(end.floor()); + + while pfn < end_pfn { + let mut order = usize::from(pfn).trailing_zeros().min(MAX_ORDER); + let new_end_pfn = loop { + let new_end = pfn + (1 << order); + + if new_end <= end_pfn { + break new_end; + } + + order -= 1; + }; + + unsafe { + // SAFETY: We've checked that the range is within the zone above. + self.add_page_unchecked(pfn, order) + }; + + pfn = new_end_pfn; + } + } + + fn add_page(&mut self, pfn: PFN, order: u32) { + let prange = PRange::from(PAddr::from(pfn)).grow(1 << (order + 12)); + assert!( + self.zone.contains_prange(prange), + "The given page is not within the zone." + ); + + unsafe { + // SAFETY: Checks above. + self.add_page_unchecked(pfn, order); + } + } + + unsafe fn add_page_unchecked(&mut self, pfn: PFN, order: u32) { + let Some(page) = self.zone.get_page(pfn) else { + unsafe { unreachable_unchecked() } + }; + + unsafe { + // SAFETY: The caller ensures that the page is unused. + let page_mut = &mut *page.get(); + self.free_areas[order as usize].add_page(page_mut, order); + } + } + + fn break_page(&mut self, page: &mut P, order: u32, target_order: u32) { + let pfn = page.pfn(); + + for order in (target_order..order).rev() { + let buddy_pfn = pfn + (1 << order); + + unsafe { + // SAFETY: We got the page from `self.free_areas`. Checks are + // done when we've put the page into the buddy system. + self.add_page_unchecked(buddy_pfn, order); + } + } + + page.set_order(target_order); + } + + pub fn alloc_order(&mut self, order: u32) -> Option<&'static mut Z::Page> { + for current_order in order..AREAS as u32 { + let Some(page) = self.free_areas[current_order as usize].get_free_page() else { + continue; + }; + + if current_order > order { + self.break_page(page, current_order, order); + } + + return Some(page); + } + + None + } + + pub unsafe fn dealloc(&mut self, page: &'static mut Z::Page) { + let mut pfn = page.pfn(); + let mut order = page.get_order(); + + assert!( + !page.is_buddy(), + "Trying to free a page that is already in the buddy system: {pfn:?}", + ); + + while order < MAX_ORDER { + let buddy_pfn = pfn.buddy_pfn(order); + let Some(buddy_page) = self.try_get_buddy(buddy_pfn, order) else { + break; + }; + + self.free_areas[order as usize].remove_page(buddy_page); + pfn = pfn.combined_pfn(buddy_pfn); + order += 1; + } + + self.add_page(pfn, order); } - pub fn alloc_order(&mut self, order: u32) -> Option { - let pages_ptr = self.zone.get_free_pages(order); + /// This function checks whether the given page is within our [`Zone`] and + /// is a free buddy page with the specified order. + /// + /// We can assure exclusive access to a buddy page of [`order`] if + /// - the buddy is within the same [`Zone`] as us. + /// - the buddy is a free buddy (in some [`FreeArea`]) + /// - the buddy has order [`order`] + fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut P> { + let buddy_page = self.zone.get_page(buddy_pfn)?; + + unsafe { + // SAFETY: We just test whether the page is a buddy. + let buddy_page_ref = &*buddy_page.get(); - if let Some(pages_ptr) = pages_ptr { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - pages_ptr.refcount().fetch_add(1, Ordering::Relaxed); - pages_ptr.clear_free(); + if !buddy_page_ref.is_buddy() { + return None; + } + + // Sad... + if buddy_page_ref.get_order() != order { + return None; + } + + // SAFETY: We have the mutable reference to the buddy allocator. + // So all the pages within are exclusively accessible to us. + Some(&mut *buddy_page.get()) + } + } +} + +impl FreeArea +where + L: PageListSized, +{ + const fn new() -> Self { + Self { + free_list: L::NEW, + count: 0, } + } +} + +impl FreeArea +where + L: PageList, + L::Page: BuddyPage + 'static, +{ + pub fn get_free_page(&mut self) -> Option<&'static mut L::Page> { + self.free_list.pop_head().map(|page| { + assert_ne!(self.count, 0, "Oops"); + + page.set_buddy(false); + self.count -= 1; - pages_ptr + page + }) } - pub unsafe fn dealloc(&mut self, page_ptr: T) { - self.zone.free_pages(page_ptr); + pub fn add_page(&mut self, page: &'static mut L::Page, order: u32) { + page.set_order(order); + page.set_buddy(true); + + self.count += 1; + self.free_list.push_tail(page); } - pub fn has_management_over(page_ptr: T) -> bool { - !page_ptr.is_free() && page_ptr.is_buddy() + pub fn remove_page(&mut self, page: &mut L::Page) { + assert_ne!(self.count, 0, "Oops"); + page.set_buddy(false); + + self.count -= 1; + self.free_list.remove(page); } } -pub(self) trait BuddyPFNOps { +trait BuddyPFNOps { fn buddy_pfn(self, order: u32) -> PFN; fn combined_pfn(self, buddy_pfn: PFN) -> PFN; } diff --git a/crates/buddy_allocator/src/zone.rs b/crates/buddy_allocator/src/zone.rs deleted file mode 100644 index 2c850ef9..00000000 --- a/crates/buddy_allocator/src/zone.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::free_area::FreeArea; -use crate::{BuddyPFNOps as _, BuddyRawPage}; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PAddr}, - paging::PFN, -}; - -pub(super) struct Zone { - free_areas: [FreeArea; AREAS], -} - -impl Zone -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_areas: [const { FreeArea::new() }; AREAS], - } - } - - pub fn get_free_pages(&mut self, order: u32) -> Option { - for current_order in order..AREAS as u32 { - let pages_ptr = self.free_areas[current_order as usize].get_free_pages(); - let Some(pages_ptr) = pages_ptr else { continue }; - - pages_ptr.set_order(order); - - if current_order > order { - self.expand(pages_ptr, current_order, order); - } - - assert!( - pages_ptr.is_present(), - "Page {:?} is not present", - pages_ptr.into(), - ); - - assert!( - pages_ptr.is_free(), - "Page {:?} is not free", - pages_ptr.into(), - ); - - return Some(pages_ptr); - } - None - } - - fn expand(&mut self, pages_ptr: Raw, order: u32, target_order: u32) { - let mut offset = 1 << order; - let pages_pfn = Into::::into(pages_ptr); - - for order in (target_order..order).rev() { - offset >>= 1; - - let split_pages_ptr = Raw::from(pages_pfn + offset); - split_pages_ptr.set_order(order); - split_pages_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(split_pages_ptr); - } - } - - pub fn free_pages(&mut self, mut pages_ptr: Raw) { - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - let mut pfn = Into::::into(pages_ptr); - let mut current_order = pages_ptr.order(); - - assert!( - pages_ptr.is_present(), - "Freeing a page that is not present: {:?}", - pages_ptr.into(), - ); - - assert!( - !pages_ptr.is_free(), - "Freeing a page that is free: {:?}", - pages_ptr.into(), - ); - - while current_order < (AREAS - 1) as u32 { - let buddy_pfn = pfn.buddy_pfn(current_order); - let buddy_pages_ptr = Raw::from(buddy_pfn); - - if !self.buddy_check(buddy_pages_ptr, current_order) { - break; - } - - pages_ptr.clear_buddy(); - buddy_pages_ptr.clear_buddy(); - self.free_areas[current_order as usize].del_pages(buddy_pages_ptr); - - pages_ptr = Raw::from(pfn.combined_pfn(buddy_pfn)); - pfn = pfn.combined_pfn(buddy_pfn); - - pages_ptr.set_buddy(); - current_order += 1; - } - - pages_ptr.set_order(current_order); - self.free_areas[current_order as usize].add_pages(pages_ptr); - } - - /// This function checks whether a page is free && is a buddy - /// we can coalesce a page and its buddy if - /// - the buddy is valid(present) && - /// - the buddy is right now in free_areas && - /// - a page and its buddy have the same order && - /// - a page and its buddy are in the same zone (on smp systems). - fn buddy_check(&self, pages_ptr: Raw, order: u32) -> bool { - if !pages_ptr.is_present() { - return false; - } - if !pages_ptr.is_free() { - return false; - } - if pages_ptr.order() != order { - return false; - } - - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - true - } - - /// Only used on buddy initialization - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - let mut start_pfn = PFN::from(start.ceil()); - let end_pfn = PFN::from(end.floor()); - - while start_pfn < end_pfn { - let mut order = usize::from(start_pfn) - .trailing_zeros() - .min((AREAS - 1) as u32); - - while start_pfn + (1 << order) as usize > end_pfn { - order -= 1; - } - let page_ptr = Raw::from(start_pfn); - page_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(page_ptr); - start_pfn = start_pfn + (1 << order) as usize; - } - } -} diff --git a/crates/eonix_mm/src/paging.rs b/crates/eonix_mm/src/paging.rs index 88da902e..0c4811f2 100644 --- a/crates/eonix_mm/src/paging.rs +++ b/crates/eonix_mm/src/paging.rs @@ -1,9 +1,13 @@ +mod list; mod page; mod page_alloc; mod pfn; mod raw_page; +mod zone; +pub use list::{PageList, PageListSized}; pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc}; pub use pfn::PFN; pub use raw_page::{RawPage, UnmanagedRawPage}; +pub use zone::Zone; diff --git a/crates/eonix_mm/src/paging/list.rs b/crates/eonix_mm/src/paging/list.rs new file mode 100644 index 00000000..a52cf947 --- /dev/null +++ b/crates/eonix_mm/src/paging/list.rs @@ -0,0 +1,19 @@ +pub trait PageList { + type Page; + + fn is_empty(&self) -> bool; + + fn peek_head(&mut self) -> Option<&mut Self::Page>; + + fn pop_head(&mut self) -> Option<&'static mut Self::Page>; + fn push_tail(&mut self, page: &'static mut Self::Page); + fn remove(&mut self, page: &mut Self::Page); +} + +pub trait PageListSized: PageList + Sized { + const NEW: Self; + + fn new() -> Self { + Self::NEW + } +} diff --git a/crates/eonix_mm/src/paging/raw_page.rs b/crates/eonix_mm/src/paging/raw_page.rs index 7951729d..789e863b 100644 --- a/crates/eonix_mm/src/paging/raw_page.rs +++ b/crates/eonix_mm/src/paging/raw_page.rs @@ -1,14 +1,13 @@ -use super::PFN; use core::sync::atomic::AtomicUsize; +use super::PFN; + /// A `RawPage` represents a page of memory in the kernel. It is a low-level /// representation of a page that is used by the kernel to manage memory. #[doc(notable_trait)] pub trait RawPage: Clone + Copy + From + Into { fn order(&self) -> u32; fn refcount(&self) -> &AtomicUsize; - - fn is_present(&self) -> bool; } #[derive(Clone, Copy)] @@ -45,8 +44,4 @@ impl RawPage for UnmanagedRawPage { fn refcount(&self) -> &AtomicUsize { &UNMANAGED_RAW_PAGE_CLONE_COUNT } - - fn is_present(&self) -> bool { - true - } } diff --git a/crates/eonix_mm/src/paging/zone.rs b/crates/eonix_mm/src/paging/zone.rs new file mode 100644 index 00000000..ec3ed15e --- /dev/null +++ b/crates/eonix_mm/src/paging/zone.rs @@ -0,0 +1,20 @@ +use core::cell::UnsafeCell; + +#[allow(unused_imports)] +use super::{Page, PageAlloc, RawPage, PFN}; +use crate::address::PRange; + +/// A [`Zone`] holds a lot of [`Page`]s that share the same NUMA node or +/// "physical location". +pub trait Zone: Send + Sync { + type Page; + + /// Whether the [`range`] is within this [`Zone`]. + fn contains_prange(&self, range: PRange) -> bool; + + /// Get the [`RawPage`] that [`pfn`] points to. + /// + /// # Return + /// [`None`] if [`pfn`] is not in this [`Zone`]. + fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell>; +} diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index 8a684edd..8597331d 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -2,6 +2,7 @@ use core::ptr::NonNull; +use eonix_mm::paging::{PageList, PageListSized}; use eonix_sync::Spin; #[repr(C)] @@ -10,19 +11,6 @@ pub union SlabSlot { data: u8, } -pub trait SlabPageList: Sized { - type Page: SlabPage; - - fn new() -> Self; - fn is_empty(&self) -> bool; - - fn peek_head(&mut self) -> Option<&mut Self::Page>; - - fn pop_head(&mut self) -> Option<&'static mut Self::Page>; - fn push_tail(&mut self, page: &'static mut Self::Page); - fn remove(&mut self, page: &mut Self::Page); -} - pub trait SlabPage: Sized + 'static { fn get_data_ptr(&self) -> NonNull<[u8]>; @@ -98,7 +86,7 @@ where pub trait SlabPageAlloc { type Page: SlabPage; - type PageList: SlabPageList; + type PageList: PageList; /// Allocate a page suitable for slab system use. The page MUST come with /// its allocation count 0 and next free slot None. @@ -110,7 +98,7 @@ pub trait SlabPageAlloc { pub(crate) struct SlabList where - T: SlabPageList, + T: PageList, { empty_list: T, partial_list: T, @@ -132,6 +120,7 @@ unsafe impl Sync for SlabAlloc where P: SlabPag impl SlabAlloc where L: SlabPageAlloc, + L::PageList: PageListSized, { pub fn new_in(alloc: L) -> Self { Self { @@ -159,17 +148,23 @@ where impl SlabList where - T: SlabPageList, + T: PageListSized, { - fn new(object_size: usize) -> Self { + const fn new(object_size: usize) -> Self { Self { - empty_list: T::new(), - partial_list: T::new(), - full_list: T::new(), + empty_list: T::NEW, + partial_list: T::NEW, + full_list: T::NEW, object_size, } } +} +impl SlabList +where + T: PageList, + T::Page: SlabPage, +{ fn alloc_from_partial(&mut self) -> NonNull { let head = self.partial_list.peek_head().unwrap(); let slot = head.alloc_slot().unwrap(); diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 1c018f37..9dce4567 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -1,19 +1,25 @@ mod raw_page; +mod zones; use core::sync::atomic::Ordering; -use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; +use buddy_allocator::BuddyAllocator; use eonix_mm::address::{AddrOps as _, PRange}; -use eonix_mm::paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}; +use eonix_mm::paging::{ + GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, PFN, +}; +use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; -use intrusive_list::List; -use raw_page::PageFlags; +use raw_page::{PageFlags, RawPageList}; pub use raw_page::{RawPage, RawPagePtr}; +pub use zones::GlobalZone; const COSTLY_ORDER: u32 = 3; +const AREAS: usize = COSTLY_ORDER as usize + 1; const BATCH_SIZE: u32 = 64; -static BUDDY_ALLOC: Spin> = Spin::new(BuddyAllocator::new()); +static BUDDY_ALLOC: Spin> = + Spin::new(BuddyAllocator::new(&GlobalZone())); #[eonix_percpu::define_percpu] static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); @@ -26,58 +32,42 @@ pub struct BuddyPageAlloc(); struct PerCpuPageAlloc { batch: u32, - // TODO: might be used in the future. - // high: u32, - free_areas: [List; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList; AREAS], +} + +pub trait PerCpuPage { + fn set_local(&mut self, val: bool); } impl PerCpuPageAlloc { const fn new() -> Self { Self { batch: BATCH_SIZE, - // high: 0, - free_areas: [const { List::new() }; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList::NEW; AREAS], } } - fn insert_free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - let free_area = &mut self.free_areas[order as usize]; - free_area.insert(unsafe { pages_ptr.get_link() }); - } - - fn get_free_pages(&mut self, order: u32) -> Option { - let free_area = &mut self.free_areas[order as usize]; - free_area.pop().map(|node| unsafe { - // SAFETY: `node` is a valid pointer to a `Link` that is not used by anyone. - RawPagePtr::from_link(node) - }) - } - - fn alloc_order(&mut self, order: u32) -> Option { + fn alloc_order(&mut self, order: u32) -> Option<&'static mut RawPage> { assert!(order <= COSTLY_ORDER); - if let Some(pages) = self.get_free_pages(order) { + if let Some(pages) = self.free_areas[order as usize].pop_head() { return Some(pages); } let batch = self.batch >> order; for _ in 0..batch { - if let Some(pages_ptr) = BUDDY_ALLOC.lock().alloc_order(order) { - pages_ptr.flags().set(PageFlags::LOCAL); - self.insert_free_pages(pages_ptr, order); - } else { + let Some(page) = BUDDY_ALLOC.lock().alloc_order(order) else { break; }; + + page.set_local(true); + self.free_areas[order as usize].push_tail(page); } - self.get_free_pages(order) + self.free_areas[order as usize].pop_head() } - fn free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - assert_eq!(pages_ptr.order(), order); - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - pages_ptr.refcount().store(1, Ordering::Relaxed); - self.insert_free_pages(pages_ptr, order); + fn free_pages(&mut self, page: &'static mut RawPage, order: u32) { + self.free_areas[order as usize].push_tail(page); } } @@ -87,16 +77,6 @@ impl GlobalPageAlloc { BuddyPageAlloc() } - pub fn mark_present(range: PRange) { - let mut pfn = PFN::from(range.start().ceil()); - let end_pfn = PFN::from(range.end().floor()); - - while pfn < end_pfn { - RawPagePtr::from(pfn).flags().set(PageFlags::PRESENT); - pfn = pfn + 1; - } - } - /// Add the pages in the PAddr range `range` to the global allocator. /// /// This function is only to be called on system initialization when `eonix_preempt` @@ -116,34 +96,47 @@ impl PageAlloc for GlobalPageAlloc { type RawPage = RawPagePtr; fn alloc_order(&self, order: u32) -> Option { - if order > COSTLY_ORDER { + let raw_page = if order > COSTLY_ORDER { BUDDY_ALLOC.lock().alloc_order(order) } else { unsafe { eonix_preempt::disable(); - let page_ptr = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); + let page = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); eonix_preempt::enable(); - page_ptr + + page } - } + }; + + raw_page.map(|raw_page| { + // SAFETY: Memory order here can be Relaxed is for the same reason + // as that in the copy constructor of `std::shared_ptr`. + raw_page.refcount.fetch_add(1, Ordering::Relaxed); + + RawPagePtr::from_ref(raw_page) + }) } unsafe fn dealloc(&self, page_ptr: RawPagePtr) { + assert_eq!( + page_ptr.refcount().load(Ordering::Relaxed), + 0, + "Trying to free a page with refcount > 0" + ); + if page_ptr.order() > COSTLY_ORDER { - BUDDY_ALLOC.lock().dealloc(page_ptr); + BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); } else { let order = page_ptr.order(); + unsafe { - eonix_preempt::disable(); - PERCPU_PAGE_ALLOC.as_mut().free_pages(page_ptr, order); - eonix_preempt::enable(); + PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(page_ptr.as_mut(), order); } } } fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) - && (page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL)) + page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL) } } @@ -157,14 +150,17 @@ impl PageAlloc for BuddyPageAlloc { type RawPage = RawPagePtr; fn alloc_order(&self, order: u32) -> Option { - BUDDY_ALLOC.lock().alloc_order(order) + BUDDY_ALLOC + .lock() + .alloc_order(order) + .map(|raw_page| RawPagePtr::from_ref(raw_page)) } unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - BUDDY_ALLOC.lock().dealloc(page_ptr); + BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); } - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) + fn has_management_over(&self, _: RawPagePtr) -> bool { + true } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index d793ccd7..0baa7b9a 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,18 +1,18 @@ use core::ptr::NonNull; use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; -use buddy_allocator::BuddyRawPage; +use buddy_allocator::BuddyPage; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _}; -use eonix_mm::paging::{PageAlloc, RawPage as RawPageTrait, PFN}; +use eonix_mm::paging::{PageAlloc, PageList, PageListSized, RawPage as RawPageTrait, PFN}; use intrusive_list::{container_of, Link, List}; -use slab_allocator::{SlabPage, SlabPageAlloc, SlabPageList, SlabSlot}; +use slab_allocator::{SlabPage, SlabPageAlloc, SlabSlot}; -use super::GlobalPageAlloc; +use super::{GlobalPageAlloc, PerCpuPage}; use crate::kernel::mem::page_cache::PageCacheRawPage; use crate::kernel::mem::PhysAccess; -const PAGE_ARRAY: NonNull = +pub const PAGE_ARRAY: NonNull = unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct PageFlags(AtomicU32); @@ -52,21 +52,23 @@ pub struct RawPage { /// This field is only used in buddy system and is protected by the global lock. order: u32, flags: PageFlags, - refcount: AtomicUsize, + pub refcount: AtomicUsize, shared_data: PageData, } +// XXX: introduce Folio and remove this. +unsafe impl Send for RawPage {} +unsafe impl Sync for RawPage {} + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct RawPagePtr(NonNull); impl PageFlags { - pub const PRESENT: u32 = 1 << 0; pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; pub const SLAB: u32 = 1 << 3; pub const DIRTY: u32 = 1 << 4; - pub const FREE: u32 = 1 << 5; pub const LOCAL: u32 = 1 << 6; pub fn has(&self, flag: u32) -> bool { @@ -158,48 +160,31 @@ impl RawPageTrait for RawPagePtr { fn refcount(&self) -> &AtomicUsize { self.refcount() } - - fn is_present(&self) -> bool { - self.flags().has(PageFlags::PRESENT) - } } -impl BuddyRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } - - fn set_order(&self, order: u32) { - self.as_mut().order = order; +impl BuddyPage for RawPage { + fn pfn(&self) -> PFN { + PFN::from(RawPagePtr::from_ref(self)) } - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link + fn get_order(&self) -> u32 { + self.order } fn is_buddy(&self) -> bool { - self.flags().has(PageFlags::BUDDY) - } - - fn is_free(&self) -> bool { - self.flags().has(PageFlags::FREE) - } - - fn set_buddy(&self) { - self.flags().set(PageFlags::BUDDY); - } - - fn set_free(&self) { - self.flags().set(PageFlags::FREE); + self.flags.has(PageFlags::BUDDY) } - fn clear_buddy(&self) { - self.flags().clear(PageFlags::BUDDY); + fn set_order(&mut self, order: u32) { + self.order = order; } - fn clear_free(&self) { - self.flags().clear(PageFlags::FREE); + fn set_buddy(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::BUDDY); + } else { + self.flags.clear(PageFlags::BUDDY) + } } } @@ -284,15 +269,21 @@ impl PageCacheRawPage for RawPagePtr { } } -pub struct RawSlabPageList(List); +impl PerCpuPage for RawPage { + fn set_local(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::LOCAL) + } else { + self.flags.clear(PageFlags::LOCAL) + } + } +} + +pub struct RawPageList(List); -impl SlabPageList for RawSlabPageList { +impl PageList for RawPageList { type Page = RawPage; - fn new() -> Self { - Self(List::new()) - } - fn is_empty(&self) -> bool { self.0.is_empty() } @@ -324,9 +315,13 @@ impl SlabPageList for RawSlabPageList { } } +impl PageListSized for RawPageList { + const NEW: Self = RawPageList(List::new()); +} + impl SlabPageAlloc for GlobalPageAlloc { type Page = RawPage; - type PageList = RawSlabPageList; + type PageList = RawPageList; unsafe fn alloc_uninit(&self) -> &'static mut RawPage { let raw_page = self.alloc().expect("Out of memory").as_mut(); diff --git a/src/kernel/mem/page_alloc/zones.rs b/src/kernel/mem/page_alloc/zones.rs new file mode 100644 index 00000000..7a2e4e33 --- /dev/null +++ b/src/kernel/mem/page_alloc/zones.rs @@ -0,0 +1,25 @@ +use core::cell::UnsafeCell; + +use eonix_mm::address::PRange; +use eonix_mm::paging::{Zone, PFN}; + +use super::RawPage; +use crate::kernel::mem::page_alloc::RawPagePtr; + +pub struct GlobalZone(); + +impl Zone for GlobalZone { + type Page = RawPage; + + fn contains_prange(&self, _: PRange) -> bool { + true + } + + fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell> { + unsafe { + // SAFETY: The pointer returned by [`RawPagePtr::as_ptr()`] is valid. + // And so is it wrapped with [`UnsafeCell`] + Some(&*(RawPagePtr::from(pfn).as_ptr() as *const UnsafeCell)) + } + } +} diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 3d8be90f..93b6da20 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -1,14 +1,11 @@ +use eonix_hal::bootstrap::BootStrapData; +use eonix_hal::mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}; +use eonix_hal::traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps as _, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PagingMode as _, PTE}; +use eonix_mm::paging::{Page as GenericPage, PAGE_SIZE, PFN}; + use crate::kernel::mem::{GlobalPageAlloc, RawPage}; -use eonix_hal::{ - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}, - traits::mm::Memory, -}; -use eonix_mm::{ - address::{Addr as _, AddrOps as _, VAddr, VRange}, - page_table::{PageAttribute, PagingMode as _, PTE}, - paging::{Page as GenericPage, PAGE_SIZE, PFN}, -}; pub fn setup_memory(data: &mut BootStrapData) { let addr_max = ArchMemory::present_ram() @@ -50,9 +47,10 @@ pub fn setup_memory(data: &mut BootStrapData) { ); } - for range in ArchMemory::present_ram() { - GlobalPageAlloc::mark_present(range); - } + // TODO!!!: Construct the global zone with all present ram. + // for range in ArchMemory::present_ram() { + // GlobalPageAlloc::mark_present(range); + // } if let Some(early_alloc) = data.take_alloc() { for range in early_alloc.into_iter() { From 60149eb952e90cb5d124a9a6242b13f73c372c79 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 14 Jan 2026 01:09:49 +0800 Subject: [PATCH 38/54] vfs: rework of inode and page cache system - Bump rust compiler version to nightly-2026-01-09. - Inode rework: add a generic Inode struct. - Add a macro to help function tweaks. - PageCache rework: reduce complexity and try to decouple. - Adapt fat32, tmpfs to the new page cache system. - Change the way we process mapped pages and load ELF executables. - Refine handling flags in `MMArea::handle_mmap`. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 62 +- rust-toolchain | 2 +- src/fs/fat32.rs | 276 ++++----- src/fs/procfs.rs | 155 +++-- src/fs/tmpfs/dir.rs | 287 +++++----- src/fs/tmpfs/file.rs | 331 +++++------ src/fs/tmpfs/mod.rs | 23 +- src/kernel/mem.rs | 2 +- src/kernel/mem/mm_area.rs | 97 ++-- src/kernel/mem/mm_list/mapping.rs | 15 +- src/kernel/mem/mm_list/page_fault.rs | 5 +- src/kernel/mem/page_alloc.rs | 4 +- src/kernel/mem/page_alloc/raw_page.rs | 29 +- src/kernel/mem/page_cache.rs | 416 +++++--------- src/kernel/syscall/mm.rs | 21 +- src/kernel/task/loader/elf.rs | 70 +-- src/kernel/vfs/dentry.rs | 66 +-- src/kernel/vfs/dentry/walk.rs | 52 +- src/kernel/vfs/file/inode_file.rs | 36 +- src/kernel/vfs/filearray.rs | 46 +- src/kernel/vfs/inode/inode.rs | 531 +++++++++--------- src/kernel/vfs/inode/mod.rs | 2 +- src/kernel/vfs/inode/ops.rs | 5 +- src/kernel/vfs/inode/statx.rs | 32 +- src/kernel/vfs/mount.rs | 28 +- src/kernel/vfs/superblock.rs | 45 +- src/lib.rs | 1 - 27 files changed, 1152 insertions(+), 1487 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 0f1dff63..7b3dc043 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -1,39 +1,31 @@ -use super::{ - config::{self, mm::*}, - console::write_str, - cpu::{CPUID, CPU_COUNT}, - time::set_next_timer, -}; -use crate::{ - arch::{ - cpu::CPU, - fdt::{init_dtb_and_fdt, FdtExt, FDT}, - mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}, - }, - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}, -}; -use core::{ - alloc::Allocator, - arch::asm, - cell::RefCell, - sync::atomic::{AtomicBool, AtomicUsize}, -}; -use core::{ - arch::{global_asm, naked_asm}, - hint::spin_loop, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::alloc::Allocator; +use core::arch::{asm, global_asm, naked_asm}; +use core::cell::RefCell; +use core::hint::spin_loop; +use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; + use eonix_hal_traits::mm::Memory; -use eonix_mm::{ - address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}, - page_table::{PageAttribute, PagingMode, PTE as _}, - paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}, -}; +use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PagingMode, PTE as _}; +use eonix_mm::paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}; use eonix_percpu::PercpuArea; use fdt::Fdt; -use riscv::{asm::sfence_vma_all, register::satp}; -use sbi::{hsm::hart_start, legacy::console_putchar, PhysicalAddress}; +use riscv::asm::sfence_vma_all; +use riscv::register::satp; +use sbi::hsm::hart_start; +use sbi::legacy::console_putchar; +use sbi::PhysicalAddress; + +use super::config::mm::*; +use super::config::{self}; +use super::console::write_str; +use super::cpu::{CPUID, CPU_COUNT}; +use super::time::set_next_timer; +use crate::arch::cpu::CPU; +use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; +use crate::arch::mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}; +use crate::bootstrap::BootStrapData; +use crate::mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -78,7 +70,7 @@ static AP_SEM: AtomicBool = AtomicBool::new(false); #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.entry")] -unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) -> ! { +unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) { naked_asm!( " ld sp, 2f @@ -289,7 +281,7 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.apentry")] -unsafe extern "C" fn _ap_start(hart_id: usize) -> ! { +unsafe extern "C" fn _ap_start(hart_id: usize) { naked_asm!( " la sp, 1f // set temp stack diff --git a/rust-toolchain b/rust-toolchain index 8adb8e58..11ad5efd 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2025-05-16 +nightly-2026-01-09 diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 9a4e03ec..b19c9908 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,26 +1,22 @@ mod dir; mod file; -use alloc::sync::{Arc, Weak}; -use core::future::Future; +use alloc::sync::Arc; use core::ops::Deref; use async_trait::async_trait; use dir::{as_raw_dirents, ParseDirent}; +use eonix_mm::paging::PAGE_SIZE; use eonix_sync::RwLock; use itertools::Itertools; use crate::io::{Buffer, ByteBuffer, UninitBuffer}; use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{ - CachePage, CachePageStream, Page, PageCache, PageCacheBackendOps, PageExcl, PageExt, -}; +use crate::kernel::mem::{CachePage, Page, PageExcl, PageExt, PageOffset}; use crate::kernel::timer::Instant; use crate::kernel::vfs::dentry::Dentry; -use crate::kernel::vfs::inode::{ - Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, -}; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; @@ -56,6 +52,10 @@ impl Cluster { Ino::new(self.0 as _) } + pub fn from_ino(ino: Ino) -> Self { + Self(ino.as_raw() as u32) + } + fn normalized(self) -> Self { Self(self.0 - 2) } @@ -130,7 +130,7 @@ impl FatFs { } impl FatFs { - pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { + pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { let device = BlockDevice::get(device)?; let mut info = UninitBuffer::::new(); @@ -217,18 +217,15 @@ impl<'fat> Iterator for ClusterIterator<'fat> { } } -struct FileInode { - cluster: Cluster, - info: Spin, - sb: SbRef, - page_cache: PageCache, -} +struct FileInode; impl FileInode { - fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { - InodeUse::new_cyclic(|weak: &Weak| Self { - cluster, - info: Spin::new(InodeInfo { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::REG, + InodeInfo { size: size as u64, nlink: 1, uid: 0, @@ -237,108 +234,75 @@ impl FileInode { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - sb, - page_cache: PageCache::new(weak.clone()), - }) + }, + Self, + ) } } impl InodeOps for FileInode { type SuperBlock = FatFs; - fn ino(&self) -> Ino { - self.cluster.as_ino() - } - - fn format(&self) -> Format { - Format::REG - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.page_cache) - } -} - -impl InodeDirOps for FileInode {} -impl InodeFileOps for FileInode { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.page_cache.read(buffer, offset).await + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + inode.get_page_cache().read(buffer, offset).await } - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let sb = self.sb.get()?; + async fn read_page( + &self, + sb: SbUse, + inode: &InodeUse, + page: &mut CachePage, + offset: PageOffset, + ) -> KResult<()> { let fs = &sb.backend; let fat = sb.backend.fat.read().await; - if offset >= self.info.lock().size as usize { - return Ok(0); + if offset >= PageOffset::from_byte_ceil(inode.info.lock().size as usize) { + unreachable!("read_page called with offset beyond file size"); } let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); - - let skip_clusters = offset / cluster_size; - let inner_offset = offset % cluster_size; - - let cluster_iter = ClusterIterator::new(fat.as_ref(), self.cluster).skip(skip_clusters); - - let buffer_page = Page::alloc(); - for cluster in cluster_iter { - fs.read_cluster(cluster, &buffer_page).await?; - - let pg = buffer_page.lock(); - let data = &pg.as_bytes()[inner_offset..]; - - let end = offset + data.len(); - let real_end = end.min(self.info.lock().size as usize); - let real_size = real_end - offset; - - if buffer.fill(&data[..real_size])?.should_stop() { - break; - } + if cluster_size != PAGE_SIZE { + unimplemented!("cluster size != PAGE_SIZE"); } - Ok(buffer.wrote()) - } -} + // XXX: Ugly and inefficient O(n^2) algorithm for sequential file read. + let cluster = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)) + .skip(offset.page_count()) + .next() + .ok_or(EIO)?; -impl PageCacheBackendOps for FileInode { - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_direct(page, offset).await - } + let page = page.get_page(); + fs.read_cluster(cluster, &page).await?; - async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - todo!() - } + let real_len = (inode.info.lock().size as usize) - offset.byte_count(); + if real_len < PAGE_SIZE { + let mut page = page.lock(); + page.as_bytes_mut()[real_len..].fill(0); + } - fn size(&self) -> usize { - self.info.lock().size as usize + Ok(()) } } struct DirInode { - cluster: Cluster, - info: Spin, - sb: SbRef, - // TODO: Use the new PageCache... dir_pages: RwLock>, } impl DirInode { - fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { - InodeUse::new(Self { - cluster, - info: Spin::new(InodeInfo { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::DIR, + InodeInfo { size: size as u64, nlink: 2, // '.' and '..' uid: 0, @@ -347,23 +311,23 @@ impl DirInode { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - sb, - dir_pages: RwLock::new(Vec::new()), - }) + }, + Self { + dir_pages: RwLock::new(Vec::new()), + }, + ) } - async fn read_dir_pages(&self) -> KResult<()> { + async fn read_dir_pages(&self, sb: &SbUse, inode: &InodeUse) -> KResult<()> { let mut dir_pages = self.dir_pages.write().await; if !dir_pages.is_empty() { return Ok(()); } - let sb = self.sb.get()?; let fs = &sb.backend; let fat = fs.fat.read().await; - let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); + let clusters = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)); for cluster in clusters { let page = PageExcl::alloc(); @@ -375,7 +339,11 @@ impl DirInode { Ok(()) } - async fn get_dir_pages(&self) -> KResult> + use<'_>> { + async fn get_dir_pages( + &self, + sb: &SbUse, + inode: &InodeUse, + ) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { @@ -383,7 +351,7 @@ impl DirInode { } } - self.read_dir_pages().await?; + self.read_dir_pages(sb, inode).await?; if let Some(dir_pages) = self.dir_pages.try_read() { return Ok(dir_pages); @@ -396,32 +364,13 @@ impl DirInode { impl InodeOps for DirInode { type SuperBlock = FatFs; - fn ino(&self) -> Ino { - self.cluster.as_ino() - } - - fn format(&self) -> Format { - Format::DIR - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeFileOps for DirInode {} -impl InodeDirOps for DirInode { - async fn lookup(&self, dentry: &Arc) -> KResult>> { - let sb = self.sb.get()?; - let dir_pages = self.get_dir_pages().await?; + async fn lookup( + &self, + sb: SbUse, + inode: &InodeUse, + dentry: &Arc, + ) -> KResult> { + let dir_pages = self.get_dir_pages(&sb, inode).await?; let dir_data = dir_pages.iter().map(|pg| pg.as_bytes()); @@ -451,48 +400,47 @@ impl InodeDirOps for DirInode { Ok(None) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, + sb: SbUse, + inode: &InodeUse, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - async move { - let sb = self.sb.get()?; - let fs = &sb.backend; - let dir_pages = self.get_dir_pages().await?; + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let fs = &sb.backend; + let dir_pages = self.get_dir_pages(&sb, inode).await?; - let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; - let cluster_offset = offset / cluster_size; - let inner_offset = offset % cluster_size; - let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); + let cluster_offset = offset / cluster_size; + let inner_offset = offset % cluster_size; + let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); - let dir_data = dir_pages - .iter() - .skip(cluster_offset) - .map(|pg| pg.as_bytes()); - - let raw_dirents = dir_data - .map(as_raw_dirents) - .take_while_inclusive(Result::is_ok) - .flatten_ok() - .skip(inner_raw_dirent_offset); - - let mut dirents = futures::stream::iter(raw_dirents); - - let mut nread = 0; - while let Some(result) = dirents.next_dirent().await { - let entry = result?; - - match callback(&entry.filename, entry.cluster.as_ino()) { - Err(err) => return Ok(Err(err)), - Ok(true) => nread += entry.entry_offset as usize, - Ok(false) => break, - } - } + let dir_data = dir_pages + .iter() + .skip(cluster_offset) + .map(|pg| pg.as_bytes()); - Ok(Ok(nread)) + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok() + .skip(inner_raw_dirent_offset); + + let mut dirents = futures::stream::iter(raw_dirents); + + let mut nread = 0; + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + match callback(&entry.filename, entry.cluster.as_ino()) { + Err(err) => return Ok(Err(err)), + Ok(true) => nread += entry.entry_offset as usize, + Ok(false) => break, + } } + + Ok(Ok(nread)) } } diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 57b881df..32ede420 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,30 +1,21 @@ +use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; + +use async_trait::async_trait; +use eonix_sync::{LazyLock, RwLock}; + +use crate::io::Buffer; use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR}; +use crate::kernel::mem::paging::PageBuffer; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::mount::{dump_mounts, register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; -use crate::{ - io::Buffer, - kernel::{ - mem::paging::PageBuffer, - vfs::{ - dentry::Dentry, - inode::{Ino, Inode}, - mount::{dump_mounts, register_filesystem, Mount, MountCreator}, - }, - }, - prelude::*, -}; -use alloc::sync::Arc; -use async_trait::async_trait; -use core::future::Future; -use core::sync::atomic::{AtomicU64, Ordering}; -use eonix_sync::{LazyLock, RwLock}; +use crate::prelude::*; struct Node { - ino: Ino, - sb: SbRef, - info: Spin, kind: NodeKind, } @@ -39,38 +30,19 @@ struct FileInode { } struct DirInode { - entries: RwLock, InodeUse)>>, + entries: RwLock, InodeUse)>>, } impl InodeOps for Node { type SuperBlock = ProcFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - match &self.kind { - NodeKind::File(_) => Format::REG, - NodeKind::Dir(_) => Format::DIR, - } - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&crate::kernel::mem::PageCache> { - None - } -} - -impl InodeFileOps for Node { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + async fn read( + &self, + _: SbUse, + _: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { let NodeKind::File(file_inode) = &self.kind else { return Err(EISDIR); }; @@ -88,10 +60,13 @@ impl InodeFileOps for Node { Ok(buffer.fill(data)?.allow_partial()) } -} -impl InodeDirOps for Node { - async fn lookup(&self, dentry: &Arc) -> KResult>> { + async fn lookup( + &self, + _: SbUse, + _: &InodeUse, + dentry: &Arc, + ) -> KResult> { let NodeKind::Dir(dir) = &self.kind else { return Err(ENOTDIR); }; @@ -108,29 +83,29 @@ impl InodeDirOps for Node { Ok(None) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, + _: SbUse, + _: &InodeUse, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - Box::pin(async move { - let NodeKind::Dir(dir) = &self.kind else { - return Err(ENOTDIR); - }; - - let entries = dir.entries.read().await; - - let mut count = 0; - for (name, node) in entries.iter().skip(offset) { - match callback(name.as_ref(), node.ino) { - Err(err) => return Ok(Err(err)), - Ok(true) => count += 1, - Ok(false) => break, - } + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; + + let entries = dir.entries.read().await; + + let mut count = 0; + for (name, node) in entries.iter().skip(offset) { + match callback(name.as_ref(), node.ino) { + Err(err) => return Ok(Err(err)), + Ok(true) => count += 1, + Ok(false) => break, } + } - Ok(Ok(count)) - }) + Ok(Ok(count)) } } @@ -139,11 +114,12 @@ impl Node { ino: Ino, sb: SbRef, read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static, - ) -> InodeUse { - InodeUse::new(Self { - ino, + ) -> InodeUse { + InodeUse::new( sb, - info: Spin::new(InodeInfo { + ino, + Format::REG, + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -152,16 +128,19 @@ impl Node { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - kind: NodeKind::File(FileInode::new(Box::new(read))), - }) + }, + Self { + kind: NodeKind::File(FileInode::new(Box::new(read))), + }, + ) } - fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { - InodeUse::new(Self { - ino, + fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { + InodeUse::new( sb, - info: Spin::new(InodeInfo { + ino, + Format::DIR, + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -170,9 +149,11 @@ impl Node { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - kind: NodeKind::Dir(DirInode::new()), - }) + }, + Self { + kind: NodeKind::Dir(DirInode::new()), + }, + ) } } @@ -194,7 +175,7 @@ impl DirInode { } pub struct ProcFs { - root: InodeUse, + root: InodeUse, next_ino: AtomicU64, } @@ -240,7 +221,7 @@ where F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, { let procfs = &GLOBAL_PROCFS.backend; - let root = &procfs.root; + let root = &procfs.root.get_priv::(); let NodeKind::Dir(root) = &root.kind else { unreachable!(); diff --git a/src/fs/tmpfs/dir.rs b/src/fs/tmpfs/dir.rs index e2be1d12..4dd64d52 100644 --- a/src/fs/tmpfs/dir.rs +++ b/src/fs/tmpfs/dir.rs @@ -1,72 +1,51 @@ -use core::{any::Any, future::Future}; +use alloc::sync::Arc; +use alloc::vec; +use alloc::vec::Vec; -use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; use eonix_log::println_warn; -use eonix_sync::{LazyLock, RwLock, Spin}; - -use crate::{ - kernel::{ - constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}, - mem::PageCache, - timer::Instant, - vfs::{ - dentry::{dcache, Dentry}, - inode::{ - Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, RenameData, - }, - types::{DeviceId, Format, Mode, Permission}, - SbRef, - }, - }, - prelude::KResult, -}; - -use super::{ - file::{DeviceInode, FileInode, SymlinkInode}, - TmpFs, -}; +use eonix_sync::{LazyLock, RwLock}; + +use super::file::{DeviceInode, FileInode, SymlinkInode}; +use super::TmpFs; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::{dcache, Dentry}; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, RenameData}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; pub struct DirectoryInode { - sb: SbRef, - ino: Ino, - info: Spin, entries: RwLock, Ino)>>, } -impl InodeOps for DirectoryInode { - type SuperBlock = TmpFs; - - fn ino(&self) -> Ino { - self.ino - } +fn link(dir: &InodeUse, entries: &mut Vec<(Arc<[u8]>, Ino)>, name: Arc<[u8]>, file: &InodeUse) { + let mut dir_info = dir.info.lock(); + let mut file_info = file.info.lock(); - fn format(&self) -> Format { - Format::DIR - } + let now = Instant::now(); - fn info(&self) -> &Spin { - &self.info - } + file_info.nlink += 1; + file_info.ctime = now; - fn super_block(&self) -> &SbRef { - &self.sb - } + dir_info.size += 1; + dir_info.mtime = now; + dir_info.ctime = now; - fn page_cache(&self) -> Option<&PageCache> { - None - } + entries.push((name, file.ino)); } impl DirectoryInode { - pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { static DOT: LazyLock> = LazyLock::new(|| Arc::from(b".".as_slice())); let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::DIR, + InodeInfo { size: 1, nlink: 1, // link from `.` to itself perm, @@ -75,35 +54,16 @@ impl DirectoryInode { atime: now, uid: 0, gid: 0, - }), - entries: RwLock::new(vec![(DOT.clone(), ino)]), - }) - } - - fn link( - &self, - entries: &mut Vec<(Arc<[u8]>, Ino)>, - name: Arc<[u8]>, - file: &InodeUse, - ) { - let mut self_info = self.info.lock(); - let mut file_info = file.info().lock(); - - let now = Instant::now(); - - file_info.nlink += 1; - file_info.ctime = now; - - self_info.size += 1; - self_info.mtime = now; - self_info.ctime = now; - - entries.push((name, file.ino())); + }, + Self { + entries: RwLock::new(vec![(DOT.clone(), ino)]), + }, + ) } fn do_unlink( &self, - file: &InodeUse, + file: &InodeUse, filename: &[u8], entries: &mut Vec<(Arc<[u8]>, Ino)>, now: Instant, @@ -112,11 +72,11 @@ impl DirectoryInode { file_info: &mut InodeInfo, ) -> KResult<()> { // SAFETY: `file_lock` has done the synchronization - if file.format() == Format::DIR { + if file.format == Format::DIR { return Err(EISDIR); } - let file_ino = file.ino(); + let file_ino = file.ino; entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename); if decrease_size { @@ -138,87 +98,114 @@ impl DirectoryInode { } } -impl InodeDirOps for DirectoryInode { - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, +impl InodeOps for DirectoryInode { + type SuperBlock = TmpFs; + + async fn readdir( + &self, + sb: SbUse, + _: &InodeUse, offset: usize, - for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - Box::pin(async move { - let _sb = self.sb.get()?; - let entries = self.entries.read().await; - - let mut count = 0; - for entry in entries.iter().skip(offset) { - match for_each_entry(&entry.0, entry.1) { - Err(err) => return Ok(Err(err)), - Ok(false) => break, - Ok(true) => count += 1, - } + for_each_entry: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let _sb = sb; + let entries = self.entries.read().await; + + let mut count = 0; + for entry in entries.iter().skip(offset) { + match for_each_entry(&entry.0, entry.1) { + Err(err) => return Ok(Err(err)), + Ok(false) => break, + Ok(true) => count += 1, } + } - Ok(Ok(count)) - }) + Ok(Ok(count)) } - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { - let sb = self.sb.get()?; + async fn create( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + perm: Permission, + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = FileInode::new(ino, self.sb.clone(), 0, perm); + let file = FileInode::new(ino, sb.get_ref(), 0, perm); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + async fn mknod( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + mode: Mode, + dev: DeviceId, + ) -> KResult<()> { if !mode.is_chr() && !mode.is_blk() { return Err(EINVAL); } - let sb = self.sb.get()?; let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = DeviceInode::new(ino, self.sb.clone(), mode, dev); + let file = DeviceInode::new(ino, sb.get_ref(), mode, dev); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - let sb = self.sb.get()?; + async fn symlink( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + target: &[u8], + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = SymlinkInode::new(ino, self.sb.clone(), target.into()); + let file = SymlinkInode::new(ino, sb.get_ref(), target.into()); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { - let sb = self.sb.get()?; + async fn mkdir( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + perm: Permission, + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let new_dir: InodeUse = DirectoryInode::new(ino, self.sb.clone(), perm); + let new_dir = DirectoryInode::new(ino, sb.get_ref(), perm); - self.link(&mut entries, at.get_name(), &new_dir); + link(inode, &mut entries, at.get_name(), &new_dir); at.fill(new_dir); Ok(()) } - async fn unlink(&self, at: &Arc) -> KResult<()> { - let _sb = self.sb.get()?; + async fn unlink( + &self, + _sb: SbUse, + inode: &InodeUse, + at: &Arc, + ) -> KResult<()> { let mut entries = self.entries.write().await; let file = at.get_inode()?; @@ -230,8 +217,8 @@ impl InodeDirOps for DirectoryInode { &mut entries, Instant::now(), true, - &mut self.info.lock(), - &mut file.info().lock(), + &mut inode.info.lock(), + &mut file.info.lock(), )?; // Remove the dentry from the dentry cache immediately @@ -241,8 +228,12 @@ impl InodeDirOps for DirectoryInode { Ok(()) } - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { - let sb = self.sb.get()?; + async fn rename( + &self, + sb: SbUse, + inode: &InodeUse, + rename_data: RenameData<'_, '_>, + ) -> KResult<()> { let _rename_lock = sb.backend.rename_lock.lock().await; let mut self_entries = self.entries.write().await; @@ -266,11 +257,11 @@ impl InodeDirOps for DirectoryInode { return Err(EEXIST); } - if new_parent.as_raw() == &raw const *self { + if inode == &new_parent { // Same directory rename // Remove from old location and add to new location - let old_ino = old_file.ino(); - let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); let old_name = old_dentry.get_name(); let new_name = new_dentry.get_name(); @@ -299,7 +290,7 @@ impl InodeDirOps for DirectoryInode { // Replace existing file (i.e. rename the old and unlink the new) let new_file = new_file.unwrap(); - match (new_file.format(), old_file.format()) { + match (new_file.format, old_file.format) { (Format::DIR, _) => return Err(EISDIR), (_, Format::DIR) => return Err(ENOTDIR), _ => {} @@ -307,12 +298,12 @@ impl InodeDirOps for DirectoryInode { self_entries.remove(new_idx); - self.info.lock().size -= 1; + inode.info.lock().size -= 1; // The last reference to the inode is held by some dentry // and will be released when the dentry is released - let mut new_info = new_file.info().lock(); + let mut new_info = new_file.info.lock(); new_info.nlink -= 1; new_info.mtime = now; @@ -322,24 +313,21 @@ impl InodeDirOps for DirectoryInode { let (name, _) = &mut self_entries[old_ent_idx]; *name = new_dentry.get_name(); - let mut self_info = self.info.lock(); + let mut self_info = inode.info.lock(); self_info.mtime = now; self_info.ctime = now; } else { // Cross-directory rename - handle similar to same directory case // Get new parent directory - let new_parent_inode = new_dentry.parent().get_inode()?; - assert_eq!(new_parent_inode.format(), Format::DIR); - - let new_parent = (&new_parent_inode as &dyn Any) - .downcast_ref::() - .expect("new parent must be a DirectoryInode"); + let new_parent = new_dentry.parent().get_inode()?; + assert_eq!(new_parent.format, Format::DIR); - let mut new_entries = new_parent.entries.write().await; + let new_parent_priv = new_parent.get_priv::(); + let mut new_entries = new_parent_priv.entries.write().await; - let old_ino = old_file.ino(); - let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); let old_name = old_dentry.get_name(); let new_name = new_dentry.get_name(); @@ -361,26 +349,28 @@ impl InodeDirOps for DirectoryInode { // Replace existing file (i.e. move the old and unlink the new) let new_file = new_file.unwrap(); - match (old_file.format(), new_file.format()) { + match (old_file.format, new_file.format) { (Format::DIR, Format::DIR) => {} (Format::DIR, _) => return Err(ENOTDIR), (_, _) => {} } // Unlink the old file that was replaced - new_parent.do_unlink( + new_parent_priv.do_unlink( &new_file, &new_name, &mut new_entries, now, false, &mut new_parent.info.lock(), - &mut new_file.info().lock(), + &mut new_file.info.lock(), )?; } else { - new_parent.info.lock().size += 1; - new_parent.info.lock().mtime = now; - new_parent.info.lock().ctime = now; + let mut info = new_parent.info.lock(); + + info.size += 1; + info.mtime = now; + info.ctime = now; } // Remove from old directory @@ -389,7 +379,7 @@ impl InodeDirOps for DirectoryInode { // Add new entry new_entries.push((new_name, old_ino)); - let mut self_info = self.info.lock(); + let mut self_info = inode.info.lock(); self_info.size -= 1; self_info.mtime = now; self_info.ctime = now; @@ -398,17 +388,16 @@ impl InodeDirOps for DirectoryInode { dcache::d_exchange(old_dentry, new_dentry).await; Ok(()) } -} - -impl InodeFileOps for DirectoryInode { - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; - { - let mut info = self.info.lock(); - info.perm = perm; - info.ctime = Instant::now(); - } + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); Ok(()) } diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index 624112e0..a1755908 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -1,39 +1,26 @@ +use alloc::collections::btree_map::BTreeMap; use alloc::sync::Arc; -use eonix_mm::paging::PAGE_SIZE; -use eonix_sync::{RwLock, Spin}; - -use crate::{ - io::{Buffer, Stream}, - kernel::{ - mem::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}, - timer::Instant, - vfs::{ - inode::{Ino, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - SbRef, - }, - }, - prelude::KResult, -}; use super::TmpFs; +use crate::io::{Buffer, Stream}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, WriteOffset}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; -pub struct FileInode { - sb: SbRef, - ino: Ino, - info: Spin, - rwsem: RwLock<()>, - pages: PageCache, -} +pub struct FileInode; impl FileInode { - pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { let now = Instant::now(); - InodeUse::new_cyclic(|weak| Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::REG, + InodeInfo { size: size as _, nlink: 1, uid: 0, @@ -42,60 +29,34 @@ impl FileInode { atime: now, ctime: now, mtime: now, - }), - rwsem: RwLock::new(()), - pages: PageCache::new(weak.clone() as _), - }) - } -} - -impl PageCacheBackendOps for FileInode { - async fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn size(&self) -> usize { - self.info.lock().size as usize + }, + Self, + ) } } impl InodeOps for FileInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - Format::REG - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.pages) - } -} - -impl InodeDirOps for FileInode {} -impl InodeFileOps for FileInode { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let _lock = self.rwsem.read().await; - self.pages.read(buffer, offset).await + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + let _lock = inode.rwsem.read().await; + inode.get_page_cache().read(buffer, offset).await } - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { - let _lock = self.rwsem.write().await; + async fn write( + &self, + _: SbUse, + inode: &InodeUse, + stream: &mut dyn Stream, + offset: WriteOffset<'_>, + ) -> KResult { + let _lock = inode.rwsem.write().await; let mut store_new_end = None; let offset = match offset { @@ -104,74 +65,131 @@ impl InodeFileOps for FileInode { store_new_end = Some(end); // `info.size` won't change since we are holding the write lock. - self.info.lock().size as usize + inode.info.lock().size as usize } }; - let wrote = self.pages.write(stream, offset).await?; + let page_cache = inode.get_page_cache(); + + if Arc::strong_count(&page_cache) == 1 { + // XXX: A temporary workaround here. Change this ASAP... + // Prevent the page cache from being dropped during the write. + let _ = Arc::into_raw(page_cache.clone()); + } + + let wrote = page_cache.write(stream, offset).await?; let cursor_end = offset + wrote; if let Some(store_end) = store_new_end { *store_end = cursor_end; } - { - let now = Instant::now(); - let mut info = self.info.lock(); - info.mtime = now; - info.ctime = now; - info.size = info.size.max(cursor_end as u64); - } - Ok(wrote) } - async fn truncate(&self, length: usize) -> KResult<()> { - let _lock = self.rwsem.write().await; + async fn truncate( + &self, + _: SbUse, + inode: &InodeUse, + length: usize, + ) -> KResult<()> { + let _lock = inode.rwsem.write().await; - self.pages.resize(length).await?; + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = length as u64; - { - let now = Instant::now(); - let mut info = self.info.lock(); - info.mtime = now; - info.ctime = now; - info.size = length as u64; - } + Ok(()) + } + + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + + Ok(()) + } + async fn read_page( + &self, + _: SbUse, + _: &InodeUse, + page: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + page.as_bytes_mut().fill(0); Ok(()) } - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; + async fn write_page( + &self, + _: SbUse, + _: &InodeUse, + _: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + // XXX: actually we should refuse to do the writeback. + // think of a way to inform that of the page cache. + Ok(()) + } - { - let mut info = self.info.lock(); + async fn write_begin<'a>( + &self, + _: SbUse, + _: &InodeUse, + page_cache: &PageCache, + pages: &'a mut BTreeMap, + offset: usize, + _: usize, + ) -> KResult<&'a mut CachePage> { + // TODO: Remove dependency on `page_cache`. + page_cache + .get_page_locked(pages, PageOffset::from_byte_floor(offset)) + .await + } - info.perm = perm; - info.ctime = Instant::now(); - } + async fn write_end( + &self, + _: SbUse, + inode: &InodeUse, + _: &PageCache, + _: &mut BTreeMap, + offset: usize, + _: usize, + copied: usize, + ) -> KResult<()> { + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = info.size.max((offset + copied) as u64); Ok(()) } } pub struct DeviceInode { - sb: SbRef, - ino: Ino, - info: Spin, is_block: bool, devid: DeviceId, } impl DeviceInode { - pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + mode.format(), + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -180,76 +198,49 @@ impl DeviceInode { atime: now, ctime: now, mtime: now, - }), - is_block: mode.format() == Format::BLK, - devid, - }) + }, + Self { + is_block: mode.format() == Format::BLK, + devid, + }, + ) } } impl InodeOps for DeviceInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - if self.is_block { - Format::BLK - } else { - Format::CHR - } - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeDirOps for DeviceInode {} -impl InodeFileOps for DeviceInode { - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; - - { - let mut info = self.info.lock(); - - info.perm = perm; - info.ctime = Instant::now(); - } + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); Ok(()) } - fn devid(&self) -> KResult { + fn devid(&self, _: SbUse, _: &InodeUse) -> KResult { Ok(self.devid) } } pub struct SymlinkInode { - sb: SbRef, - ino: Ino, - info: Spin, target: Arc<[u8]>, } impl SymlinkInode { - pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::LNK, + InodeInfo { size: target.len() as _, nlink: 1, uid: 0, @@ -258,39 +249,21 @@ impl SymlinkInode { atime: now, ctime: now, mtime: now, - }), - target, - }) + }, + Self { target }, + ) } } -impl InodeDirOps for SymlinkInode {} impl InodeOps for SymlinkInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - Format::LNK - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeFileOps for SymlinkInode { - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + async fn readlink( + &self, + _sb: SbUse, + _inode: &InodeUse, + buffer: &mut dyn Buffer, + ) -> KResult { buffer .fill(self.target.as_ref()) .map(|result| result.allow_partial()) diff --git a/src/fs/tmpfs/mod.rs b/src/fs/tmpfs/mod.rs index 2bef67b6..62a0dfc2 100644 --- a/src/fs/tmpfs/mod.rs +++ b/src/fs/tmpfs/mod.rs @@ -1,23 +1,20 @@ mod dir; mod file; -use crate::kernel::vfs::inode::{Ino, InodeUse}; -use crate::kernel::vfs::types::{DeviceId, Permission}; -use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; -use crate::{ - kernel::vfs::{ - dentry::Dentry, - mount::{register_filesystem, Mount, MountCreator}, - }, - prelude::*, -}; use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; + use async_trait::async_trait; -use core::sync::atomic::AtomicU64; -use core::sync::atomic::Ordering; use dir::DirectoryInode; use eonix_sync::Mutex; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeUse}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; +use crate::kernel::vfs::types::{DeviceId, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; + pub struct TmpFs { next_ino: AtomicU64, rename_lock: Mutex<()>, @@ -30,7 +27,7 @@ impl TmpFs { Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) } - fn create() -> KResult<(SbUse, InodeUse)> { + fn create() -> KResult<(SbUse, InodeUse)> { let tmpfs = SbUse::new( SuperBlockInfo { io_blksize: 4096, diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index bfc826bf..f8b5dc0b 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::PhysAccess; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; +pub use page_cache::{CachePage, PageCache, PageOffset}; pub use paging::{Page, PageBuffer, PageExcl, PageExt}; diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index dcbeeb63..2891dad8 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -4,12 +4,12 @@ use core::cmp; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; -use eonix_mm::paging::{PAGE_SIZE, PFN}; +use eonix_mm::paging::PFN; use super::mm_list::EMPTY_PAGE; use super::{Mapping, Page, Permission}; -use crate::kernel::constants::EINVAL; -use crate::kernel::mem::{PageExcl, PageExt}; +use crate::kernel::mem::page_cache::PageOffset; +use crate::kernel::mem::{CachePage, PageExcl, PageExt}; use crate::prelude::KResult; #[derive(Debug)] @@ -141,59 +141,48 @@ impl MMArea { assert!(offset < file_mapping.length, "Offset out of range"); - let Some(page_cache) = file_mapping.file.page_cache() else { - panic!("Mapping file should have pagecache"); + let file_offset = file_mapping.offset + offset; + + let map_page = |page: &Page, cache_page: &CachePage| { + if !self.permission.write { + assert!(!write, "Write fault on read-only mapping"); + + *pfn = page.clone().into_raw(); + return; + } + + if self.is_shared { + // We don't process dirty flags in write faults. + // Simply assume that page will eventually be dirtied. + // So here we can set the dirty flag now. + cache_page.set_dirty(true); + attr.insert(PageAttribute::WRITE); + *pfn = page.clone().into_raw(); + return; + } + + if !write { + // Delay the copy-on-write until write fault happens. + attr.insert(PageAttribute::COPY_ON_WRITE); + *pfn = page.clone().into_raw(); + return; + } + + // XXX: Change this. Let's handle mapped pages before CoW pages. + // Nah, we are writing to a mapped private mapping... + let mut new_page = PageExcl::zeroed(); + new_page + .as_bytes_mut() + .copy_from_slice(page.lock().as_bytes()); + + attr.insert(PageAttribute::WRITE); + *pfn = new_page.into_page().into_raw(); }; - let file_offset = file_mapping.offset + offset; - let cnt_to_read = (file_mapping.length - offset).min(0x1000); - - page_cache - .with_page(file_offset, |page, cache_page| { - // Non-write faults: we find page in pagecache and do mapping - // Write fault: we need to care about shared or private mapping. - if !write { - // Bss is embarrassing in pagecache! - // We have to assume cnt_to_read < PAGE_SIZE all bss - if cnt_to_read < PAGE_SIZE { - let mut new_page = PageExcl::zeroed(); - - new_page.as_bytes_mut()[..cnt_to_read] - .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); - - *pfn = new_page.into_page().into_raw(); - } else { - *pfn = page.clone().into_raw(); - } - - if self.permission.write { - if self.is_shared { - // The page may will not be written, - // But we simply assume page will be dirty - cache_page.set_dirty(); - attr.insert(PageAttribute::WRITE); - } else { - attr.insert(PageAttribute::COPY_ON_WRITE); - } - } - } else { - if self.is_shared { - cache_page.set_dirty(); - *pfn = page.clone().into_raw(); - } else { - let mut new_page = PageExcl::zeroed(); - - new_page.as_bytes_mut()[..cnt_to_read] - .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); - - *pfn = new_page.into_page().into_raw(); - } - - attr.insert(PageAttribute::WRITE); - } - }) - .await? - .ok_or(EINVAL)?; + file_mapping + .page_cache + .with_page(PageOffset::from_byte_floor(file_offset), map_page) + .await?; attr.insert(PageAttribute::PRESENT); attr.remove(PageAttribute::MAPPED); diff --git a/src/kernel/mem/mm_list/mapping.rs b/src/kernel/mem/mm_list/mapping.rs index 5446ae42..2b837ae7 100644 --- a/src/kernel/mem/mm_list/mapping.rs +++ b/src/kernel/mem/mm_list/mapping.rs @@ -1,9 +1,12 @@ -use crate::kernel::vfs::inode::{Inode, InodeUse}; +use alloc::sync::Arc; + use eonix_mm::paging::PAGE_SIZE; +use crate::kernel::mem::PageCache; + #[derive(Debug, Clone)] pub struct FileMapping { - pub file: InodeUse, + pub page_cache: Arc, /// Offset in the file, aligned to 4KB boundary. pub offset: usize, /// Length of the mapping. Exceeding part will be zeroed. @@ -19,10 +22,10 @@ pub enum Mapping { } impl FileMapping { - pub fn new(file: InodeUse, offset: usize, length: usize) -> Self { + pub fn new(page_cache: Arc, offset: usize, length: usize) -> Self { assert_eq!(offset & (PAGE_SIZE - 1), 0); Self { - file, + page_cache, offset, length, } @@ -30,10 +33,10 @@ impl FileMapping { pub fn offset(&self, offset: usize) -> Self { if self.length <= offset { - Self::new(self.file.clone(), self.offset + self.length, 0) + Self::new(self.page_cache.clone(), self.offset + self.length, 0) } else { Self::new( - self.file.clone(), + self.page_cache.clone(), self.offset + offset, self.length - offset, ) diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 6f14583d..7aac141d 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,11 +1,12 @@ -use super::{MMList, VAddr}; -use crate::kernel::task::Thread; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; use posix_types::signal::Signal; +use super::{MMList, VAddr}; +use crate::kernel::task::Thread; + #[repr(C)] struct FixEntry { start: u64, diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 9dce4567..37344fc2 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -4,9 +4,9 @@ mod zones; use core::sync::atomic::Ordering; use buddy_allocator::BuddyAllocator; -use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::address::PRange; use eonix_mm::paging::{ - GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, PFN, + GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, }; use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 0baa7b9a..074f82c7 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -32,15 +32,9 @@ impl SlabPageData { } } -#[derive(Clone, Copy)] -struct PageCacheData { - valid_size: usize, -} - #[repr(C)] union PageData { slab: SlabPageData, - page_cache: PageCacheData, } pub struct RawPage { @@ -245,27 +239,16 @@ impl SlabPage for RawPage { } impl PageCacheRawPage for RawPagePtr { - fn valid_size(&self) -> &mut usize { - unsafe { - // SAFETY: The caller ensures that the page is in some page cache. - &mut self.as_mut().shared_data.page_cache.valid_size - } - } - fn is_dirty(&self) -> bool { self.flags().has(PageFlags::DIRTY) } - fn clear_dirty(&self) { - self.flags().clear(PageFlags::DIRTY); - } - - fn set_dirty(&self) { - self.flags().set(PageFlags::DIRTY); - } - - fn cache_init(&self) { - self.as_mut().shared_data.page_cache = PageCacheData { valid_size: 0 }; + fn set_dirty(&self, dirty: bool) { + if dirty { + self.flags().set(PageFlags::DIRTY); + } else { + self.flags().clear(PageFlags::DIRTY); + } } } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 6a1c04ca..214c65a5 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,26 +1,27 @@ -use alloc::boxed::Box; -use alloc::collections::btree_map::BTreeMap; -use alloc::sync::Weak; +use alloc::collections::btree_map::{BTreeMap, Entry}; use core::future::Future; use core::mem::ManuallyDrop; -use align_ext::AlignExt; -use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess}; use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; -use super::paging::AllocZeroed; use super::Page; -use crate::io::{Buffer, FillResult, Stream}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::EINVAL; use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::kernel::vfs::inode::InodeUse; use crate::prelude::KResult; use crate::GlobalPageAlloc; +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct PageOffset(usize); + pub struct PageCache { - pages: Mutex>, - backend: Weak, + pages: Mutex>, + inode: InodeUse, } unsafe impl Send for PageCache {} @@ -30,70 +31,46 @@ unsafe impl Sync for PageCache {} pub struct CachePage(RawPagePtr); unsafe impl Send for CachePage {} +unsafe impl Sync for CachePage {} -impl Buffer for CachePage { - fn total(&self) -> usize { - PAGE_SIZE +impl PageOffset { + pub const fn from_byte_floor(offset: usize) -> Self { + Self(offset >> PAGE_SIZE_BITS) } - fn wrote(&self) -> usize { - self.valid_size() + pub const fn from_byte_ceil(offset: usize) -> Self { + Self((offset + PAGE_SIZE - 1) >> PAGE_SIZE_BITS) } - fn fill(&mut self, data: &[u8]) -> KResult { - let valid_size = self.valid_size(); - let available = &mut self.all_mut()[valid_size..]; - if available.len() == 0 { - return Ok(FillResult::Full); - } - - let len = core::cmp::min(data.len(), available.len()); - available[..len].copy_from_slice(&data[..len]); + pub fn iter_till(self, end: PageOffset) -> impl Iterator { + (self.0..end.0).map(PageOffset) + } - *self.0.valid_size() += len; + pub fn page_count(self) -> usize { + self.0 + } - if len < data.len() { - Ok(FillResult::Partial(len)) - } else { - Ok(FillResult::Done(len)) - } + pub fn byte_count(self) -> usize { + self.page_count() * PAGE_SIZE } } impl CachePage { pub fn new() -> Self { - let page = GlobalPageAlloc.alloc().unwrap(); - page.cache_init(); - Self(page) - } - - pub fn new_zeroed() -> Self { - let page = Page::zeroed(); - let raw_page_ptr = RawPagePtr::from(page.into_raw()); - - raw_page_ptr.cache_init(); - Self(raw_page_ptr) - } - - pub fn valid_size(&self) -> usize { - *self.0.valid_size() + Self(GlobalPageAlloc.alloc().unwrap()) } - pub fn set_valid_size(&mut self, valid_size: usize) { - *self.0.valid_size() = valid_size; - } - - pub fn all(&self) -> &[u8] { + pub fn as_bytes(&self) -> &[u8] { unsafe { core::slice::from_raw_parts( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. + // SAFETY: The page is owned by us, so we can safely access its data. ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), PAGE_SIZE, ) } } - pub fn all_mut(&mut self) -> &mut [u8] { + pub fn as_bytes_mut(&mut self) -> &mut [u8] { unsafe { core::slice::from_raw_parts_mut( // SAFETY: The page is exclusively owned by us, so we can safely access its data. @@ -103,306 +80,171 @@ impl CachePage { } } - pub fn valid_data(&self) -> &[u8] { - &self.all()[..self.valid_size()] - } - pub fn is_dirty(&self) -> bool { self.0.is_dirty() } - pub fn set_dirty(&self) { - self.0.set_dirty(); + pub fn set_dirty(&self, dirty: bool) { + self.0.set_dirty(dirty); } - pub fn clear_dirty(&self) { - self.0.clear_dirty(); + pub fn get_page(&self) -> Page { + unsafe { Page::with_raw(PFN::from(self.0), |page| page.clone()) } } } impl PageCache { - pub fn new(backend: Weak) -> Self { + pub fn new(inode: InodeUse) -> Self { Self { pages: Mutex::new(BTreeMap::new()), - backend: backend, + inode, } } - pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { - let mut pages = self.pages.lock().await; - let size = self.backend.upgrade().unwrap().size(); - - loop { - if offset >= size { - break; - } - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let available_in_file = size.saturating_sub(offset); - - // TODO: still cause unnecessary IO if valid_size < PAGESIZE - // and fill result is Done - let page_data = &page.valid_data()[inner_offset..]; - let read_size = page_data.len().min(available_in_file); - - if read_size == 0 - || buffer.fill(&page_data[..read_size])?.should_stop() - || buffer.available() == 0 - { - break; - } - offset += read_size; - } - None => { + pub fn get_page_locked<'a>( + &self, + pages: &'a mut BTreeMap, + pgoff: PageOffset, + ) -> impl Future> + Send + use<'_, 'a> { + async move { + match pages.entry(pgoff) { + Entry::Occupied(ent) => Ok(ent.into_mut()), + Entry::Vacant(vacant_entry) => { let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) - .await?; - pages.insert(page_id, new_page); + self.inode.read_page(&mut new_page, pgoff).await?; + + Ok(vacant_entry.insert(new_page)) } } } + } - Ok(buffer.wrote()) + fn len(&self) -> usize { + self.inode.info.lock().size as usize } - pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { + // TODO: Remove this. + pub async fn with_page( + &self, + pgoff: PageOffset, + func: impl FnOnce(&Page, &CachePage), + ) -> KResult<()> { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); - let mut wrote = 0; - - loop { - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get_mut(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let cursor_end = match stream.poll_data(&mut page.all_mut()[inner_offset..])? { - Some(buf) => { - wrote += buf.len(); - inner_offset + buf.len() - } - None => { - break; - } - }; - - if page.valid_size() < cursor_end { - page.set_valid_size(cursor_end); - } - page.set_dirty(); - offset += PAGE_SIZE - inner_offset; - } - None => { - let new_page = if (offset >> PAGE_SIZE_BITS) > (old_size >> PAGE_SIZE_BITS) { - let new_page = CachePage::new_zeroed(); - new_page - } else { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) - .await?; - new_page - }; - - pages.insert(page_id, new_page); - } - } + if pgoff > PageOffset::from_byte_ceil(self.len()) { + return Err(EINVAL); } - Ok(wrote) - } + let cache_page = self.get_page_locked(&mut pages, pgoff).await?; - pub async fn fsync(&self) -> KResult<()> { - let pages = self.pages.lock().await; - for (page_id, page) in pages.iter() { - if page.is_dirty() { - self.backend - .upgrade() - .unwrap() - .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS) - .await?; - page.clear_dirty(); - } + unsafe { + let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(cache_page.0))); + + func(&page, cache_page); } + Ok(()) } - // This function is used for extend write or truncate - pub async fn resize(&self, new_size: usize) -> KResult<()> { + pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); + let total_len = self.len(); - if new_size < old_size { - let begin = new_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = old_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; + if offset >= total_len { + return Ok(0); + } - for page_id in begin..end { - pages.remove(&page_id); - } - } else if new_size > old_size { - let begin = old_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = new_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; + let pgoff_start = PageOffset::from_byte_floor(offset); + let pgoff_end = PageOffset::from_byte_ceil(total_len); - pages.remove(&begin); + for pgoff in pgoff_start.iter_till(pgoff_end) { + let page = self.get_page_locked(&mut pages, pgoff).await?; - for page_id in begin..end { - let mut new_page = CachePage::new_zeroed(); + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let real_end = end_offset.min(total_len); - if page_id != end - 1 { - new_page.set_valid_size(PAGE_SIZE); - } else { - new_page.set_valid_size(new_size % PAGE_SIZE); - } - new_page.set_dirty(); - pages.insert(page_id, new_page); + let inner_offset = offset % PAGE_SIZE; + let data_len = real_end - offset; + + if buffer + .fill(&page.as_bytes()[inner_offset..inner_offset + data_len])? + .should_stop() + || buffer.available() == 0 + { + break; } + + offset = real_end; } - Ok(()) + Ok(buffer.wrote()) } - pub async fn with_page(&self, offset: usize, func: F) -> KResult> - where - F: FnOnce(&Page, &CachePage) -> O, - { - let offset_aligin = offset.align_down(PAGE_SIZE); - let page_id = offset_aligin >> PAGE_SIZE_BITS; - let size = self.backend.upgrade().unwrap().size(); - - if offset_aligin > size { - return Ok(None); - } - + pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; + let mut total_written = 0; - let raw_page_ptr = match pages.get(&page_id) { - Some(CachePage(raw_page_ptr)) => *raw_page_ptr, - None => { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset_aligin) - .await?; - pages.insert(page_id, new_page); - new_page.0 + loop { + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let len = end_offset - offset; + + // TODO: Rewrite to return a write state object. + let page = self + .inode + .write_begin(self, &mut pages, offset, len) + .await?; + + let inner_offset = offset % PAGE_SIZE; + let written = stream + .poll_data(&mut page.as_bytes_mut()[inner_offset..])? + .map(|b| b.len()) + .unwrap_or(0); + + page.set_dirty(true); + self.inode + .write_end(self, &mut pages, offset, len, written) + .await?; + + if written == 0 { + break; } - }; - - unsafe { - let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(raw_page_ptr))); - Ok(Some(func(&page, &CachePage(raw_page_ptr)))) + total_written += written; + offset += written; } - } -} -pub struct CachePageStream { - page: CachePage, - cur: usize, -} - -impl CachePageStream { - pub fn new(page: CachePage) -> Self { - Self { page, cur: 0 } + Ok(total_written) } -} - -impl Stream for CachePageStream { - fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); - } - - let page_data = &self.page.all()[self.cur..self.page.valid_size()]; - let to_read = buf.len().min(page_data.len()); - buf[..to_read].copy_from_slice(&page_data[..to_read]); - self.cur += to_read; + pub async fn fsync(&self) -> KResult<()> { + let mut pages = self.pages.lock().await; - Ok(Some(&mut buf[..to_read])) - } + for (&pgoff, page) in pages.iter_mut() { + if !page.is_dirty() { + continue; + } - fn ignore(&mut self, len: usize) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); + self.inode.write_page(page, pgoff).await?; + page.set_dirty(false); } - let to_ignore = len.min(self.page.valid_size() - self.cur); - self.cur += to_ignore; - Ok(Some(to_ignore)) + Ok(()) } } -// with this trait, "page cache" and "block cache" are unified, -// for fs, offset is file offset (floor algin to PAGE_SIZE) -// for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) -// Oh no, this would make unnecessary cache -pub trait PageCacheBackendOps: Sized { - fn read_page( - &self, - page: &mut CachePage, - offset: usize, - ) -> impl Future> + Send; - - fn write_page( - &self, - page: &mut CachePageStream, - offset: usize, - ) -> impl Future> + Send; - - fn size(&self) -> usize; -} - -#[async_trait] -pub trait PageCacheBackend: Send + Sync { - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; - fn size(&self) -> usize; -} - -#[async_trait] -impl PageCacheBackend for T -where - T: PageCacheBackendOps + Send + Sync + 'static, -{ - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_page(page, offset).await - } - - async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { - self.write_page(page, offset).await - } - - fn size(&self) -> usize { - self.size() +impl core::fmt::Debug for PageCache { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("PageCache").finish() } } pub trait PageCacheRawPage: RawPage { - fn valid_size(&self) -> &mut usize; - fn is_dirty(&self) -> bool; - - fn set_dirty(&self); - - fn clear_dirty(&self); - - fn cache_init(&self); + fn set_dirty(&self, dirty: bool); } impl Drop for PageCache { fn drop(&mut self) { - let _ = self.fsync(); + // TODO: Write back dirty pages... + // let _ = self.fsync(); } } diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index 4cb7908c..825440ef 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,20 +1,15 @@ -use super::FromSyscallArg; -use crate::kernel::constants::{EBADF, EINVAL}; -use crate::kernel::mem::FileMapping; -use crate::kernel::task::Thread; -use crate::kernel::vfs::filearray::FD; -use crate::{ - kernel::{ - constants::{UserMmapFlags, UserMmapProtocol}, - mem::{Mapping, Permission}, - }, - prelude::*, -}; use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; use posix_types::syscall_no::*; +use super::FromSyscallArg; +use crate::kernel::constants::{UserMmapFlags, UserMmapProtocol, EBADF, EINVAL}; +use crate::kernel::mem::{FileMapping, Mapping, Permission}; +use crate::kernel::task::Thread; +use crate::kernel::vfs::filearray::FD; +use crate::prelude::*; + impl FromSyscallArg for UserMmapProtocol { fn from_arg(value: usize) -> UserMmapProtocol { UserMmapProtocol::from_bits_truncate(value as u32) @@ -74,7 +69,7 @@ async fn do_mmap2( .get_inode()? .ok_or(EBADF)?; - Mapping::File(FileMapping::new(file, pgoffset, len)) + Mapping::File(FileMapping::new(file.get_page_cache(), pgoffset, len)) }; let permission = Permission { diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 36d139a8..9f8aa166 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -1,27 +1,22 @@ +use alloc::ffi::CString; +use alloc::sync::Arc; +use alloc::vec::Vec; + +use align_ext::AlignExt; +use eonix_mm::address::{Addr, AddrOps as _, VAddr}; +use eonix_mm::paging::PAGE_SIZE; +use xmas_elf::header::{self, Class, HeaderPt1, Machine_}; +use xmas_elf::program::{self, ProgramHeader32, ProgramHeader64}; + use super::{LoadInfo, ELF_MAGIC}; -use crate::io::UninitBuffer; +use crate::io::{ByteBuffer, UninitBuffer}; +use crate::kernel::constants::ENOEXEC; +use crate::kernel::mem::{FileMapping, MMList, Mapping, Permission}; use crate::kernel::task::loader::aux_vec::{AuxKey, AuxVec}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::FsContext; use crate::path::Path; -use crate::{ - io::ByteBuffer, - kernel::{ - constants::ENOEXEC, - mem::{FileMapping, MMList, Mapping, Permission}, - vfs::{dentry::Dentry, FsContext}, - }, - prelude::*, -}; -use align_ext::AlignExt; -use alloc::vec::Vec; -use alloc::{ffi::CString, sync::Arc}; -use eonix_mm::{ - address::{Addr, AddrOps as _, VAddr}, - paging::PAGE_SIZE, -}; -use xmas_elf::{ - header::{self, Class, HeaderPt1, Machine_}, - program::{self, ProgramHeader32, ProgramHeader64}, -}; +use crate::prelude::*; const INIT_STACK_SIZE: usize = 0x80_0000; @@ -366,7 +361,7 @@ impl Elf { vmap_start, file_len, Mapping::File(FileMapping::new( - self.file.get_inode()?, + self.file.get_inode()?.get_page_cache(), file_offset, real_file_length, )), @@ -376,16 +371,27 @@ impl Elf { .await?; } - if vmem_len > file_len { - mm_list - .mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - ) - .await?; + if vmem_vaddr_end > load_vaddr_end { + if load_vaddr_end.page_offset() != 0 { + let mut zero_len = PAGE_SIZE - load_vaddr_end.page_offset(); + zero_len = zero_len.min(vmem_vaddr_end - load_vaddr_end); + + mm_list + .access_mut(load_vaddr_end, zero_len, |_, data| data.fill(0)) + .await?; + } + + if vmem_len - file_len > 0 { + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; + } } Ok(vmap_start + vmem_len) diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index c1eb8cb8..22760de9 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,35 +1,31 @@ pub mod dcache; mod walk; -use core::{ - cell::UnsafeCell, - fmt, - hash::{BuildHasher, BuildHasherDefault, Hasher}, - sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}, -}; - use alloc::sync::Arc; +use core::cell::UnsafeCell; +use core::fmt; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}; + use arcref::AsArcRef; use eonix_sync::LazyLock; use pointers::BorrowedArc; -use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; - -use crate::{ - hash::KernelHasher, - io::Buffer, - io::Stream, - kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}, - kernel::{block::BlockDevice, CharDevice}, - path::Path, - prelude::*, - rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}, -}; - -use super::{ - inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - FsContext, -}; +use posix_types::namei::RenameFlags; +use posix_types::open::OpenFlags; +use posix_types::result::PosixError; +use posix_types::stat::StatX; + +use super::inode::{Ino, InodeUse, RenameData, WriteOffset}; +use super::types::{DeviceId, Format, Mode, Permission}; +use super::FsContext; +use crate::hash::KernelHasher; +use crate::io::{Buffer, Stream}; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}; +use crate::kernel::CharDevice; +use crate::path::Path; +use crate::prelude::*; +use crate::rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}; const D_INVALID: u8 = 0; const D_REGULAR: u8 = 1; @@ -56,7 +52,7 @@ enum DentryKind { /// [lookup()]: crate::kernel::vfs::inode::InodeDirOps::lookup struct AssociatedInode { kind: UnsafeCell>, - inode: UnsafeCell>>, + inode: UnsafeCell>, } /// # Safety @@ -181,15 +177,15 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - pub fn fill(&self, file: InodeUse) { + pub fn fill(&self, file: InodeUse) { self.inode.store(file); } - pub fn inode(&self) -> Option> { + pub fn inode(&self) -> Option { self.inode.load().map(|(_, inode)| inode.clone()) } - pub fn get_inode(&self) -> KResult> { + pub fn get_inode(&self) -> KResult { self.inode().ok_or(ENOENT) } @@ -291,7 +287,7 @@ impl Dentry { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.format() { + match inode.format { Format::DIR => Err(EISDIR), Format::REG => inode.read(buffer, offset).await, Format::BLK => { @@ -309,7 +305,7 @@ impl Dentry { pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.format() { + match inode.format { Format::DIR => Err(EISDIR), Format::REG => inode.write(stream, offset).await, Format::BLK => Err(EINVAL), // TODO @@ -375,7 +371,7 @@ impl Dentry { } pub async fn chmod(&self, mode: Mode) -> KResult<()> { - self.get_inode()?.chmod(mode).await + self.get_inode()?.chmod(mode.perm()).await } pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { @@ -438,8 +434,8 @@ impl AssociatedInode { } } - fn store(&self, inode: InodeUse) { - let kind = match inode.format() { + fn store(&self, inode: InodeUse) { + let kind = match inode.format { Format::REG | Format::BLK | Format::CHR => DentryKind::Regular, Format::DIR => DentryKind::Directory, Format::LNK => DentryKind::Symlink, @@ -463,7 +459,7 @@ impl AssociatedInode { DentryKind::atomic_acq(&self.kind) } - fn load(&self) -> Option<(DentryKind, &InodeUse)> { + fn load(&self) -> Option<(DentryKind, &InodeUse)> { self.kind().map(|kind| unsafe { let inode = (&*self.inode.get()) .as_ref() diff --git a/src/kernel/vfs/dentry/walk.rs b/src/kernel/vfs/dentry/walk.rs index 3e401b4b..7b1060ac 100644 --- a/src/kernel/vfs/dentry/walk.rs +++ b/src/kernel/vfs/dentry/walk.rs @@ -1,33 +1,23 @@ -use core::{ - future::Future, - hash::{BuildHasher, BuildHasherDefault, Hasher}, - ops::Deref, - pin::Pin, -}; - -use alloc::{boxed::Box, sync::Arc}; +use alloc::boxed::Box; +use alloc::sync::Arc; +use core::future::Future; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::ops::Deref; +use core::pin::Pin; + use arcref::{ArcRef, AsArcRef}; use posix_types::result::PosixError; -use crate::{ - hash::KernelHasher, - io::ByteBuffer, - kernel::{ - constants::ELOOP, - vfs::{ - inode::{Inode, InodeUse}, - FsContext, - }, - }, - path::{Path, PathComponent, PathIterator}, - prelude::KResult, - rcu::{rcu_read_lock, RCUReadLock}, -}; - -use super::{ - dcache::{self, DCacheItem}, - Dentry, DentryKind, -}; +use super::dcache::{self, DCacheItem}; +use super::{Dentry, DentryKind}; +use crate::hash::KernelHasher; +use crate::io::ByteBuffer; +use crate::kernel::constants::ELOOP; +use crate::kernel::vfs::inode::InodeUse; +use crate::kernel::vfs::FsContext; +use crate::path::{Path, PathComponent, PathIterator}; +use crate::prelude::KResult; +use crate::rcu::{rcu_read_lock, RCUReadLock}; struct DentryFind<'a, 'b> { parent: &'a Dentry, @@ -40,7 +30,7 @@ pub enum WalkResultRcu<'rcu, 'path> { Ok(ArcRef<'rcu, Dentry>), Symlink { symlink: ArcRef<'rcu, Dentry>, - inode: InodeUse, + inode: InodeUse, }, Miss { parent: ArcRef<'rcu, Dentry>, @@ -53,7 +43,7 @@ pub enum WalkResult { Ok(Arc), Symlink { symlink: Arc, - inode: InodeUse, + inode: InodeUse, }, } @@ -270,7 +260,7 @@ impl FsContext { pub async fn follow_symlink( &self, symlink: ArcRef<'_, Dentry>, - inode: &InodeUse, + inode: &InodeUse, nr_follows: u32, ) -> KResult> { let mut target = [0; 256]; @@ -288,7 +278,7 @@ impl FsContext { fn follow_symlink_boxed<'r, 'a: 'r, 'b: 'r, 'c: 'r>( &'a self, symlink: ArcRef<'b, Dentry>, - inode: &'c InodeUse, + inode: &'c InodeUse, nr_follows: u32, ) -> Pin>> + Send + 'r>> { Box::pin(self.follow_symlink(symlink, inode, nr_follows)) diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs index 96526ee9..d302079c 100644 --- a/src/kernel/vfs/file/inode_file.rs +++ b/src/kernel/vfs/file/inode_file.rs @@ -1,23 +1,17 @@ -use super::{File, FileType, SeekOption}; -use crate::{ - io::{Buffer, BufferFill, Stream}, - kernel::{ - constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, - vfs::{ - dentry::Dentry, - inode::{Inode, InodeUse, WriteOffset}, - types::Format, - }, - }, - prelude::KResult, -}; use alloc::sync::Arc; + use eonix_sync::Mutex; -use posix_types::{ - getdent::{UserDirent, UserDirent64}, - open::OpenFlags, - stat::StatX, -}; +use posix_types::getdent::{UserDirent, UserDirent64}; +use posix_types::open::OpenFlags; +use posix_types::stat::StatX; + +use super::{File, FileType, SeekOption}; +use crate::io::{Buffer, BufferFill, Stream}; +use crate::kernel::constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{InodeUse, WriteOffset}; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; pub struct InodeFile { pub r: bool, @@ -34,7 +28,7 @@ impl InodeFile { pub fn new(dentry: Arc, flags: OpenFlags) -> File { // SAFETY: `dentry` used to create `InodeFile` is valid. // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let format = dentry.inode().expect("dentry should be invalid").format(); + let format = dentry.inode().expect("dentry should be invalid").format; let (r, w, a) = flags.as_rwa(); @@ -98,7 +92,7 @@ impl InodeFile { } impl File { - pub fn get_inode(&self) -> KResult>> { + pub fn get_inode(&self) -> KResult> { if let FileType::Inode(inode_file) = &**self { Ok(Some(inode_file.dentry.get_inode()?)) } else { @@ -191,7 +185,7 @@ impl File { SeekOption::Set(n) => n, SeekOption::End(off) => { let inode = inode_file.dentry.get_inode()?; - let size = inode.info().lock().size as usize; + let size = inode.info.lock().size as usize; size.checked_add_signed(off).ok_or(EOVERFLOW)? } }; diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 1862a3e1..609d969c 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,28 +1,23 @@ -use super::{ - file::{File, InodeFile, Pipe}, - types::{Format, Permission}, - Spin, TerminalFile, -}; -use crate::kernel::{ - constants::{ - EBADF, EISDIR, ENOTDIR, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, - }, - syscall::{FromSyscallArg, SyscallRetVal}, -}; -use crate::{ - kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, - prelude::*, -}; use alloc::sync::Arc; -use intrusive_collections::{ - intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, -}; -use itertools::{ - FoldWhile::{Continue, Done}, - Itertools, -}; + +use intrusive_collections::rbtree::Entry; +use intrusive_collections::{intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink}; +use itertools::FoldWhile::{Continue, Done}; +use itertools::Itertools; use posix_types::open::{FDFlags, OpenFlags}; +use super::file::{File, InodeFile, Pipe}; +use super::types::{Format, Permission}; +use super::{Spin, TerminalFile}; +use crate::kernel::console::get_console; +use crate::kernel::constants::{ + EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, +}; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::CharDevice; +use crate::prelude::*; + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); @@ -291,20 +286,19 @@ impl FileArray { let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let file_format = inode.format(); - match (flags.directory(), file_format, flags.write()) { + match (flags.directory(), inode.format, flags.write()) { (true, Format::DIR, _) => {} (true, _, _) => return Err(ENOTDIR), (false, Format::DIR, true) => return Err(EISDIR), _ => {} } - if flags.truncate() && flags.write() && file_format == Format::REG { + if flags.truncate() && flags.write() && inode.format == Format::REG { inode.truncate(0).await?; } - let file = if file_format == Format::CHR { + let file = if inode.format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs index 870a045d..5f0b98c2 100644 --- a/src/kernel/vfs/inode/inode.rs +++ b/src/kernel/vfs/inode/inode.rs @@ -1,52 +1,149 @@ use alloc::boxed::Box; -use core::{ - any::Any, - future::Future, - marker::Unsize, - ops::{CoerceUnsized, Deref}, - pin::Pin, -}; -use eonix_sync::Spin; - +use alloc::collections::btree_map::BTreeMap; use alloc::sync::{Arc, Weak}; -use async_trait::async_trait; +use core::any::Any; +use core::future::Future; +use core::ops::Deref; -use crate::{ - io::{Buffer, Stream}, - kernel::{ - constants::{EINVAL, EPERM}, - mem::PageCache, - timer::Instant, - vfs::{ - dentry::Dentry, - types::{DeviceId, Format, Mode, Permission}, - SbRef, SbUse, SuperBlock, - }, - }, - prelude::KResult, -}; +use async_trait::async_trait; +use eonix_sync::{RwLock, Spin}; use super::{Ino, RenameData, WriteOffset}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::{EINVAL, EPERM}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock}; +use crate::prelude::KResult; + +pub struct Inode { + pub ino: Ino, + pub format: Format, + pub info: Spin, + pub rwsem: RwLock<()>, + page_cache: Spin>, + sb: SbRef, + ops: Box, +} -pub trait InodeOps: Sized + Send + Sync + 'static { - type SuperBlock: SuperBlock + Sized; - - fn ino(&self) -> Ino; - fn format(&self) -> Format; - fn info(&self) -> &Spin; - - fn super_block(&self) -> &SbRef; +macro_rules! return_type { + ($type:ty) => { + $type + }; + () => { + () + }; +} - fn page_cache(&self) -> Option<&PageCache>; +macro_rules! define_inode_ops { + { + $( + $(#[$attr:meta])* + async fn $method:ident $(<$($lt:lifetime),+>)? (&self $(,)? $($name:ident : $type:ty $(,)?)*) $(-> $ret:ty)? + $body:block + )* + + --- + + $( + $(#[$attr1:meta])* + fn $method1:ident $(<$($lt1:lifetime),+>)? (&self $(,)? $($name1:ident : $type1:ty $(,)?)*) $(-> $ret1:ty)? + $body1:block + )* + } => { + #[allow(unused_variables)] + pub trait InodeOps: Sized + Send + Sync + 'static { + type SuperBlock: SuperBlock + Sized; + + $( + $(#[$attr])* + fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> impl Future + Send { + async { $body } + })* + + $( + $(#[$attr1])* + fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + $body1 + })* + } + + #[async_trait] + trait InodeOpsErased: Any + Send + Sync + 'static { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?);)* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?);)* + } + + #[async_trait] + impl InodeOpsErased for T + where + T: InodeOps, + { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.$method(sb.downcast(), inode, $($name),*).await + })* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.$method1(sb.downcast(), inode, $($name1),*) + })* + } + + impl InodeUse { + $(pub async fn $method $(<$($lt),+>)? ( + &self, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.ops.$method(self.sbget()?, self, $($name),*).await + })* + + $(pub fn $method1 $(<$($lt1),+>)? ( + &self, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.ops.$method1(self.sbget()?, self, $($name1),*) + })* + } + }; } -#[allow(unused_variables)] -pub trait InodeDirOps: InodeOps { - fn lookup( - &self, - dentry: &Arc, - ) -> impl Future>>> + Send { - async { Err(EPERM) } +define_inode_ops! { + // DIRECTORY OPERATIONS + + async fn lookup(&self, dentry: &Arc) -> KResult> { + Err(EPERM) } /// Read directory entries and call the given closure for each entry. @@ -55,255 +152,114 @@ pub trait InodeDirOps: InodeOps { /// - Ok(count): The number of entries read. /// - Ok(Err(err)): Some error occurred while calling the given closure. /// - Err(err): An error occurred while reading the directory. - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, offset: usize, - for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - async { Err(EPERM) } + for_each_entry: &mut (dyn (for<'a> FnMut(&'a [u8], Ino) -> KResult) + Send), + ) -> KResult> { + Err(EPERM) } - fn create( - &self, - at: &Arc, - mode: Permission, - ) -> impl Future> + Send { - async { Err(EPERM) } + async fn create(&self, at: &Arc, mode: Permission) -> KResult<()> { + Err(EPERM) } - fn mkdir(&self, at: &Dentry, mode: Permission) -> impl Future> + Send { - async { Err(EPERM) } + async fn mkdir(&self, at: &Dentry, mode: Permission) -> KResult<()> { + Err(EPERM) } - fn mknod( - &self, - at: &Dentry, - mode: Mode, - dev: DeviceId, - ) -> impl Future> + Send { - async { Err(EPERM) } + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + Err(EPERM) } - fn unlink(&self, at: &Arc) -> impl Future> + Send { - async { Err(EPERM) } + async fn unlink(&self, at: &Arc) -> KResult<()> { + Err(EPERM) } - fn symlink(&self, at: &Arc, target: &[u8]) -> impl Future> + Send { - async { Err(EPERM) } + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + Err(EPERM) } - fn rename(&self, rename_data: RenameData<'_, '_>) -> impl Future> + Send { - async { Err(EPERM) } + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + Err(EPERM) } -} -#[allow(unused_variables)] -pub trait InodeFileOps: InodeOps { - fn read( - &self, - buffer: &mut dyn Buffer, - offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } + // FILE OPERATIONS + + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) } - fn read_direct( - &self, - buffer: &mut dyn Buffer, - offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) } - fn write( + async fn write( &self, stream: &mut dyn Stream, - offset: WriteOffset<'_>, - ) -> impl Future> + Send { - async { Err(EINVAL) } + offset: WriteOffset<'_> + ) -> KResult { + Err(EINVAL) } - fn write_direct( + async fn write_direct( &self, stream: &mut dyn Stream, offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } - } - - fn devid(&self) -> KResult { + ) -> KResult { Err(EINVAL) } - fn readlink(&self, buffer: &mut dyn Buffer) -> impl Future> + Send { - async { Err(EINVAL) } - } - - fn truncate(&self, length: usize) -> impl Future> + Send { - async { Err(EPERM) } - } - - fn chmod(&self, perm: Permission) -> impl Future> + Send { - async { Err(EPERM) } - } - - fn chown(&self, uid: u32, gid: u32) -> impl Future> + Send { - async { Err(EPERM) } - } -} - -#[async_trait] -pub trait InodeDir { - async fn lookup(&self, dentry: &Arc) -> KResult>>; - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()>; - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()>; - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()>; - async fn unlink(&self, at: &Arc) -> KResult<()>; - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()>; - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()>; - - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, - offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> Pin>> + Send + 'r>>; -} - -#[async_trait] -pub trait InodeFile { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult; - async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult; - fn devid(&self) -> KResult; - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult; - async fn truncate(&self, length: usize) -> KResult<()>; - async fn chmod(&self, mode: Mode) -> KResult<()>; - async fn chown(&self, uid: u32, gid: u32) -> KResult<()>; -} - -pub trait Inode: InodeFile + InodeDir + Any + Send + Sync + 'static { - fn ino(&self) -> Ino; - fn format(&self) -> Format; - fn info(&self) -> &Spin; - - // TODO: This might should be removed... Temporary workaround for now. - fn page_cache(&self) -> Option<&PageCache>; - - fn sbref(&self) -> SbRef; - fn sbget(&self) -> KResult>; -} - -#[async_trait] -impl InodeFile for T -where - T: InodeFileOps, -{ - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.read(buffer, offset).await - } - - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.read_direct(buffer, offset).await - } - - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { - self.write(stream, offset).await - } - - async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { - self.write_direct(stream, offset).await - } - - fn devid(&self) -> KResult { - self.devid() - } - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - self.readlink(buffer).await + Err(EINVAL) } async fn truncate(&self, length: usize) -> KResult<()> { - self.truncate(length).await + Err(EPERM) } - async fn chmod(&self, mode: Mode) -> KResult<()> { - self.chmod(Permission::new(mode.non_format_bits())).await + async fn chmod(&self, perm: Permission) -> KResult<()> { + Err(EPERM) } async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - self.chown(uid, gid).await - } -} - -#[async_trait] -impl InodeDir for T -where - T: InodeDirOps, -{ - async fn lookup(&self, dentry: &Arc) -> KResult>> { - self.lookup(dentry).await - } - - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { - self.create(at, perm).await - } - - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { - self.mkdir(at, perm).await + Err(EPERM) } - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { - self.mknod(at, mode, dev).await - } - - async fn unlink(&self, at: &Arc) -> KResult<()> { - self.unlink(at).await - } - - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - self.symlink(at, target).await + // PAGE CACHE OPERATIONS + async fn read_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) } - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { - self.rename(rename_data).await + async fn write_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn write_begin<'a>( + &self, + page_cache: &PageCache, + pages: &'a mut BTreeMap, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> Pin>> + Send + 'r>> { - Box::pin(self.readdir(offset, callback)) - } -} - -impl Inode for T -where - T: InodeOps + InodeFile + InodeDir, -{ - fn ino(&self) -> Ino { - self.ino() - } - - fn format(&self) -> Format { - self.format() - } - - fn info(&self) -> &Spin { - self.info() + len: usize, + ) -> KResult<&'a mut CachePage> { + Err(EINVAL) } - fn page_cache(&self) -> Option<&PageCache> { - self.page_cache() + async fn write_end( + &self, + page_cache: &PageCache, + pages: &mut BTreeMap, + offset: usize, + len: usize, + copied: usize + ) -> KResult<()> { + Err(EINVAL) } - fn sbref(&self) -> SbRef { - self.super_block().clone() - } + --- - fn sbget(&self) -> KResult> { - self.super_block().get().map(|sb| sb as _) + fn devid(&self) -> KResult { + Err(EINVAL) } } @@ -321,64 +277,87 @@ pub struct InodeInfo { pub mtime: Instant, } -pub struct InodeUse(Arc) -where - I: Inode + ?Sized; +#[repr(transparent)] +pub struct InodeUse(Arc); + +impl InodeUse { + pub fn new( + sb: SbRef, + ino: Ino, + format: Format, + info: InodeInfo, + ops: impl InodeOps, + ) -> Self { + let inode = Inode { + sb, + ino, + format, + info: Spin::new(info), + rwsem: RwLock::new(()), + page_cache: Spin::new(Weak::new()), + ops: Box::new(ops), + }; -impl InodeUse -where - I: Inode, -{ - pub fn new(inode: I) -> Self { Self(Arc::new(inode)) } - pub fn new_cyclic(inode_func: impl FnOnce(&Weak) -> I) -> Self { - Self(Arc::new_cyclic(inode_func)) + pub fn sbref(&self) -> SbRef { + self.sb.clone() } -} -impl InodeUse -where - I: Inode + ?Sized, -{ - pub fn as_raw(&self) -> *const I { - Arc::as_ptr(&self.0) + pub fn sbget(&self) -> KResult> { + self.sb.get().map(|sb| sb as _) + } + + pub fn get_priv(&self) -> &I + where + I: InodeOps, + { + let ops = (&*self.ops) as &dyn Any; + + ops.downcast_ref() + .expect("InodeUse::private: InodeOps type mismatch") } -} -impl CoerceUnsized> for InodeUse -where - T: Inode + Unsize + ?Sized, - U: Inode + ?Sized, -{ + pub fn get_page_cache(&self) -> Arc { + if let Some(cache) = self.page_cache.lock().upgrade() { + return cache; + } + + // Slow path... + let cache = Arc::new(PageCache::new(self.clone())); + let mut page_cache = self.page_cache.lock(); + if let Some(cache) = page_cache.upgrade() { + return cache; + } + + *page_cache = Arc::downgrade(&cache); + cache + } } -impl Clone for InodeUse -where - I: Inode + ?Sized, -{ +impl Clone for InodeUse { fn clone(&self) -> Self { Self(self.0.clone()) } } -impl core::fmt::Debug for InodeUse -where - I: Inode + ?Sized, -{ +impl core::fmt::Debug for InodeUse { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "InodeUse(ino={})", self.ino()) + write!(f, "InodeUse(ino={})", self.ino) } } -impl Deref for InodeUse -where - I: Inode + ?Sized, -{ - type Target = I; +impl Deref for InodeUse { + type Target = Inode; fn deref(&self) -> &Self::Target { self.0.deref() } } + +impl PartialEq for InodeUse { + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.0, &other.0) + } +} diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs index 08471ef3..6f4f041a 100644 --- a/src/kernel/vfs/inode/mod.rs +++ b/src/kernel/vfs/inode/mod.rs @@ -4,5 +4,5 @@ mod ops; mod statx; pub use ino::Ino; -pub use inode::{Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +pub use inode::{Inode, InodeInfo, InodeOps, InodeUse}; pub use ops::{RenameData, WriteOffset}; diff --git a/src/kernel/vfs/inode/ops.rs b/src/kernel/vfs/inode/ops.rs index baab1a80..7bf00ce5 100644 --- a/src/kernel/vfs/inode/ops.rs +++ b/src/kernel/vfs/inode/ops.rs @@ -1,9 +1,8 @@ use alloc::sync::Arc; +use super::inode::InodeUse; use crate::kernel::vfs::dentry::Dentry; -use super::{inode::InodeUse, Inode}; - pub enum WriteOffset<'end> { Position(usize), End(&'end mut usize), @@ -12,7 +11,7 @@ pub enum WriteOffset<'end> { pub struct RenameData<'a, 'b> { pub old_dentry: &'a Arc, pub new_dentry: &'b Arc, - pub new_parent: InodeUse, + pub new_parent: InodeUse, pub is_exchange: bool, pub no_replace: bool, } diff --git a/src/kernel/vfs/inode/statx.rs b/src/kernel/vfs/inode/statx.rs index a85ef3af..feb2a1b5 100644 --- a/src/kernel/vfs/inode/statx.rs +++ b/src/kernel/vfs/inode/statx.rs @@ -1,25 +1,17 @@ use posix_types::stat::StatX; -use crate::{ - kernel::{ - constants::{ - STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, - STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, - }, - vfs::types::Format, - }, - prelude::KResult, +use super::inode::InodeUse; +use crate::kernel::constants::{ + STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, + STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, }; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; -use super::{inode::InodeUse, Inode}; - -impl InodeUse -where - I: Inode + ?Sized, -{ +impl InodeUse { pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { let sb = self.sbget()?; - let info = self.info().lock(); + let info = self.info.lock(); if mask & STATX_NLINK != 0 { stat.stx_nlink = info.nlink as _; @@ -53,10 +45,8 @@ where } if mask & STATX_TYPE != 0 { - let format = self.format(); - - stat.stx_mode |= format.as_raw() as u16; - if let Format::BLK | Format::CHR = format { + stat.stx_mode |= self.format.as_raw() as u16; + if let Format::BLK | Format::CHR = self.format { let devid = self.devid()?; stat.stx_rdev_major = devid.major as _; stat.stx_rdev_minor = devid.minor as _; @@ -65,7 +55,7 @@ where } if mask & STATX_INO != 0 { - stat.stx_ino = self.ino().as_raw(); + stat.stx_ino = self.ino.as_raw(); stat.stx_mask |= STATX_INO; } diff --git a/src/kernel/vfs/mount.rs b/src/kernel/vfs/mount.rs index 213acae9..6b171f81 100644 --- a/src/kernel/vfs/mount.rs +++ b/src/kernel/vfs/mount.rs @@ -1,17 +1,17 @@ -use super::{ - dentry::{dcache, Dentry, DROOT}, - inode::{Inode, InodeUse}, - SbUse, SuperBlock, -}; -use crate::kernel::{ - constants::{EEXIST, ENODEV, ENOTDIR}, - task::block_on, -}; -use crate::prelude::*; -use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc}; +use alloc::collections::btree_map::BTreeMap; +use alloc::string::ToString as _; +use alloc::sync::Arc; + use async_trait::async_trait; use eonix_sync::LazyLock; +use super::dentry::{dcache, Dentry, DROOT}; +use super::inode::InodeUse; +use super::{SbUse, SuperBlock}; +use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR}; +use crate::kernel::task::block_on; +use crate::prelude::*; + pub const MS_RDONLY: u64 = 1 << 0; pub const MS_NOSUID: u64 = 1 << 1; pub const MS_NODEV: u64 = 1 << 2; @@ -39,11 +39,7 @@ pub struct Mount { } impl Mount { - pub fn new( - mp: &Dentry, - sb: SbUse, - root_inode: InodeUse, - ) -> KResult { + pub fn new(mp: &Dentry, sb: SbUse, root_inode: InodeUse) -> KResult { let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name()); root_dentry.fill(root_inode); diff --git a/src/kernel/vfs/superblock.rs b/src/kernel/vfs/superblock.rs index 85b28c01..e3be5cef 100644 --- a/src/kernel/vfs/superblock.rs +++ b/src/kernel/vfs/superblock.rs @@ -1,16 +1,15 @@ -use core::{ - marker::Unsize, - ops::{CoerceUnsized, Deref}, -}; - use alloc::sync::{Arc, Weak}; -use eonix_sync::RwLock; +use core::any::{Any, TypeId}; +use core::marker::Unsize; +use core::ops::{CoerceUnsized, Deref}; -use crate::{kernel::constants::EIO, prelude::KResult}; +use eonix_sync::RwLock; use super::types::DeviceId; +use crate::kernel::constants::EIO; +use crate::prelude::KResult; -pub trait SuperBlock: Send + Sync + 'static {} +pub trait SuperBlock: Any + Send + Sync + 'static {} #[derive(Debug, Clone)] pub struct SuperBlockInfo { @@ -83,6 +82,36 @@ where } } +impl SbUse +where + S: SuperBlock + ?Sized, +{ + pub fn get_ref(&self) -> SbRef { + SbRef(Arc::downgrade(&self.0)) + } +} + +impl SbUse { + /// Downcast the superblock to a specific type. + /// + /// # Panics + /// Panics if the downcast fails. + pub fn downcast(self) -> SbUse { + let Self(sb_complex) = self; + if (&sb_complex.backend as &dyn Any).type_id() != TypeId::of::() { + panic!("Downcast failed: type mismatch"); + } + + unsafe { + // SAFETY: We have checked the type above and unsized coercion says + // that Arc has the same layout as Arc if T: Unsize. + SbUse(Arc::from_raw( + Arc::into_raw(sb_complex) as *const SuperBlockComplex + )) + } + } +} + impl Clone for SbRef where S: SuperBlock + ?Sized, diff --git a/src/lib.rs b/src/lib.rs index 959cb29f..8457169c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ #![feature(allocator_api)] #![feature(c_size_t)] #![feature(coerce_unsized)] -#![feature(concat_idents)] #![feature(arbitrary_self_types)] #![feature(get_mut_unchecked)] #![feature(macro_metavar_expr)] From f9b5b3a3dfe6e89d3758c31b0175ee10cb707712 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 00:56:10 +0800 Subject: [PATCH 39/54] mem: introduce new `Folio` abstraction - Remove struct `Page` and add `Folio`s to represent adjacent pages. - Introduce `Zone`s similar to that in Linux. Looking forward to removing all occurrence of `ArchPhysAccess` and so on. - Adapt existing code to new `Folio` interface in a dirty and rough way. Signed-off-by: greatbridf --- .vscode/tasks.json | 2 +- crates/buddy_allocator/src/lib.rs | 112 +++---- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 62 ++-- crates/eonix_hal/src/arch/riscv64/mm.rs | 51 +-- crates/eonix_hal/src/mm.rs | 38 ++- crates/eonix_mm/src/address/paddr.rs | 10 +- crates/eonix_mm/src/page_table.rs | 2 +- crates/eonix_mm/src/page_table/page_table.rs | 149 ++++----- crates/eonix_mm/src/page_table/pte.rs | 3 +- .../eonix_mm/src/page_table/pte_iterator.rs | 138 ++++---- crates/eonix_mm/src/page_table/walk.rs | 210 ++++++++++++ crates/eonix_mm/src/paging.rs | 8 +- crates/eonix_mm/src/paging/list.rs | 14 +- crates/eonix_mm/src/paging/page.rs | 313 +++--------------- crates/eonix_mm/src/paging/page_alloc.rs | 79 +---- crates/eonix_mm/src/paging/raw_page.rs | 47 --- crates/eonix_mm/src/paging/zone.rs | 7 +- crates/slab_allocator/src/lib.rs | 44 ++- src/driver/ahci/command.rs | 28 +- src/driver/ahci/command_table.rs | 7 +- src/driver/ahci/defs.rs | 6 +- src/driver/ahci/slot.rs | 10 +- src/driver/e1000e.rs | 19 +- src/driver/virtio/virtio_blk.rs | 8 +- src/fs/fat32.rs | 12 +- src/fs/fat32/file.rs | 24 -- src/fs/tmpfs/file.rs | 2 +- src/kernel/block.rs | 17 +- src/kernel/mem.rs | 4 +- src/kernel/mem/allocator.rs | 26 +- src/kernel/mem/folio.rs | 210 ++++++++++++ src/kernel/mem/mm_area.rs | 39 ++- src/kernel/mem/mm_list.rs | 64 ++-- src/kernel/mem/mm_list/page_table.rs | 40 +++ src/kernel/mem/page_alloc.rs | 91 ++--- src/kernel/mem/page_alloc/raw_page.rs | 136 ++------ src/kernel/mem/page_alloc/zones.rs | 22 +- src/kernel/mem/page_cache.rs | 97 +++--- src/kernel/mem/paging.rs | 122 +------ src/kernel/task/kernel_stack.rs | 25 +- src/kernel/vfs/file/mod.rs | 4 +- src/kernel_init.rs | 58 ++-- 42 files changed, 1116 insertions(+), 1244 deletions(-) create mode 100644 crates/eonix_mm/src/page_table/walk.rs delete mode 100644 crates/eonix_mm/src/paging/raw_page.rs delete mode 100644 src/fs/fat32/file.rs create mode 100644 src/kernel/mem/folio.rs create mode 100644 src/kernel/mem/mm_list/page_table.rs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index a85ea0cf..e7a54791 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -6,7 +6,7 @@ { "label": "debug run riscv64", "type": "shell", - "command": "make srun ARCH=riscv64 IMG=/Volumes/oscomp/sdcard-rv.img", + "command": "make srun ARCH=riscv64", "isBackground": true, "problemMatcher": [ { diff --git a/crates/buddy_allocator/src/lib.rs b/crates/buddy_allocator/src/lib.rs index abe1ef7b..82a7d6c5 100644 --- a/crates/buddy_allocator/src/lib.rs +++ b/crates/buddy_allocator/src/lib.rs @@ -3,12 +3,12 @@ use core::hint::unreachable_unchecked; use eonix_mm::address::{AddrOps as _, PAddr, PRange}; -use eonix_mm::paging::{PageList, PageListSized, Zone, PFN}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; const MAX_ORDER: u32 = 10; const AREAS: usize = const { MAX_ORDER as usize + 1 }; -pub trait BuddyPage: Sized + 'static { +pub trait BuddyFolio: Sized + 'static { fn pfn(&self) -> PFN; fn get_order(&self) -> u32; @@ -20,19 +20,19 @@ pub trait BuddyPage: Sized + 'static { struct FreeArea where - L: PageList, + L: FolioList, { free_list: L, count: usize, } -unsafe impl Send for FreeArea where L: PageList {} -unsafe impl Sync for FreeArea where L: PageList {} +unsafe impl Send for FreeArea where L: FolioList {} +unsafe impl Sync for FreeArea where L: FolioList {} pub struct BuddyAllocator where Z: Zone + 'static, - L: PageList, + L: FolioList, { zone: &'static Z, free_areas: [FreeArea; AREAS], @@ -41,8 +41,8 @@ where impl BuddyAllocator where Z: Zone + 'static, - Z::Page: BuddyPage, - L: PageListSized, + Z::Page: BuddyFolio, + L: FolioListSized, { pub const fn new(zone: &'static Z) -> Self { Self { @@ -52,13 +52,13 @@ where } } -impl BuddyAllocator +impl BuddyAllocator where - Z: Zone, - L: PageList, - P: BuddyPage + 'static, + Z: Zone, + L: FolioList, + F: BuddyFolio + 'static, { - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { + pub fn create_folios(&mut self, start: PAddr, end: PAddr) { assert!( self.zone .contains_prange(PRange::new(start.ceil(), end.floor())), @@ -82,40 +82,40 @@ where unsafe { // SAFETY: We've checked that the range is within the zone above. - self.add_page_unchecked(pfn, order) + self.add_folio_unchecked(pfn, order) }; pfn = new_end_pfn; } } - fn add_page(&mut self, pfn: PFN, order: u32) { + fn add_folio(&mut self, pfn: PFN, order: u32) { let prange = PRange::from(PAddr::from(pfn)).grow(1 << (order + 12)); assert!( self.zone.contains_prange(prange), - "The given page is not within the zone." + "The given folio is not within the zone." ); unsafe { // SAFETY: Checks above. - self.add_page_unchecked(pfn, order); + self.add_folio_unchecked(pfn, order); } } - unsafe fn add_page_unchecked(&mut self, pfn: PFN, order: u32) { - let Some(page) = self.zone.get_page(pfn) else { + unsafe fn add_folio_unchecked(&mut self, pfn: PFN, order: u32) { + let Some(mut folio) = self.zone.get_page(pfn) else { unsafe { unreachable_unchecked() } }; unsafe { // SAFETY: The caller ensures that the page is unused. - let page_mut = &mut *page.get(); - self.free_areas[order as usize].add_page(page_mut, order); + let folio_mut = folio.as_mut(); + self.free_areas[order as usize].add_folio(folio_mut, order); } } - fn break_page(&mut self, page: &mut P, order: u32, target_order: u32) { - let pfn = page.pfn(); + fn break_folio(&mut self, folio: &mut F, order: u32, target_order: u32) { + let pfn = folio.pfn(); for order in (target_order..order).rev() { let buddy_pfn = pfn + (1 << order); @@ -123,50 +123,50 @@ where unsafe { // SAFETY: We got the page from `self.free_areas`. Checks are // done when we've put the page into the buddy system. - self.add_page_unchecked(buddy_pfn, order); + self.add_folio_unchecked(buddy_pfn, order); } } - page.set_order(target_order); + folio.set_order(target_order); } pub fn alloc_order(&mut self, order: u32) -> Option<&'static mut Z::Page> { for current_order in order..AREAS as u32 { - let Some(page) = self.free_areas[current_order as usize].get_free_page() else { + let Some(folio) = self.free_areas[current_order as usize].get_free_folio() else { continue; }; if current_order > order { - self.break_page(page, current_order, order); + self.break_folio(folio, current_order, order); } - return Some(page); + return Some(folio); } None } - pub unsafe fn dealloc(&mut self, page: &'static mut Z::Page) { - let mut pfn = page.pfn(); - let mut order = page.get_order(); + pub unsafe fn dealloc(&mut self, folio: &'static mut Z::Page) { + let mut pfn = folio.pfn(); + let mut order = folio.get_order(); assert!( - !page.is_buddy(), - "Trying to free a page that is already in the buddy system: {pfn:?}", + !folio.is_buddy(), + "Trying to free a folio that is already in the buddy system: {pfn:?}", ); while order < MAX_ORDER { let buddy_pfn = pfn.buddy_pfn(order); - let Some(buddy_page) = self.try_get_buddy(buddy_pfn, order) else { + let Some(buddy) = self.try_get_buddy(buddy_pfn, order) else { break; }; - self.free_areas[order as usize].remove_page(buddy_page); + self.free_areas[order as usize].remove_folio(buddy); pfn = pfn.combined_pfn(buddy_pfn); order += 1; } - self.add_page(pfn, order); + self.add_folio(pfn, order); } /// This function checks whether the given page is within our [`Zone`] and @@ -176,32 +176,32 @@ where /// - the buddy is within the same [`Zone`] as us. /// - the buddy is a free buddy (in some [`FreeArea`]) /// - the buddy has order [`order`] - fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut P> { - let buddy_page = self.zone.get_page(buddy_pfn)?; + fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut F> { + let mut buddy = self.zone.get_page(buddy_pfn)?; unsafe { // SAFETY: We just test whether the page is a buddy. - let buddy_page_ref = &*buddy_page.get(); + let buddy_ref = buddy.as_ref(); - if !buddy_page_ref.is_buddy() { + if !buddy_ref.is_buddy() { return None; } // Sad... - if buddy_page_ref.get_order() != order { + if buddy_ref.get_order() != order { return None; } // SAFETY: We have the mutable reference to the buddy allocator. // So all the pages within are exclusively accessible to us. - Some(&mut *buddy_page.get()) + Some(buddy.as_mut()) } } } impl FreeArea where - L: PageListSized, + L: FolioListSized, { const fn new() -> Self { Self { @@ -213,34 +213,34 @@ where impl FreeArea where - L: PageList, - L::Page: BuddyPage + 'static, + L: FolioList, + L::Folio: BuddyFolio + 'static, { - pub fn get_free_page(&mut self) -> Option<&'static mut L::Page> { - self.free_list.pop_head().map(|page| { + pub fn get_free_folio(&mut self) -> Option<&'static mut L::Folio> { + self.free_list.pop_head().map(|folio| { assert_ne!(self.count, 0, "Oops"); - page.set_buddy(false); + folio.set_buddy(false); self.count -= 1; - page + folio }) } - pub fn add_page(&mut self, page: &'static mut L::Page, order: u32) { - page.set_order(order); - page.set_buddy(true); + pub fn add_folio(&mut self, folio: &'static mut L::Folio, order: u32) { + folio.set_order(order); + folio.set_buddy(true); self.count += 1; - self.free_list.push_tail(page); + self.free_list.push_tail(folio); } - pub fn remove_page(&mut self, page: &mut L::Page) { + pub fn remove_folio(&mut self, folio: &mut L::Folio) { assert_ne!(self.count, 0, "Oops"); - page.set_buddy(false); + folio.set_buddy(false); self.count -= 1; - self.free_list.remove(page); + self.free_list.remove(folio); } } diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 7b3dc043..b2305f99 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -2,12 +2,13 @@ use core::alloc::Allocator; use core::arch::{asm, global_asm, naked_asm}; use core::cell::RefCell; use core::hint::spin_loop; +use core::ptr::NonNull; use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PagingMode, PTE as _}; -use eonix_mm::paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _}; +use eonix_mm::paging::{Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN}; use eonix_percpu::PercpuArea; use fdt::Fdt; use riscv::asm::sfence_vma_all; @@ -23,9 +24,12 @@ use super::cpu::{CPUID, CPU_COUNT}; use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; -use crate::arch::mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}; +use crate::arch::mm::{ + ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, RawPageTableSv48, + GLOBAL_PAGE_TABLE, +}; use crate::bootstrap::BootStrapData; -use crate::mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; +use crate::mm::{ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -38,26 +42,26 @@ static TEMP_AP_STACK: [u8; 256] = [0; 256]; static TEMP_AP_STACK_START: &'static [u8; 256] = &TEMP_AP_STACK; #[repr(C, align(4096))] -struct PageTable([u64; PTES_PER_PAGE]); +struct BootPageTable([u64; PTES_PER_PAGE]); /// map 0x8000 0000 to itself and 0xffff ffff 8000 0000 #[unsafe(link_section = ".bootstrap.page_table.1")] -static BOOT_PAGE_TABLE: PageTable = { +static BOOT_PAGE_TABLE: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[0] = 0 | 0x2f; arr[510] = 0 | 0x2f; arr[511] = (0x80202 << 10) | 0x21; - PageTable(arr) + BootPageTable(arr) }; #[unsafe(link_section = ".bootstrap.page_table.2")] #[used] -static PT1: PageTable = { +static PT1: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[510] = (0x80000 << 10) | 0x2f; - PageTable(arr) + BootPageTable(arr) }; static BSP_PAGE_ALLOC: AtomicPtr> = AtomicPtr::new(core::ptr::null_mut()); @@ -111,7 +115,7 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { real_allocator.borrow_mut().add_range(range); } - setup_kernel_page_table(&alloc); + setup_kernel_page_table(alloc.clone()); unsafe { init_dtb_and_fdt(dtb_addr); } @@ -148,8 +152,12 @@ unsafe extern "C" { /// TODO: /// 对kernel image添加更细的控制,或者不加也行 -fn setup_kernel_page_table(alloc: impl PageAlloc) { - let global_page_table = &GLOBAL_PAGE_TABLE; +fn setup_kernel_page_table(alloc: BasicPageAllocRef) { + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); let attr = PageAttribute::WRITE | PageAttribute::READ @@ -160,18 +168,11 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) { const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000); // Map kernel BSS - for pte in global_page_table.iter_kernel_in( - VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize), - ArchPagingMode::LEVELS, - &alloc, - ) { - let page = Page::alloc_in(&alloc); - - let attr = { - let mut attr = attr.clone(); - attr.remove(PageAttribute::EXECUTE); - attr - }; + let bss_range = VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize); + for pte in global_page_table.iter_kernel(bss_range) { + let page = alloc.alloc().unwrap(); + let attr = attr.difference(PageAttribute::EXECUTE); + pte.set(page.into_raw(), attr.into()); } @@ -189,17 +190,22 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) { ); } sfence_vma_all(); + + core::mem::forget(global_page_table); } /// set up tp register to percpu -fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { +fn setup_cpu(alloc: impl FrameAlloc, hart_id: usize) { CPU_COUNT.fetch_add(1, Ordering::Relaxed); let mut percpu_area = PercpuArea::new(|layout| { let page_count = layout.size().div_ceil(PAGE_SIZE); - let page = Page::alloc_at_least_in(page_count, alloc); + let page = alloc.alloc_at_least(page_count).unwrap(); - let ptr = ArchPhysAccess::get_ptr_for_page(&page).cast(); + let ptr = unsafe { + // TODO: safety + ArchPhysAccess::as_ptr(page.start()) + }; page.into_raw(); ptr @@ -243,7 +249,7 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { for hart_id in FDT.harts().filter(|&id| id != local_hart_id) { let stack_range = { let page_alloc = BasicPageAllocRef::new(&page_alloc); - let ap_stack = Page::alloc_order_in(4, page_alloc); + let ap_stack = page_alloc.alloc_order(4).unwrap(); let stack_range = ap_stack.range(); ap_stack.into_raw(); stack_range diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 46dd9437..f67646cf 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -1,31 +1,25 @@ -use super::{ - config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}, - fdt::{FdtExt, FDT}, -}; -use crate::{arch::riscv64::config::mm::KIMAGE_OFFSET, traits::mm::Memory}; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::{ - address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}, - page_table::{ - PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, - TableAttribute, PTE, - }, - paging::{NoAlloc, Page, PageBlock, PFN}, +use core::marker::PhantomData; +use core::ptr::NonNull; + +use eonix_hal_traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, + TableAttribute, PTE, }; +use eonix_mm::paging::{BasicFolio, Folio, PageAccess, PageBlock, PFN}; use eonix_sync_base::LazyLock; use fdt::Fdt; -use riscv::{ - asm::{sfence_vma, sfence_vma_all}, - register::satp, -}; +use riscv::asm::{sfence_vma, sfence_vma_all}; +use riscv::register::satp; -pub const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); -pub static GLOBAL_PAGE_TABLE: LazyLock> = - LazyLock::new(|| unsafe { - Page::with_raw(PAGE_TABLE_BASE, |root_table_page| { - PageTable::with_root_table(root_table_page.clone()) - }) - }); +use super::config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}; +use super::fdt::{FdtExt, FDT}; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::mm::BasicPageAlloc; + +const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); +pub const GLOBAL_PAGE_TABLE: BasicFolio = BasicFolio::new(PAGE_TABLE_BASE, 0); pub const PA_V: u64 = 0b1 << 0; pub const PA_R: u64 = 0b1 << 1; @@ -61,6 +55,9 @@ pub struct ArchPhysAccess; pub struct ArchMemory; +#[derive(Clone)] +pub struct PageAccessImpl; + impl PTE for PTE64 { type Attr = PageAttribute64; @@ -261,6 +258,12 @@ impl PhysAccess for ArchPhysAccess { } } +impl PageAccess for PageAccessImpl { + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull { + unsafe { ArchPhysAccess::as_ptr(PAddr::from(pfn)) } + } +} + impl Memory for ArchMemory { fn present_ram() -> impl Iterator { FDT.present_ram() diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index 0a5597ac..c4b9bb74 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -1,16 +1,14 @@ -use core::{ - alloc::{AllocError, Allocator, Layout}, - cell::RefCell, - ptr::NonNull, -}; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{PageAlloc, UnmanagedRawPage, PAGE_SIZE, PFN}, -}; +use core::alloc::{AllocError, Allocator, Layout}; +use core::cell::RefCell; +use core::ptr::NonNull; + +use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{BasicFolio, FrameAlloc, PAGE_SIZE, PFN}; pub use crate::arch::mm::{ flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchMemory, - ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE, + ArchPhysAccess, GLOBAL_PAGE_TABLE, }; pub struct BasicPageAlloc { @@ -118,19 +116,23 @@ impl<'a> BasicPageAllocRef<'a> { } } -impl PageAlloc for BasicPageAllocRef<'_> { - type RawPage = UnmanagedRawPage; +impl FrameAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - fn alloc_order(&self, order: u32) -> Option { - Some(Self::RawPage::new(self.0.borrow_mut().alloc(order), order)) + fn alloc_order(&self, order: u32) -> Option { + Some(BasicFolio::new(self.0.borrow_mut().alloc(order), order)) } +} + +impl PageTableAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("Dealloc is not supported in BasicPageAlloc"); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, _: Self::RawPage) -> bool { - true + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + BasicFolio::new(pfn, 0) } } diff --git a/crates/eonix_mm/src/address/paddr.rs b/crates/eonix_mm/src/address/paddr.rs index 6fadbd2a..bbfa299e 100644 --- a/crates/eonix_mm/src/address/paddr.rs +++ b/crates/eonix_mm/src/address/paddr.rs @@ -1,11 +1,11 @@ +use core::fmt; +use core::ops::{Add, Sub}; +use core::ptr::NonNull; + use super::addr::Addr; use crate::paging::{PAGE_SIZE_BITS, PFN}; -use core::{ - fmt, - ops::{Add, Sub}, - ptr::NonNull, -}; +/// Convert PAddr to VAddr. pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. diff --git a/crates/eonix_mm/src/page_table.rs b/crates/eonix_mm/src/page_table.rs index 55732f72..f3528060 100644 --- a/crates/eonix_mm/src/page_table.rs +++ b/crates/eonix_mm/src/page_table.rs @@ -3,7 +3,7 @@ mod paging_mode; mod pte; mod pte_iterator; -pub use page_table::{PageTable, RawPageTable}; +pub use page_table::{PageTable, PageTableAlloc, RawPageTable}; pub use paging_mode::{PageTableLevel, PagingMode}; pub use pte::{PageAttribute, RawAttribute, TableAttribute, PTE}; pub use pte_iterator::PageTableIterator; diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 8318049f..80be63b9 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -1,15 +1,12 @@ -use super::{ - paging_mode::PageTableLevel, - pte::{RawAttribute, TableAttribute}, - pte_iterator::{KernelIterator, UserIterator}, - PagingMode, PTE, -}; -use crate::{ - address::{PAddr, VRange}, - page_table::PageTableIterator, - paging::{GlobalPageAlloc, Page, PageAccess, PageAlloc, PageBlock}, -}; -use core::{marker::PhantomData, ptr::NonNull}; +use core::marker::PhantomData; +use core::ptr::NonNull; + +use super::paging_mode::PageTableLevel; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PagingMode, PTE}; +use crate::address::{PAddr, VRange}; +use crate::page_table::PageTableIterator; +use crate::paging::{Folio, PageAccess, PageBlock, PFN}; pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; @@ -24,45 +21,60 @@ pub trait RawPageTable<'a>: Send + 'a { unsafe fn from_ptr(ptr: NonNull) -> Self; } +pub trait PageTableAlloc: Clone { + type Folio: Folio; + + fn alloc(&self) -> Self::Folio; + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio; +} + +pub trait GlobalPageTableAlloc: PageTableAlloc { + const GLOBAL: Self; +} + pub struct PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - root_table_page: Page, - phantom: PhantomData<&'a (M, X)>, + root_table_page: A::Folio, + alloc: A, + access: X, + phantom: PhantomData<&'a M>, } impl<'a, M, A, X> PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - pub fn with_root_table(root_table_page: Page) -> Self { + pub fn new(root_table_page: A::Folio, alloc: A, access: X) -> Self { Self { root_table_page, + alloc, + access, phantom: PhantomData, } } pub fn clone_global<'b, B>(&self) -> PageTable<'b, M, B, X> where - B: GlobalPageAlloc, + B: GlobalPageTableAlloc, { - self.clone_in(B::global()) + self.clone_in(B::GLOBAL) } pub fn clone_in<'b, B>(&self, alloc: B) -> PageTable<'b, M, B, X> where - B: PageAlloc, + B: PageTableAlloc, { - let new_root_table_page = Page::alloc_in(alloc); - let new_table_data = X::get_ptr_for_page(&new_root_table_page); - let kernel_table_data = X::get_ptr_for_page(&self.root_table_page); + let new_root_table_page = alloc.alloc(); + let new_table_data = self.access.get_ptr_for_page(&new_root_table_page); + let kernel_table_data = self.access.get_ptr_for_page(&self.root_table_page); unsafe { // SAFETY: `new_table_data` and `kernel_table_data` are both valid pointers @@ -82,7 +94,7 @@ where root_page_table.index_mut(idx).take(); } - PageTable::with_root_table(new_root_table_page) + PageTable::new(new_root_table_page, alloc, self.access.clone()) } pub fn addr(&self) -> PAddr { @@ -90,100 +102,59 @@ where } pub fn iter_user(&self, range: VRange) -> impl Iterator { - let alloc = self.root_table_page.allocator(); - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::new(root_page_table, range, alloc.clone()) - } - - pub fn iter_kernel(&self, range: VRange) -> impl Iterator { - self.iter_kernel_levels(range, M::LEVELS) + PageTableIterator::::new( + root_page_table, + range, + TableAttribute::USER, + self.alloc.clone(), + self.access.clone(), + ) } - /// Iterates over the kernel space entries in the page table for the specified levels. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. + /// Iterates over the kernel space entries in the page table. /// /// # Returns /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. + /// specified range. /// /// # Example /// ``` /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_levels(range, levels) { + /// for pte in page_table.iter_kernel(range) { /// // Process each entry /// } /// ``` - pub fn iter_kernel_levels( - &self, - range: VRange, - levels: &'static [PageTableLevel], - ) -> impl Iterator { - self.iter_kernel_in(range, levels, self.root_table_page.allocator()) - } - - /// Iterates over the kernel space entries in the page table for the specified levels - /// with a given page allocator. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. - /// - `alloc`: A page allocator that provides memory for the page table entries. - /// - /// # Returns - /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. - /// - /// # Example - /// ```no_run - /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_in(range, levels, NoAlloc) { - /// // Process each entry - /// } - /// ``` - pub fn iter_kernel_in( - &self, - range: VRange, - levels: &'static [PageTableLevel], - alloc: A1, - ) -> impl Iterator { - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + pub fn iter_kernel(&self, range: VRange) -> impl Iterator { + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::with_levels( + PageTableIterator::::with_levels( root_page_table, range, - alloc, - levels, + TableAttribute::GLOBAL, + self.alloc.clone(), + self.access.clone(), + M::LEVELS, ) } - fn drop_page_table_recursive(page_table: &Page, levels: &[PageTableLevel]) { + fn drop_page_table_recursive(&self, page_table: &A::Folio, levels: &[PageTableLevel]) { let [level, remaining_levels @ ..] = levels else { return }; if remaining_levels.is_empty() { // We reached the last level, no need to go deeper. return; } - let alloc = page_table.allocator(); - - let page_table_ptr = X::get_ptr_for_page(page_table); + let page_table_ptr = self.access.get_ptr_for_page(page_table); let mut page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) @@ -201,10 +172,10 @@ where let page_table = unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw_in(pfn, alloc.clone()) + self.alloc.from_raw(pfn) }; - Self::drop_page_table_recursive(&page_table, remaining_levels); + self.drop_page_table_recursive(&page_table, remaining_levels); } } } @@ -213,10 +184,10 @@ impl<'a, M, A, X> Drop for PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { fn drop(&mut self) { - Self::drop_page_table_recursive(&self.root_table_page, M::LEVELS); + self.drop_page_table_recursive(&self.root_table_page, M::LEVELS); } } diff --git a/crates/eonix_mm/src/page_table/pte.rs b/crates/eonix_mm/src/page_table/pte.rs index e067d207..c14d5738 100644 --- a/crates/eonix_mm/src/page_table/pte.rs +++ b/crates/eonix_mm/src/page_table/pte.rs @@ -1,6 +1,7 @@ -use crate::paging::PFN; use bitflags::bitflags; +use crate::paging::PFN; + bitflags! { #[derive(Clone, Copy, PartialEq)] pub struct TableAttribute: usize { diff --git a/crates/eonix_mm/src/page_table/pte_iterator.rs b/crates/eonix_mm/src/page_table/pte_iterator.rs index 89b9fb9f..a9e4ff46 100644 --- a/crates/eonix_mm/src/page_table/pte_iterator.rs +++ b/crates/eonix_mm/src/page_table/pte_iterator.rs @@ -1,62 +1,14 @@ -use super::{ - pte::{RawAttribute, TableAttribute}, - PageTableLevel, PagingMode, RawPageTable as _, PTE, -}; -use crate::{ - address::{AddrOps as _, VRange}, - paging::{Page, PageAccess, PageAlloc}, -}; -use core::{marker::PhantomData}; - -pub struct KernelIterator; -pub struct UserIterator; - -pub trait IteratorType { - fn page_table_attributes() -> TableAttribute; - - fn get_page_table<'a, A, X>(pte: &mut M::Entry, alloc: &A) -> M::RawTable<'a> - where - A: PageAlloc, - X: PageAccess, - { - let attr = pte.get_attr().as_table_attr().expect("Not a page table"); - - if attr.contains(TableAttribute::PRESENT) { - let pfn = pte.get_pfn(); - unsafe { - // SAFETY: We are creating a pointer to a page referenced to in - // some page table, which should be valid. - let page_table_ptr = X::get_ptr_for_pfn(pfn); - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } else { - let page = Page::alloc_in(alloc.clone()); - let page_table_ptr = X::get_ptr_for_page(&page); - - unsafe { - // SAFETY: `page_table_ptr` is good for writing and properly aligned. - page_table_ptr.write_bytes(0, 1); - } - - pte.set( - page.into_raw(), - ::Attr::from(Self::page_table_attributes()), - ); - - unsafe { - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } - } -} +use super::page_table::PageTableAlloc; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PagingMode, RawPageTable as _, PTE}; +use crate::address::{AddrOps as _, VRange}; +use crate::paging::{Folio, PageAccess}; -pub struct PageTableIterator<'a, M, A, X, K> +pub struct PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { /// Specifies the hierarchy of page table levels to iterate over. @@ -69,19 +21,19 @@ where indicies: [u16; 8], tables: [Option>; 8], + fill_entry_attr: TableAttribute, + alloc: A, - _phantom: PhantomData<&'a (X, K)>, + access: X, } -impl<'a, M, A, X, K> PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { - fn parse_tables_starting_from(&mut self, idx_level: usize) { for (idx, &pt_idx) in self .indicies @@ -98,18 +50,58 @@ where }; let parent_table = parent_table.as_mut().expect("Parent table is None"); let next_pte = parent_table.index_mut(pt_idx); - child_table.replace(K::get_page_table::(next_pte, &self.alloc)); + + child_table.replace({ + let attr = next_pte + .get_attr() + .as_table_attr() + .expect("Not a page table"); + + if attr.contains(TableAttribute::PRESENT) { + let pfn = next_pte.get_pfn(); + unsafe { + // SAFETY: We are creating a pointer to a page referenced to in + // some page table, which should be valid. + let page_table_ptr = self.access.get_ptr_for_pfn(pfn); + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } else { + let page = self.alloc.alloc(); + let page_table_ptr = self.access.get_ptr_for_page(&page); + + unsafe { + // SAFETY: `page_table_ptr` is good for writing and properly aligned. + page_table_ptr.write_bytes(0, 1); + } + + next_pte.set(page.into_raw(), self.fill_entry_attr.into()); + + unsafe { + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } + }); } } - pub fn new(page_table: M::RawTable<'a>, range: VRange, alloc: A) -> Self { - Self::with_levels(page_table, range, alloc, M::LEVELS) + pub fn new( + page_table: M::RawTable<'a>, + range: VRange, + fill_entry_attr: TableAttribute, + alloc: A, + access: X, + ) -> Self { + Self::with_levels(page_table, range, fill_entry_attr, alloc, access, M::LEVELS) } pub fn with_levels( page_table: M::RawTable<'a>, range: VRange, + fill_entry_attr: TableAttribute, alloc: A, + access: X, levels: &'static [PageTableLevel], ) -> Self { let start = range.start().floor(); @@ -122,8 +114,9 @@ where remaining: (end - start) / last_level.page_size(), indicies: [0; 8], tables: [const { None }; 8], + fill_entry_attr: fill_entry_attr.union(TableAttribute::PRESENT), alloc, - _phantom: PhantomData, + access, }; for (i, level) in levels.iter().enumerate() { @@ -137,13 +130,12 @@ where } } -impl<'a, M, A, X, K> Iterator for PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> Iterator for PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { type Item = &'a mut M::Entry; @@ -178,15 +170,3 @@ where Some(retval) } } - -impl IteratorType for KernelIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::GLOBAL - } -} - -impl IteratorType for UserIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::USER - } -} diff --git a/crates/eonix_mm/src/page_table/walk.rs b/crates/eonix_mm/src/page_table/walk.rs new file mode 100644 index 00000000..aba80b09 --- /dev/null +++ b/crates/eonix_mm/src/page_table/walk.rs @@ -0,0 +1,210 @@ +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PTE}; +use crate::address::{AddrOps, VAddr, VRange}; +use crate::paging::PFN; + +pub enum WalkState { + Next, + Skip, + Break, +} + +pub trait PageTable: Sized { + type Entry: PTE; + const LEVELS: &'static [PageTableLevel]; + + fn index(&self, index: usize) -> &Self::Entry; + fn index_mut(&mut self, index: usize) -> &mut Self::Entry; + + fn from_pfn(pfn: PFN) -> Self; + unsafe fn take_pfn(pfn: PFN) -> Self; +} + +pub struct PageTableWalk<'a, T, D> +where + T: PageTable, +{ + levels: &'a [PageTableLevel], + fill_entry: &'a [fn(&mut D, &mut T::Entry) -> Option], + walk_entry: &'a [fn(&mut D, &mut T::Entry) -> WalkState], + data: D, +} + +fn try_get_table( + entry: &mut T::Entry, + data: &mut D, + fill_entry: fn(&mut D, &mut T::Entry) -> Option, +) -> Option +where + T: PageTable, +{ + let (mut pfn, attr) = entry.get(); + + // Always skip huge page entries + let attr = attr.as_table_attr()?; + + // For normal entries, check present flags + if !attr.contains(TableAttribute::PRESENT) { + // Skip entries filled with nothing + pfn = fill_entry(data, entry)?; + } + + Some(T::from_pfn(pfn)) +} + +fn _walk_page_table( + walk: &mut PageTableWalk, + cur_level: usize, + table: &mut T, + range: VRange, +) where + T: PageTable, +{ + let level = walk.levels[cur_level]; + + let page_size = level.page_size(); + let mut addr = range.start(); + + while addr < range.end() { + let idx = level.index_of(addr); + let entry = table.index_mut(idx); + + let mut next_table = None; + if cur_level < walk.levels.len() - 1 { + next_table = try_get_table(entry, &mut walk.data, walk.fill_entry[cur_level]); + } + + match ( + walk.walk_entry[cur_level](&mut walk.data, entry), + &mut next_table, + ) { + (WalkState::Break, _) => break, + (WalkState::Next, Some(next_table)) => _walk_page_table( + walk, + cur_level + 1, + next_table, + VRange::new(addr, range.end()), + ), + // `fill_entry` says that we shouldn't continue. + (WalkState::Next, None) => {} + _ => {} + } + + addr = addr.floor_to(page_size) + page_size; + } +} + +pub fn walk_page_table(walk: &mut PageTableWalk, table: &mut T, range: VRange) +where + T: PageTable, +{ + _walk_page_table(walk, 0, table, range); +} + +pub fn drop_user_page_table(mut root_page_table: T) +where + T: PageTable, +{ + fn walk(_: &mut (), entry: &mut T::Entry) -> WalkState { + let (pfn, attr) = entry.get(); + let Some(attr) = attr.as_table_attr() else { + return WalkState::Skip; + }; + + if !attr.contains(TableAttribute::USER) { + return WalkState::Skip; + } + + unsafe { + // Check `_walk_page_table`: We will and only will touch the next level of table with + // `next_table` holding a refcount. We take the table away from the parent table now. + T::take_pfn(pfn); + } + + entry.set(PFN::from_val(0), TableAttribute::empty().into()); + + if LEVEL == 2 { + WalkState::Skip + } else { + WalkState::Next + } + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[no_fill::, no_fill::, no_fill::], + walk_entry: &[walk::, walk::, walk::, skip_walk::], + data: (), + }; + + walk_page_table( + &mut walk, + &mut root_page_table, + VRange::new(VAddr::from(0), VAddr::from(0x0000_8000_0000_0000)), + ); +} + +pub fn iter_pte( + page_table: &mut T, + range: VRange, + fill_func: impl FnMut(&mut T::Entry) -> Option, + for_each: impl FnMut(&mut T::Entry), +) { + let walker = (fill_func, for_each); + + fn fill_entry( + (fill, _): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> Option { + fill(entry) + } + + fn walk_entry( + (_, for_each): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> WalkState { + for_each(entry); + WalkState::Next + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[fill_entry::, fill_entry::, fill_entry::], + walk_entry: &[ + cont_walk::, + cont_walk::, + cont_walk::, + walk_entry::, + ], + data: walker, + }; + + walk_page_table(&mut walk, page_table, range); +} + +pub fn no_fill(_: &mut D, _: &mut T::Entry) -> Option +where + T: PageTable, +{ + None +} + +pub fn skip_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Skip +} + +pub fn cont_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Next +} diff --git a/crates/eonix_mm/src/paging.rs b/crates/eonix_mm/src/paging.rs index 0c4811f2..f0166cf3 100644 --- a/crates/eonix_mm/src/paging.rs +++ b/crates/eonix_mm/src/paging.rs @@ -2,12 +2,10 @@ mod list; mod page; mod page_alloc; mod pfn; -mod raw_page; mod zone; -pub use list::{PageList, PageListSized}; -pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; -pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc}; +pub use list::{FolioList, FolioListSized}; +pub use page::{BasicFolio, Folio, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; +pub use page_alloc::{FrameAlloc, GlobalFrameAlloc}; pub use pfn::PFN; -pub use raw_page::{RawPage, UnmanagedRawPage}; pub use zone::Zone; diff --git a/crates/eonix_mm/src/paging/list.rs b/crates/eonix_mm/src/paging/list.rs index a52cf947..2dd557c9 100644 --- a/crates/eonix_mm/src/paging/list.rs +++ b/crates/eonix_mm/src/paging/list.rs @@ -1,16 +1,16 @@ -pub trait PageList { - type Page; +pub trait FolioList { + type Folio; fn is_empty(&self) -> bool; - fn peek_head(&mut self) -> Option<&mut Self::Page>; + fn peek_head(&mut self) -> Option<&mut Self::Folio>; - fn pop_head(&mut self) -> Option<&'static mut Self::Page>; - fn push_tail(&mut self, page: &'static mut Self::Page); - fn remove(&mut self, page: &mut Self::Page); + fn pop_head(&mut self) -> Option<&'static mut Self::Folio>; + fn push_tail(&mut self, page: &'static mut Self::Folio); + fn remove(&mut self, page: &mut Self::Folio); } -pub trait PageListSized: PageList + Sized { +pub trait FolioListSized: FolioList + Sized { const NEW: Self; fn new() -> Self { diff --git a/crates/eonix_mm/src/paging/page.rs b/crates/eonix_mm/src/paging/page.rs index c5a14b5e..8b067e43 100644 --- a/crates/eonix_mm/src/paging/page.rs +++ b/crates/eonix_mm/src/paging/page.rs @@ -1,6 +1,8 @@ -use super::{GlobalPageAlloc, PageAlloc, RawPage as _, PFN}; -use crate::address::{AddrRange, PAddr, PhysAccess}; -use core::{fmt, mem::ManuallyDrop, ptr::NonNull, sync::atomic::Ordering}; +use core::mem::ManuallyDrop; +use core::ptr::NonNull; + +use super::PFN; +use crate::address::{PAddr, PRange}; pub const PAGE_SIZE: usize = 4096; pub const PAGE_SIZE_BITS: u32 = PAGE_SIZE.trailing_zeros(); @@ -15,306 +17,81 @@ pub struct PageBlock([u8; PAGE_SIZE]); /// A trait that provides the kernel access to the page. #[doc(notable_trait)] -pub trait PageAccess { +pub trait PageAccess: Clone { /// Returns a kernel-accessible pointer to the page referenced by the given /// physical frame number. /// /// # Safety /// This function is unsafe because calling this function on some non-existing /// pfn will cause undefined behavior. - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull; + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull; /// Returns a kernel-accessible pointer to the given page. - fn get_ptr_for_page(page: &Page) -> NonNull { + fn get_ptr_for_page(&self, page: &F) -> NonNull { unsafe { // SAFETY: `page.pfn()` is guaranteed to be valid. - Self::get_ptr_for_pfn(page.pfn()) + self.get_ptr_for_pfn(page.pfn()) } } } -/// A Page allocated in allocator `A`. -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub struct Page { - raw_page: A::RawPage, - alloc: A, -} - -unsafe impl Send for Page {} -unsafe impl Sync for Page {} - -impl Page -where - A: GlobalPageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order(order: u32) -> Self { - Self::alloc_order_in(order, A::global()) - } - - /// Allocate exactly one page. - pub fn alloc() -> Self { - Self::alloc_in(A::global()) - } +/// A [`Folio`] represents one page or a bunch of adjacent pages. +pub trait Folio { + /// Returns the physical frame number of the folio, which is aligned with + /// the folio's size and valid. + fn pfn(&self) -> PFN; - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least(count: usize) -> Self { - Self::alloc_at_least_in(count, A::global()) - } + /// Returns the folio's *order* (log2 of the number of pages contained in + /// the folio). + fn order(&self) -> u32; - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page allocated through `alloc_order()` and that the - /// page have not been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked(pfn: PFN) -> Self { - unsafe { Self::from_raw_unchecked_in(pfn, A::global()) } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw(pfn: PFN) -> Self { - unsafe { Self::from_raw_in(pfn, A::global()) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw()` for the safety requirements. - pub unsafe fn with_raw(pfn: PFN, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_in(pfn, A::global(), func) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked()` for the safety requirements. - pub unsafe fn with_raw_unchecked(pfn: PFN, func: F, alloc: A) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_unchecked_in(pfn, func, alloc) } - } -} - -impl Page -where - A: PageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order_in(order: u32, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_order(order).expect("Out of memory"), - alloc, - } - } - - /// Allocate exactly one page. - pub fn alloc_in(alloc: A) -> Self { - Self { - raw_page: alloc.alloc().expect("Out of memory"), - alloc, - } - } - - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least_in(count: usize, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_at_least(count).expect("Out of memory"), - alloc, - } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page managed by `alloc` and that the page have not - /// been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked_in(pfn: PFN, alloc: A) -> Self { - Self { - raw_page: A::RawPage::from(pfn), - alloc, - } + /// Returns the total size of the folio in bytes. + fn len(&self) -> usize { + 1 << (self.order() + PAGE_SIZE_BITS) } - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw_in(pfn: PFN, alloc: A) -> Self { - unsafe { - // SAFETY: The caller guarantees that the page is inside the global page array. - assert!(alloc.has_management_over(A::RawPage::from(pfn))); - - // SAFETY: We've checked that the validity of the page. And the caller guarantees - // that the page will not be freed or deallocated during the call. - Self::from_raw_unchecked_in(pfn, alloc) - } + /// Returns the start physical address of the folio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn start(&self) -> PAddr { + PAddr::from(self.pfn()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw_in()` for the safety requirements. - pub unsafe fn with_raw_in(pfn: PFN, alloc: A, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_in(pfn, alloc)); - func(&me) - } + /// Returns the physical address range of the ifolio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn range(&self) -> PRange { + PRange::from(self.start()).grow(self.len()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked_in()` for the safety requirements. - pub unsafe fn with_raw_unchecked_in(pfn: PFN, func: F, alloc: A) -> O + /// Consumes the folio and returns the PFN without dropping the reference + /// count the folio holds. + fn into_raw(self) -> PFN where - F: FnOnce(&Self) -> O, + Self: Sized, { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_unchecked_in(pfn, alloc)); - func(&me) - } - } - - /// Whether we are the only owner of the page. - pub fn is_exclusive(&self) -> bool { - self.raw_page.refcount().load(Ordering::Acquire) == 1 - } - - /// Returns the *order* of the page, which is the log2 of the number of pages - /// contained in the page object. - pub fn order(&self) -> u32 { - self.raw_page.order() - } - - /// Returns the total size of the page in bytes. - pub fn len(&self) -> usize { - 1 << (self.order() + PAGE_SIZE_BITS) - } - - /// Consumes the `Page` and returns the physical frame number without dropping - /// the reference count the page holds. - pub fn into_raw(self) -> PFN { let me = ManuallyDrop::new(self); me.pfn() } - - /// Returns the physical frame number of the page, which is aligned with the - /// page size and valid. - pub fn pfn(&self) -> PFN { - Into::::into(self.raw_page) - } - - /// Returns the start physical address of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn start(&self) -> PAddr { - PAddr::from(self.pfn()) - } - - /// Returns the physical address range of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn range(&self) -> AddrRange { - AddrRange::from(self.start()).grow(self.len()) - } - - /// Get the allocator that manages this page. - pub fn allocator(&self) -> &A { - &self.alloc - } } -impl Clone for Page -where - A: PageAlloc, -{ - fn clone(&self) -> Self { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - self.raw_page.refcount().fetch_add(1, Ordering::Relaxed); - - Self { - raw_page: self.raw_page, - alloc: self.alloc.clone(), - } - } +/// A simple [`Folio`] with no reference counting or other ownership mechanism. +#[derive(Clone)] +pub struct BasicFolio { + pfn: PFN, + order: u32, } -impl Drop for Page -where - A: PageAlloc, -{ - fn drop(&mut self) { - match self.raw_page.refcount().fetch_sub(1, Ordering::AcqRel) { - 0 => panic!("Refcount for an in-use page is 0"), - 1 => unsafe { - // SAFETY: `self.raw_page` points to a valid page inside the global page array. - assert!(self.alloc.has_management_over(self.raw_page)); - - // SAFETY: `self.raw_page` is managed by the allocator and we're dropping the page. - self.alloc.dealloc(self.raw_page) - }, - _ => {} - } +impl BasicFolio { + pub const fn new(pfn: PFN, order: u32) -> Self { + Self { pfn, order } } } -impl fmt::Debug for Page { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Page({:?}, order={})", - Into::::into(self.raw_page), - self.order() - ) +impl Folio for BasicFolio { + fn pfn(&self) -> PFN { + self.pfn } -} -impl PageAccess for T -where - T: PhysAccess, -{ - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull { - unsafe { - // SAFETY: The physical address of a existing page must be - // aligned to the page size. - T::as_ptr(PAddr::from(pfn)) - } + fn order(&self) -> u32 { + self.order } } diff --git a/crates/eonix_mm/src/paging/page_alloc.rs b/crates/eonix_mm/src/paging/page_alloc.rs index fe222605..267d3ccb 100644 --- a/crates/eonix_mm/src/paging/page_alloc.rs +++ b/crates/eonix_mm/src/paging/page_alloc.rs @@ -1,89 +1,44 @@ -use super::{raw_page::UnmanagedRawPage, RawPage}; +use super::Folio; -/// A trait for allocating and deallocating pages of memory. +/// A trait for allocating and deallocating folios. /// /// Note that the instances of this trait should provide pointer-like or reference-like /// behavior, meaning that the allocators are to be passed around by value and stored in /// managed data structures. This is because the allocator may be used to deallocate the /// pages it allocates. -#[doc(notable_trait)] -pub trait PageAlloc: Clone { - type RawPage: RawPage; +pub trait FrameAlloc: Clone { + type Folio: Folio; - /// Allocate a page of the given *order*. - fn alloc_order(&self, order: u32) -> Option; + /// Allocate a folio of the given *order*. + fn alloc_order(&self, order: u32) -> Option; - /// Allocate exactly one page. - fn alloc(&self) -> Option { + /// Allocate exactly one folio. + fn alloc(&self) -> Option { self.alloc_order(0) } - /// Allocate a contiguous block of pages that can contain at least `count` pages. - fn alloc_at_least(&self, count: usize) -> Option { + /// Allocate a folio that can contain at least [`count`] contiguous pages. + fn alloc_at_least(&self, count: usize) -> Option { let order = count.next_power_of_two().trailing_zeros(); self.alloc_order(order) } - - /// Deallocate a page. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller MUST ensure that - /// `raw_page` is allocated in this allocator and never used after this call. - unsafe fn dealloc(&self, raw_page: Self::RawPage); - - /// Check whether the page is allocated and managed by the allocator. - fn has_management_over(&self, page_ptr: Self::RawPage) -> bool; } /// A trait for global page allocators. /// /// Global means that we can get an instance of the allocator from anywhere in the kernel. -#[doc(notable_trait)] -pub trait GlobalPageAlloc: PageAlloc + 'static { - /// Get the global page allocator. - fn global() -> Self; +pub trait GlobalFrameAlloc: FrameAlloc + 'static { + /// The global page allocator. + const GLOBAL: Self; } -#[derive(Clone)] -pub struct NoAlloc; - -impl<'a, A> PageAlloc for &'a A +impl<'a, A> FrameAlloc for &'a A where - A: PageAlloc, + A: FrameAlloc, { - type RawPage = A::RawPage; + type Folio = A::Folio; - fn alloc_order(&self, order: u32) -> Option { + fn alloc_order(&self, order: u32) -> Option { (*self).alloc_order(order) } - - unsafe fn dealloc(&self, raw_page: Self::RawPage) { - unsafe { (*self).dealloc(raw_page) } - } - - fn has_management_over(&self, raw_page: Self::RawPage) -> bool { - (*self).has_management_over(raw_page) - } -} - -impl PageAlloc for NoAlloc { - type RawPage = UnmanagedRawPage; - - fn alloc_order(&self, _: u32) -> Option { - panic!("`NoAlloc` cannot allocate pages"); - } - - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("`NoAlloc` cannot free pages"); - } - - fn has_management_over(&self, _: Self::RawPage) -> bool { - true - } -} - -impl GlobalPageAlloc for NoAlloc { - fn global() -> Self { - Self - } } diff --git a/crates/eonix_mm/src/paging/raw_page.rs b/crates/eonix_mm/src/paging/raw_page.rs deleted file mode 100644 index 789e863b..00000000 --- a/crates/eonix_mm/src/paging/raw_page.rs +++ /dev/null @@ -1,47 +0,0 @@ -use core::sync::atomic::AtomicUsize; - -use super::PFN; - -/// A `RawPage` represents a page of memory in the kernel. It is a low-level -/// representation of a page that is used by the kernel to manage memory. -#[doc(notable_trait)] -pub trait RawPage: Clone + Copy + From + Into { - fn order(&self) -> u32; - fn refcount(&self) -> &AtomicUsize; -} - -#[derive(Clone, Copy)] -pub struct UnmanagedRawPage(PFN, u32); - -/// Unmanaged raw pages should always have a non-zero refcount to -/// avoid `free()` from being called. -static UNMANAGED_RAW_PAGE_CLONE_COUNT: AtomicUsize = AtomicUsize::new(1); - -impl UnmanagedRawPage { - pub const fn new(pfn: PFN, order: u32) -> Self { - Self(pfn, order) - } -} - -impl From for UnmanagedRawPage { - fn from(value: PFN) -> Self { - Self::new(value, 0) - } -} - -impl Into for UnmanagedRawPage { - fn into(self) -> PFN { - let Self(pfn, _) = self; - pfn - } -} - -impl RawPage for UnmanagedRawPage { - fn order(&self) -> u32 { - self.1 - } - - fn refcount(&self) -> &AtomicUsize { - &UNMANAGED_RAW_PAGE_CLONE_COUNT - } -} diff --git a/crates/eonix_mm/src/paging/zone.rs b/crates/eonix_mm/src/paging/zone.rs index ec3ed15e..a2e85343 100644 --- a/crates/eonix_mm/src/paging/zone.rs +++ b/crates/eonix_mm/src/paging/zone.rs @@ -1,7 +1,6 @@ -use core::cell::UnsafeCell; +use core::ptr::NonNull; -#[allow(unused_imports)] -use super::{Page, PageAlloc, RawPage, PFN}; +use super::PFN; use crate::address::PRange; /// A [`Zone`] holds a lot of [`Page`]s that share the same NUMA node or @@ -16,5 +15,5 @@ pub trait Zone: Send + Sync { /// /// # Return /// [`None`] if [`pfn`] is not in this [`Zone`]. - fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell>; + fn get_page(&self, pfn: PFN) -> Option>; } diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index 8597331d..c3e7f392 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -2,7 +2,7 @@ use core::ptr::NonNull; -use eonix_mm::paging::{PageList, PageListSized}; +use eonix_mm::paging::{FolioList, FolioListSized}; use eonix_sync::Spin; #[repr(C)] @@ -84,21 +84,21 @@ where } } -pub trait SlabPageAlloc { +/// Allocate a page suitable for slab system use. The page MUST come with +/// its allocation count 0 and next free slot None. +/// +/// # Safety +/// The page returned MUST have been properly initialized after allocation. +pub unsafe trait SlabPageAlloc { type Page: SlabPage; - type PageList: PageList; + type PageList: FolioList; - /// Allocate a page suitable for slab system use. The page MUST come with - /// its allocation count 0 and next free slot None. - /// - /// # Safety - /// The page returned MUST be properly initialized before its usage. - unsafe fn alloc_uninit(&self) -> &'static mut Self::Page; + fn alloc_slab_page(&self) -> &'static mut Self::Page; } pub(crate) struct SlabList where - T: PageList, + T: FolioList, { empty_list: T, partial_list: T, @@ -120,7 +120,7 @@ unsafe impl Sync for SlabAlloc where P: SlabPag impl SlabAlloc where L: SlabPageAlloc, - L::PageList: PageListSized, + L::PageList: FolioListSized, { pub fn new_in(alloc: L) -> Self { Self { @@ -148,7 +148,7 @@ where impl SlabList where - T: PageListSized, + T: FolioListSized, { const fn new(object_size: usize) -> Self { Self { @@ -162,8 +162,8 @@ where impl SlabList where - T: PageList, - T::Page: SlabPage, + T: FolioList, + T::Folio: SlabPage, { fn alloc_from_partial(&mut self) -> NonNull { let head = self.partial_list.peek_head().unwrap(); @@ -190,18 +190,16 @@ where slot } - fn charge(&mut self, alloc: &impl SlabPageAlloc) { - unsafe { - let slab = alloc.alloc_uninit(); - let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); + fn charge(&mut self, alloc: &impl SlabPageAlloc) { + let slab = alloc.alloc_slab_page(); + let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); - slab.set_free_slot(Some(free_slot)); + slab.set_free_slot(Some(free_slot)); - self.empty_list.push_tail(slab); - } + self.empty_list.push_tail(slab); } - fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { + fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { if !self.partial_list.is_empty() { return self.alloc_from_partial(); } @@ -216,7 +214,7 @@ where unsafe fn dealloc(&mut self, ptr: NonNull, _alloc: &impl SlabPageAlloc) { let slab_page = unsafe { // SAFETY: - ::from_allocated(ptr) + ::from_allocated(ptr) }; let (was_full, is_empty); diff --git a/src/driver/ahci/command.rs b/src/driver/ahci/command.rs index c83339b7..4609d38d 100644 --- a/src/driver/ahci/command.rs +++ b/src/driver/ahci/command.rs @@ -1,9 +1,11 @@ +use eonix_mm::paging::Folio as _; + use crate::kernel::constants::EINVAL; -use crate::kernel::mem::paging::Page; +use crate::kernel::mem::Folio; use crate::prelude::*; pub trait Command { - fn pages(&self) -> &[Page]; + fn pages(&self) -> &[Folio]; fn lba(&self) -> u64; // in sectors @@ -14,19 +16,19 @@ pub trait Command { } pub struct IdentifyCommand { - page: Page, + page: Folio, } impl IdentifyCommand { pub fn new() -> Self { Self { - page: Page::alloc(), + page: Folio::alloc(), } } } impl Command for IdentifyCommand { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { core::slice::from_ref(&self.page) } @@ -47,14 +49,14 @@ impl Command for IdentifyCommand { } } -pub struct ReadLBACommand<'lt> { - pages: &'lt [Page], +pub struct ReadLBACommand<'a> { + pages: &'a [Folio], lba: u64, count: u16, } -impl<'lt> ReadLBACommand<'lt> { - pub fn new(pages: &'lt [Page], lba: u64, count: u16) -> KResult { +impl<'a> ReadLBACommand<'a> { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -69,7 +71,7 @@ impl<'lt> ReadLBACommand<'lt> { } impl Command for ReadLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } @@ -91,13 +93,13 @@ impl Command for ReadLBACommand<'_> { } pub struct WriteLBACommand<'a> { - pages: &'a [Page], + pages: &'a [Folio], lba: u64, count: u16, } impl<'a> WriteLBACommand<'a> { - pub fn new(pages: &'a [Page], lba: u64, count: u16) -> KResult { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -112,7 +114,7 @@ impl<'a> WriteLBACommand<'a> { } impl Command for WriteLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } diff --git a/src/driver/ahci/command_table.rs b/src/driver/ahci/command_table.rs index 7b78d26f..00fc8a0b 100644 --- a/src/driver/ahci/command_table.rs +++ b/src/driver/ahci/command_table.rs @@ -1,13 +1,14 @@ use core::ptr::NonNull; use eonix_mm::address::PAddr; +use eonix_mm::paging::Folio as _; use super::command::Command; use super::{PRDTEntry, FISH2D}; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::FolioOwned; pub struct CommandTable { - page: Page, + page: FolioOwned, cmd_fis: NonNull, prdt: NonNull<[PRDTEntry; 248]>, prdt_entries: usize, @@ -18,7 +19,7 @@ unsafe impl Sync for CommandTable {} impl CommandTable { pub fn new() -> Self { - let page = Page::alloc(); + let page = FolioOwned::alloc(); let base = page.get_ptr(); unsafe { diff --git a/src/driver/ahci/defs.rs b/src/driver/ahci/defs.rs index c5440246..66841da8 100644 --- a/src/driver/ahci/defs.rs +++ b/src/driver/ahci/defs.rs @@ -1,7 +1,9 @@ #![allow(dead_code)] -use crate::kernel::mem::paging::Page; use eonix_mm::address::Addr as _; +use eonix_mm::paging::Folio as _; + +use crate::kernel::mem::Folio; pub const VENDOR_INTEL: u16 = 0x8086; pub const DEVICE_AHCI: u16 = 0x2922; @@ -239,7 +241,7 @@ pub struct PRDTEntry { } impl PRDTEntry { - pub fn setup(&mut self, page: &Page) { + pub fn setup(&mut self, page: &Folio) { self.base = page.start().addr() as u64; self._reserved1 = 0; diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 06c6f2ec..dd096f57 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -3,18 +3,18 @@ use core::ptr::NonNull; use core::task::{Poll, Waker}; use eonix_mm::address::{Addr as _, PAddr}; +use eonix_mm::paging::Folio as _; use eonix_sync::{Spin, SpinIrq as _}; use super::command_table::CommandTable; use super::CommandHeader; use crate::kernel::constants::EIO; -use crate::kernel::mem::paging::AllocZeroed; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::FolioOwned; use crate::KResult; pub struct CommandList { base: NonNull, - _page: Page, + _page: FolioOwned, } unsafe impl Send for CommandList {} @@ -75,7 +75,9 @@ impl CommandList { } pub fn new() -> Self { - let page = Page::zeroed(); + let mut page = FolioOwned::alloc(); + page.as_bytes_mut().fill(0); + let base = page.get_ptr(); let controls_ptr = Self::controls_ptr(base); diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index 73143c2c..6d6ca353 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -5,11 +5,12 @@ use core::ptr::NonNull; use async_trait::async_trait; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; +use eonix_mm::paging::Folio as _; use eonix_sync::SpinIrq; use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; use crate::kernel::interrupt::register_irq_handler; -use crate::kernel::mem::{PageExcl, PageExt, PhysAccess}; +use crate::kernel::mem::{FolioOwned, PhysAccess}; use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; use crate::net::netdev; use crate::prelude::*; @@ -54,13 +55,13 @@ struct E1000eDev { id: u32, regs: Registers, - rt_desc_page: PageExcl, + rt_desc_page: FolioOwned, rx_head: Option, rx_tail: Option, tx_tail: Option, - rx_buffers: Box<[PageExcl; RX_DESC_SIZE]>, - tx_buffers: Box<[Option; TX_DESC_SIZE]>, + rx_buffers: Box<[FolioOwned; RX_DESC_SIZE]>, + tx_buffers: Box<[Option; TX_DESC_SIZE]>, } fn test(val: u32, bit: u32) -> bool { @@ -227,7 +228,7 @@ impl netdev::Netdev for E1000eDev { return Err(EIO); } - let mut buffer_page = PageExcl::alloc(); + let mut buffer_page = FolioOwned::alloc(); if buf.len() > buffer_page.len() { return Err(EFAULT); } @@ -363,11 +364,15 @@ impl E1000eDev { speed: netdev::LinkSpeed::SpeedUnknown, id: netdev::alloc_id(), regs, - rt_desc_page: PageExcl::zeroed(), + rt_desc_page: { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }, rx_head: None, rx_tail: None, tx_tail: None, - rx_buffers: Box::new(core::array::from_fn(|_| PageExcl::alloc_order(2))), + rx_buffers: Box::new(core::array::from_fn(|_| FolioOwned::alloc_order(2))), tx_buffers: Box::new([const { None }; 32]), }; diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index c5a3c3d2..5dfed88a 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -3,7 +3,7 @@ use alloc::boxed::Box; use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{Addr, PAddr, PhysAccess}; -use eonix_mm::paging::PFN; +use eonix_mm::paging::{Folio as _, PFN}; use eonix_sync::Spin; use virtio_drivers::device::blk::VirtIOBlk; use virtio_drivers::transport::Transport; @@ -12,7 +12,7 @@ use virtio_drivers::Hal; use crate::io::Chunks; use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::EIO; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::Folio; use crate::prelude::KResult; pub struct HAL; @@ -22,7 +22,7 @@ unsafe impl Hal for HAL { pages: usize, _direction: virtio_drivers::BufferDirection, ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull) { - let page = Page::alloc_at_least(pages); + let page = Folio::alloc_at_least(pages); let ptr = page.get_ptr(); let pfn = page.into_raw(); @@ -40,7 +40,7 @@ unsafe impl Hal for HAL { unsafe { // SAFETY: The caller ensures that the pfn corresponds to a valid // page allocated by `dma_alloc`. - Page::from_raw(pfn); + Folio::from_raw(pfn); } 0 diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index b19c9908..c1feebdf 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,5 +1,4 @@ mod dir; -mod file; use alloc::sync::Arc; use core::ops::Deref; @@ -13,7 +12,7 @@ use itertools::Itertools; use crate::io::{Buffer, ByteBuffer, UninitBuffer}; use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{CachePage, Page, PageExcl, PageExt, PageOffset}; +use crate::kernel::mem::{CachePage, Folio, FolioOwned, PageOffset}; use crate::kernel::timer::Instant; use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; @@ -114,7 +113,7 @@ struct FatFs { impl SuperBlock for FatFs {} impl FatFs { - async fn read_cluster(&self, mut cluster: Cluster, buf: &Page) -> KResult<()> { + async fn read_cluster(&self, mut cluster: Cluster, buf: &Folio) -> KResult<()> { cluster = cluster.normalized(); let rq = BlockDeviceRequest::Read { @@ -278,7 +277,6 @@ impl InodeOps for FileInode { .next() .ok_or(EIO)?; - let page = page.get_page(); fs.read_cluster(cluster, &page).await?; let real_len = (inode.info.lock().size as usize) - offset.byte_count(); @@ -293,7 +291,7 @@ impl InodeOps for FileInode { struct DirInode { // TODO: Use the new PageCache... - dir_pages: RwLock>, + dir_pages: RwLock>, } impl DirInode { @@ -330,7 +328,7 @@ impl DirInode { let clusters = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)); for cluster in clusters { - let page = PageExcl::alloc(); + let page = FolioOwned::alloc(); fs.read_cluster(cluster, &page).await?; dir_pages.push(page); @@ -343,7 +341,7 @@ impl DirInode { &self, sb: &SbUse, inode: &InodeUse, - ) -> KResult> + use<'_>> { + ) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { diff --git a/src/fs/fat32/file.rs b/src/fs/fat32/file.rs deleted file mode 100644 index 2df69728..00000000 --- a/src/fs/fat32/file.rs +++ /dev/null @@ -1,24 +0,0 @@ -use futures::Stream; - -use crate::{kernel::mem::Page, prelude::KResult}; - -use super::{ClusterIterator, FatFs}; - -pub trait ReadClusters { - fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send; -} - -impl ReadClusters for ClusterIterator<'_> { - fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send { - futures::stream::unfold(self, move |mut me| async { - let cluster = me.next()?; - let page = Page::alloc(); - - if let Err(err) = fs.read_cluster(cluster, &page).await { - return Some((Err(err), me)); - } - - Some((Ok(page), me)) - }) - } -} diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index a1755908..d560a672 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -125,7 +125,7 @@ impl InodeOps for FileInode { page: &mut CachePage, _: PageOffset, ) -> KResult<()> { - page.as_bytes_mut().fill(0); + page.lock().as_bytes_mut().fill(0); Ok(()) } diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 8e017336..be2146f8 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -8,8 +8,7 @@ use async_trait::async_trait; use mbr::MBRPartTable; use super::constants::ENOENT; -use super::mem::paging::Page; -use super::mem::PageExt; +use super::mem::Folio; use super::vfs::types::DeviceId; use crate::io::{Buffer, Chunks, FillResult}; use crate::kernel::constants::{EEXIST, EINVAL}; @@ -202,15 +201,15 @@ impl BlockDevice { let (page_slice, page, mut page_vec); match nr_batch { ..=8 => { - page = Page::alloc(); + page = Folio::alloc(); page_slice = core::slice::from_ref(&page); } ..=16 => { - page = Page::alloc_order(1); + page = Folio::alloc_order(1); page_slice = core::slice::from_ref(&page); } ..=32 => { - page = Page::alloc_order(2); + page = Folio::alloc_order(2); page_slice = core::slice::from_ref(&page); } count => { @@ -220,8 +219,8 @@ impl BlockDevice { let nr_pages = nr_huge_pages + nr_small_pages; page_vec = Vec::with_capacity(nr_pages); - page_vec.resize_with(nr_huge_pages, || Page::alloc_order(2)); - page_vec.resize_with(nr_pages, || Page::alloc()); + page_vec.resize_with(nr_huge_pages, || Folio::alloc_order(2)); + page_vec.resize_with(nr_pages, || Folio::alloc()); page_slice = &page_vec; } } @@ -266,7 +265,7 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to read count: u64, /// Buffer pages to read into - buffer: &'lt [Page], + buffer: &'lt [Folio], }, Write { /// Sector to write to, in 512-byte blocks @@ -274,6 +273,6 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to write count: u64, /// Buffer pages to write from - buffer: &'lt [Page], + buffer: &'lt [Folio], }, } diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index f8b5dc0b..47b864bb 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -3,14 +3,16 @@ pub mod paging; mod access; mod address; mod allocator; +mod folio; mod mm_area; mod mm_list; mod page_alloc; mod page_cache; pub use access::PhysAccess; +pub use folio::{Folio, FolioOwned, LockedFolio}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; pub use page_cache::{CachePage, PageCache, PageOffset}; -pub use paging::{Page, PageBuffer, PageExcl, PageExt}; +pub use paging::PageBuffer; diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index a3676ce0..3a70a8c2 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -3,11 +3,12 @@ use core::ptr::NonNull; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; -use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; use slab_allocator::SlabAlloc; -use super::{GlobalPageAlloc, Page, PageExt}; +use super::folio::Folio; +use super::GlobalPageAlloc; static SLAB_ALLOCATOR: LazyLock> = LazyLock::new(|| SlabAlloc::new_in(GlobalPageAlloc)); @@ -18,19 +19,15 @@ unsafe impl GlobalAlloc for Allocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let size = layout.size().next_power_of_two(); - let result = if size <= 2048 { - SLAB_ALLOCATOR.alloc(size) + if size <= 2048 { + SLAB_ALLOCATOR.alloc(size).as_ptr() } else { - let page_count = size >> PAGE_SIZE_BITS; - let page = Page::alloc_at_least(page_count); - - let ptr = page.get_ptr(); - page.into_raw(); + let folio = Folio::alloc_at_least(size >> PAGE_SIZE_BITS); + let ptr = folio.get_ptr(); + folio.into_raw(); - ptr - }; - - result.as_ptr() + ptr.as_ptr() + } } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { @@ -45,7 +42,8 @@ unsafe impl GlobalAlloc for Allocator { } else { let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); - Page::from_raw(pfn); + + Folio::from_raw(pfn); }; } } diff --git a/src/kernel/mem/folio.rs b/src/kernel/mem/folio.rs new file mode 100644 index 00000000..6647e1af --- /dev/null +++ b/src/kernel/mem/folio.rs @@ -0,0 +1,210 @@ +use core::fmt; +use core::mem::ManuallyDrop; +use core::ops::Deref; +use core::ptr::NonNull; +use core::sync::atomic::Ordering; + +use eonix_mm::paging::{Folio as FolioTrait, FrameAlloc, GlobalFrameAlloc, Zone, PFN}; + +use super::page_alloc::ZONE; +use super::{GlobalPageAlloc, PhysAccess as _, RawPage}; + +#[repr(transparent)] +pub struct Folio(NonNull); + +#[derive(Debug)] +#[repr(transparent)] +pub struct FolioOwned(Folio); + +#[repr(transparent)] +pub struct LockedFolio<'a>(&'a Folio); + +unsafe impl Send for Folio {} +unsafe impl Sync for Folio {} + +impl Folio { + pub(super) const fn from_mut_page(raw_page: &'static mut RawPage) -> Self { + Self(NonNull::new(raw_page).unwrap()) + } + + /// Allocate a folio of the given *order*. + pub fn alloc_order(order: u32) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_order(order) + .expect("Out of memory") + } + + /// Allocate a folio of order 0 + pub fn alloc() -> Self { + Self::alloc_order(0) + } + + /// Allocate a folio consisting of at least [`count`] pages. + pub fn alloc_at_least(count: usize) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_at_least(count) + .expect("Out of memory") + } + + /// Acquire the ownership of the folio pointed to by [`pfn`], leaving + /// [`refcount`] untouched. + /// + /// # Panic + /// This function will panic if the folio is not within the global zone. + /// + /// # Safety + /// This function is unsafe because it assumes that the caller has to ensure + /// that [`pfn`] points to a valid folio allocated through [`Self::alloc()`] + /// and that the folio have not been freed or deallocated yet. + pub unsafe fn from_raw(pfn: PFN) -> Self { + unsafe { + // SAFETY: The caller ensures that [`pfn`] points to a folio within + // the global zone. + Self(ZONE.get_page(pfn).unwrap_unchecked()) + } + } + + /// Do some work with the folio without touching the reference count with + /// the same restrictions as [`Self::from_raw()`]. + /// + /// # Safety + /// Check [`Self::from_raw()`] for safety requirements. + pub unsafe fn with_raw(pfn: PFN, func: F) -> O + where + F: FnOnce(&Self) -> O, + { + unsafe { + let me = ManuallyDrop::new(Self::from_raw(pfn)); + func(&me) + } + } + + pub fn lock(&self) -> LockedFolio { + // TODO: actually perform the lock... + LockedFolio(self) + } + + /// Get a vmem pointer to the folio data as a byte slice. + pub fn get_bytes_ptr(&self) -> NonNull<[u8]> { + unsafe { + // SAFETY: `self.start()` can't be null. + NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) + } + } + + /// Get a vmem pointer to the start of the folio. + pub fn get_ptr(&self) -> NonNull { + self.get_bytes_ptr().cast() + } +} + +impl Deref for Folio { + type Target = RawPage; + + fn deref(&self) -> &Self::Target { + unsafe { + // SAFETY: We don't expose mutable references to the folio. + self.0.as_ref() + } + } +} + +impl Clone for Folio { + fn clone(&self) -> Self { + // SAFETY: Memory order here can be Relaxed is for the same reason as + // that in the copy constructor of `std::shared_ptr`. + self.refcount.fetch_add(1, Ordering::Relaxed); + + Self(self.0) + } +} + +impl Drop for Folio { + fn drop(&mut self) { + match self.refcount.fetch_sub(1, Ordering::AcqRel) { + 0 => unreachable!("Refcount for an in-use page is 0"), + 1 => unsafe { GlobalPageAlloc::GLOBAL.dealloc_raw(self.0.as_mut()) }, + _ => {} + } + } +} + +impl fmt::Debug for Folio { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Page({:?}, order={})", self.pfn(), self.order) + } +} + +impl FolioTrait for Folio { + fn pfn(&self) -> PFN { + ZONE.get_pfn(self.0.as_ptr()) + } + + fn order(&self) -> u32 { + self.order + } +} + +impl LockedFolio<'_> { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) + } + } +} + +impl Deref for LockedFolio<'_> { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl FolioOwned { + pub fn alloc() -> Self { + Self(Folio::alloc()) + } + + pub fn alloc_order(order: u32) -> Self { + Self(Folio::alloc_order(order)) + } + + pub fn alloc_at_least(count: usize) -> Self { + Self(Folio::alloc_at_least(count)) + } + + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_ref() + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_mut() + } + } + + pub fn share(self) -> Folio { + self.0 + } +} + +impl Deref for FolioOwned { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 2891dad8..782c5ef7 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -1,15 +1,16 @@ use core::borrow::Borrow; use core::cell::UnsafeCell; use core::cmp; +use core::sync::atomic::Ordering; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; -use eonix_mm::paging::PFN; +use eonix_mm::paging::{Folio as _, PFN}; use super::mm_list::EMPTY_PAGE; -use super::{Mapping, Page, Permission}; -use crate::kernel::mem::page_cache::PageOffset; -use crate::kernel::mem::{CachePage, PageExcl, PageExt}; +use super::{Mapping, Permission}; +use crate::kernel::mem::folio::Folio; +use crate::kernel::mem::{CachePage, FolioOwned, PageOffset}; use crate::prelude::KResult; #[derive(Debug)] @@ -98,8 +99,10 @@ impl MMArea { attr.remove(PageAttribute::COPY_ON_WRITE); attr.set(PageAttribute::WRITE, self.permission.write); - let page = unsafe { Page::from_raw(*pfn) }; - if page.is_exclusive() { + let page = unsafe { Folio::from_raw(*pfn) }; + + // XXX: Change me!!! + if page.refcount.load(Ordering::Relaxed) == 1 { // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock // held, there couldn't be neither other processes sharing the page, nor other // threads making the page COW at the same time. @@ -109,9 +112,13 @@ impl MMArea { let mut new_page; if *pfn == EMPTY_PAGE.pfn() { - new_page = PageExcl::zeroed(); + new_page = { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }; } else { - new_page = PageExcl::alloc(); + new_page = FolioOwned::alloc(); unsafe { // SAFETY: `page` is CoW, which means that others won't write to it. @@ -123,7 +130,7 @@ impl MMArea { } attr.remove(PageAttribute::ACCESSED); - *pfn = new_page.into_page().into_raw(); + *pfn = new_page.share().into_raw(); } /// # Arguments @@ -143,11 +150,11 @@ impl MMArea { let file_offset = file_mapping.offset + offset; - let map_page = |page: &Page, cache_page: &CachePage| { + let map_page = |cache_page: &CachePage| { if !self.permission.write { assert!(!write, "Write fault on read-only mapping"); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } @@ -157,26 +164,26 @@ impl MMArea { // So here we can set the dirty flag now. cache_page.set_dirty(true); attr.insert(PageAttribute::WRITE); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } if !write { // Delay the copy-on-write until write fault happens. attr.insert(PageAttribute::COPY_ON_WRITE); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } // XXX: Change this. Let's handle mapped pages before CoW pages. // Nah, we are writing to a mapped private mapping... - let mut new_page = PageExcl::zeroed(); + let mut new_page = FolioOwned::alloc(); new_page .as_bytes_mut() - .copy_from_slice(page.lock().as_bytes()); + .copy_from_slice(cache_page.lock().as_bytes()); attr.insert(PageAttribute::WRITE); - *pfn = new_page.into_page().into_raw(); + *pfn = new_page.share().into_raw(); }; file_mapping diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 17dc1b05..5221c73b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -1,31 +1,33 @@ mod mapping; mod page_fault; +mod page_table; use alloc::collections::btree_set::BTreeSet; use core::fmt; use core::sync::atomic::{AtomicUsize, Ordering}; use eonix_hal::mm::{ - flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchPagingMode, - ArchPhysAccess, GLOBAL_PAGE_TABLE, + flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, GLOBAL_PAGE_TABLE, }; use eonix_mm::address::{Addr as _, AddrOps as _, PAddr, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PageTable, RawAttribute, PTE}; -use eonix_mm::paging::{PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PFN}; use eonix_sync::{LazyLock, Mutex}; pub use mapping::{FileMapping, Mapping}; pub use page_fault::handle_kernel_page_fault; +use page_table::KernelPageTable; use super::address::{VAddrExt as _, VRangeExt as _}; -use super::page_alloc::GlobalPageAlloc; -use super::paging::AllocZeroed as _; -use super::{MMArea, Page, PageExt}; +use super::{Folio, FolioOwned, MMArea}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; -use crate::kernel::mem::page_alloc::RawPagePtr; use crate::prelude::*; use crate::sync::ArcSwap; -pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| Page::zeroed()); +pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio.share() +}); #[derive(Debug, Clone, Copy)] pub struct Permission { @@ -34,23 +36,21 @@ pub struct Permission { pub execute: bool, } -pub type KernelPageTable<'a> = PageTable<'a, ArchPagingMode, GlobalPageAlloc, ArchPhysAccess>; - -struct MMListInner<'a> { +struct MMListInner { areas: BTreeSet, - page_table: KernelPageTable<'a>, + page_table: KernelPageTable, break_start: Option, break_pos: Option, } pub struct MMList { - inner: ArcSwap>>, + inner: ArcSwap>, user_count: AtomicUsize, /// Only used in kernel space to switch page tables on context switch. root_page_table: AtomicUsize, } -impl MMListInner<'_> { +impl MMListInner { fn overlapping_addr(&self, addr: VAddr) -> Option<&MMArea> { self.areas.get(&VRange::from(addr)) } @@ -96,7 +96,7 @@ impl MMListInner<'_> { } } - fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { + fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { assert_eq!(start.floor(), start); let end = (start + len).ceil(); let range_to_unmap = VRange::new(start, end); @@ -120,7 +120,7 @@ impl MMListInner<'_> { let (pfn, _) = pte.take(); pages_to_free.push(unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw(pfn) + Folio::from_raw(pfn) }); } @@ -275,23 +275,23 @@ impl MMListInner<'_> { } } -impl Drop for MMListInner<'_> { +impl Drop for MMListInner { fn drop(&mut self) { // May buggy for area in &self.areas { if area.is_shared { for pte in self.page_table.iter_user(area.range()) { - let (pfn, _) = pte.take(); - let raw_page = RawPagePtr::from(pfn); - if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { - // Wrong here - // unsafe { Page::from_raw(pfn) }; - } + // XXX: Fix me + let _ = pte.take(); + // let raw_page = RawPagePtr::from(pfn); + // if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { + // unsafe { Page::from_raw(pfn) }; + // } } } else { for pte in self.page_table.iter_user(area.range()) { let (pfn, _) = pte.take(); - unsafe { Page::from_raw(pfn) }; + unsafe { Folio::from_raw(pfn) }; } } } @@ -327,7 +327,7 @@ impl MMList { } pub fn new() -> Self { - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -344,7 +344,7 @@ impl MMList { let inner = self.inner.borrow(); let mut inner = inner.lock().await; - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); let list = Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -392,7 +392,7 @@ impl MMList { } pub fn deactivate(&self) { - set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.addr())); + set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.start())); let old_user_count = self.user_count.fetch_sub(1, Ordering::Release); assert_ne!(old_user_count, 0); @@ -444,7 +444,7 @@ impl MMList { let new_root_page_table = match &new { Some(new_mm) => new_mm.root_page_table.load(Ordering::Relaxed), - None => GLOBAL_PAGE_TABLE.addr().addr(), + None => GLOBAL_PAGE_TABLE.start().addr(), }; set_root_page_table_pfn(PFN::from(PAddr::from(new_root_page_table))); @@ -693,7 +693,7 @@ impl MMList { unsafe { // SAFETY: We are sure that the page is valid and we have the right to access it. - Page::with_raw(pte.get_pfn(), |page| { + Folio::with_raw(pte.get_pfn(), |page| { let mut pg = page.lock(); let page_data = &mut pg.as_bytes_mut()[start_offset..end_offset]; @@ -724,7 +724,7 @@ trait PageTableExt { fn set_copied(&self, from: &Self, range: VRange); } -impl PageTableExt for KernelPageTable<'_> { +impl PageTableExt for KernelPageTable { fn set_anonymous(&self, range: VRange, permission: Permission) { for pte in self.iter_user(range) { pte.set_anonymous(permission.execute); @@ -805,7 +805,7 @@ where let pfn = unsafe { // SAFETY: We get the pfn from a valid page table entry, so it should be valid as well. - Page::with_raw(from.get_pfn(), |page| page.clone().into_raw()) + Folio::with_raw(from.get_pfn(), |page| page.clone().into_raw()) }; self.set(pfn, T::Attr::from(from_attr & !PageAttribute::ACCESSED)); diff --git a/src/kernel/mem/mm_list/page_table.rs b/src/kernel/mem/mm_list/page_table.rs new file mode 100644 index 00000000..8a2acc13 --- /dev/null +++ b/src/kernel/mem/mm_list/page_table.rs @@ -0,0 +1,40 @@ +use core::ops::Deref; + +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; +use eonix_hal::mm::GLOBAL_PAGE_TABLE; +use eonix_mm::page_table::PageTable; +use eonix_mm::paging::{Folio, GlobalFrameAlloc}; + +use crate::kernel::mem::{FolioOwned, GlobalPageAlloc, PhysAccess}; + +#[repr(transparent)] +pub struct KernelPageTable(PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>); + +impl KernelPageTable { + pub fn new() -> Self { + let global_page_table = unsafe { + // SAFETY: The region is valid and read only after initialization. + GLOBAL_PAGE_TABLE.start().as_ptr::<[u8; 4096]>().as_ref() + }; + + let mut table_page = FolioOwned::alloc(); + let entries = table_page.as_bytes_mut().len(); + table_page.as_bytes_mut()[..(entries / 2)].fill(0); + table_page.as_bytes_mut()[(entries / 2)..] + .copy_from_slice(&global_page_table[(entries / 2)..]); + + Self(PageTable::new( + table_page.share(), + GlobalPageAlloc::GLOBAL, + PageAccessImpl, + )) + } +} + +impl Deref for KernelPageTable { + type Target = PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 37344fc2..ac2485da 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -5,14 +5,14 @@ use core::sync::atomic::Ordering; use buddy_allocator::BuddyAllocator; use eonix_mm::address::PRange; -use eonix_mm::paging::{ - GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, -}; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{FolioList, FolioListSized as _, FrameAlloc, GlobalFrameAlloc, PFN}; use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; -use raw_page::{PageFlags, RawPageList}; -pub use raw_page::{RawPage, RawPagePtr}; -pub use zones::GlobalZone; +pub use raw_page::{PageFlags, RawPage, RawPageList}; +pub use zones::{GlobalZone, ZONE}; + +use super::folio::Folio; const COSTLY_ORDER: u32 = 3; const AREAS: usize = COSTLY_ORDER as usize + 1; @@ -27,9 +27,6 @@ static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); #[derive(Clone)] pub struct GlobalPageAlloc; -#[derive(Clone)] -pub struct BuddyPageAlloc(); - struct PerCpuPageAlloc { batch: u32, free_areas: [RawPageList; AREAS], @@ -72,11 +69,6 @@ impl PerCpuPageAlloc { } impl GlobalPageAlloc { - #[allow(dead_code)] - pub const fn buddy_alloc() -> BuddyPageAlloc { - BuddyPageAlloc() - } - /// Add the pages in the PAddr range `range` to the global allocator. /// /// This function is only to be called on system initialization when `eonix_preempt` @@ -88,15 +80,11 @@ impl GlobalPageAlloc { pub unsafe fn add_pages(range: PRange) { BUDDY_ALLOC .lock_with_context(NoContext) - .create_pages(range.start(), range.end()) + .create_folios(range.start(), range.end()) } -} - -impl PageAlloc for GlobalPageAlloc { - type RawPage = RawPagePtr; - fn alloc_order(&self, order: u32) -> Option { - let raw_page = if order > COSTLY_ORDER { + pub fn alloc_raw_order(&self, order: u32) -> Option<&'static mut RawPage> { + if order > COSTLY_ORDER { BUDDY_ALLOC.lock().alloc_order(order) } else { unsafe { @@ -106,61 +94,54 @@ impl PageAlloc for GlobalPageAlloc { page } - }; - - raw_page.map(|raw_page| { - // SAFETY: Memory order here can be Relaxed is for the same reason - // as that in the copy constructor of `std::shared_ptr`. - raw_page.refcount.fetch_add(1, Ordering::Relaxed); - - RawPagePtr::from_ref(raw_page) - }) + } } - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { + pub unsafe fn dealloc_raw(&self, raw_page: &'static mut RawPage) { assert_eq!( - page_ptr.refcount().load(Ordering::Relaxed), + raw_page.refcount.load(Ordering::Relaxed), 0, "Trying to free a page with refcount > 0" ); - if page_ptr.order() > COSTLY_ORDER { - BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); + if raw_page.order > COSTLY_ORDER { + BUDDY_ALLOC.lock().dealloc(raw_page); } else { - let order = page_ptr.order(); + let order = raw_page.order; unsafe { - PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(page_ptr.as_mut(), order); + PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(raw_page, order); } } } - - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL) - } } -impl GlobalPageAllocTrait for GlobalPageAlloc { - fn global() -> Self { - GlobalPageAlloc +impl FrameAlloc for GlobalPageAlloc { + type Folio = Folio; + + fn alloc_order(&self, order: u32) -> Option { + self.alloc_raw_order(order).map(|raw_page| { + // SAFETY: Memory order here can be Relaxed is for the same reason + // as that in the copy constructor of `std::shared_ptr`. + + raw_page.refcount.fetch_add(1, Ordering::Relaxed); + Folio::from_mut_page(raw_page) + }) } } -impl PageAlloc for BuddyPageAlloc { - type RawPage = RawPagePtr; +impl GlobalFrameAlloc for GlobalPageAlloc { + const GLOBAL: Self = GlobalPageAlloc; +} - fn alloc_order(&self, order: u32) -> Option { - BUDDY_ALLOC - .lock() - .alloc_order(order) - .map(|raw_page| RawPagePtr::from_ref(raw_page)) - } +impl PageTableAlloc for GlobalPageAlloc { + type Folio = Folio; - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, _: RawPagePtr) -> bool { - true + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + unsafe { Folio::from_raw(pfn) } } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 074f82c7..0d775245 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,20 +1,17 @@ use core::ptr::NonNull; use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; -use buddy_allocator::BuddyPage; +use buddy_allocator::BuddyFolio; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _}; -use eonix_mm::paging::{PageAlloc, PageList, PageListSized, RawPage as RawPageTrait, PFN}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; use intrusive_list::{container_of, Link, List}; use slab_allocator::{SlabPage, SlabPageAlloc, SlabSlot}; +use super::zones::ZONE; use super::{GlobalPageAlloc, PerCpuPage}; -use crate::kernel::mem::page_cache::PageCacheRawPage; use crate::kernel::mem::PhysAccess; -pub const PAGE_ARRAY: NonNull = - unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; - pub struct PageFlags(AtomicU32); #[derive(Clone, Copy)] @@ -41,11 +38,11 @@ pub struct RawPage { /// This can be used for LRU page swap in the future. /// /// Now only used for free page links in the buddy system. - link: Link, + pub link: Link, /// # Safety /// This field is only used in buddy system and is protected by the global lock. - order: u32, - flags: PageFlags, + pub order: u32, + pub flags: PageFlags, pub refcount: AtomicUsize, shared_data: PageData, @@ -55,9 +52,6 @@ pub struct RawPage { unsafe impl Send for RawPage {} unsafe impl Sync for RawPage {} -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct RawPagePtr(NonNull); - impl PageFlags { pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; @@ -85,80 +79,9 @@ impl PageFlags { } } -impl RawPagePtr { - pub const fn from_ref(raw_page_ref: &RawPage) -> Self { - Self::new(unsafe { - // SAFETY: Rust references always points to non-null addresses. - NonNull::new_unchecked(&raw const *raw_page_ref as *mut _) - }) - } - - pub const fn new(ptr: NonNull) -> Self { - Self(ptr) - } - - /// Get a raw pointer to the underlying `RawPage` struct. - /// - /// # Safety - /// Doing arithmetic on the pointer returned will cause immediate undefined behavior. - pub const unsafe fn as_ptr(self) -> *mut RawPage { - self.0.as_ptr() - } - - pub const fn as_ref<'a>(self) -> &'a RawPage { - unsafe { &*self.as_ptr() } - } - - pub const fn as_mut<'a>(self) -> &'a mut RawPage { - unsafe { &mut *self.as_ptr() } - } - - pub const fn order(&self) -> u32 { - self.as_ref().order - } - - pub const fn flags(&self) -> &PageFlags { - &self.as_ref().flags - } - - pub const fn refcount(&self) -> &AtomicUsize { - &self.as_ref().refcount - } - - // return the ptr point to the actually raw page - pub fn real_ptr(&self) -> NonNull { - let pfn = unsafe { PFN::from(RawPagePtr(NonNull::new_unchecked(self.as_ptr()))) }; - unsafe { PAddr::from(pfn).as_ptr::() } - } -} - -impl From for PFN { - fn from(value: RawPagePtr) -> Self { - let idx = unsafe { value.as_ptr().offset_from(PAGE_ARRAY.as_ptr()) as usize }; - Self::from(idx) - } -} - -impl From for RawPagePtr { - fn from(pfn: PFN) -> Self { - let raw_page_ptr = unsafe { PAGE_ARRAY.add(usize::from(pfn)) }; - Self::new(raw_page_ptr) - } -} - -impl RawPageTrait for RawPagePtr { - fn order(&self) -> u32 { - self.order() - } - - fn refcount(&self) -> &AtomicUsize { - self.refcount() - } -} - -impl BuddyPage for RawPage { +impl BuddyFolio for RawPage { fn pfn(&self) -> PFN { - PFN::from(RawPagePtr::from_ref(self)) + ZONE.get_pfn(self) } fn get_order(&self) -> u32 { @@ -184,8 +107,7 @@ impl BuddyPage for RawPage { impl SlabPage for RawPage { fn get_data_ptr(&self) -> NonNull<[u8]> { - let raw_page_ptr = RawPagePtr::from_ref(self); - let paddr_start = PAddr::from(PFN::from(raw_page_ptr)); + let paddr_start = PAddr::from(ZONE.get_pfn(self)); let page_data_ptr = unsafe { paddr_start.as_ptr() }; NonNull::slice_from_raw_parts(page_data_ptr, 1 << (self.order + 12)) @@ -233,21 +155,9 @@ impl SlabPage for RawPage { let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); - RawPagePtr::from(pfn).as_mut() - } - } -} - -impl PageCacheRawPage for RawPagePtr { - fn is_dirty(&self) -> bool { - self.flags().has(PageFlags::DIRTY) - } - - fn set_dirty(&self, dirty: bool) { - if dirty { - self.flags().set(PageFlags::DIRTY); - } else { - self.flags().clear(PageFlags::DIRTY); + ZONE.get_page(pfn) + .expect("Page outside of the global zone") + .as_mut() } } } @@ -264,14 +174,16 @@ impl PerCpuPage for RawPage { pub struct RawPageList(List); -impl PageList for RawPageList { - type Page = RawPage; +unsafe impl Send for RawPageList {} + +impl FolioList for RawPageList { + type Folio = RawPage; fn is_empty(&self) -> bool { self.0.is_empty() } - fn peek_head(&mut self) -> Option<&mut Self::Page> { + fn peek_head(&mut self) -> Option<&mut Self::Folio> { unsafe { let link = self.0.head()?; let mut raw_page_ptr = container_of!(link, RawPage, link); @@ -280,7 +192,7 @@ impl PageList for RawPageList { } } - fn pop_head(&mut self) -> Option<&'static mut Self::Page> { + fn pop_head(&mut self) -> Option<&'static mut Self::Folio> { unsafe { let link = self.0.pop()?; let mut raw_page_ptr = container_of!(link, RawPage, link); @@ -289,25 +201,25 @@ impl PageList for RawPageList { } } - fn push_tail(&mut self, page: &'static mut Self::Page) { + fn push_tail(&mut self, page: &'static mut Self::Folio) { self.0.insert(&mut page.link); } - fn remove(&mut self, page: &mut Self::Page) { + fn remove(&mut self, page: &mut Self::Folio) { self.0.remove(&mut page.link) } } -impl PageListSized for RawPageList { +impl FolioListSized for RawPageList { const NEW: Self = RawPageList(List::new()); } -impl SlabPageAlloc for GlobalPageAlloc { +unsafe impl SlabPageAlloc for GlobalPageAlloc { type Page = RawPage; type PageList = RawPageList; - unsafe fn alloc_uninit(&self) -> &'static mut RawPage { - let raw_page = self.alloc().expect("Out of memory").as_mut(); + fn alloc_slab_page(&self) -> &'static mut RawPage { + let raw_page = self.alloc_raw_order(0).expect("Out of memory"); raw_page.flags.set(PageFlags::SLAB); raw_page.shared_data.slab = SlabPageData::new(); diff --git a/src/kernel/mem/page_alloc/zones.rs b/src/kernel/mem/page_alloc/zones.rs index 7a2e4e33..032b9cd0 100644 --- a/src/kernel/mem/page_alloc/zones.rs +++ b/src/kernel/mem/page_alloc/zones.rs @@ -1,13 +1,23 @@ -use core::cell::UnsafeCell; +use core::ptr::NonNull; use eonix_mm::address::PRange; use eonix_mm::paging::{Zone, PFN}; use super::RawPage; -use crate::kernel::mem::page_alloc::RawPagePtr; + +pub static ZONE: GlobalZone = GlobalZone(); + +const PAGE_ARRAY: NonNull = + unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct GlobalZone(); +impl GlobalZone { + pub fn get_pfn(&self, page_ptr: *const RawPage) -> PFN { + PFN::from(unsafe { page_ptr.offset_from(PAGE_ARRAY.as_ptr()) as usize }) + } +} + impl Zone for GlobalZone { type Page = RawPage; @@ -15,11 +25,7 @@ impl Zone for GlobalZone { true } - fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell> { - unsafe { - // SAFETY: The pointer returned by [`RawPagePtr::as_ptr()`] is valid. - // And so is it wrapped with [`UnsafeCell`] - Some(&*(RawPagePtr::from(pfn).as_ptr() as *const UnsafeCell)) - } + fn get_page(&self, pfn: PFN) -> Option> { + Some(unsafe { PAGE_ARRAY.add(usize::from(pfn)) }) } } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 214c65a5..3fe33d5b 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,19 +1,16 @@ use alloc::collections::btree_map::{BTreeMap, Entry}; use core::future::Future; -use core::mem::ManuallyDrop; +use core::ops::{Deref, DerefMut}; -use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::address::{PAddr, PhysAccess}; -use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; -use super::Page; +use super::page_alloc::PageFlags; +use super::{Folio, FolioOwned}; use crate::io::{Buffer, Stream}; use crate::kernel::constants::EINVAL; -use crate::kernel::mem::page_alloc::RawPagePtr; use crate::kernel::vfs::inode::InodeUse; use crate::prelude::KResult; -use crate::GlobalPageAlloc; #[repr(transparent)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -24,14 +21,7 @@ pub struct PageCache { inode: InodeUse, } -unsafe impl Send for PageCache {} -unsafe impl Sync for PageCache {} - -#[derive(Clone, Copy)] -pub struct CachePage(RawPagePtr); - -unsafe impl Send for CachePage {} -unsafe impl Sync for CachePage {} +pub struct CachePage(Folio); impl PageOffset { pub const fn from_byte_floor(offset: usize) -> Self { @@ -57,39 +47,47 @@ impl PageOffset { impl CachePage { pub fn new() -> Self { - Self(GlobalPageAlloc.alloc().unwrap()) + CachePage(Folio::alloc()) } - pub fn as_bytes(&self) -> &[u8] { - unsafe { - core::slice::from_raw_parts( - // SAFETY: The page is owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) - } - } + pub fn new_zeroed() -> Self { + CachePage({ + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - core::slice::from_raw_parts_mut( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) - } + folio.share() + }) } pub fn is_dirty(&self) -> bool { - self.0.is_dirty() + self.flags.has(PageFlags::DIRTY) } pub fn set_dirty(&self, dirty: bool) { - self.0.set_dirty(dirty); + if dirty { + self.flags.set(PageFlags::DIRTY); + } else { + self.flags.clear(PageFlags::DIRTY); + } + } + + pub fn add_mapping(&self) -> PFN { + // TODO: Increase map_count + self.0.clone().into_raw() } +} + +impl Deref for CachePage { + type Target = Folio; - pub fn get_page(&self) -> Page { - unsafe { Page::with_raw(PFN::from(self.0), |page| page.clone()) } + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for CachePage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 } } @@ -124,11 +122,7 @@ impl PageCache { } // TODO: Remove this. - pub async fn with_page( - &self, - pgoff: PageOffset, - func: impl FnOnce(&Page, &CachePage), - ) -> KResult<()> { + pub async fn with_page(&self, pgoff: PageOffset, func: impl FnOnce(&CachePage)) -> KResult<()> { let mut pages = self.pages.lock().await; if pgoff > PageOffset::from_byte_ceil(self.len()) { return Err(EINVAL); @@ -136,11 +130,7 @@ impl PageCache { let cache_page = self.get_page_locked(&mut pages, pgoff).await?; - unsafe { - let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(cache_page.0))); - - func(&page, cache_page); - } + func(cache_page); Ok(()) } @@ -166,7 +156,7 @@ impl PageCache { let data_len = real_end - offset; if buffer - .fill(&page.as_bytes()[inner_offset..inner_offset + data_len])? + .fill(&page.lock().as_bytes()[inner_offset..inner_offset + data_len])? .should_stop() || buffer.available() == 0 { @@ -195,7 +185,7 @@ impl PageCache { let inner_offset = offset % PAGE_SIZE; let written = stream - .poll_data(&mut page.as_bytes_mut()[inner_offset..])? + .poll_data(&mut page.lock().as_bytes_mut()[inner_offset..])? .map(|b| b.len()) .unwrap_or(0); @@ -237,14 +227,9 @@ impl core::fmt::Debug for PageCache { } } -pub trait PageCacheRawPage: RawPage { - fn is_dirty(&self) -> bool; - fn set_dirty(&self, dirty: bool); -} - impl Drop for PageCache { fn drop(&mut self) { - // TODO: Write back dirty pages... - // let _ = self.fsync(); + // XXX: Send the PageCache to some flusher worker. + let _ = self.fsync(); } } diff --git a/src/kernel/mem/paging.rs b/src/kernel/mem/paging.rs index bca573fb..1b95ce79 100644 --- a/src/kernel/mem/paging.rs +++ b/src/kernel/mem/paging.rs @@ -1,48 +1,22 @@ -use core::ops::Deref; -use core::ptr::NonNull; +use eonix_mm::paging::Folio as _; -use eonix_mm::paging::Page as GenericPage; - -use super::page_alloc::GlobalPageAlloc; -use super::PhysAccess; +use super::folio::FolioOwned; use crate::io::{Buffer, FillResult}; -pub type Page = GenericPage; - /// A buffer that wraps a page and provides a `Buffer` interface. pub struct PageBuffer { - page: PageExcl, + page: FolioOwned, offset: usize, } -pub struct PageLocked<'a> { - page: &'a Page, -} - -/// A page that is exclusively owned. -#[repr(transparent)] -pub struct PageExcl(Page); - pub trait AllocZeroed { fn zeroed() -> Self; } -pub trait PageExt { - fn lock(&self) -> PageLocked; - - /// Get a vmem pointer to the page data as a byte slice. - fn get_bytes_ptr(&self) -> NonNull<[u8]>; - - /// Get a vmem pointer to the start of the page. - fn get_ptr(&self) -> NonNull { - self.get_bytes_ptr().cast() - } -} - impl PageBuffer { pub fn new() -> Self { Self { - page: PageExcl::alloc(), + page: FolioOwned::alloc(), offset: 0, } } @@ -86,91 +60,3 @@ impl Buffer for PageBuffer { } } } - -impl AllocZeroed for Page { - fn zeroed() -> Self { - let page = Self::alloc(); - - page.lock().as_bytes_mut().fill(0); - - page - } -} - -impl PageExt for Page { - fn lock(&self) -> PageLocked { - // TODO: Actually perform the lock. - PageLocked { page: self } - } - - fn get_bytes_ptr(&self) -> NonNull<[u8]> { - unsafe { - // SAFETY: `self.start()` can't be null. - NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) - } - } -} - -impl PageLocked<'_> { - pub fn as_bytes(&self) -> &[u8] { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) - } - } - - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) - } - } -} - -impl Deref for PageLocked<'_> { - type Target = Page; - - fn deref(&self) -> &Self::Target { - self.page - } -} - -impl PageExcl { - pub fn alloc() -> Self { - Self(Page::alloc()) - } - - pub fn alloc_order(order: u32) -> Self { - Self(Page::alloc_order(order)) - } - - pub fn zeroed() -> Self { - Self(Page::zeroed()) - } - - pub fn as_bytes(&self) -> &[u8] { - unsafe { - // SAFETY: The page is exclusively owned by us. - self.get_bytes_ptr().as_ref() - } - } - - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: The page is exclusively owned by us. - self.get_bytes_ptr().as_mut() - } - } - - pub fn into_page(self) -> Page { - self.0 - } -} - -impl Deref for PageExcl { - type Target = Page; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/src/kernel/task/kernel_stack.rs b/src/kernel/task/kernel_stack.rs index d3e9de2f..f00b91bd 100644 --- a/src/kernel/task/kernel_stack.rs +++ b/src/kernel/task/kernel_stack.rs @@ -1,11 +1,12 @@ -use crate::kernel::mem::{paging::Page, PhysAccess as _}; -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; + use eonix_runtime::executor::Stack; +use crate::kernel::mem::FolioOwned; + #[derive(Debug)] pub struct KernelStack { - _pages: Page, - bottom: NonZero, + folio: FolioOwned, } impl KernelStack { @@ -14,15 +15,8 @@ impl KernelStack { const KERNEL_STACK_ORDER: u32 = 7; pub fn new() -> Self { - let pages = Page::alloc_order(Self::KERNEL_STACK_ORDER); - let bottom = unsafe { - // SAFETY: The paddr is from a page, which should be valid. - pages.range().end().as_ptr::().addr() - }; - Self { - _pages: pages, - bottom, + folio: FolioOwned::alloc_order(Self::KERNEL_STACK_ORDER), } } } @@ -33,7 +27,10 @@ impl Stack for KernelStack { } fn get_bottom(&self) -> NonNull<()> { - // SAFETY: The stack is allocated and `bottom` is non-zero. - unsafe { NonNull::new_unchecked(self.bottom.get() as *mut _) } + let ptr = self.folio.get_bytes_ptr(); + let len = ptr.len(); + + // SAFETY: The vaddr of the folio is guaranteed to be non-zero. + unsafe { ptr.cast().byte_add(len) } } } diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs index eb00cc4c..799b9848 100644 --- a/src/kernel/vfs/file/mod.rs +++ b/src/kernel/vfs/file/mod.rs @@ -15,7 +15,7 @@ pub use terminal_file::TerminalFile; use crate::io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}; use crate::kernel::constants::{EBADF, EINTR, EINVAL, ENOTTY}; -use crate::kernel::mem::PageExcl; +use crate::kernel::mem::FolioOwned; use crate::kernel::task::Thread; use crate::kernel::CharDevice; use crate::prelude::KResult; @@ -94,7 +94,7 @@ impl FileType { } pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let mut buffer_page = PageExcl::alloc(); + let mut buffer_page = FolioOwned::alloc(); let buffer = buffer_page.as_bytes_mut(); self.sendfile_check()?; diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 93b6da20..65af41e4 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -1,32 +1,26 @@ +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; use eonix_hal::bootstrap::BootStrapData; -use eonix_hal::mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}; +use eonix_hal::mm::{ArchMemory, BasicPageAllocRef, GLOBAL_PAGE_TABLE}; use eonix_hal::traits::mm::Memory; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PagingMode as _, PTE}; -use eonix_mm::paging::{Page as GenericPage, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, PageTable, PTE}; +use eonix_mm::paging::{Folio as _, FrameAlloc, PAGE_SIZE, PFN}; use crate::kernel::mem::{GlobalPageAlloc, RawPage}; -pub fn setup_memory(data: &mut BootStrapData) { - let addr_max = ArchMemory::present_ram() - .map(|range| range.end()) - .max() - .expect("No free memory"); - - let pfn_max = PFN::from(addr_max.ceil()); - let len_bytes_page_array = usize::from(pfn_max) * size_of::(); - let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); - - let alloc = data.get_alloc().unwrap(); +fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { + // TODO: This should be done by the global Zone + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); // Map kernel page array. const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000); - for pte in GLOBAL_PAGE_TABLE.iter_kernel_in( - VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages), - ArchPagingMode::LEVELS, - &alloc, - ) { + let range = VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); + for pte in global_page_table.iter_kernel(range) { let attr = PageAttribute::PRESENT | PageAttribute::WRITE | PageAttribute::READ @@ -34,10 +28,15 @@ pub fn setup_memory(data: &mut BootStrapData) { | PageAttribute::ACCESSED | PageAttribute::DIRTY; - let page = GenericPage::alloc_in(&alloc); + let page = alloc.alloc().unwrap(); pte.set(page.into_raw(), attr.into()); } + // TODO!!!: Construct the global zone with all present ram. + // for range in ArchMemory::present_ram() { + // GlobalPageAlloc::mark_present(range); + // } + unsafe { // SAFETY: We've just mapped the area with sufficient length. core::ptr::write_bytes( @@ -47,10 +46,21 @@ pub fn setup_memory(data: &mut BootStrapData) { ); } - // TODO!!!: Construct the global zone with all present ram. - // for range in ArchMemory::present_ram() { - // GlobalPageAlloc::mark_present(range); - // } + core::mem::forget(global_page_table); +} + +pub fn setup_memory(data: &mut BootStrapData) { + let addr_max = ArchMemory::present_ram() + .map(|range| range.end()) + .max() + .expect("No free memory"); + + let pfn_max = PFN::from(addr_max.ceil()); + let len_bytes_page_array = usize::from(pfn_max) * size_of::(); + let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); + + let alloc = data.get_alloc().unwrap(); + setup_kernel_page_array(alloc, count_pages); if let Some(early_alloc) = data.take_alloc() { for range in early_alloc.into_iter() { From 30d6c9d64551eba97070fb1071dbacb42271a469 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 01:09:33 +0800 Subject: [PATCH 40/54] script: add a script to help translate stacktraces - Add script/backtrace to translate backtraces. - Add a cut sign in the kernel panic routine to indicate the start of stack backtrace. Signed-off-by: greatbridf --- script/backtrace | 100 +++++++++++++++++++++++++++++++++++++++++++++++ src/panic.rs | 6 +++ 2 files changed, 106 insertions(+) create mode 100755 script/backtrace diff --git a/script/backtrace b/script/backtrace new file mode 100755 index 00000000..8a60c829 --- /dev/null +++ b/script/backtrace @@ -0,0 +1,100 @@ +#!/bin/bash + +ADDR2LINE=${ADDR2LINE:-riscv64-unknown-elf-addr2line} + +ksym=build/riscv64gc-unknown-none-elf/debug/eonix_kernel + +usage() { + cat < Use the given kernel symbol file + -o, --only-gbos Show kernel function calls only + -h, --help Show this message +EOF + exit "$1" +} + +# $1: instruction address +parse_pos() { + addr="$1" + shift + + "$ADDR2LINE" -e "$ksym" -i "$addr" "$@" 2>/dev/null +} + +filter_col() { + [ "$1" -eq 0 ] || awk "{ print \$$1; }" +} + +str_contains() { + grep -E "$1" >/dev/null 2>&1 +} + +filter_stacktrace() { + NL=$'\n' + _state=nonstart + _out= + while [ $_state != "end" ]; do + read -r _line + case $_state in + nonstart) + str_contains "8< CUT HERE" <<< "$_line" && _state=save + ;; + save) + if str_contains "8< CUT HERE" <<< "$_line"; then + _state=end + else + _out="$_out$_line$NL" + fi + ;; + esac + done + + echo "$_out" +} + +while [ "$#" -gt 0 ]; do + case "$1" in + -s|--ksym) + shift + ksym="$1" + ;; + -o|--only-gbos) + only_gb=y + ;; + --) + shift + break + ;; + -h|--help) + usage 0 + ;; + *) + usage 1 + ;; + esac + shift +done + +stacktrace="$(filter_stacktrace)" + +i=1 +for addr in $(filter_col 3 <<< "$stacktrace"); do + pos="$(parse_pos "$addr" "$@")" + + if [ -n "$only_gb" ]; then + if ! str_contains "greatbridf_os" <<< "$pos"; then + continue + fi + fi + + printf "========== %4d ==========\n" "$i" + + parse_pos "$addr" "$@" + + i=$((i + 1)) +done diff --git a/src/panic.rs b/src/panic.rs index 3c9c5f34..4a9ef92c 100644 --- a/src/panic.rs +++ b/src/panic.rs @@ -24,6 +24,12 @@ pub fn stack_trace() { UnwindReasonCode::NO_REASON } + println_fatal!("--------------8< CUT HERE 8<--------------"); + println_fatal!("Stacktrace:"); + println_fatal!(); + let mut data = CallbackData { counter: 0 }; _Unwind_Backtrace(callback, &raw mut data as *mut c_void); + + println_fatal!("--------------8< CUT HERE 8<--------------"); } From ee468628be9bf740bba5171b9f0313719a4d527f Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 02:35:05 +0800 Subject: [PATCH 41/54] style: fix or suppress warnings No functional changes - Add `{extern_,}symbol_addr` macro to retrieve symbol address. - Remove manual impl Send and Sync for RawPage - Make elided '_ lifetimes in return types explicit - Suppress unused warnings by allowing them - Remove really unused functions - Refactor `println_trace` macro to suppress unused variable warnings Signed-off-by: greatbridf --- crates/eonix_hal/src/lib.rs | 26 +++++++++ crates/eonix_log/src/lib.rs | 22 ++++++-- crates/eonix_percpu/src/lib.rs | 31 +++++------ crates/eonix_runtime/src/scheduler.rs | 35 +++++------- crates/eonix_sync/eonix_spin/src/lib.rs | 14 +++-- .../eonix_sync/eonix_sync_rt/src/spin_irq.rs | 4 +- crates/posix_types/src/poll.rs | 2 + macros/src/lib.rs | 4 +- src/driver/ahci/slot.rs | 4 +- src/driver/e1000e.rs | 2 + src/driver/serial/io.rs | 15 ++++-- src/fs/procfs.rs | 2 + src/fs/tmpfs/file.rs | 6 +-- src/kernel/constants.rs | 2 +- src/kernel/mem/folio.rs | 2 +- src/kernel/mem/mm_list/page_fault.rs | 19 +++---- src/kernel/mem/page_alloc/raw_page.rs | 4 -- src/kernel/pcie/device.rs | 24 +++++---- src/kernel/pcie/header.rs | 17 +++--- src/kernel/pcie/init.rs | 5 +- src/kernel/syscall.rs | 23 ++++---- src/kernel/task.rs | 3 +- src/kernel/task/process.rs | 54 +++++++++---------- src/kernel/vfs/dentry.rs | 6 ++- src/kernel/vfs/types/device_id.rs | 11 ---- src/lib.rs | 5 +- src/path.rs | 6 ++- src/sync/arcswap.rs | 11 ++-- 28 files changed, 188 insertions(+), 171 deletions(-) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index b9c7d053..d3bf7825 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -43,3 +43,29 @@ pub mod arch_exported { pub use eonix_hal_macros::{ap_main, default_trap_handler, main}; pub use eonix_hal_traits as traits; + +#[macro_export] +macro_rules! symbol_addr { + ($sym:expr) => {{ + ($sym) as *const () as usize + }}; + ($sym:expr, $type:ty) => {{ + ($sym) as *const () as *const $type + }}; +} + +#[macro_export] +macro_rules! extern_symbol_addr { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym) + }}; + ($sym:ident, $type:ty) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym, $type) + }}; +} diff --git a/crates/eonix_log/src/lib.rs b/crates/eonix_log/src/lib.rs index 92b1639f..01b6a587 100644 --- a/crates/eonix_log/src/lib.rs +++ b/crates/eonix_log/src/lib.rs @@ -2,6 +2,7 @@ use alloc::sync::Arc; use core::fmt::{self, Write}; + use eonix_sync::{Spin, SpinIrq as _}; extern crate alloc; @@ -91,18 +92,31 @@ macro_rules! println_fatal { #[macro_export] macro_rules! println_trace { - ($feat:literal) => { + (feat:$feat:literal) => { #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] ") + $crate::println!("[kernel:trace]") } }; - ($feat:literal, $($arg:tt)*) => {{ + (feat:$feat:literal, $fmt:literal) => {{ #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] {}", format_args!($($arg)*)) + $crate::println!(concat!("[kernel:trace] ", $feat)) } }}; + (feat:$feat:literal, $fmt:literal, $($arg:expr $(,)?)*) => { + #[deny(unexpected_cfgs)] + { + // Suppress unused variables warning + #[cfg(not(feature = $feat))] + { + $(let _ = $arg;)* + } + + #[cfg(feature = $feat)] + $crate::println!("[kernel:trace] {}", format_args!($fmt, $($arg,)*)) + } + }; } diff --git a/crates/eonix_percpu/src/lib.rs b/crates/eonix_percpu/src/lib.rs index 1fc7ffb8..a00b5c05 100644 --- a/crates/eonix_percpu/src/lib.rs +++ b/crates/eonix_percpu/src/lib.rs @@ -1,28 +1,21 @@ #![no_std] use core::alloc::Layout; -use core::ptr::null_mut; -use core::ptr::NonNull; -use core::sync::atomic::AtomicPtr; -use core::sync::atomic::Ordering; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +use core::ptr::{null_mut, NonNull}; +use core::sync::atomic::{AtomicPtr, Ordering}; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_loongarch64 as define_percpu; - +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_shared_loongarch64 as define_percpu_shared; +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; const MAX_CPUS: usize = 256; @@ -41,7 +34,7 @@ impl PercpuArea { unsafe extern "C" { fn PERCPU_LENGTH(); } - let len = PERCPU_LENGTH as usize; + let len = PERCPU_LENGTH as *const () as usize; assert_ne!(len, 0, "Percpu length should not be zero."); len @@ -52,7 +45,7 @@ impl PercpuArea { fn PERCPU_DATA_START(); } - let addr = PERCPU_DATA_START as usize; + let addr = PERCPU_DATA_START as *const () as usize; NonNull::new(addr as *mut _).expect("Percpu data should not be null.") } diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 3f72fbf4..b4b7960d 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,19 @@ -use crate::{ - executor::OutputHandle, - ready_queue::{local_rq, ReadyQueue}, - task::{Task, TaskAdapter, TaskHandle, TaskState}, -}; -use alloc::{sync::Arc, task::Wake}; -use core::{ - ops::{Deref, DerefMut}, - ptr::NonNull, - task::{Context, Poll, Waker}, -}; +use alloc::sync::Arc; +use alloc::task::Wake; +use core::ops::{Deref, DerefMut}; +use core::ptr::NonNull; +use core::task::{Context, Poll, Waker}; + use eonix_hal::processor::halt; use eonix_log::println_trace; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; +use crate::executor::OutputHandle; +use crate::ready_queue::{local_rq, ReadyQueue}; +use crate::task::{Task, TaskAdapter, TaskHandle, TaskState}; + #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; @@ -93,12 +92,6 @@ impl Runtime { } } - fn current(&self) -> Option> { - CURRENT_TASK - .get() - .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) - } - fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { let Some(current) = CURRENT_TASK .swap(None) @@ -116,7 +109,7 @@ impl Runtime { }) { Ok(TaskState::READY_RUNNING) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Re-enqueueing task {:?} (CPU{})", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -126,7 +119,7 @@ impl Runtime { } Ok(_) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Current task {:?} (CPU{}) is blocked, not re-enqueueing", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -184,7 +177,7 @@ impl Runtime { }; println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Switching to task {:?} (CPU{})", next.id, eonix_hal::processor::CPU::local().cpuid(), @@ -212,7 +205,7 @@ impl Runtime { ); println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Task {:?} finished execution, removing...", Task::current().id, ); diff --git a/crates/eonix_sync/eonix_spin/src/lib.rs b/crates/eonix_sync/eonix_spin/src/lib.rs index 4718b867..7225aceb 100644 --- a/crates/eonix_sync/eonix_spin/src/lib.rs +++ b/crates/eonix_sync/eonix_spin/src/lib.rs @@ -2,13 +2,11 @@ mod guard; -use core::{ - cell::UnsafeCell, - marker::PhantomData, - sync::atomic::{AtomicBool, Ordering}, -}; -use eonix_sync_base::{Relax, SpinRelax}; +use core::cell::UnsafeCell; +use core::marker::PhantomData; +use core::sync::atomic::{AtomicBool, Ordering}; +use eonix_sync_base::{Relax, SpinRelax}; pub use guard::{SpinGuard, UnlockedSpinGuard}; pub trait SpinContext { @@ -84,7 +82,7 @@ where T: ?Sized, R: Relax, { - pub fn lock_with_context(&self, context: C) -> SpinGuard + pub fn lock_with_context(&self, context: C) -> SpinGuard<'_, T, C, R> where C: SpinContext, { @@ -100,7 +98,7 @@ where ) } - pub fn lock(&self) -> SpinGuard { + pub fn lock(&self) -> SpinGuard<'_, T, DisablePreemption, R> { self.lock_with_context(DisablePreemption::save()) } diff --git a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs index 76a28682..b70cdc3d 100644 --- a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs +++ b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs @@ -12,7 +12,7 @@ pub trait SpinIrq { type Context: SpinContext; type Relax; - fn lock_irq(&self) -> SpinGuard; + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax>; } impl SpinContext for IrqContext { @@ -50,7 +50,7 @@ where type Context = IrqContext; type Relax = R; - fn lock_irq(&self) -> SpinGuard { + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax> { self.lock_with_context(IrqContext::save()) } } diff --git a/crates/posix_types/src/poll.rs b/crates/posix_types/src/poll.rs index 781f589f..dcf5f9b2 100644 --- a/crates/posix_types/src/poll.rs +++ b/crates/posix_types/src/poll.rs @@ -1,5 +1,7 @@ pub const FDSET_LENGTH: usize = 1024 / (8 * size_of::()); +// TODO: Implement syscall pselect +#[allow(unused)] pub struct FDSet { fds_bits: [usize; FDSET_LENGTH], } diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 722fa5da..09e12f99 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -123,7 +123,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { Box::new_in( async move { eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "tid{}: {}({}) => {{", thd.tid, #syscall_name_str, @@ -133,7 +133,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "}} => {:x?}", retval, ); diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index dd096f57..fdb61f96 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -39,6 +39,8 @@ enum SlotState { Idle, Working, Finished, + // TODO: Implement AHCI error handling + #[allow(unused)] Error, } @@ -67,7 +69,7 @@ impl CommandList { + (size_of::>() + size_of::>()) * 32 } - pub fn get(&self, index: usize) -> CommandSlot { + pub fn get(&self, index: usize) -> CommandSlot<'_> { CommandSlot { cmdheader: &self.cmdheaders()[index], control: &self.controls()[index], diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index 6d6ca353..923a4594 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -61,6 +61,8 @@ struct E1000eDev { tx_tail: Option, rx_buffers: Box<[FolioOwned; RX_DESC_SIZE]>, + // TODO: Implement E1000e send + #[allow(unused)] tx_buffers: Box<[Option; TX_DESC_SIZE]>, } diff --git a/src/driver/serial/io.rs b/src/driver/serial/io.rs index aec18f20..57e61c56 100644 --- a/src/driver/serial/io.rs +++ b/src/driver/serial/io.rs @@ -1,10 +1,11 @@ -use super::SerialRegister; use core::ptr::NonNull; -use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; -use eonix_mm::address::{PAddr, PhysAccess}; #[cfg(target_arch = "x86_64")] use eonix_hal::arch_exported::io::Port8; +use eonix_hal::mm::ArchPhysAccess; +use eonix_mm::address::{PAddr, PhysAccess}; + +use super::SerialRegister; #[cfg(target_arch = "x86_64")] pub struct SerialIO { @@ -73,10 +74,12 @@ impl SerialIO { self.line_status } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { self.modem_status } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { self.scratch } @@ -100,7 +103,7 @@ impl SerialRegister for NonNull { let retval = unsafe { self.as_ptr().read_volatile() }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); retval } @@ -110,7 +113,7 @@ impl SerialRegister for NonNull { unsafe { self.as_ptr().write_volatile(data) }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); } } @@ -155,10 +158,12 @@ impl SerialIO { unsafe { self.base_addr.add(5) } } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { unsafe { self.base_addr.add(6) } } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { unsafe { self.base_addr.add(7) } } diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 32ede420..9a3933bb 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -26,6 +26,8 @@ enum NodeKind { struct FileInode { read: Option KResult<()> + Send + Sync>>, + // TODO: Implement writes to procfs files + #[allow(unused)] write: Option<()>, } diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index d560a672..aafae539 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -177,7 +177,6 @@ impl InodeOps for FileInode { } pub struct DeviceInode { - is_block: bool, devid: DeviceId, } @@ -199,10 +198,7 @@ impl DeviceInode { ctime: now, mtime: now, }, - Self { - is_block: mode.format() == Format::BLK, - devid, - }, + Self { devid }, ) } } diff --git a/src/kernel/constants.rs b/src/kernel/constants.rs index 4e11d66e..b96387b0 100644 --- a/src/kernel/constants.rs +++ b/src/kernel/constants.rs @@ -36,7 +36,7 @@ pub const ENOTDIR: u32 = 20; pub const EISDIR: u32 = 21; pub const EINVAL: u32 = 22; pub const ENOTTY: u32 = 25; -pub const ENOSPC: u32 = 28; +// pub const ENOSPC: u32 = 28; pub const ESPIPE: u32 = 29; // pub const EROFS: u32 = 30; pub const EPIPE: u32 = 32; diff --git a/src/kernel/mem/folio.rs b/src/kernel/mem/folio.rs index 6647e1af..8ab4d6be 100644 --- a/src/kernel/mem/folio.rs +++ b/src/kernel/mem/folio.rs @@ -79,7 +79,7 @@ impl Folio { } } - pub fn lock(&self) -> LockedFolio { + pub fn lock(&self) -> LockedFolio<'_> { // TODO: actually perform the lock... LockedFolio(self) } diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 7aac141d..5a56efbc 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,3 +1,4 @@ +use eonix_hal::extern_symbol_addr; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; @@ -24,27 +25,19 @@ impl FixEntry { VAddr::from((self.start + self.length) as usize) } - #[allow(dead_code)] - fn range(&self) -> VRange { - VRange::new(self.start(), self.end()) - } - fn jump_address(&self) -> VAddr { VAddr::from(self.jump_address as usize) } fn entries() -> &'static [FixEntry] { - extern "C" { - fn FIX_START(); - fn FIX_END(); - } + let fix_seg_len_bytes = extern_symbol_addr!(FIX_END) - extern_symbol_addr!(FIX_START); unsafe { - // SAFETY: `FIX_START` and `FIX_END` are defined in the - // linker script in `.rodata` section. + // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script + // in `.rodata` section. core::slice::from_raw_parts( - FIX_START as usize as *const FixEntry, - (FIX_END as usize - FIX_START as usize) / size_of::(), + extern_symbol_addr!(FIX_START, FixEntry), + fix_seg_len_bytes / size_of::(), ) } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 0d775245..16d57714 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -48,10 +48,6 @@ pub struct RawPage { shared_data: PageData, } -// XXX: introduce Folio and remove this. -unsafe impl Send for RawPage {} -unsafe impl Sync for RawPage {} - impl PageFlags { pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; diff --git a/src/kernel/pcie/device.rs b/src/kernel/pcie/device.rs index 085e7b9a..2a8f150d 100644 --- a/src/kernel/pcie/device.rs +++ b/src/kernel/pcie/device.rs @@ -1,14 +1,17 @@ -use super::{ - header::{Bar, Command}, - CommonHeader, Header, -}; -use crate::kernel::mem::PhysAccess as _; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::num::NonZero; +use core::ops::RangeInclusive; + use align_ext::AlignExt; -use alloc::{collections::btree_map::BTreeMap, sync::Arc, vec::Vec}; -use core::{num::NonZero, ops::RangeInclusive}; use eonix_mm::address::{Addr, PAddr, PRange}; use eonix_sync::Spin; +use super::header::{Bar, Command}; +use super::{CommonHeader, Header}; +use crate::kernel::mem::PhysAccess as _; + pub(super) static PCIE_DEVICES: Spin>>> = Spin::new(BTreeMap::new()); @@ -20,7 +23,7 @@ pub struct PCIDevice<'a> { pub device_id: u16, } -#[allow(dead_code)] +#[allow(unused)] #[derive(Clone)] pub struct SegmentGroup { id: usize, @@ -28,6 +31,7 @@ pub struct SegmentGroup { base_address: PAddr, } +#[allow(unused)] #[derive(Clone)] pub struct ConfigSpace { pub bus: u8, @@ -180,10 +184,12 @@ impl PCIDevice<'_> { ); } + #[allow(unused)] pub fn config_space(&self) -> &ConfigSpace { &self.config_space } + #[allow(unused)] pub fn segment_group(&self) -> &SegmentGroup { &self.segment_group } @@ -209,7 +215,7 @@ impl PciMemoryAllocator { self.start += size; eonix_log::println_trace!( - "trace_pci", + feat: "trace_pci", "PciMemoryAllocator: Allocated {} bytes at {:#x}", size, base diff --git a/src/kernel/pcie/header.rs b/src/kernel/pcie/header.rs index 889795d3..0a44ea28 100644 --- a/src/kernel/pcie/header.rs +++ b/src/kernel/pcie/header.rs @@ -1,10 +1,9 @@ +use core::marker::PhantomData; +use core::num::NonZero; +use core::ops::{BitAnd, BitOr, Deref, Not}; +use core::sync::atomic::{AtomicU16, AtomicU32, Ordering}; + use bitflags::bitflags; -use core::{ - marker::PhantomData, - num::NonZero, - ops::{BitAnd, BitOr, Deref, Not}, - sync::atomic::{AtomicU16, AtomicU32, Ordering}, -}; use eonix_hal::fence::memory_barrier; pub trait BitFlag: Sized + Copy { @@ -215,14 +214,14 @@ where } impl CommonHeader { - pub fn command(&self) -> Register { + pub fn command(&self) -> Register<'_, Command> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._command) as *mut u16) }, _phantom: PhantomData, } } - pub fn status(&self) -> Register { + pub fn status(&self) -> Register<'_, Status> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._status) as *mut u16) }, _phantom: PhantomData, @@ -231,7 +230,7 @@ impl CommonHeader { } impl Bars<'_> { - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl Iterator> + use<'_> { struct BarsIterator<'a> { bars: &'a [AtomicU32], pos: usize, diff --git a/src/kernel/pcie/init.rs b/src/kernel/pcie/init.rs index 4c183bc5..74a490b4 100644 --- a/src/kernel/pcie/init.rs +++ b/src/kernel/pcie/init.rs @@ -10,6 +10,7 @@ use super::error::PciError; use crate::kernel::mem::PhysAccess as _; use crate::kernel::pcie::device::PciMemoryAllocator; +#[allow(unused)] #[derive(Clone)] struct AcpiHandlerImpl; @@ -34,7 +35,6 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(target_arch = "x86_64")] { use acpi::{AcpiTables, PciConfigRegions}; - use eonix_mm::address::PAddr; let acpi_tables = unsafe { // SAFETY: Our impl should be correct. @@ -69,7 +69,6 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { use eonix_hal::arch_exported::fdt::FDT; - use eonix_mm::address::PRange; use crate::kernel::constants::{EINVAL, EIO, ENOENT}; @@ -88,7 +87,7 @@ pub fn init_pcie() -> Result<(), PciError> { let size = u64::from_be_bytes(entry[20..28].try_into().unwrap()); println_trace!( - "trace_pci", + feat: "trace_pci", "PCIe range: PCI address = {:#x}, CPU address = {:#x}, size = {:#x}", pci_address, cpu_address, diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 78ddcd1c..d06c5d88 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,11 +1,17 @@ -use super::task::ThreadAlloc; -use crate::kernel::task::Thread; use alloc::boxed::Box; -use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use core::future::Future; +use core::marker::PhantomData; +use core::ops::Deref; +use core::pin::Pin; + +use eonix_hal::extern_symbol_addr; use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; use posix_types::ctypes::PtrT; +use super::task::ThreadAlloc; +use crate::kernel::task::Thread; + pub mod file_rw; pub mod mm; pub mod net; @@ -280,12 +286,6 @@ impl core::fmt::Debug for UserMut { } static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = LazyLock::new(|| { - extern "C" { - // SAFETY: `SYSCALL_HANDLERS` is defined in linker script. - fn RAW_SYSCALL_HANDLERS(); - fn RAW_SYSCALL_HANDLERS_SIZE(); - } - // DO NOT TOUCH THESE FUNCTIONS!!! // THEY ARE USED FOR KEEPING THE OBJECTS NOT STRIPPED BY THE LINKER!!! file_rw::keep_alive(); @@ -294,15 +294,14 @@ static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = La procops::keep_alive(); sysinfo::keep_alive(); - let raw_handlers_addr = RAW_SYSCALL_HANDLERS as *const (); - let raw_handlers_size_byte = RAW_SYSCALL_HANDLERS_SIZE as usize; + let raw_handlers_size_byte = extern_symbol_addr!(RAW_SYSCALL_HANDLERS_SIZE); assert!(raw_handlers_size_byte % size_of::() == 0); let raw_handlers_count = raw_handlers_size_byte / size_of::(); let raw_handlers = unsafe { core::slice::from_raw_parts( - raw_handlers_addr as *const RawSyscallHandler, + extern_symbol_addr!(RAW_SYSCALL_HANDLERS, RawSyscallHandler), raw_handlers_count, ) }; diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 3fe6fe97..b0966046 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -11,6 +11,7 @@ mod thread; mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; +use eonix_hal::symbol_addr; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -185,7 +186,7 @@ where trap_ctx.set_user_mode(false); trap_ctx.set_interrupt_enabled(true); let _ = trap_ctx.set_user_call_frame( - execute:: as usize, + symbol_addr!(execute::), Some(sp.addr().get()), None, &[(&raw mut future) as usize, output.get() as usize], diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 421e4b8b..3eff5949 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,34 +1,30 @@ -use super::{ - process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, - ProcessList, Session, Thread, -}; -use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; -use crate::kernel::task::{CloneArgs, CloneFlags}; -use crate::rcu::call_rcu; -use crate::{ - kernel::mem::MMList, - prelude::*, - rcu::{RCUPointer, RCUReadGuard}, - sync::CondVar, -}; -use alloc::{ - collections::{btree_map::BTreeMap, vec_deque::VecDeque}, - sync::{Arc, Weak}, -}; +use alloc::collections::btree_map::BTreeMap; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; use core::sync::atomic::{AtomicU32, Ordering}; -use eonix_mm::address::VAddr; + use eonix_sync::{ AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, UnlockableGuard as _, UnlockedGuard as _, }; use pointers::BorrowedArc; use posix_types::constants::{ - CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_PGID, P_PIDFD, + CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, P_PGID, P_PID, P_PIDFD, }; -use posix_types::constants::{P_ALL, P_PID}; use posix_types::signal::Signal; use posix_types::SIGNAL_COREDUMP; +use super::process_group::ProcessGroupBuilder; +use super::signal::RaiseResult; +use super::thread::ThreadBuilder; +use super::{ProcessGroup, ProcessList, Session, Thread}; +use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; +use crate::kernel::mem::MMList; +use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::prelude::*; +use crate::rcu::{call_rcu, RCUPointer, RCUReadGuard}; +use crate::sync::CondVar; + pub struct ProcessBuilder { mm_list: Option, exit_signal: Option, @@ -51,8 +47,6 @@ pub struct Process { pub exit_signal: Option, - pub shm_areas: Spin>, - /// Parent process /// /// `parent` must be valid during the whole life of the process. @@ -256,7 +250,6 @@ impl ProcessBuilder { pid: self.pid.expect("should set pid before building"), wait_list: WaitList::new(), mm_list, - shm_areas: Spin::new(BTreeMap::new()), exit_signal: self.exit_signal, parent: RCUPointer::empty(), pgroup: RCUPointer::empty(), @@ -522,17 +515,17 @@ impl Process { } /// Provide RCU locked (maybe inconsistent) access to the session. - pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Session>> { self.session.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the process group. - pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { self.pgroup.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the parent process. - pub fn parent_rcu(&self) -> Option>> { + pub fn parent_rcu(&self) -> Option>> { self.parent.load() } @@ -569,7 +562,7 @@ impl WaitList { self.cv_wait_procs.notify_all(); } - pub fn drain_exited(&self) -> DrainExited { + pub fn drain_exited(&self) -> DrainExited<'_> { DrainExited { wait_procs: self.wait_procs.lock(), } @@ -578,7 +571,12 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry( + &self, + wait_id: WaitId, + want_stop: bool, + want_continue: bool, + ) -> Entry<'_, '_, '_> { Entry { process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 22760de9..a401f4f7 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -27,6 +27,8 @@ use crate::path::Path; use crate::prelude::*; use crate::rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}; +// TODO: Implement slab reclaim +#[allow(unused)] const D_INVALID: u8 = 0; const D_REGULAR: u8 = 1; const D_DIRECTORY: u8 = 2; @@ -159,7 +161,7 @@ impl Dentry { && &***self.name() == &***other.name() } - pub fn name(&self) -> RCUReadGuard>> { + pub fn name(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Arc<[u8]>>> { self.name.load().expect("Dentry has no name") } @@ -167,7 +169,7 @@ impl Dentry { (***self.name()).clone() } - pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc> { + pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc<'_, Dentry>> { self.parent.load().expect("Dentry has no parent") } diff --git a/src/kernel/vfs/types/device_id.rs b/src/kernel/vfs/types/device_id.rs index cf3ea886..6dd128ee 100644 --- a/src/kernel/vfs/types/device_id.rs +++ b/src/kernel/vfs/types/device_id.rs @@ -10,17 +10,6 @@ impl DeviceId { pub const fn new(major: u16, minor: u16) -> Self { Self { major, minor } } - - pub const fn from_raw(raw: u32) -> Self { - Self { - major: (raw >> 16) as u16, - minor: (raw & 0xFFFF) as u16, - } - } - - pub const fn to_raw(self) -> u32 { - ((self.major as u32) << 16) | (self.minor as u32) - } } impl Debug for DeviceId { diff --git a/src/lib.rs b/src/lib.rs index 8457169c..4f7fb262 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use eonix_hal::arch_exported::bootstrap::shutdown; use eonix_hal::context::TaskContext; use eonix_hal::processor::{halt, CPU, CPU_COUNT}; +use eonix_hal::symbol_addr; use eonix_hal::traits::context::RawTaskContext; use eonix_hal::traits::trap::IrqState; use eonix_hal::trap::disable_irqs_save; @@ -136,7 +137,7 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { @@ -162,7 +163,7 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { diff --git a/src/path.rs b/src/path.rs index b342ef5f..47b9a4b6 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,6 +1,8 @@ -use crate::{kernel::constants::ENOENT, prelude::*}; use core::fmt::{self, Debug, Formatter}; +use crate::kernel::constants::ENOENT; +use crate::prelude::*; + #[repr(transparent)] pub struct Path { all: [u8], @@ -23,7 +25,7 @@ impl Path { self.all.starts_with(&['/' as u8]) } - pub fn iter(&self) -> PathIterator { + pub fn iter(&self) -> PathIterator<'_> { PathIterator::new(&self.all) } } diff --git a/src/sync/arcswap.rs b/src/sync/arcswap.rs index fb8219b2..7421659f 100644 --- a/src/sync/arcswap.rs +++ b/src/sync/arcswap.rs @@ -1,9 +1,8 @@ use alloc::sync::Arc; -use core::{ - fmt::{self, Debug, Formatter}, - ptr::NonNull, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::fmt::{self, Debug, Formatter}; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicPtr, Ordering}; + use pointers::BorrowedArc; unsafe impl Send for ArcSwap where T: Send + Sync {} @@ -33,7 +32,7 @@ impl ArcSwap { } } - pub fn borrow(&self) -> BorrowedArc { + pub fn borrow(&self) -> BorrowedArc<'_, T> { unsafe { BorrowedArc::from_raw( NonNull::new(self.pointer.load(Ordering::Acquire)) From 3b832e38d622f42a7e51839268dfd5d6db0b94d1 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 02:56:56 +0800 Subject: [PATCH 42/54] user, init: update riscv64 init script Make print message prettier. Also panic when we have an error. Signed-off-by: greatbridf --- user-programs/init_script_riscv64.sh | 88 ++++++++++++++++------------ 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/user-programs/init_script_riscv64.sh b/user-programs/init_script_riscv64.sh index 52b2628c..b5ce95d7 100644 --- a/user-programs/init_script_riscv64.sh +++ b/user-programs/init_script_riscv64.sh @@ -1,60 +1,75 @@ #!/mnt/busybox sh BUSYBOX=/mnt/busybox +TERMINAL=/dev/ttyS0 +VERBOSE= -freeze() { - echo "an error occurred while executing '''$@''', freezing..." >&2 +error() { + printf "\033[91merror: \033[0m%s\n" "$1" >&2 +} + +warn() { + printf "\033[93mwarn : \033[0m%s\n" "$1" >&2 +} + +info() { + printf "\033[92minfo : \033[0m%s\n" "$1" >&2 +} + +die() { + error "$1" && freeze +} +freeze() { + info "freezing..." >&2 while true; do - true + : done + + exit 1 } -do_or_freeze() { - if $@; then - return - fi +unrecoverable() { + die "unrecoverable error occurred. check the message above." +} - freeze $@ +busybox() { + $BUSYBOX "$@" } -do_or_freeze $BUSYBOX mkdir -p /dev +trap unrecoverable EXIT + +set -euo pipefail -do_or_freeze $BUSYBOX mknod -m 666 /dev/console c 5 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3 -do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda b 8 0 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda1 b 8 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vdb b 8 16 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65 +if [ -n "$VERBOSE" ]; then + set -x +fi -echo -n -e "deploying busybox... " >&2 +busybox mkdir -p /dev -do_or_freeze $BUSYBOX mkdir -p /bin -do_or_freeze $BUSYBOX --install -s /bin -do_or_freeze $BUSYBOX mkdir -p /lib +busybox mknod -m 666 /dev/console c 5 1 +busybox mknod -m 666 /dev/null c 1 3 +busybox mknod -m 666 /dev/zero c 1 5 +busybox mknod -m 666 /dev/vda b 8 0 +busybox mknod -m 666 /dev/vda1 b 8 1 +busybox mknod -m 666 /dev/vdb b 8 16 +busybox mknod -m 666 /dev/ttyS0 c 4 64 +busybox mknod -m 666 /dev/ttyS1 c 4 65 -export PATH="/bin" +info "deploying busybox..." -echo ok >&2 +busybox mkdir -p /bin /lib +busybox --install -s /bin -do_or_freeze mkdir -p /etc /root /proc -do_or_freeze mount -t procfs proc proc +info "done" -# Check if the device /dev/vdb is available and can be read -if dd if=/dev/vdb of=/dev/null bs=512 count=1; then - echo -n -e "Mounting the ext4 image... " >&2 - do_or_freeze mkdir -p /mnt1 - do_or_freeze mount -t ext4 /dev/vdb /mnt1 - echo ok >&2 -fi +export PATH="/bin" -cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1 -ln -s /lib/ld-musl-i386.so.1 /lib/libc.so +mkdir -p /etc /root /proc +mount -t procfs proc proc cat > /etc/passwd < /etc/group < /dev/ttyS0 2> /dev/ttyS0 +# shellcheck disable=SC2094 +exec sh -l < "$TERMINAL" > "$TERMINAL" 2> "$TERMINAL" # We don't have a working init yet, so we use busybox sh directly for now. # exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0' From b2c4c54dbf037cfa0074b780529c6dc79848615c Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 14:33:38 +0800 Subject: [PATCH 43/54] chore: remove and ignore vscode settings from svc Signed-off-by: greatbridf --- .gitignore | 1 + .vscode/settings.json | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index fbc2a9b1..4684b698 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build/ .idea/ +.vscode/settings.json test/ diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 10b4a8b4..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "makefile.configureOnOpen": false, - "editor.formatOnSave": true, -} From 59f044422cd47b405d9190786333e7fa226905e8 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 21:35:39 +0800 Subject: [PATCH 44/54] mm, proc: add an exited thread reaper - thd.exit() will set thd.dead and send it to the reaper - delay the release of process mm until we reap it - extract futex logic out of exit and exec routine Signed-off-by: greatbridf --- src/kernel/mem/mm_list.rs | 30 +++---- src/kernel/syscall/procops.rs | 26 ++---- src/kernel/task.rs | 5 +- src/kernel/task/futex.rs | 53 +++++++++--- src/kernel/task/process_list.rs | 144 ++++++++++++++++++++------------ src/kernel/task/signal.rs | 20 ++--- src/kernel/task/thread.rs | 49 ++++++++--- src/lib.rs | 23 +---- 8 files changed, 209 insertions(+), 141 deletions(-) diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 5221c73b..f073025b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -398,20 +398,6 @@ impl MMList { assert_ne!(old_user_count, 0); } - /// Deactivate `self` and activate `to` with root page table changed only once. - /// This might reduce the overhead of switching page tables twice. - #[allow(dead_code)] - pub fn switch(&self, to: &Self) { - self.user_count.fetch_add(1, Ordering::Acquire); - - let root_page_table = self.root_page_table.load(Ordering::Relaxed); - assert_ne!(root_page_table, 0); - set_root_page_table_pfn(PFN::from(PAddr::from(root_page_table))); - - let old_user_count = to.user_count.fetch_sub(1, Ordering::Release); - assert_ne!(old_user_count, 0); - } - /// Replace the current page table with a new one. /// /// # Safety @@ -454,10 +440,24 @@ impl MMList { // TODO: Check whether we should wake someone up if they've been put // to sleep when calling `vfork`. - self.inner + let old_mm = self + .inner .swap(new.map(|new_mm| new_mm.inner.swap(None)).flatten()); eonix_preempt::enable(); + + // This could take long... + drop(old_mm); + } + + pub fn release(&self) { + let old_mm = self.inner.swap(None); + let old_table = self.root_page_table.swap(0, Ordering::Relaxed); + + // TODO: Remove this completely... + // XXX: `ArcSwap` is broken and never safe to use. Check `replace` above. + assert_ne!(old_table, 0, "Already released?"); + assert!(old_mm.is_some(), "Already released?"); } /// No need to do invalidation manually, `PageTable` already does it. diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 1359d0ab..3e815f25 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -22,8 +22,8 @@ use crate::kernel::constants::{ use crate::kernel::mem::PageBuffer; use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, FutexOp, - ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_exec, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, + FutexOp, ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::timer::sleep; use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString}; @@ -213,10 +213,7 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult, argv: User, envp: User) -> KResult SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs - .do_exit(&thread, WaitType::Exited(status), false) - .await; - } + thread.exit(WaitType::Exited(status)); SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] async fn exit_group(status: u32) -> SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs.do_exit(&thread, WaitType::Exited(status), true).await; - } + // XXX: Send SIGKILL to our sibling threads. + thread.exit(WaitType::Exited(status)); SyscallNoReturn } @@ -856,7 +844,7 @@ async fn rt_sigreturn() -> KResult { "`rt_sigreturn` failed in thread {} with error {err}!", thread.tid ); - thread.force_kill(Signal::SIGSEGV).await; + thread.force_kill(Signal::SIGSEGV); return Err(err); } diff --git a/src/kernel/task.rs b/src/kernel/task.rs index b0966046..6505666c 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -12,7 +12,10 @@ mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; use eonix_hal::symbol_addr; -pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; +pub use futex::{ + futex_exec, futex_exit, futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, + RobustListHead, +}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; pub use process::{alloc_pid, Process, ProcessBuilder, WaitId, WaitObject, WaitType}; diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index a04d7091..4dd57615 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -1,19 +1,17 @@ -use core::pin::pin; - use alloc::sync::Arc; use alloc::vec::Vec; +use core::pin::pin; + use bitflags::bitflags; +use eonix_mm::address::Addr; use eonix_sync::{LazyLock, Mutex, MutexGuard, WaitList}; use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink}; -use crate::{ - kernel::{ - constants::{EAGAIN, EINVAL}, - syscall::User, - user::UserPointer, - }, - prelude::KResult, -}; +use super::Thread; +use crate::kernel::constants::{EAGAIN, EINVAL}; +use crate::kernel::syscall::User; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::prelude::KResult; #[derive(PartialEq, Debug, Clone, Copy)] #[repr(u32)] @@ -318,3 +316,38 @@ impl RobustListHead { Ok(()) } } + +async fn do_futex_exit(thread: &Thread) -> KResult<()> { + if let Some(clear_ctid) = thread.get_clear_ctid() { + UserPointerMut::new(clear_ctid)?.write(0u32)?; + + futex_wake(clear_ctid.addr(), None, 1).await?; + } + + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + } + + Ok(()) +} + +pub async fn futex_exit(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exit(thread); +} + +async fn do_futex_exec(thread: &Thread) -> KResult<()> { + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + thread.set_robust_list(None); + } + + Ok(()) +} + +pub async fn futex_exec(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exec(thread); +} diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index af073e84..c676d22e 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -1,16 +1,17 @@ +use alloc::collections::btree_map::BTreeMap; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; +use core::pin::pin; use core::sync::atomic::Ordering; -use super::{Process, ProcessGroup, Session, Thread, WaitObject, WaitType}; -use crate::{ - kernel::{task::futex_wake, user::UserPointerMut}, - rcu::rcu_sync, -}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use eonix_runtime::scheduler::RUNTIME; +use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList}; + +use super::loader::LoadInfo; +use super::{ + alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, ThreadBuilder, WaitObject, }; -use eonix_mm::address::Addr; -use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; +use crate::rcu::rcu_sync; pub struct ProcessList { /// The init process. @@ -78,7 +79,7 @@ impl ProcessList { } } - pub fn set_init_process(&mut self, init: Arc) { + fn set_init_process(&mut self, init: Arc) { let old_init = self.init.replace(init); assert!(old_init.is_none(), "Init process already set"); } @@ -103,45 +104,66 @@ impl ProcessList { self.sessions.get(&sid).and_then(Weak::upgrade) } - /// Make the process a zombie and notify the parent. - /// # Safety - /// This function will destroy the process and all its threads. - /// It is the caller's responsibility to ensure that the process is not - /// running or will not run after this function is called. - pub async unsafe fn do_exit( - &mut self, - thread: &Thread, - exit_status: WaitType, - is_exiting_group: bool, - ) { - let process = thread.process.clone(); - - if process.pid == 1 { - panic!("init exited"); - } + pub async fn sys_init(load_info: LoadInfo) { + let thread_builder = ThreadBuilder::new() + .name(Arc::from(&b"busybox"[..])) + .entry(load_info.entry_ip, load_info.sp); - let inner = process.inner.access_mut(self.prove_mut()); + let mut process_list = ProcessList::get().write().await; + let (thread, process) = ProcessBuilder::new() + .pid(alloc_pid()) + .mm_list(load_info.mm_list) + .thread_builder(thread_builder) + .build(&mut process_list); - thread.dead.store(true, Ordering::SeqCst); + process_list.set_init_process(process); - if is_exiting_group { - // TODO: Send SIGKILL to all threads. - // todo!() - } + // TODO!!!: Remove this. + thread.files.open_console(); - if thread.tid != process.pid { - self.threads.remove(&thread.tid); - inner.threads.remove(&thread.tid).unwrap(); - } + RUNTIME.spawn(Reaper::daemon()); + RUNTIME.spawn(thread.run()); + } + + pub fn send_to_reaper(thread: Arc) { + GLOBAL_REAPER.reap_list.lock().push_back(thread); + GLOBAL_REAPER.wait.notify_one(); + } +} + +struct Reaper { + reap_list: Spin>>, + wait: WaitList, +} + +static GLOBAL_REAPER: Reaper = Reaper { + reap_list: Spin::new(VecDeque::new()), + wait: WaitList::new(), +}; - if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); +impl Reaper { + async fn reap(&self, thread: Arc) { + let exit_status = thread + .exit_status + .lock() + .take() + .expect("Exited thread with no exit status"); - let _ = futex_wake(clear_ctid.addr(), None, 1).await; + let process = &thread.process; + + if process.pid == 1 && thread.tid == process.pid { + panic!("init exited"); } - if let Some(robust_list) = thread.get_robust_list() { - let _ = robust_list.wake_all().await; + let mut procs = ProcessList::get().write().await; + + let inner = process.inner.access_mut(procs.prove_mut()); + + thread.dead.store(true, Ordering::SeqCst); + + if thread.tid != process.pid { + procs.threads.remove(&thread.tid); + inner.threads.remove(&thread.tid).unwrap(); } // main thread exit @@ -151,48 +173,62 @@ impl ProcessList { thread.files.close_all().await; // If we are the session leader, we should drop the control terminal. - if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await + if process.session(procs.prove()).sid == process.pid { + if let Some(terminal) = process.session(procs.prove()).drop_control_terminal().await { terminal.drop_session().await; } } // Release the MMList as well as the page table. - unsafe { - // SAFETY: We are exiting the process, so no one might be using it. - process.mm_list.replace(None); - } + process.mm_list.release(); // Make children orphans (adopted by init) { - let init = self.init_process(); + let init = procs.init_process(); inner.children.retain(|_, child| { let child = child.upgrade().unwrap(); // SAFETY: `child.parent` must be ourself. So we don't need to free it. unsafe { child.parent.swap(Some(init.clone())) }; - init.add_child(&child, self.prove_mut()); + init.add_child(&child, procs.prove_mut()); false }); } - let mut init_notify = self.init_process().notify_batch(); + let mut init_notify = procs.init_process().notify_batch(); process .wait_list .drain_exited() .into_iter() .for_each(|item| init_notify.notify(item)); - init_notify.finish(self.prove()); + init_notify.finish(procs.prove()); - process.parent(self.prove()).notify( + process.parent(procs.prove()).notify( process.exit_signal, WaitObject { pid: process.pid, code: exit_status, }, - self.prove(), + procs.prove(), ); } } + + async fn daemon() { + let me = &GLOBAL_REAPER; + + loop { + let mut wait = pin!(me.wait.prepare_to_wait()); + wait.as_mut().add_to_wait_list(); + + let thd_to_reap = me.reap_list.lock().pop_front(); + if let Some(thd_to_reap) = thd_to_reap { + me.reap(thd_to_reap).await; + continue; + } + + wait.await; + } + } } diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index d9970cad..0a7b580d 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -1,11 +1,10 @@ mod signal_action; -use super::{ProcessList, Thread, WaitObject, WaitType}; -use crate::kernel::constants::{EFAULT, EINVAL}; -use crate::{kernel::user::UserPointer, prelude::*}; use alloc::collections::binary_heap::BinaryHeap; use alloc::sync::Arc; -use core::{cmp::Reverse, task::Waker}; +use core::cmp::Reverse; +use core::task::Waker; + use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; @@ -14,9 +13,13 @@ use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; use posix_types::{SIGNAL_IGNORE, SIGNAL_NOW, SIGNAL_STOP}; +pub use signal_action::SignalAction; use signal_action::SignalActionList; -pub use signal_action::SignalAction; +use super::{ProcessList, Thread, WaitObject, WaitType}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::user::UserPointer; +use crate::prelude::*; pub(self) const SAVED_DATA_SIZE: usize = size_of::() + size_of::() + size_of::(); @@ -168,10 +171,7 @@ impl SignalList { pub async fn handle(&self, trap_ctx: &mut TrapContext, fpu_state: &mut FpuState) { loop { let signal = { - let signal = match self.inner.lock().pop() { - Some(signal) => signal, - None => return, - }; + let Some(signal) = self.inner.lock().pop() else { return }; let handler = self.inner.lock().actions.get(signal); if let SignalAction::SimpleHandler { mask, .. } = &handler { @@ -246,7 +246,7 @@ impl SignalList { } signal => { // Default to terminate the thread. - Thread::current().force_kill(signal).await; + Thread::current().force_kill(signal); return; } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 77e8e618..7e005875 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -24,8 +24,7 @@ use super::{stackful, Process, ProcessList, WaitType}; use crate::kernel::interrupt::default_irq_handler; use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut}; use crate::kernel::task::clone::CloneArgs; -use crate::kernel::task::futex::RobustListHead; -use crate::kernel::task::CloneFlags; +use crate::kernel::task::{futex_exit, CloneFlags, RobustListHead}; use crate::kernel::timer::{should_reschedule, timer_interrupt}; use crate::kernel::user::{UserPointer, UserPointerMut}; use crate::kernel::vfs::filearray::FileArray; @@ -83,6 +82,7 @@ pub struct Thread { pub fpu_state: AtomicUniqueRefCell, pub dead: AtomicBool, + pub exit_status: Spin>, inner: Spin, } @@ -240,6 +240,7 @@ impl ThreadBuilder { trap_ctx: AtomicUniqueRefCell::new(trap_ctx), fpu_state: AtomicUniqueRefCell::new(fpu_state), dead: AtomicBool::new(false), + exit_status: Spin::new(None), inner: Spin::new(ThreadInner { name, tls: self.tls, @@ -331,18 +332,26 @@ impl Thread { } } - pub async fn force_kill(&self, signal: Signal) { - let mut proc_list = ProcessList::get().write().await; - unsafe { - // SAFETY: Preemption is disabled. - proc_list - .do_exit(self, WaitType::Signaled(signal), false) - .await; + pub fn exit(&self, exit_status: WaitType) { + { + let mut self_status = self.exit_status.lock(); + if self_status.is_some() { + // Someone has got here before us. + return; + } + + *self_status = Some(exit_status); } + + self.dead.store(true, Ordering::Release); + } + + pub fn force_kill(&self, signal: Signal) { + self.exit(WaitType::Signaled(signal)); } pub fn is_dead(&self) -> bool { - self.dead.load(Ordering::SeqCst) + self.dead.load(Ordering::Acquire) } async fn real_run(&self) { @@ -385,6 +394,10 @@ impl Thread { error_code, address: addr, }) => { + if self.is_dead() { + return; + } + let mms = &self.process.mm_list; if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await { self.signal_list.raise(signal); @@ -407,6 +420,10 @@ impl Thread { } } TrapType::Syscall { no, args } => { + if self.is_dead() { + return; + } + if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); @@ -447,7 +464,17 @@ impl Thread { } pub fn run(self: Arc) -> impl Future + Send + 'static { - async move { self.contexted(stackful(self.real_run())).await } + async move { + self.contexted(async { + stackful(self.real_run()).await; + + futex_exit(&self).await; + }) + .await; + + assert!(self.is_dead(), "`real_run` returned before the thread die?"); + ProcessList::send_to_reaper(self); + } } } diff --git a/src/lib.rs b/src/lib.rs index 4f7fb262..2e28db24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,6 @@ mod rcu; mod sync; use alloc::ffi::CString; -use alloc::sync::Arc; use core::hint::spin_loop; use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -46,7 +45,7 @@ use eonix_mm::address::PRange; use eonix_runtime::executor::Stack; use eonix_runtime::scheduler::RUNTIME; use kernel::mem::GlobalPageAlloc; -use kernel::task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}; +use kernel::task::{KernelStack, ProcessList, ProgramLoader}; use kernel::vfs::dentry::Dentry; use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}; use kernel::vfs::types::Permission; @@ -56,8 +55,6 @@ use kernel_init::setup_memory; use path::Path; use prelude::*; -use crate::kernel::task::alloc_pid; - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { #[cfg(target_arch = "riscv64")] @@ -276,21 +273,5 @@ async fn init_process(early_kstack: PRange) { .expect("Failed to load init program") }; - let thread_builder = ThreadBuilder::new() - .name(Arc::from(&b"busybox"[..])) - .entry(load_info.entry_ip, load_info.sp); - - let mut process_list = ProcessList::get().write().await; - let (thread, process) = ProcessBuilder::new() - .pid(alloc_pid()) - .mm_list(load_info.mm_list) - .thread_builder(thread_builder) - .build(&mut process_list); - - process_list.set_init_process(process); - - // TODO!!!: Remove this. - thread.files.open_console(); - - RUNTIME.spawn(thread.run()); + ProcessList::sys_init(load_info).await; } From dae6e754108815661efd292e997e8a7243c1e1bc Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 21 Jan 2026 00:41:58 +0800 Subject: [PATCH 45/54] proc: rewrite process list organization - Use intrusive lists to store and organize the process hierarchy. - Remove `FileArray::open_console()`. Do it in the init script instead. - Fix open logic: acquire controlling terminals only if O_NOCTTY is not set. Put this into TerminalFile::open(). - Send SIGHUP and then SIGCONT to foreground pgroup procs when the controlling terminal is dropped. - Set the controlling terminal of sessions in Terminal. - Limit max line width to 80. Format some codes. Signed-off-by: greatbridf --- .rustfmt.toml | 4 +- Cargo.toml | 4 +- crates/posix_types/src/open.rs | 6 +- src/kernel/chardev.rs | 84 ++++----- src/kernel/syscall/file_rw.rs | 144 +++++++++++--- src/kernel/task/process.rs | 270 ++++++++++++++++++--------- src/kernel/task/process_group.rs | 146 +++++++++------ src/kernel/task/process_list.rs | 151 ++++++++------- src/kernel/task/session.rs | 187 ++++++++++++------- src/kernel/task/thread.rs | 106 +++++++++-- src/kernel/terminal.rs | 156 ++++++++++------ src/kernel/vfs/file/terminal_file.rs | 68 +++++-- src/kernel/vfs/filearray.rs | 88 ++++----- user-programs/init_script_riscv64.sh | 6 +- 14 files changed, 927 insertions(+), 493 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index 17b2bbc5..85b1cfc7 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,4 +1,4 @@ -max_width = 100 +max_width = 80 hard_tabs = false tab_spaces = 4 newline_style = "Auto" @@ -14,7 +14,7 @@ single_line_if_else_max_width = 60 single_line_let_else_max_width = 60 wrap_comments = false format_code_in_doc_comments = false -doc_comment_code_block_width = 100 +doc_comment_code_block_width = 80 comment_width = 80 normalize_comments = false normalize_doc_attributes = false diff --git a/Cargo.toml b/Cargo.toml index 4fcb6f70..214e5941 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,8 +27,10 @@ pointers = { path = "./crates/pointers" } posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } +intrusive-collections = { version = "0.9.8", features = [ + "nightly", +], git = "https://github.com/greatbridf/intrusive-rs" } bitflags = "2.6.0" -intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" diff --git a/crates/posix_types/src/open.rs b/crates/posix_types/src/open.rs index 758ea331..7135e5b8 100644 --- a/crates/posix_types/src/open.rs +++ b/crates/posix_types/src/open.rs @@ -11,6 +11,8 @@ bitflags! { const O_CREAT = 0x40; /// Exclusive access, fail if file exists const O_EXCL = 0x80; + /// Don't set controlling terminal. + const O_NOCTTY = 0x100; /// Truncate file to zero length if it exists const O_TRUNC = 0x200; /// Open file in append mode @@ -116,6 +118,8 @@ impl AtFlags { } pub fn statx_default_sync(&self) -> bool { - !self.intersects(AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC) + !self.intersects( + AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC, + ) } } diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index 4e01d83a..e4a6e1b3 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -1,22 +1,18 @@ -use super::{ - console::get_console, - constants::{EEXIST, EIO}, - task::{block_on, ProcessList, Thread}, - terminal::Terminal, - vfs::{types::DeviceId, File, FileType, TerminalFile}, -}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - prelude::*, -}; -use alloc::{ - boxed::Box, - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; -use eonix_sync::AsProof as _; +use alloc::boxed::Box; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; + use posix_types::open::OpenFlags; +use super::console::get_console; +use super::constants::{EEXIST, EIO}; +use super::task::{block_on, Thread}; +use super::terminal::Terminal; +use super::vfs::types::DeviceId; +use super::vfs::{File, FileType, TerminalFile}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::prelude::*; + pub trait VirtualCharDevice: Send + Sync { fn read(&self, buffer: &mut dyn Buffer) -> KResult; fn write(&self, stream: &mut dyn Stream) -> KResult; @@ -33,12 +29,15 @@ pub struct CharDevice { device: CharDeviceType, } -static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); +static CHAR_DEVICES: Spin>> = + Spin::new(BTreeMap::new()); impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => { + block_on(terminal.read(buffer)) + } CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -46,10 +45,12 @@ impl CharDevice { pub fn write(&self, stream: &mut dyn Stream) -> KResult { match &self.device { CharDeviceType::Virtual(device) => device.write(stream), - CharDeviceType::Terminal(terminal) => stream.read_till_end(&mut [0; 128], |data| { - terminal.write(data); - Ok(()) - }), + CharDeviceType::Terminal(terminal) => { + stream.read_till_end(&mut [0; 128], |data| { + terminal.write(data); + Ok(()) + }) + } } } @@ -57,7 +58,11 @@ impl CharDevice { CHAR_DEVICES.lock().get(&devid).cloned() } - pub fn register(devid: DeviceId, name: Arc, device: CharDeviceType) -> KResult<()> { + pub fn register( + devid: DeviceId, + name: Arc, + device: CharDeviceType, + ) -> KResult<()> { match CHAR_DEVICES.lock().entry(devid) { Entry::Vacant(entry) => { entry.insert(Arc::new(CharDevice { name, device })); @@ -67,26 +72,21 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult { - Ok(match &self.device { + pub async fn open( + self: &Arc, + thread: &Thread, + flags: OpenFlags, + ) -> KResult { + let file = match &self.device { + CharDeviceType::Virtual(_) => { + File::new(flags, FileType::CharDev(self.clone())) + } CharDeviceType::Terminal(terminal) => { - let procs = block_on(ProcessList::get().read()); - let current = Thread::current(); - let session = current.process.session(procs.prove()); - // We only set the control terminal if the process is the session leader. - if session.sid == Thread::current().process.pid { - // Silently fail if we can't set the control terminal. - dont_check!(block_on(session.set_control_terminal( - &terminal, - false, - procs.prove() - ))); - } - - TerminalFile::new(terminal.clone(), flags) + TerminalFile::open(thread, terminal, flags).await } - CharDeviceType::Virtual(_) => File::new(flags, FileType::CharDev(self.clone())), - }) + }; + + Ok(file) } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 93a543d7..8ac9c22a 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -12,7 +12,8 @@ use posix_types::syscall_no::*; use super::{FromSyscallArg, User}; use crate::io::{Buffer, BufferFill, IntoStream}; use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, + SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; @@ -61,7 +62,13 @@ async fn dentry_from( let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?; - Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink).await + Dentry::open_at( + &thread.fs_context, + dir_dentry, + path, + follow_symlink, + ) + .await } } } @@ -79,7 +86,12 @@ async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PREAD64)] -async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { +async fn pread64( + fd: FD, + buffer: UserMut, + bufsize: usize, + offset: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -104,7 +116,12 @@ async fn write(fd: FD, buffer: User, count: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PWRITE64)] -async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { +async fn pwrite64( + fd: FD, + buffer: User, + count: usize, + offset: usize, +) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); @@ -117,11 +134,17 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; +async fn openat( + dirfd: FD, + pathname: User, + flags: OpenFlags, + mode: Mode, +) -> KResult { + let dentry = + dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - thread.files.open(&dentry, flags, perm).await + thread.files.open(thread, &dentry, flags, perm).await } #[cfg(target_arch = "x86_64")] @@ -156,7 +179,8 @@ async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { #[eonix_macros::define_syscall(SYS_PIPE2)] async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; + let mut buffer = + UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -170,7 +194,11 @@ async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -184,7 +212,11 @@ async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents64( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -230,7 +262,8 @@ async fn newfstatat( )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { - sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH) + .await } #[eonix_macros::define_syscall(SYS_STATX)] @@ -307,7 +340,11 @@ async fn unlink(pathname: User) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { +async fn symlinkat( + target: User, + dirfd: FD, + linkpath: User, +) -> KResult<()> { let target = UserString::new(target)?; let dentry = dentry_from(thread, dirfd, linkpath, false).await?; @@ -341,7 +378,12 @@ impl UserDeviceId { } #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceId) -> KResult<()> { +async fn mknodat( + dirfd: FD, + pathname: User, + mut mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { if !mode.is_blk() && !mode.is_chr() { return Err(EINVAL); } @@ -354,7 +396,11 @@ async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceI #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -async fn mknod(pathname: User, mode: Mode, dev: UserDeviceId) -> KResult<()> { +async fn mknod( + pathname: User, + mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } @@ -373,11 +419,20 @@ async fn readlinkat( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { +async fn readlink( + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek( + thread: &Thread, + fd: FD, + offset: u64, + whence: u32, +) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { @@ -403,7 +458,8 @@ async fn llseek( result: UserMut, whence: u32, ) -> KResult<()> { - let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; + let mut result = + UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); let new_offset = do_lseek(thread, fd, offset, whence).await?; @@ -434,9 +490,10 @@ async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => { - Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) - } + Ok(IoVec { base, len }) => Some(UserBuffer::new( + UserMut::with_addr(base.addr()), + len.get(), + )), }) .collect::>>()?; @@ -471,8 +528,11 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) - .map(|ptr| ptr.into_stream()), + CheckedUserPointer::new( + User::with_addr(base.addr()), + len.get(), + ) + .map(|ptr| ptr.into_stream()), ), }) .collect::>>()?; @@ -491,7 +551,12 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat( + dirfd: FD, + pathname: User, + _mode: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -522,7 +587,12 @@ async fn access(pathname: User, mode: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { +async fn sendfile64( + out_fd: FD, + in_fd: FD, + offset: UserMut, + count: usize, +) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -627,7 +697,11 @@ async fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { +async fn poll( + fds: UserMut, + nfds: u32, + timeout: u32, +) -> KResult { do_poll(thread, fds, nfds, timeout).await } @@ -639,7 +713,8 @@ async fn fchownat( gid: u32, flags: AtFlags, ) -> KResult<()> { - let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; + let dentry = + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; if !dentry.is_valid() { return Err(ENOENT); } @@ -648,7 +723,12 @@ async fn fchownat( } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { +async fn fchmodat( + dirfd: FD, + pathname: User, + mode: Mode, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -709,12 +789,16 @@ async fn renameat2( let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; // The two flags RENAME_NOREPLACE and RENAME_EXCHANGE are mutually exclusive. - if flags.contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) { + if flags + .contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) + { Err(EINVAL)?; } - let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false).await?; - let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false).await?; + let old_dentry = + dentry_from(thread, old_dirfd, old_pathname, false).await?; + let new_dentry = + dentry_from(thread, new_dirfd, new_pathname, false).await?; old_dentry.rename(&new_dentry, flags).await } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 3eff5949..1385235d 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,22 +1,24 @@ -use alloc::collections::btree_map::BTreeMap; use alloc::collections::vec_deque::VecDeque; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::sync::atomic::{AtomicU32, Ordering}; use eonix_sync::{ - AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, - UnlockableGuard as _, UnlockedGuard as _, + AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, + SpinGuard, UnlockableGuard as _, UnlockedGuard as _, +}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; use pointers::BorrowedArc; use posix_types::constants::{ - CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, P_PGID, P_PID, P_PIDFD, + CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, + P_PGID, P_PID, P_PIDFD, }; use posix_types::signal::Signal; use posix_types::SIGNAL_COREDUMP; -use super::process_group::ProcessGroupBuilder; use super::signal::RaiseResult; -use super::thread::ThreadBuilder; +use super::thread::{ProcessThreads, ThreadBuilder}; use super::{ProcessGroup, ProcessList, Session, Thread}; use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; use crate::kernel::mem::MMList; @@ -35,7 +37,6 @@ pub struct ProcessBuilder { pid: Option, } -#[derive(Debug)] pub struct Process { /// Process id /// @@ -66,14 +67,55 @@ pub struct Process { /// The only case where it may be `None` is when the process is kernel thread. pub(super) session: RCUPointer, - /// All things related to the process list. - pub(super) inner: Locked, + pub children: Locked, ProcessList>, + pub threads: Locked, ProcessList>, + + all_procs_link: RBTreeAtomicLink, + group_procs_link: RBTreeAtomicLink, + siblings_link: RBTreeAtomicLink, } -#[derive(Debug)] -pub(super) struct ProcessInner { - pub(super) children: BTreeMap>, - pub(super) threads: BTreeMap>, +intrusive_adapter!(pub AllProcs = Arc: Process { + all_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub GroupProcs = Arc: Process { + group_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessChildren = Arc: Process { + siblings_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for GroupProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for ProcessChildren { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } } #[derive(Debug)] @@ -148,7 +190,9 @@ impl WaitType { pub fn to_wstatus(self) -> u32 { match self { WaitType::Exited(status) => (status & 0xff) << 8, - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => signal.into_raw() | 0x80, + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + signal.into_raw() | 0x80 + } WaitType::Signaled(signal) => signal.into_raw(), WaitType::Stopped(signal) => 0x7f | (signal.into_raw() << 8), WaitType::Continued => 0xffff, @@ -159,7 +203,9 @@ impl WaitType { // TODO: CLD_TRAPPED match self { WaitType::Exited(status) => (status, CLD_EXITED), - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => (signal.into_raw(), CLD_DUMPED), + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + (signal.into_raw(), CLD_DUMPED) + } WaitType::Signaled(signal) => (signal.into_raw(), CLD_KILLED), WaitType::Stopped(signal) => (signal.into_raw(), CLD_STOPPED), WaitType::Continued => (Signal::SIGCONT.into_raw(), CLD_CONTINUED), @@ -194,7 +240,11 @@ impl ProcessBuilder { } } - pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from( + mut self, + process: Arc, + clone_args: &CloneArgs, + ) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { process.mm_list.new_shared().await } else { @@ -243,7 +293,10 @@ impl ProcessBuilder { self } - pub fn build(self, process_list: &mut ProcessList) -> (Arc, Arc) { + pub fn build( + self, + process_list: &mut ProcessList, + ) -> (Arc, Arc) { let mm_list = self.mm_list.unwrap_or_else(|| MMList::new()); let process = Arc::new(Process { @@ -254,18 +307,23 @@ impl ProcessBuilder { parent: RCUPointer::empty(), pgroup: RCUPointer::empty(), session: RCUPointer::empty(), - inner: Locked::new( - ProcessInner { - children: BTreeMap::new(), - threads: BTreeMap::new(), - }, + children: Locked::new( + RBTree::new(ProcessChildren::NEW), + process_list, + ), + threads: Locked::new( + RBTree::new(ProcessThreads::NEW), process_list, ), + all_procs_link: RBTreeAtomicLink::new(), + group_procs_link: RBTreeAtomicLink::new(), + siblings_link: RBTreeAtomicLink::new(), }); process_list.add_process(&process); - let thread_builder = self.thread_builder.expect("Thread builder is not set"); + let thread_builder = + self.thread_builder.expect("Thread builder is not set"); let thread = thread_builder .process(process.clone()) .tid(process.pid) @@ -281,10 +339,7 @@ impl ProcessBuilder { pgroup.add_member(&process, process_list.prove_mut()); pgroup } - None => ProcessGroupBuilder::new() - .leader(&process) - .session(session.clone()) - .build(process_list), + None => ProcessGroup::new(&process, &session, process_list), }; if let Some(parent) = &self.parent { @@ -304,30 +359,30 @@ impl ProcessBuilder { impl Process { pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let inner = self.inner.access(procs); - for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) { + let threads = self.threads.access(procs); + for thread in threads.iter() { if let RaiseResult::Finished = thread.raise(signal) { break; } } } - pub(super) fn add_child(&self, child: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .children - .insert(child.pid, Arc::downgrade(child)) - .is_none()); + pub fn add_child( + &self, + child: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.children.access_mut(procs).insert(child.clone()); } - pub(super) fn add_thread(&self, thread: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .threads - .insert(thread.tid, Arc::downgrade(thread)) - .is_none()); + pub fn add_thread( + &self, + thread: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.threads.access_mut(procs).insert(thread.clone()); } pub async fn wait( @@ -354,12 +409,7 @@ impl Process { break object; } - if self - .inner - .access(waits.process_list.prove()) - .children - .is_empty() - { + if self.children.access(waits.process_list.prove()).is_empty() { return Err(ECHILD); } @@ -375,12 +425,12 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid).await; + procs.remove_process(wait_object.pid); assert!(self - .inner - .access_mut(procs.prove_mut()) .children - .remove(&wait_object.pid) + .access_mut(procs.prove_mut()) + .find_mut(&wait_object.pid) + .remove() .is_some()); Ok(Some(wait_object)) @@ -396,15 +446,17 @@ impl Process { if process_list.try_find_session(self.pid).is_some() { return Err(EPERM); } + + self.pgroup(process_list.prove()) + .remove_member(self, &mut process_list); + let session = Session::new(self, &mut process_list); - let pgroup = ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(&mut process_list); + let pgroup = ProcessGroup::new(self, &session, &mut process_list); - let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); + let old_session = + unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = + unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); call_rcu(move || { drop(old_session); @@ -417,47 +469,56 @@ impl Process { /// Set the process group id of the process to `pgid`. /// /// This function does the actual work. - fn do_setpgid(self: &Arc, pgid: u32, procs: &mut ProcessList) -> KResult<()> { + fn do_setpgid( + self: &Arc, + pgid: u32, + procs: &mut ProcessList, + ) -> KResult<()> { // SAFETY: We are holding the process list lock. let session = unsafe { self.session.load_locked().unwrap() }; - let pgroup = unsafe { self.pgroup.load_locked().unwrap() }; // Changing the process group of a session leader is not allowed. if session.sid == self.pid { return Err(EPERM); } - let new_pgroup = if let Some(new_pgroup) = procs.try_find_pgroup(pgid) { + let cur_pgroup = self.pgroup(procs.prove()).clone(); + let existing_pgroup = procs.try_find_pgroup(pgid); + + if let Some(new_pgroup) = &existing_pgroup { // Move us to an existing process group. // Check that the two groups are in the same session. - if new_pgroup.session.upgrade().unwrap().sid != session.sid { + if new_pgroup.session.sid != session.sid { return Err(EPERM); } // If we are already in the process group, we are done. - if new_pgroup.pgid == pgroup.pgid { + if new_pgroup.pgid == cur_pgroup.pgid { return Ok(()); } - - new_pgroup.add_member(self, procs.prove_mut()); - - new_pgroup } else { // Create a new process group only if `pgid` matches our `pid`. if pgid != self.pid { return Err(EPERM); } + } - ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(procs) - }; + // Permission checks done. Let's do the actual work. + cur_pgroup.remove_member(self, procs); - pgroup.remove_member(self.pid, procs.prove_mut()); + let new_pgroup; + if let Some(pgroup) = existing_pgroup { + pgroup.add_member(self, procs.prove_mut()); + new_pgroup = pgroup; + } else { + new_pgroup = ProcessGroup::new(self, &session, procs); + } - let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - call_rcu(move || drop(old_pgroup)); + unsafe { + // SAFETY: `cur_pgroup` held above. + self.pgroup.swap(Some(new_pgroup)); + } + call_rcu(move || drop(cur_pgroup)); Ok(()) } @@ -475,15 +536,14 @@ impl Process { let child = { // If `pid` refers to one of our children, the thread leaders must be // in out children list. - let children = &self.inner.access(procs.prove()).children; - let child = { - let child = children.get(&pid); - child.and_then(Weak::upgrade).ok_or(ESRCH)? - }; + let children = self.children.access(procs.prove()); + let child = children.find(&pid).clone_pointer().ok_or(ESRCH)?; // Changing the process group of a child is only allowed // if we are in the same session. - if child.session(procs.prove()).sid != self.session(procs.prove()).sid { + if child.session(procs.prove()).sid + != self.session(procs.prove()).sid + { return Err(EPERM); } @@ -497,19 +557,28 @@ impl Process { } /// Provide locked (consistent) access to the session. - pub fn session<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Session> { + pub fn session<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Session> { // SAFETY: We are holding the process list lock. unsafe { self.session.load_locked() }.unwrap() } /// Provide locked (consistent) access to the process group. - pub fn pgroup<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, ProcessGroup> { + pub fn pgroup<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, ProcessGroup> { // SAFETY: We are holding the process list lock. unsafe { self.pgroup.load_locked() }.unwrap() } /// Provide locked (consistent) access to the parent process. - pub fn parent<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Process> { + pub fn parent<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Process> { // SAFETY: We are holding the process list lock. unsafe { self.parent.load_locked() }.unwrap() } @@ -520,16 +589,25 @@ impl Process { } /// Provide RCU locked (maybe inconsistent) access to the process group. - pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { + pub fn pgroup_rcu( + &self, + ) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { self.pgroup.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the parent process. - pub fn parent_rcu(&self) -> Option>> { + pub fn parent_rcu( + &self, + ) -> Option>> { self.parent.load() } - pub fn notify(&self, signal: Option, wait: WaitObject, procs: Proof<'_, ProcessList>) { + pub fn notify( + &self, + signal: Option, + wait: WaitObject, + procs: Proof<'_, ProcessList>, + ) { self.wait_list.notify(wait); if let Some(signal) = signal { @@ -607,8 +685,11 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - if let Some(process) = self.process_list.try_find_process(item.pid) { - return process.pgroup(self.process_list.prove()).pgid == pgid; + if let Some(process) = + self.process_list.try_find_process(item.pid) + { + return process.pgroup(self.process_list.prove()).pgid + == pgid; } false } @@ -622,7 +703,10 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { + pub fn wait( + self, + no_block: bool, + ) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_group.rs b/src/kernel/task/process_group.rs index 137c5191..8c708b5c 100644 --- a/src/kernel/task/process_group.rs +++ b/src/kernel/task/process_group.rs @@ -1,87 +1,121 @@ -use super::{Process, ProcessList, Session}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{Locked, Proof, ProofMut}; use posix_types::signal::Signal; -pub struct ProcessGroupBuilder { - pgid: Option, - leader: Option>, - session: Option>, -} +use super::process::GroupProcs; +use super::{Process, ProcessList, Session}; -#[derive(Debug)] pub struct ProcessGroup { pub pgid: u32, - pub _leader: Weak, - pub session: Weak, + pub leader: Weak, + pub session: Arc, - pub processes: Locked>, ProcessList>, + pub procs: Locked, ProcessList>, + + all_groups_link: RBTreeAtomicLink, + session_groups_link: RBTreeAtomicLink, } -impl ProcessGroupBuilder { - pub const fn new() -> Self { - Self { - pgid: None, - leader: None, - session: None, - } - } +intrusive_adapter!(pub AllGroups = Arc: ProcessGroup { + all_groups_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub SessionGroups = Arc: ProcessGroup { + session_groups_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllGroups { + type Key = u32; - pub fn leader(mut self, leader: &Arc) -> Self { - self.pgid = Some(leader.pid); - self.leader = Some(Arc::downgrade(leader)); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} + +impl KeyAdapter<'_> for SessionGroups { + type Key = u32; - pub fn session(mut self, session: Arc) -> Self { - self.session = Some(session); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} - pub fn build(self, process_list: &mut ProcessList) -> Arc { - let pgid = self.pgid.expect("PGID is not set"); - let leader = self.leader.expect("Leader is not set"); - let session = self.session.expect("Session is not set"); +impl ProcessGroup { + /// Create a pgroup and add it to the global pgroup list. + /// Add the pgroup to the session. + /// + /// # Panics + /// Panics if `leader` is already in some pgroup. + pub fn new( + leader: &Arc, + session: &Arc, + procs: &mut ProcessList, + ) -> Arc { + let pgid = leader.pid; + let pgroup_procs = { + let mut list = RBTree::new(GroupProcs::new()); + list.insert(leader.clone()); + list + }; let pgroup = Arc::new(ProcessGroup { pgid, - session: Arc::downgrade(&session), - processes: Locked::new(BTreeMap::from([(pgid, leader.clone())]), process_list), - _leader: leader, + session: session.clone(), + procs: Locked::new(pgroup_procs, procs), + leader: Arc::downgrade(leader), + all_groups_link: RBTreeAtomicLink::new(), + session_groups_link: RBTreeAtomicLink::new(), }); - process_list.add_pgroup(&pgroup); - session.add_member(process_list, &pgroup); + procs.add_pgroup(&pgroup); + session.add_member(&pgroup, procs.prove_mut()); pgroup } -} -impl ProcessGroup { - pub(super) fn add_member(&self, process: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .processes - .access_mut(procs) - .insert(process.pid, Arc::downgrade(process)) - .is_none()); + /// Add `process` to the pgroup. + /// + /// # Panics + /// Panics if `process` is already in some pgroup or the pgroup is dead. + pub fn add_member( + &self, + process: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_groups_link.is_linked(), "Dead pgroup"); + self.procs.access_mut(procs).insert(process.clone()); } - pub(super) fn remove_member(&self, pid: u32, procs: ProofMut<'_, ProcessList>) { - let processes = self.processes.access_mut(procs); - assert!(processes.remove(&pid).is_some()); - if processes.is_empty() { - self.session - .upgrade() - .unwrap() - .remove_member(self.pgid, procs); + pub fn remove_member( + self: &Arc, + process: &Arc, + procs: &mut ProcessList, + ) { + let members = self.procs.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&process.pid).remove().is_some(), + "Not a member" + ); + + if !members.is_empty() { + return; } + + self.session.remove_member(self, procs); + procs.remove_pgroup(self); } pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let processes = self.processes.access(procs); - for process in processes.values().map(|p| p.upgrade().unwrap()) { + let members = self.procs.access(procs); + for process in members.iter() { process.raise(signal, procs); } } diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index c676d22e..f3371f25 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -1,37 +1,41 @@ -use alloc::collections::btree_map::BTreeMap; use alloc::collections::vec_deque::VecDeque; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::pin::pin; -use core::sync::atomic::Ordering; use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList}; +use intrusive_collections::RBTree; use super::loader::LoadInfo; +use super::process::AllProcs; +use super::process_group::AllGroups; +use super::session::AllSessions; +use super::thread::AllThreads; use super::{ - alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, ThreadBuilder, WaitObject, + alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, + ThreadBuilder, WaitObject, }; -use crate::rcu::rcu_sync; +use crate::rcu::call_rcu; pub struct ProcessList { /// The init process. init: Option>, /// All threads. - threads: BTreeMap>, + threads: RBTree, /// All processes. - processes: BTreeMap>, + procs: RBTree, /// All process groups. - pgroups: BTreeMap>, + pgroups: RBTree, /// All sessions. - sessions: BTreeMap>, + sessions: RBTree, } static GLOBAL_PROC_LIST: RwLock = RwLock::new(ProcessList { init: None, - threads: BTreeMap::new(), - processes: BTreeMap::new(), - pgroups: BTreeMap::new(), - sessions: BTreeMap::new(), + threads: RBTree::new(AllThreads::NEW), + procs: RBTree::new(AllProcs::NEW), + pgroups: RBTree::new(AllGroups::NEW), + sessions: RBTree::new(AllSessions::NEW), }); impl ProcessList { @@ -40,43 +44,64 @@ impl ProcessList { } pub fn add_session(&mut self, session: &Arc) { - self.sessions.insert(session.sid, Arc::downgrade(session)); + self.sessions.insert(session.clone()); } pub fn add_pgroup(&mut self, pgroup: &Arc) { - self.pgroups.insert(pgroup.pgid, Arc::downgrade(pgroup)); + self.pgroups.insert(pgroup.clone()); } pub fn add_process(&mut self, process: &Arc) { - self.processes.insert(process.pid, Arc::downgrade(process)); + self.procs.insert(process.clone()); } pub fn add_thread(&mut self, thread: &Arc) { - self.threads.insert(thread.tid, thread.clone()); + self.threads.insert(thread.clone()); } - pub async fn remove_process(&mut self, pid: u32) { + pub fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. - if let Some(thread) = self.threads.remove(&pid) { - self.processes.remove(&pid); - - // SAFETY: We wait until all references are dropped below with `rcu_sync()`. - let session = unsafe { thread.process.session.swap(None) }.unwrap(); - let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); - let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); - pgroup.remove_member(pid, self.prove_mut()); - rcu_sync().await; - - if Arc::strong_count(&pgroup) == 1 { - self.pgroups.remove(&pgroup.pgid); - } + let Some(_) = self.threads.find_mut(&pid).remove() else { + panic!("Thread {} not found", pid); + }; - if Arc::strong_count(&session) == 1 { - self.sessions.remove(&session.sid); - } - } else { + let Some(proc) = self.procs.find_mut(&pid).remove() else { panic!("Process {} not found", pid); - } + }; + + // SAFETY: `call_rcu` below. + let session = unsafe { proc.session.swap(None) }.unwrap(); + let pgroup = unsafe { proc.pgroup.swap(None) }.unwrap(); + let parent = unsafe { proc.parent.swap(None) }.unwrap(); + + pgroup.remove_member(&proc, self); + + call_rcu(move || { + drop(session); + drop(pgroup); + drop(parent); + }); + } + + pub fn remove_thread(&mut self, thread: &Arc) { + assert!( + self.threads.find_mut(&thread.tid).remove().is_some(), + "Double remove" + ); + } + + pub fn remove_session(&mut self, session: &Arc) { + assert!( + self.sessions.find_mut(&session.sid).remove().is_some(), + "Double remove" + ); + } + + pub fn remove_pgroup(&mut self, pgroup: &Arc) { + assert!( + self.pgroups.find_mut(&pgroup.pgid).remove().is_some(), + "Double remove" + ); } fn set_init_process(&mut self, init: Arc) { @@ -88,20 +113,20 @@ impl ProcessList { self.init.as_ref().unwrap() } - pub fn try_find_thread(&self, tid: u32) -> Option<&Arc> { - self.threads.get(&tid) + pub fn try_find_thread(&self, tid: u32) -> Option> { + self.threads.find(&tid).clone_pointer() } pub fn try_find_process(&self, pid: u32) -> Option> { - self.processes.get(&pid).and_then(Weak::upgrade) + self.procs.find(&pid).clone_pointer() } pub fn try_find_pgroup(&self, pgid: u32) -> Option> { - self.pgroups.get(&pgid).and_then(Weak::upgrade) + self.pgroups.find(&pgid).clone_pointer() } pub fn try_find_session(&self, sid: u32) -> Option> { - self.sessions.get(&sid).and_then(Weak::upgrade) + self.sessions.find(&sid).clone_pointer() } pub async fn sys_init(load_info: LoadInfo) { @@ -118,9 +143,6 @@ impl ProcessList { process_list.set_init_process(process); - // TODO!!!: Remove this. - thread.files.open_console(); - RUNTIME.spawn(Reaper::daemon()); RUNTIME.spawn(thread.run()); } @@ -152,18 +174,19 @@ impl Reaper { let process = &thread.process; if process.pid == 1 && thread.tid == process.pid { - panic!("init exited"); + panic!("init exited: {}", alloc_pid()); } let mut procs = ProcessList::get().write().await; - let inner = process.inner.access_mut(procs.prove_mut()); - - thread.dead.store(true, Ordering::SeqCst); - if thread.tid != process.pid { - procs.threads.remove(&thread.tid); - inner.threads.remove(&thread.tid).unwrap(); + let threads = process.threads.access_mut(procs.prove_mut()); + assert!( + threads.find_mut(&thread.tid).remove().is_some(), + "Thread gone?" + ); + + procs.remove_thread(&thread); } // main thread exit @@ -172,11 +195,11 @@ impl Reaper { thread.files.close_all().await; + let session = process.session(procs.prove()).clone(); // If we are the session leader, we should drop the control terminal. - if process.session(procs.prove()).sid == process.pid { - if let Some(terminal) = process.session(procs.prove()).drop_control_terminal().await - { - terminal.drop_session().await; + if session.sid == process.pid { + if let Some(terminal) = session.control_terminal() { + terminal.drop_session(procs.prove()); } } @@ -184,16 +207,14 @@ impl Reaper { process.mm_list.release(); // Make children orphans (adopted by init) - { - let init = procs.init_process(); - inner.children.retain(|_, child| { - let child = child.upgrade().unwrap(); - // SAFETY: `child.parent` must be ourself. So we don't need to free it. - unsafe { child.parent.swap(Some(init.clone())) }; - init.add_child(&child, procs.prove_mut()); - - false - }); + let init = procs.init_process(); + let children = process.children.access_mut(procs.prove_mut()); + for child in children.take() { + // XXX: May buggy. Check here again. + // SAFETY: `child.parent` must be ourself. + // So we don't need to free it. + unsafe { child.parent.swap(Some(init.clone())) }; + init.add_child(&child, procs.prove_mut()); } let mut init_notify = procs.init_process().notify_batch(); diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index a7b57afd..899aa395 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -1,117 +1,170 @@ -use super::{Process, ProcessGroup, ProcessList, Thread}; -use crate::kernel::constants::EPERM; -use crate::{kernel::Terminal, prelude::*}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProof as _, AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLock}; use posix_types::signal::Signal; -#[derive(Debug)] +use super::process_group::SessionGroups; +use super::{Process, ProcessGroup, ProcessList}; +use crate::kernel::constants::EPERM; +use crate::kernel::Terminal; +use crate::prelude::*; + struct SessionJobControl { - /// Foreground process group - foreground: Weak, + foreground: Option>, control_terminal: Option>, } -#[allow(dead_code)] -#[derive(Debug)] pub struct Session { pub sid: u32, pub leader: Weak, - job_control: RwLock, + job_control: Spin, + groups: Locked, ProcessList>, + all_sessions_link: RBTreeAtomicLink, +} - groups: Locked>, ProcessList>, +intrusive_adapter!(pub AllSessions = Arc: Session { + all_sessions_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllSessions { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.sid + } } impl Session { /// Create a session and add it to the global session list. - pub fn new(leader: &Arc, process_list: &mut ProcessList) -> Arc { + pub fn new(leader: &Arc, proclist: &mut ProcessList) -> Arc { let session = Arc::new(Self { sid: leader.pid, leader: Arc::downgrade(leader), - job_control: RwLock::new(SessionJobControl { - foreground: Weak::new(), + job_control: Spin::new(SessionJobControl { + foreground: None, control_terminal: None, }), - groups: Locked::new( - BTreeMap::new(), - // SAFETY: `procs` must be the global process list, which won't be moved. - process_list, - ), + groups: Locked::new(RBTree::new(SessionGroups::NEW), proclist), + all_sessions_link: RBTreeAtomicLink::new(), }); - process_list.add_session(&session); + proclist.add_session(&session); session } - pub(super) fn add_member(&self, procs: &mut ProcessList, pgroup: &Arc) { - let groups = self.groups.access_mut(procs.prove_mut()); - let old = groups.insert(pgroup.pgid, Arc::downgrade(pgroup)); - assert!(old.is_none(), "Process group already exists"); + pub fn add_member( + &self, + pgroup: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_sessions_link.is_linked(), "Dead session"); + self.groups.access_mut(procs).insert(pgroup.clone()); } - pub(super) fn remove_member(&self, pgid: u32, procs: ProofMut<'_, ProcessList>) { - assert!(self.groups.access_mut(procs).remove(&pgid).is_some()); + pub fn remove_member( + self: &Arc, + pgroup: &Arc, + procs: &mut ProcessList, + ) { + let members = self.groups.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&pgroup.pgid).remove().is_some(), + "Not a member" + ); + + if let Some(fg_pgroup) = self.foreground_pgroup() { + if fg_pgroup.pgid == pgroup.pgid { + let _ = self.set_foreground_pgroup(None); + } + } + + if !members.is_empty() { + return; + } + + // Recycle dead session. + procs.remove_session(self); } - pub async fn foreground(&self) -> Option> { - self.job_control.read().await.foreground.upgrade() + pub fn leader(&self) -> Option> { + self.leader.upgrade() + } + + pub fn foreground_pgroup(&self) -> Option> { + self.job_control.lock().foreground.clone() + } + + pub fn control_terminal(&self) -> Option> { + self.job_control.lock().control_terminal.clone() } /// Set the foreground process group identified by `pgid`. /// The process group must belong to the session. - pub async fn set_foreground_pgid( + pub fn set_foreground_pgroup( &self, - pgid: u32, - procs: Proof<'_, ProcessList>, + pgroup: Option<&Arc>, ) -> KResult<()> { - if let Some(group) = self.groups.access(procs).get(&pgid) { - self.job_control.write().await.foreground = group.clone(); - Ok(()) - } else { - // TODO: Check if the process group refers to an existing process group. - // That's not a problem though, the operation will fail anyway. - Err(EPERM) + if let Some(pgroup) = pgroup { + if pgroup.session.sid != self.sid { + return Err(EPERM); + } } + + self.job_control.lock().foreground = pgroup.cloned(); + Ok(()) } - /// Only session leaders can set the control terminal. - /// Make sure we've checked that before calling this function. - pub async fn set_control_terminal( + /// Set our controlling terminal to `terminal`. Only meant to be called by + /// the session leader. The pgroup that the session leader is in becomes the + /// new foreground pgroup. + /// + /// # Panics + /// Panics if we have a controlling terminal already + /// or the session leader is gone. + pub fn _set_control_terminal( self: &Arc, terminal: &Arc, - forced: bool, procs: Proof<'_, ProcessList>, - ) -> KResult<()> { - let mut job_control = self.job_control.write().await; - if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().await.as_ref() { - if session.sid == self.sid { - return Ok(()); - } - } - return Err(EPERM); - } - terminal.set_session(self, forced).await?; + ) { + let mut job_control = self.job_control.lock(); + let leader = self.leader().expect("Leader is gone?"); + + assert!( + job_control.control_terminal.is_none(), + "We have a controlling terminal already" + ); + job_control.control_terminal = Some(terminal.clone()); - job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); - Ok(()) + job_control.foreground = Some(leader.pgroup(procs).clone()); } /// Drop the control terminal reference inside the session. - /// DO NOT TOUCH THE TERMINAL'S SESSION FIELD. - pub async fn drop_control_terminal(&self) -> Option> { - let mut inner = self.job_control.write().await; - inner.foreground = Weak::new(); - inner.control_terminal.take() + /// Send SIGHUP and then SIGCONT to our foreground pgroup. + pub fn _drop_control_terminal(&self, procs: Proof<'_, ProcessList>) { + let foreground = { + let mut inner = self.job_control.lock(); + inner.control_terminal = None; + inner.foreground.take() + }; + + if let Some(foreground) = foreground { + foreground.raise(Signal::SIGHUP, procs); + foreground.raise(Signal::SIGCHLD, procs); + } } pub async fn raise_foreground(&self, signal: Signal) { - if let Some(fg) = self.foreground().await { - let procs = ProcessList::get().read().await; - fg.raise(signal, procs.prove()); - } + let Some(fg) = self.foreground_pgroup() else { + return; + }; + + let procs = ProcessList::get().read().await; + fg.raise(signal, procs.prove()); } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 7e005875..76c56dcc 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -14,6 +14,7 @@ use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; +use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; use pointers::BorrowedArc; use posix_types::signal::Signal; use stalloc::UnsafeStalloc; @@ -84,9 +85,44 @@ pub struct Thread { pub dead: AtomicBool, pub exit_status: Spin>, + /// Link in the global thread list. + all_threads_link: RBTreeAtomicLink, + + /// Link in the process's thread list. + process_threads_link: RBTreeAtomicLink, + inner: Spin, } +intrusive_adapter!(pub AllThreads = Arc: Thread { + all_threads_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessThreads = Arc: Thread { + process_threads_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + +impl KeyAdapter<'_> for ProcessThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + impl ThreadBuilder { pub fn new() -> Self { Self { @@ -139,12 +175,18 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { + pub fn set_child_tid( + mut self, + set_child_tid: Option>, + ) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { + pub fn clear_child_tid( + mut self, + clear_child_tid: Option>, + ) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -171,7 +213,11 @@ impl ThreadBuilder { } /// Clone the thread from another thread. - pub fn clone_from(self, thread: &Thread, clone_args: &CloneArgs) -> KResult { + pub fn clone_from( + self, + thread: &Thread, + clone_args: &CloneArgs, + ) -> KResult { let inner = thread.inner.lock(); let mut trap_ctx = thread.trap_ctx.borrow().clone(); @@ -199,11 +245,12 @@ impl ThreadBuilder { FileArray::new_cloned(&thread.files) }; - let signal_list = if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { - SignalList::new_shared(&thread.signal_list) - } else { - SignalList::new_cloned(&thread.signal_list) - }; + let signal_list = + if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { + SignalList::new_shared(&thread.signal_list) + } else { + SignalList::new_cloned(&thread.signal_list) + }; Ok(self .files(files) @@ -241,6 +288,8 @@ impl ThreadBuilder { fpu_state: AtomicUniqueRefCell::new(fpu_state), dead: AtomicBool::new(false), exit_status: Spin::new(None), + all_threads_link: RBTreeAtomicLink::new(), + process_threads_link: RBTreeAtomicLink::new(), inner: Spin::new(ThreadInner { name, tls: self.tls, @@ -281,7 +330,10 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option>) { + pub fn set_robust_list( + &self, + robust_list_address: Option>, + ) { self.inner.lock().robust_list_address = robust_list_address; } @@ -371,7 +423,10 @@ impl Thread { while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list - .handle(&mut self.trap_ctx.borrow(), &mut self.fpu_state.borrow()) + .handle( + &mut self.trap_ctx.borrow(), + &mut self.fpu_state.borrow(), + ) .await; } @@ -399,7 +454,9 @@ impl Thread { } let mms = &self.process.mm_list; - if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await { + if let Err(signal) = + mms.handle_user_page_fault(addr, error_code).await + { self.signal_list.raise(signal); } } @@ -409,8 +466,12 @@ impl Thread { TrapType::Fault(Fault::InvalidOp) => { self.signal_list.raise(Signal::SIGILL); } - TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), - TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), + TrapType::Fault(Fault::Unknown(_)) => { + unimplemented!("Unhandled fault") + } + TrapType::Breakpoint => { + unimplemented!("Breakpoint in user space") + } TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -424,11 +485,16 @@ impl Thread { return; } - if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { + if let Some(retval) = + self.handle_syscall(thd_alloc, no, args).await + { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] + #[cfg(any( + target_arch = "riscv64", + target_arch = "loongarch64" + ))] { let pc = trap_ctx.get_program_counter(); trap_ctx.set_program_counter(pc + 4); @@ -472,7 +538,10 @@ impl Thread { }) .await; - assert!(self.is_dead(), "`real_run` returned before the thread die?"); + assert!( + self.is_dead(), + "`real_run` returned before the thread die?" + ); ProcessList::send_to_reaper(self); } } @@ -499,7 +568,10 @@ pub async fn yield_now() { impl Future for Yield { type Output = (); - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + fn poll( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll { if self.as_mut().yielded { Poll::Ready(()) } else { diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 86024338..33dc8066 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,18 +1,19 @@ -use super::{ - task::{ProcessList, Session, Thread}, - user::{UserPointer, UserPointerMut}, -}; -use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; -use crate::{io::Buffer, prelude::*, sync::CondVar}; -use alloc::{ - collections::vec_deque::VecDeque, - sync::{Arc, Weak}, -}; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; + use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_sync::{AsProof as _, Mutex}; +use eonix_sync::{Mutex, Proof}; use posix_types::signal::Signal; +use super::constants::ESRCH; +use super::task::{ProcessList, Session, Thread}; +use super::user::{UserPointer, UserPointerMut}; +use crate::io::Buffer; +use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; +use crate::prelude::*; +use crate::sync::CondVar; + const BUFFER_SIZE: usize = 4096; const NCCS: usize = 32; @@ -351,12 +352,12 @@ pub trait TerminalDevice: Send + Sync { struct TerminalInner { termio: Termios, - session: Weak, buffer: VecDeque, } pub struct Terminal { inner: Mutex, + session: Spin>, device: Arc, cv: CondVar, } @@ -400,9 +401,9 @@ impl Terminal { Arc::new(Self { inner: Mutex::new(TerminalInner { termio: Termios::new_standard(), - session: Weak::new(), buffer: VecDeque::with_capacity(BUFFER_SIZE), }), + session: Spin::new(Weak::new()), cv: CondVar::new(), device, }) @@ -447,15 +448,21 @@ impl Terminal { } async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { - if let Some(session) = inner.session.upgrade() { + if let Some(session) = self.session() { session.raise_foreground(signal).await; } + if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal( + &self, + inner: &mut TerminalInner, + ch: u8, + signal: Signal, + ) { self.echo_char(inner, ch); self.signal(inner, signal).await; } @@ -481,13 +488,19 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGINT) + .await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + .await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + .await } _ => {} } @@ -517,8 +530,12 @@ impl Terminal { match ch { b'\r' if inner.termio.igncr() => {} - b'\r' if inner.termio.icrnl() => return self.do_commit_char(&mut inner, b'\n'), - b'\n' if inner.termio.inlcr() => return self.do_commit_char(&mut inner, b'\r'), + b'\r' if inner.termio.icrnl() => { + return self.do_commit_char(&mut inner, b'\n') + } + b'\n' if inner.termio.inlcr() => { + return self.do_commit_char(&mut inner, b'\r') + } _ => self.do_commit_char(&mut inner, ch), } } @@ -589,26 +606,30 @@ impl Terminal { pub async fn ioctl(&self, request: TerminalIORequest<'_>) -> KResult<()> { match request { TerminalIORequest::GetProcessGroup(pgid_pointer) => { - if let Some(session) = self.inner.lock().await.session.upgrade() { - if let Some(pgroup) = session.foreground().await { - return pgid_pointer.write(pgroup.pgid); - } - } + let Some(session) = self.session() else { + return Err(ENOTTY); + }; + + let Some(pgroup) = session.foreground_pgroup() else { + return Err(ENOTTY); + }; - Err(ENOTTY) + pgid_pointer.write(pgroup.pgid) } TerminalIORequest::SetProcessGroup(pgid) => { let pgid = pgid.read()?; let procs = ProcessList::get().read().await; - let inner = self.inner.lock().await; - let session = inner.session.upgrade(); + let Some(session) = self.session() else { + return Err(ENOTTY); + }; - if let Some(session) = session { - session.set_foreground_pgid(pgid, procs.prove()).await - } else { - Err(ENOTTY) - } + let Some(pgroup) = procs.try_find_pgroup(pgid) else { + return Err(ESRCH); + }; + + session.set_foreground_pgroup(Some(&pgroup))?; + Ok(()) } TerminalIORequest::GetWindowSize(ptr) => { // TODO: Get the actual window size @@ -630,9 +651,12 @@ impl Terminal { let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. - inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); - inner.termio.oflag = TermioOFlags::from_bits_truncate(user_termios.oflag as u16); - inner.termio.lflag = TermioLFlags::from_bits_truncate(user_termios.lflag as u16); + inner.termio.iflag = + TermioIFlags::from_bits_truncate(user_termios.iflag as u16); + inner.termio.oflag = + TermioOFlags::from_bits_truncate(user_termios.oflag as u16); + inner.termio.lflag = + TermioLFlags::from_bits_truncate(user_termios.lflag as u16); inner.termio.cflag = user_termios.cflag; inner.termio.line = user_termios.line; inner.termio.cc = user_termios.cc; @@ -642,30 +666,52 @@ impl Terminal { } } - /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = self.inner.lock().await; - if let Some(session) = inner.session.upgrade() { + pub fn session(&self) -> Option> { + self.session.lock().upgrade() + } + + /// Drop our current controlled session. The old session lose its controlling + /// terminal and all processes in it will receive a SIGHUP and then SIGCONT. + pub fn drop_session(&self, procs: Proof<'_, ProcessList>) { + let session = + core::mem::replace(&mut *self.session.lock(), Weak::new()); + let Some(old_session) = session.upgrade() else { + return; + }; + + old_session._drop_control_terminal(procs); + } + + /// Assign the `session` to this terminal. + /// Drop the previous session if `forced` is true. + pub async fn set_session( + self: &Arc, + session: &Arc, + forced: bool, + procs: Proof<'_, ProcessList>, + ) -> KResult<()> { + let mut cur_session = self.session.lock(); + + // XXX: Holding spinlock for too long? + if let Some(old_session) = cur_session.upgrade() { + if old_session.sid == session.sid { + return Ok(()); + } + if !forced { - Err(EPERM) - } else { - session.drop_control_terminal().await; - inner.session = Arc::downgrade(&session); - Ok(()) + return Err(EPERM); } - } else { - // Sessions should set their `control_terminal` field. - inner.session = Arc::downgrade(&session); - Ok(()) + + // TODO: Check whether the caller has the CAP_SYS_ADMIN capability. + + // We've stolen the terminal from the old session. + old_session._drop_control_terminal(procs); } - } - pub async fn drop_session(&self) { - self.inner.lock().await.session = Weak::new(); - } + *cur_session = Arc::downgrade(session); + session._set_control_terminal(self, procs); - pub async fn session(&self) -> Option> { - self.inner.lock().await.session.upgrade() + Ok(()) } } diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs index f318c5b2..04a022b5 100644 --- a/src/kernel/vfs/file/terminal_file.rs +++ b/src/kernel/vfs/file/terminal_file.rs @@ -1,24 +1,46 @@ -use super::{File, FileType, PollEvent}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - kernel::{ - constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - terminal::TerminalIORequest, - user::{UserPointer, UserPointerMut}, - Terminal, - }, - prelude::KResult, -}; use alloc::sync::Arc; + +use eonix_sync::AsProof; use posix_types::open::OpenFlags; +use super::{File, FileType, PollEvent}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::kernel::constants::{ + EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP, +}; +use crate::kernel::task::{ProcessList, Thread}; +use crate::kernel::terminal::TerminalIORequest; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::Terminal; +use crate::prelude::KResult; + pub struct TerminalFile { terminal: Arc, } impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> File { - File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + pub async fn open( + thread: &Thread, + terminal: &Arc, + flags: OpenFlags, + ) -> File { + let set_control_tty = !flags.contains(OpenFlags::O_NOCTTY); + + let procs = ProcessList::get().read().await; + let session = thread.process.session(procs.prove()); + + // We only set the control terminal if the process is the session leader. + if set_control_tty && session.sid == thread.process.pid { + // Silently fail if we can't set the control terminal. + let _ = terminal.set_session(&session, false, procs.prove()).await; + } + + File::new( + flags, + FileType::Terminal(TerminalFile { + terminal: terminal.clone(), + }), + ) } pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { @@ -43,11 +65,21 @@ impl TerminalFile { pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { self.terminal .ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + TCGETS => TerminalIORequest::GetTermios( + UserPointerMut::with_addr(arg3)?, + ), + TCSETS => { + TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?) + } + TIOCGPGRP => TerminalIORequest::GetProcessGroup( + UserPointerMut::with_addr(arg3)?, + ), + TIOCSPGRP => TerminalIORequest::SetProcessGroup( + UserPointer::with_addr(arg3)?, + ), + TIOCGWINSZ => TerminalIORequest::GetWindowSize( + UserPointerMut::with_addr(arg3)?, + ), _ => return Err(EINVAL), }) .await diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 609d969c..c0b6a49e 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,19 +1,22 @@ use alloc::sync::Arc; use intrusive_collections::rbtree::Entry; -use intrusive_collections::{intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink}; +use intrusive_collections::{ + intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, +}; use itertools::FoldWhile::{Continue, Done}; use itertools::Itertools; use posix_types::open::{FDFlags, OpenFlags}; use super::file::{File, InodeFile, Pipe}; use super::types::{Format, Permission}; -use super::{Spin, TerminalFile}; -use crate::kernel::console::get_console; +use super::Spin; use crate::kernel::constants::{ - EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, + EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, + F_SETFD, F_SETFL, }; use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::task::Thread; use crate::kernel::vfs::dentry::Dentry; use crate::kernel::CharDevice; use crate::prelude::*; @@ -80,7 +83,11 @@ impl FDAllocator { self.min_avail = FD(0); } - fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + fn find_available( + &mut self, + from: FD, + files: &RBTree, + ) -> FD { files .range(Bound::Included(&from), Bound::Unbounded) .fold_while(from, |current, OpenFile { fd, .. }| { @@ -143,7 +150,8 @@ impl FileArray { let other_inner = other.inner.lock(); for file in other_inner.files.iter() { - let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + let new_file = + OpenFile::new(file.fd, file.flags, file.file.dup()); new_files.insert(new_file); } (new_files, other_inner.fd_alloc.clone()) @@ -223,7 +231,12 @@ impl FileArray { /// Duplicates the file to a new file descriptor, returning the old file /// description to be dropped. - fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { + fn dup_to_no_close( + &self, + old_fd: FD, + new_fd: FD, + fd_flags: FDFlags, + ) -> KResult> { let mut inner = self.inner.lock(); let (files, fd_alloc) = inner.split_borrow(); @@ -240,7 +253,8 @@ impl FileArray { Entry::Occupied(mut entry) => { let mut file = entry.remove().unwrap(); file.flags = fd_flags; - let old_file = core::mem::replace(&mut file.file, new_file_data); + let old_file = + core::mem::replace(&mut file.file, new_file_data); entry.insert(file); @@ -249,8 +263,15 @@ impl FileArray { } } - pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + pub async fn dup_to( + &self, + old_fd: FD, + new_fd: FD, + flags: OpenFlags, + ) -> KResult { + if let Some(old_file) = + self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? + { old_file.close().await; } @@ -277,6 +298,7 @@ impl FileArray { pub async fn open( &self, + thread: &Thread, dentry: &Arc, flags: OpenFlags, perm: Permission, @@ -300,7 +322,7 @@ impl FileArray { let file = if inode.format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; - device.open(flags)? + device.open(thread, flags).await? } else { InodeFile::new(dentry.clone(), flags) }; @@ -323,7 +345,8 @@ impl FileArray { F_DUPFD | F_DUPFD_CLOEXEC => { let ofile = cursor.get().ok_or(EBADF)?; - let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); + let cloexec = + cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); @@ -342,7 +365,9 @@ impl FileArray { cursor.insert(ofile); 0 } - F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, + F_GETFL => { + cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize + } F_SETFL => { cursor .get() @@ -357,35 +382,6 @@ impl FileArray { Ok(ret) } - - /// Only used for init process. - pub fn open_console(&self) { - let mut inner = self.inner.lock(); - let (files, fd_alloc) = inner.split_borrow(); - - let (stdin, stdout, stderr) = ( - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - ); - let console_terminal = get_console().expect("No console terminal"); - - inner.do_insert( - stdin, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stdout, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stderr, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - } } impl FileArrayInner { @@ -397,7 +393,9 @@ impl FileArrayInner { fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { match self.files.entry(&fd) { Entry::Occupied(_) => { - panic!("File descriptor {fd:?} already exists in the file array."); + panic!( + "File descriptor {fd:?} already exists in the file array." + ); } Entry::Vacant(insert_cursor) => { insert_cursor.insert(OpenFile::new(fd, flags, file)); @@ -405,7 +403,9 @@ impl FileArrayInner { } } - fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + fn split_borrow( + &mut self, + ) -> (&mut RBTree, &mut FDAllocator) { let Self { files, fd_alloc } = self; (files, fd_alloc) } diff --git a/user-programs/init_script_riscv64.sh b/user-programs/init_script_riscv64.sh index b5ce95d7..f67e2a27 100644 --- a/user-programs/init_script_riscv64.sh +++ b/user-programs/init_script_riscv64.sh @@ -56,6 +56,9 @@ busybox mknod -m 666 /dev/vdb b 8 16 busybox mknod -m 666 /dev/ttyS0 c 4 64 busybox mknod -m 666 /dev/ttyS1 c 4 65 +exec < "$TERMINAL" +exec > "$TERMINAL" 2>&1 + info "deploying busybox..." busybox mkdir -p /bin /lib @@ -106,8 +109,7 @@ int main() { } EOF -# shellcheck disable=SC2094 -exec sh -l < "$TERMINAL" > "$TERMINAL" 2> "$TERMINAL" +exec sh -l # We don't have a working init yet, so we use busybox sh directly for now. # exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0' From e1bf8cecee909c1ac0bebf5641180d676cdf6bd0 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:31:13 +0800 Subject: [PATCH 46/54] style: reformat the files related to next patches Reformat the files with new the format style to make the real changes clearer. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 50 +++++++++++++----- crates/eonix_hal/src/arch/riscv64/mm.rs | 52 ++++++++++++------- src/kernel_init.rs | 3 +- 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index b2305f99..4e5afcfb 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -7,8 +7,12 @@ use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _}; -use eonix_mm::paging::{Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _, +}; +use eonix_mm::paging::{ + Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN, +}; use eonix_percpu::PercpuArea; use fdt::Fdt; use riscv::asm::sfence_vma_all; @@ -25,11 +29,13 @@ use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; use crate::arch::mm::{ - ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, RawPageTableSv48, - GLOBAL_PAGE_TABLE, + ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, + RawPageTableSv48, GLOBAL_PAGE_TABLE, }; use crate::bootstrap::BootStrapData; -use crate::mm::{ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; +use crate::mm::{ + ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator, +}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -64,7 +70,8 @@ static PT1: BootPageTable = { BootPageTable(arr) }; -static BSP_PAGE_ALLOC: AtomicPtr> = AtomicPtr::new(core::ptr::null_mut()); +static BSP_PAGE_ALLOC: AtomicPtr> = + AtomicPtr::new(core::ptr::null_mut()); static AP_COUNT: AtomicUsize = AtomicUsize::new(0); static AP_STACK: AtomicUsize = AtomicUsize::new(0); @@ -130,11 +137,14 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } let start = unsafe { - ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() as *const _ - as usize + ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() + as *const _ as usize }; let bootstrap_data = BootStrapData { - early_stack: PRange::new(PAddr::from(start), PAddr::from(start + 4096 * 16)), + early_stack: PRange::new( + PAddr::from(start), + PAddr::from(start + 4096 * 16), + ), allocator: Some(real_allocator), }; @@ -179,7 +189,11 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { sfence_vma_all(); unsafe { - core::ptr::write_bytes(KERNEL_BSS_START.addr() as *mut (), 0, BSS_LENGTH as usize); + core::ptr::write_bytes( + KERNEL_BSS_START.addr() as *mut (), + 0, + BSS_LENGTH as usize, + ); } unsafe { @@ -255,7 +269,8 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { stack_range }; - let old = BSP_PAGE_ALLOC.swap((&raw const *page_alloc) as *mut _, Ordering::Release); + let old = BSP_PAGE_ALLOC + .swap((&raw const *page_alloc) as *mut _, Ordering::Release); assert!(old.is_null()); while AP_STACK @@ -324,7 +339,12 @@ unsafe extern "C" fn _ap_start(hart_id: usize) { fn get_ap_stack() -> usize { while AP_SEM - .compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed) + .compare_exchange_weak( + false, + true, + Ordering::Acquire, + Ordering::Relaxed, + ) .is_err() { core::hint::spin_loop(); @@ -344,12 +364,14 @@ fn get_ap_stack() -> usize { } fn ap_entry(hart_id: usize, stack_bottom: PAddr) -> ! { - let stack_range = PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); + let stack_range = + PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); { // SAFETY: Acquire all the work done by the BSP and other APs. let alloc = loop { - let alloc = BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); + let alloc = + BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); if !alloc.is_null() { break alloc; diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index f67646cf..45d44c6f 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -4,8 +4,8 @@ use core::ptr::NonNull; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}; use eonix_mm::page_table::{ - PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, - TableAttribute, PTE, + PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, + RawPageTable, TableAttribute, PTE, }; use eonix_mm::paging::{BasicFolio, Folio, PageAccess, PageBlock, PFN}; use eonix_sync_base::LazyLock; @@ -115,7 +115,9 @@ impl RawAttribute for PageAttribute64 { table_attr |= TableAttribute::PRESENT; } - if table_attr.contains(TableAttribute::PRESENT) && self.0 & (PA_R | PA_W | PA_X) != 0 { + if table_attr.contains(TableAttribute::PRESENT) + && self.0 & (PA_R | PA_W | PA_X) != 0 + { return None; } @@ -139,7 +141,9 @@ impl RawAttribute for PageAttribute64 { page_attr |= PageAttribute::PRESENT; } - if page_attr.contains(PageAttribute::PRESENT) && (self.0 & (PA_R | PA_W | PA_X) == 0) { + if page_attr.contains(PageAttribute::PRESENT) + && (self.0 & (PA_R | PA_W | PA_X) == 0) + { return None; } @@ -278,18 +282,22 @@ impl Memory for ArchMemory { let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - Self::present_ram() - .filter(move |range| range.end() > paddr_after_kimage_aligned) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) + core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) + .chain( + Self::present_ram() + .filter(move |range| { + range.end() > paddr_after_kimage_aligned + }) + .map(move |range| { + if range.start() < paddr_after_kimage_aligned { + let (_, right) = + range.split_at(paddr_after_kimage_aligned); + right + } else { + range + } + }), + ) } } @@ -314,17 +322,21 @@ where let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - self.filter(move |range| range.end() > paddr_after_kimage_aligned) + core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) + .chain( + self.filter(move |range| { + range.end() > paddr_after_kimage_aligned + }) .map(move |range| { if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); + let (_, right) = + range.split_at(paddr_after_kimage_aligned); right } else { range } }), - ) + ) } } diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 65af41e4..64c220b2 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -19,7 +19,8 @@ fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { // Map kernel page array. const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000); - let range = VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); + let range = + VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); for pte in global_page_table.iter_kernel(range) { let attr = PageAttribute::PRESENT | PageAttribute::WRITE From db0836fbe0a0e25ab10d0a166b065465b6c188da Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:32:55 +0800 Subject: [PATCH 47/54] riscv64, hal: simplify ArchMemory::free_ram implementation We already have `FDT.present_ram().free_ram()`. Remove the impl in `ArchMemory` to avoid confusion. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/mm.rs | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 45d44c6f..3b6df07c 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -274,30 +274,7 @@ impl Memory for ArchMemory { } fn free_ram() -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } - - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) - .chain( - Self::present_ram() - .filter(move |range| { - range.end() > paddr_after_kimage_aligned - }) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = - range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) + FDT.present_ram().free_ram() } } From 504934d663a186125210dabf17c9a008103ae3d0 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:51:32 +0800 Subject: [PATCH 48/54] style: add helper macros to retrieve symbol constants - Add `extern_symbol_value` to retrieve far relative symbol values. - Get `BSS_LENGTH` and `__kernel_end` using `extern_symbol_addr`. - Get `_ap_start` using `extern_symbol_value`. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 28 +++++++------------ crates/eonix_hal/src/arch/riscv64/mm.rs | 8 ++---- crates/eonix_hal/src/lib.rs | 14 ++++++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 4e5afcfb..d317e448 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -36,6 +36,7 @@ use crate::bootstrap::BootStrapData; use crate::mm::{ ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator, }; +use crate::{extern_symbol_addr, extern_symbol_value}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -156,10 +157,6 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } } -unsafe extern "C" { - fn BSS_LENGTH(); -} - /// TODO: /// 对kernel image添加更细的控制,或者不加也行 fn setup_kernel_page_table(alloc: BasicPageAllocRef) { @@ -171,17 +168,17 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { let attr = PageAttribute::WRITE | PageAttribute::READ - | PageAttribute::EXECUTE | PageAttribute::GLOBAL | PageAttribute::PRESENT; const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000); + let bss_length = extern_symbol_addr!(BSS_LENGTH); + // Map kernel BSS - let bss_range = VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize); + let bss_range = VRange::from(KERNEL_BSS_START).grow(bss_length); for pte in global_page_table.iter_kernel(bss_range) { let page = alloc.alloc().unwrap(); - let attr = attr.difference(PageAttribute::EXECUTE); pte.set(page.into_raw(), attr.into()); } @@ -192,7 +189,7 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { core::ptr::write_bytes( KERNEL_BSS_START.addr() as *mut (), 0, - BSS_LENGTH as usize, + bss_length, ); } @@ -247,15 +244,6 @@ fn setup_cpu(alloc: impl FrameAlloc, hart_id: usize) { percpu_area.register(cpu.cpuid()); } -fn get_ap_start_addr() -> usize { - unsafe extern "C" { - fn _ap_start(); - } - static AP_START_VALUE: &'static unsafe extern "C" fn() = - &(_ap_start as unsafe extern "C" fn()); - unsafe { (AP_START_VALUE as *const _ as *const usize).read_volatile() } -} - fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { let local_hart_id = CPU::local().cpuid(); let mut ap_count = 0; @@ -286,7 +274,11 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { } unsafe { - hart_start(hart_id, PhysicalAddress::new(get_ap_start_addr()), 0); + hart_start( + hart_id, + PhysicalAddress::new(extern_symbol_value!(_ap_start)), + 0, + ); } while AP_COUNT.load(Ordering::Acquire) == ap_count { diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 3b6df07c..6362cdca 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -16,6 +16,7 @@ use riscv::register::satp; use super::config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}; use super::fdt::{FdtExt, FDT}; use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; use crate::mm::BasicPageAlloc; const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); @@ -291,12 +292,9 @@ where T: PresentRam, { fn free_ram(self) -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } + let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; + let kernel_end = PAddr::from(kernel_end).ceil(); - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index d3bf7825..3f49a326 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -69,3 +69,17 @@ macro_rules! extern_symbol_addr { $crate::symbol_addr!($sym, $type) }}; } + +#[macro_export] +macro_rules! extern_symbol_value { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + + static SYMBOL_ADDR: &'static unsafe extern "C" fn() = + &($sym as unsafe extern "C" fn()); + + unsafe { (SYMBOL_ADDR as *const _ as *const usize).read_volatile() } + }}; +} From 49a8fa8a9dc91c37bb8b37c5a8404a35ebf1cb01 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:53:22 +0800 Subject: [PATCH 49/54] style: reformat files Signed-off-by: greatbridf --- crates/eonix_hal/src/lib.rs | 4 +- src/kernel/task/signal/signal_action.rs | 52 +++++++++++++++---------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index 3f49a326..d8696994 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -11,7 +11,9 @@ pub mod mm; pub mod trap; pub mod fence { - pub use crate::arch::fence::{memory_barrier, read_memory_barrier, write_memory_barrier}; + pub use crate::arch::fence::{ + memory_barrier, read_memory_barrier, write_memory_barrier, + }; } pub mod fpu { diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 708f9802..cbf81935 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -1,22 +1,24 @@ -use super::{KResult, SAVED_DATA_SIZE}; -use crate::{ - io::BufferFill as _, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::UserMut, - user::UserBuffer, - }, -}; -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; use core::arch::naked_asm; -use eonix_hal::{fpu::FpuState, traits::trap::RawTrapContext, trap::TrapContext}; + +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_sync::Spin; -use posix_types::{ - ctypes::Long, - signal::{SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, TryFromSigAction}, - SIGNAL_NOW, +use posix_types::ctypes::Long; +use posix_types::signal::{ + SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, + TryFromSigAction, }; +use posix_types::SIGNAL_NOW; + +use super::{KResult, SAVED_DATA_SIZE}; +use crate::io::BufferFill as _; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::UserMut; +use crate::kernel::user::UserBuffer; #[cfg(target_arch = "x86_64")] #[unsafe(naked)] @@ -139,7 +141,9 @@ impl SignalAction { handler, restorer, .. } = self else { - unreachable!("Default and Ignore actions should not be handled here"); + unreachable!( + "Default and Ignore actions should not be handled here" + ); }; let current_sp = VAddr::from(trap_ctx.get_stack_pointer()); @@ -167,7 +171,9 @@ impl SignalAction { target_arch = "riscv64", target_arch = "loongarch64" )))] - compile_error!("`vdso_sigreturn` is not implemented for this architecture"); + compile_error!( + "`vdso_sigreturn` is not implemented for this architecture" + ); #[cfg(target_arch = "x86_64")] { @@ -178,19 +184,22 @@ impl SignalAction { unsafe { // SAFETY: To prevent the compiler from optimizing this into `la` instructions // and causing a linking error. - (VDSO_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() + (VDSO_SIGRETURN_ADDR as *const _ as *const usize) + .read_volatile() } } #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - static VDSO_RT_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = + static VDSO_RT_SIGRETURN_ADDR: + &'static unsafe extern "C" fn() = &(vdso_rt_sigreturn as unsafe extern "C" fn()); unsafe { // SAFETY: To prevent the compiler from optimizing this into `la` instructions // and causing a linking error. - (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() + (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize) + .read_volatile() } } }; @@ -201,7 +210,8 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; + let mut buffer = + UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } From bb3d5e5cd0f14a0391cddcd3ab4f5549bd9ef5eb Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 02:07:08 +0800 Subject: [PATCH 50/54] riscv64, linker: make sure vdso lies inside .data With current ldscript, linkers will put vdso data after `__kernel_end`, which is buggy since we use the symbol to indicate the end of our kernel image and newly allocated pages may overwrite those positions. Change by place the vdso inside REGION_DATA. Remove old VDSO memory region. Align .data section end to page size border. Add a helper macro to retrieve .vdso section symbol addresses. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/link.x | 4 ++- crates/eonix_hal/src/arch/riscv64/memory.x | 1 - crates/eonix_hal/src/link.x.in | 4 +-- src/kernel/task/signal/signal_action.rs | 31 +++++++++------------- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index e348e1be..a74f0d0d 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -81,10 +81,12 @@ INSERT AFTER .rodata; SECTIONS { .vdso ALIGN(0x1000) : ALIGN(0x1000) { + VDSO_START = ABSOLUTE(.); + KEEP(*(.vdso .vdso.*)); . = ALIGN(0x1000); - } > VDSO AT> RAM + } > REGION_DATA AT> RAM VDSO_PADDR = LOADADDR(.vdso); } diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 0dc7c4ff..f2029c9a 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -3,7 +3,6 @@ ENTRY(_start) MEMORY { RAM : org = 0x0000000080200000, len = 8M - VDSO : org = 0x00007f0000000000, len = 4K KBSS : org = 0xffffffff40000000, len = 2M KIMAGE : org = 0xffffffff80200000, len = 8M } diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index 81c269c2..eaabdfda 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,7 +18,7 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); - + . = ALIGN(8); PROVIDE(__eh_frame = .); @@ -41,7 +41,7 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA - .data.after : + .data.after : ALIGN(0x1000) { __data_after = .; } > REGION_DATA AT> LINK_REGION_DATA diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index cbf81935..18348c32 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -20,6 +20,16 @@ use crate::kernel::constants::{EFAULT, EINVAL}; use crate::kernel::syscall::UserMut; use crate::kernel::user::UserBuffer; +macro_rules! vdso_sym_addr { + ($sym:expr) => {{ + const VDSO_START_VADDR: VAddr = VAddr::from(0x7f00_0000_0000); + let vdso_link_start = eonix_hal::extern_symbol_addr!(VDSO_START); + + eonix_hal::symbol_addr!($sym) - vdso_link_start + + VDSO_START_VADDR.addr() + }}; +} + #[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[unsafe(link_section = ".vdso.sigreturn")] @@ -178,29 +188,12 @@ impl SignalAction { #[cfg(target_arch = "x86_64")] { // TODO: Check and use `vdso_rt_sigreturn` for x86 as well. - static VDSO_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_SIGRETURN_ADDR as *const _ as *const usize) - .read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - static VDSO_RT_SIGRETURN_ADDR: - &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize) - .read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } }; From dac24f9754471c15e2d8251e1e766384073a2088 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:08:28 +0800 Subject: [PATCH 51/54] riscv64: rewrite FDT and present free memory parsing Strip out the memory used by the kernel and FDT data out of free memory block returned by FDT. Closes: #54 ("Random kernel freezing on process creation / exiting") Fixes: 4351cf55739f3 ("partial work: fix riscv64 bootstrap") Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 7 +- crates/eonix_hal/src/arch/riscv64/fdt.rs | 115 +++++++++++------- crates/eonix_hal/src/arch/riscv64/mm.rs | 36 +----- 3 files changed, 78 insertions(+), 80 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index d317e448..ccb32527 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -29,7 +29,7 @@ use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; use crate::arch::mm::{ - ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, + ArchPagingMode, ArchPhysAccess, PageAccessImpl, PageAttribute64, RawPageTableSv48, GLOBAL_PAGE_TABLE, }; use crate::bootstrap::BootStrapData; @@ -113,13 +113,12 @@ unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) { } pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { - let fdt = Fdt::from_ptr(ArchPhysAccess::as_ptr(dtb_addr).as_ptr()) - .expect("Failed to parse DTB from static memory."); + let fdt = unsafe { FdtExt::new(dtb_addr) }; let real_allocator = RefCell::new(BasicPageAlloc::new()); let alloc = BasicPageAllocRef::new(&real_allocator); - for range in fdt.present_ram().free_ram() { + for range in fdt.free_ram() { real_allocator.borrow_mut().add_range(range); } diff --git a/crates/eonix_hal/src/arch/riscv64/fdt.rs b/crates/eonix_hal/src/arch/riscv64/fdt.rs index 5efcc98d..908256c7 100644 --- a/crates/eonix_hal/src/arch/riscv64/fdt.rs +++ b/crates/eonix_hal/src/arch/riscv64/fdt.rs @@ -1,62 +1,95 @@ -use super::mm::{ArchPhysAccess, PresentRam}; -use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; -use core::sync::atomic::{AtomicPtr, Ordering}; -use eonix_mm::address::{PAddr, PRange, PhysAccess}; +use core::ops::Deref; +use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering}; + +use eonix_mm::address::{Addr, AddrOps, PAddr, PRange, PhysAccess}; use eonix_sync_base::LazyLock; use fdt::Fdt; -static DTB_VIRT_PTR: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); -pub static FDT: LazyLock> = LazyLock::new(|| unsafe { - Fdt::from_ptr(DTB_VIRT_PTR.load(Ordering::Acquire)) - .expect("Failed to parse DTB from static memory.") +use super::mm::ArchPhysAccess; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; + +static DTB_PADDR: AtomicUsize = AtomicUsize::new(0); +pub static FDT: LazyLock = LazyLock::new(|| unsafe { + FdtExt::new(PAddr::from_val(DTB_PADDR.load(Ordering::Relaxed))) }); -pub trait FdtExt { - fn harts(&self) -> impl Iterator; +pub struct FdtExt { + fdt: Fdt<'static>, + range: PRange, +} - fn hart_count(&self) -> usize { - self.harts().count() - } +impl FdtExt { + /// # Safety + /// The caller MUST ensure that [`addr`] points to valid FDT. + pub unsafe fn new(addr: PAddr) -> Self { + let fdt = unsafe { + Fdt::from_ptr(ArchPhysAccess::as_ptr(addr).as_ptr()) + .expect("Failed to parse DTB from static memory.") + }; - fn present_ram(&self) -> impl Iterator; -} + Self { + range: PRange::from(addr).grow(fdt.total_size()), + fdt, + } + } -impl FdtExt for Fdt<'_> { - fn harts(&self) -> impl Iterator { + pub fn harts(&self) -> impl Iterator { self.cpus().map(|cpu| cpu.ids().all()).flatten() } - fn present_ram(&self) -> impl Iterator + PresentRam { - struct Present(I); - impl PresentRam for Present where I: Iterator {} - impl Iterator for Present - where - I: Iterator, - { - type Item = PRange; - - fn next(&mut self) -> Option { - self.0.next() - } - } + pub fn hart_count(&self) -> usize { + self.harts().count() + } + pub fn present_ram(&self) -> impl Iterator { let mut index = 0; - Present(core::iter::from_fn(move || { - self.memory() + + core::iter::from_fn(move || { + let item = self + .memory() .regions() .filter_map(|region| { - region.size.map(|len| { - PRange::from(PAddr::from(region.starting_address as usize)).grow(len) - }) + let start = PAddr::from(region.starting_address as usize); + Some(start).zip(region.size) }) - .skip(index) - .next() - .inspect(|_| index += 1) - })) + .map(|(start, len)| PRange::from(start).grow(len)) + .nth(index); + + index += 1; + item + }) + } + + pub fn free_ram(&self) -> impl Iterator { + let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; + let kernel_end = PAddr::from(kernel_end).ceil(); + + // TODO: move this to some platform-specific crate + self.present_ram().map(move |mut range| { + // Strip out parts before __kernel_end + if range.overlap_with(&PRange::from(kernel_end)) { + (_, range) = range.split_at(kernel_end); + } + + // Strip out part after the FDT + if range.overlap_with(&self.range) { + (range, _) = range.split_at(self.range.start()); + } + + range + }) + } +} + +impl Deref for FdtExt { + type Target = Fdt<'static>; + + fn deref(&self) -> &Self::Target { + &self.fdt } } pub unsafe fn init_dtb_and_fdt(dtb_paddr: PAddr) { - let dtb_virt_ptr = ArchPhysAccess::as_ptr(dtb_paddr); - DTB_VIRT_PTR.store(dtb_virt_ptr.as_ptr(), Ordering::Release); + DTB_PADDR.store(dtb_paddr.addr(), Ordering::Relaxed); } diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 6362cdca..7891f094 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -275,46 +275,12 @@ impl Memory for ArchMemory { } fn free_ram() -> impl Iterator { - FDT.present_ram().free_ram() + FDT.free_ram() } } pub type DefaultPagingMode = PagingModeSv48; -pub trait PresentRam: Iterator {} - -pub trait FreeRam: PresentRam { - fn free_ram(self) -> impl Iterator; -} - -impl FreeRam for T -where - T: PresentRam, -{ - fn free_ram(self) -> impl Iterator { - let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; - let kernel_end = PAddr::from(kernel_end).ceil(); - - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) - .chain( - self.filter(move |range| { - range.end() > paddr_after_kimage_aligned - }) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = - range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) - } -} - #[inline(always)] pub fn flush_tlb(vaddr: usize) { sfence_vma(0, vaddr); From 1f4230d53b7c73b05d7f50612d7572c8351654e0 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:12:29 +0800 Subject: [PATCH 52/54] sysinit: pointee type should be u8 when using ptr::write_bytes Using *const (), no bytes are written to the position, which might result in uninitialized memory access. Fixes: ebd3d1224c01 ("change(x86): optimize bootstrap code, remove kinit.cpp") Fixes: 191877a3acd0 ("feat(hal): impl basic single hart bootstrap for riscv64") Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/bootstrap.rs | 2 +- src/kernel_init.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index ccb32527..b0c235aa 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -186,7 +186,7 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { unsafe { core::ptr::write_bytes( - KERNEL_BSS_START.addr() as *mut (), + KERNEL_BSS_START.addr() as *mut u8, 0, bss_length, ); diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 64c220b2..2259f6cf 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -41,7 +41,7 @@ fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { unsafe { // SAFETY: We've just mapped the area with sufficient length. core::ptr::write_bytes( - V_KERNEL_PAGE_ARRAY_START.addr() as *mut (), + V_KERNEL_PAGE_ARRAY_START.addr() as *mut u8, 0, count_pages * PAGE_SIZE, ); From 31ab01eb71d474965697a8dd1b0a40a13f4caf45 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:21:52 +0800 Subject: [PATCH 53/54] style: reformat file Signed-off-by: greatbridf --- crates/eonix_hal/src/mm.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index c4b9bb74..c6c6a369 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -7,8 +7,8 @@ use eonix_mm::page_table::PageTableAlloc; use eonix_mm::paging::{BasicFolio, FrameAlloc, PAGE_SIZE, PFN}; pub use crate::arch::mm::{ - flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchMemory, - ArchPhysAccess, GLOBAL_PAGE_TABLE, + flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, + ArchMemory, ArchPhysAccess, GLOBAL_PAGE_TABLE, }; pub struct BasicPageAlloc { @@ -87,7 +87,8 @@ impl BasicPageAlloc { panic!("Page allocator is full"); } - self.ranges[tail] = Some(PRange::new(range.start().ceil(), range.end().floor())); + self.ranges[tail] = + Some(PRange::new(range.start().ceil(), range.end().floor())); } pub fn alloc(&mut self, order: u32) -> PFN { @@ -147,7 +148,10 @@ impl<'a> ScopedAllocator<'a> { } } - pub fn with_alloc<'b, 'r, O>(&'r self, func: impl FnOnce(&'b ScopedAllocator<'a>) -> O) -> O + pub fn with_alloc<'b, 'r, O>( + &'r self, + func: impl FnOnce(&'b ScopedAllocator<'a>) -> O, + ) -> O where 'a: 'b, 'r: 'b, From 4f6289a568fabe1a7742b755c8093ab672e7dedc Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:24:11 +0800 Subject: [PATCH 54/54] hal, mm: alloc basic folios from low to high addr This can shorten qemu memory map. Signed-off-by: greatbridf --- crates/eonix_hal/src/mm.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index c6c6a369..ef006cb5 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -41,9 +41,8 @@ impl BasicPageAlloc { fn alloc_one(&mut self) -> PFN { assert_ne!(self.head, self.tail, "No free pages available"); let mut range = self.ranges[self.head].take().unwrap(); - range = range.shrink(PAGE_SIZE); - - let pfn = PFN::from(range.end()); + let pfn = PFN::from(range.start()); + range = PRange::new(range.start() + PAGE_SIZE, range.end()); if range.len() != 0 { self.ranges[self.head] = Some(range);