From f6c26c956c97da2506c4e69b9dc4f121da39af0b Mon Sep 17 00:00:00 2001 From: Heinz Date: Wed, 23 Jul 2025 23:40:26 +0800 Subject: [PATCH 01/29] perf: replace the annoyed ext4 crate with a new choice (called another ext4 crate --- Cargo.lock | 41 +++++++++++----------- Cargo.toml | 2 +- src/fs/ext4.rs | 94 ++++++++++++++++++++++++++++++-------------------- 3 files changed, 78 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b8b8e1b9..04b25a21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,15 +19,24 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c330e503236d0b06386ae6cc42a513ef1ccc23c52b603c1b52f018564faf44" +[[package]] +name = "another_ext4" +version = "0.1.0" +source = "git+https://github.com/SMS-Derfflinger/another_ext4?branch=main#ed6d91718db721eb4a744483c289cc44a6f34bf4" +dependencies = [ + "bitflags", + "log", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bit_field" @@ -51,9 +60,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "critical-section" @@ -133,6 +142,7 @@ version = "0.1.0" dependencies = [ "acpi", "align_ext", + "another_ext4", "atomic_unique_refcell", "bitflags", "buddy_allocator", @@ -144,7 +154,6 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", - "ext4_rs", "intrusive-collections", "intrusive_list", "itertools", @@ -247,16 +256,6 @@ dependencies = [ "intrusive-collections", ] -[[package]] -name = "ext4_rs" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1a97344bde15b0ace15e265dab27228d4bdc37a0bfa8548c5645d7cfa6a144" -dependencies = [ - "bitflags", - "log", -] - [[package]] name = "fdt" version = "0.1.5" @@ -392,9 +391,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -459,18 +458,18 @@ checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index edc2c319..ceea1cf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" xmas-elf = "0.10.0" -ext4_rs = "1.3.2" +another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch = "main" } [target.'cfg(target_arch = "riscv64")'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 3ffc2fe0..56928623 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -22,10 +22,11 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; +use another_ext4::{ + Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, +}; use eonix_runtime::task::Task; use eonix_sync::RwLock; -use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error}; -use ext4_rs::{Errno, Ext4}; pub struct Ext4BlockDevice { device: Arc, @@ -38,19 +39,22 @@ impl Ext4BlockDevice { } impl Ext4BlockDeviceTrait for Ext4BlockDevice { - fn read_offset(&self, offset: usize) -> Vec { - let mut buffer = vec![0u8; 4096]; + fn read_block(&self, block_id: PBlockId) -> Block { + let mut buffer = [0u8; 4096]; let mut byte_buffer = ByteBuffer::new(buffer.as_mut_slice()); let _ = self .device - .read_some(offset, &mut byte_buffer) + .read_some((block_id as usize) * 4096, &mut byte_buffer) .expect("Failed to read from block device"); - buffer + Block { + id: block_id, + data: buffer, + } } - fn write_offset(&self, _offset: usize, _data: &[u8]) { + fn write_block(&self, block: &another_ext4::Block) { todo!() } } @@ -115,7 +119,7 @@ impl Ext4Fs { impl Ext4Fs { pub fn create(device: Arc) -> KResult<(Arc, Arc)> { let ext4_device = Ext4BlockDevice::new(device.clone()); - let ext4 = Ext4::open(Arc::new(ext4_device)); + let ext4 = Ext4::load(Arc::new(ext4_device)).unwrap(); let ext4fs = Arc::new(Self { inner: ext4, @@ -125,28 +129,28 @@ impl Ext4Fs { let root_inode = { let mut icache = Task::block_on(ext4fs.icache.write()); - let root_inode = ext4fs.inner.get_inode_ref(2); + let root_inode = ext4fs.inner.read_root_inode(); ext4fs.get_or_insert( &mut icache, InodeData { - ino: root_inode.inode_num as Ino, + ino: root_inode.id as Ino, size: AtomicU64::new(root_inode.inode.size()), - nlink: AtomicNlink::new(root_inode.inode.links_count() as _), - uid: AtomicU32::new(root_inode.inode.uid() as _), - gid: AtomicU32::new(root_inode.inode.gid() as _), - mode: AtomicU32::new(root_inode.inode.mode() as _), + nlink: AtomicNlink::new(root_inode.inode.link_count() as u64), + uid: AtomicU32::new(root_inode.inode.uid()), + gid: AtomicU32::new(root_inode.inode.gid()), + mode: AtomicU32::new(root_inode.inode.mode().bits() as u32), atime: Spin::new(Instant::new( root_inode.inode.atime() as _, - root_inode.inode.i_atime_extra() as _, + root_inode.inode.atime_extra() as _, )), ctime: Spin::new(Instant::new( root_inode.inode.ctime() as _, - root_inode.inode.i_ctime_extra() as _, + root_inode.inode.ctime_extra() as _, )), mtime: Spin::new(Instant::new( root_inode.inode.mtime() as _, - root_inode.inode.i_mtime_extra() as _, + root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), vfs: Arc::downgrade(&ext4fs) as _, @@ -187,12 +191,12 @@ impl Inode for FileInode { let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; - match ext4fs.inner.read_at(self.ino as u32, offset, &mut temp_buf) { + match ext4fs.inner.read(self.ino as u32, offset, &mut temp_buf) { Ok(bytes_read) => { let _ = buffer.fill(&temp_buf[..bytes_read])?; Ok(buffer.wrote()) } - Err(e) => Err(e.error() as u32), + Err(e) => Err(e.code() as u32), } } } @@ -204,13 +208,14 @@ impl Inode for DirInode { let name = dentry.get_name(); let name = String::from_utf8_lossy(&name); - let lookup_result = ext4fs.inner.fuse_lookup(self.ino, &name); + let lookup_result = ext4fs.inner.lookup(self.ino as u32, &name); - const EXT4_ERROR_ENOENT: Ext4Error = Ext4Error::new(Errno::ENOENT); + // TODO: wtf + //const EXT4_ERROR_ENOENT: Ext4Error_ = Ext4Error_::new(ErrCode::ENOENT); let attr = match lookup_result { - Ok(attr) => attr, - Err(EXT4_ERROR_ENOENT) => return Ok(None), - Err(error) => return Err(error.error() as u32), + Ok(inode_id) => ext4fs.inner.getattr(inode_id).unwrap(), + //Err(EXT4_ERROR_ENOENT) => return Ok(None), + Err(error) => return Err(error.code() as u32), }; // Fast path: if the inode is already in the cache, return it. @@ -219,9 +224,19 @@ impl Inode for DirInode { return Ok(Some(inode)); } - let extra_perm = attr.perm.bits() as u32 & 0o7000; - let perm = attr.perm.bits() as u32 & 0o0700; - let real_perm = extra_perm | perm | perm >> 3 | perm >> 6; + let file_type_bits = match attr.ftype { + FileType::RegularFile => InodeMode::FILE.bits(), + FileType::Directory => InodeMode::DIRECTORY.bits(), + FileType::CharacterDev => InodeMode::CHARDEV.bits(), + FileType::BlockDev => InodeMode::BLOCKDEV.bits(), + FileType::Fifo => InodeMode::FIFO.bits(), + FileType::Socket => InodeMode::SOCKET.bits(), + FileType::SymLink => InodeMode::SOFTLINK.bits(), + FileType::Unknown => 0, + }; + + let perm_bits = attr.perm.bits() & InodeMode::PERM_MASK.bits(); + let mode = file_type_bits | perm_bits; // Create a new inode based on the attributes. let mut icache = Task::block_on(ext4fs.icache.write()); @@ -230,10 +245,10 @@ impl Inode for DirInode { InodeData { ino: attr.ino as Ino, size: AtomicU64::new(attr.size), - nlink: AtomicNlink::new(attr.nlink as _), + nlink: AtomicNlink::new(attr.links as _), uid: AtomicU32::new(attr.uid), gid: AtomicU32::new(attr.gid), - mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm), + mode: AtomicU32::new(mode as u32), atime: Spin::new(Instant::new(attr.atime as _, 0)), ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), @@ -255,18 +270,23 @@ impl Inode for DirInode { let entries = ext4fs .inner - .fuse_readdir(self.ino as u64, 0, offset as i64) - .map_err(|err| err.error() as u32)?; - let mut current_offset = 0; + .listdir(self.ino as u32) + .map_err(|err| err.code() as u32)?; - for entry in entries { - let name_len = entry.name_len as usize; - let name = &entry.name[..name_len]; + let entries_to_process = if offset < entries.len() { + &entries[offset..] + } else { + &entries[0..0] + }; + let mut current_offset = 0; + for entry in entries_to_process { + let name_string = entry.name(); + let name = name_string.as_bytes(); + let inode = entry.inode() as Ino; - if callback(name, entry.inode as Ino)?.is_break() { + if callback(name, inode)?.is_break() { break; } - current_offset += 1; } Ok(current_offset) From f05037374c20c896a0249ffda6edebb7bd562d9c Mon Sep 17 00:00:00 2001 From: Heinz Date: Fri, 25 Jul 2025 22:47:02 +0800 Subject: [PATCH 02/29] feat(fs): impl write, create and mkdir for ext4 fs --- src/driver/virtio/virtio_blk.rs | 18 +++- src/fs/ext4.rs | 158 ++++++++++++++++++++++++++++++-- src/kernel/block.rs | 97 ++++++++++++++++++++ 3 files changed, 266 insertions(+), 7 deletions(-) diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 57026d2a..3b15063d 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -18,7 +18,23 @@ impl BlockRequestQueue for Spin>> { fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { match req { - BlockDeviceRequest::Write { .. } => todo!(), + BlockDeviceRequest::Write { + sector, + count, + buffer, + } => { + let mut dev = self.lock(); + for ((start, len), buffer_page) in + Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) + { + let buffer = unsafe { + // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. + &buffer_page.as_memblk().as_bytes()[..len as usize * 512] + }; + + dev.write_blocks(start, buffer).map_err(|_| EIO)?; + } + } BlockDeviceRequest::Read { sector, count, diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 56928623..f7ca7578 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,14 +1,14 @@ -use core::sync::atomic::{AtomicU32, AtomicU64}; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::{ - io::{Buffer, ByteBuffer}, + io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::EIO, + constants::{EIO, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::Dentry, - inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData}, + inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, vfs::Vfs, @@ -20,7 +20,7 @@ use crate::{ }; use alloc::{ collections::btree_map::{BTreeMap, Entry}, - sync::Arc, + sync::{Arc, Weak}, }; use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, @@ -55,7 +55,9 @@ impl Ext4BlockDeviceTrait for Ext4BlockDevice { } fn write_block(&self, block: &another_ext4::Block) { - todo!() + let _ = self + .device + .write_some((block.id as usize) * 4096, &block.data); } } @@ -85,6 +87,20 @@ impl Ext4Fs { icache.get(&ino).cloned().map(Ext4Inode::into_inner) } + fn update_modify_inode(&self, ino: u64, size: u64, mtime: u32) { + let _ = self.inner.setattr( + ino as u32, + None, + None, + None, + Some(size), + None, + Some(mtime), + None, + None, + ); + } + fn get_or_insert( &self, icache: &mut BTreeMap, @@ -185,6 +201,21 @@ define_struct_inode! { struct DirInode; } +impl FileInode { + pub fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|_| FileInode { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data + .mode + .store(S_IFREG | (mode & 0o777), Ordering::Relaxed); + inode_data.nlink.store(1, Ordering::Relaxed); + inode_data + }, + }) + } +} + impl Inode for FileInode { fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; @@ -199,6 +230,68 @@ impl Inode for FileInode { Err(e) => Err(e.code() as u32), } } + + fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let mut temp_buf = vec![0u8; 4096]; + let mut total_written = 0; + + let offset = match offset { + WriteOffset::Position(offset) => offset, + // TODO: here need to add some operate + WriteOffset::End(end) => *end, + }; + + while let Some(data) = stream.poll_data(&mut temp_buf)? { + let written = ext4fs + .inner + .write(self.ino as u32, offset + total_written, data) + .unwrap(); + total_written += written; + if written < data.len() { + break; + } + } + + let mtime = Instant::now(); + *self.mtime.lock() = mtime; + let new_size = (offset + total_written) as u64; + self.size + .store(offset as u64 + total_written as u64, Ordering::Relaxed); + ext4fs.update_modify_inode(self.ino, new_size, mtime.since_epoch().as_secs() as u32); + + Ok(total_written) + } + + // TODO + fn truncate(&self, length: usize) -> KResult<()> { + Ok(()) + } +} + +impl DirInode { + fn new(idata: InodeData) -> Arc { + let inode = Arc::new(Self { idata }); + + inode + } + + fn link(&self, file: &dyn Inode) { + let now = Instant::now(); + + // SAFETY: Only `unlink` will do something based on `nlink` count + // No need to synchronize here + file.nlink.fetch_add(1, Ordering::Relaxed); + *self.ctime.lock() = now; + + // SAFETY: `rwsem` has done the synchronization + self.size.fetch_add(1, Ordering::Relaxed); + *self.mtime.lock() = now; + } } impl Inode for DirInode { @@ -291,6 +384,59 @@ impl Inode for DirInode { } Ok(current_offset) } + + fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .create( + self.ino as u32, + &name, + InodeMode::from_bits_retain((mode | S_IFREG) as u16), + ) + .unwrap(); + + let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + + *self.ctime.lock() = now; + // SAFETY: `rwsem` has done the synchronization + self.size.fetch_add(1, Ordering::Relaxed); + *self.mtime.lock() = now; + + at.save_reg(file) + } + + fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .mkdir( + self.ino as u32, + &name, + InodeMode::from_bits_retain((mode | S_IFDIR) as u16), + ) + .unwrap(); + + let newdir = DirInode::new(InodeData::new(new_ino as u64, self.vfs.clone())); + + self.link(newdir.as_ref()); + at.save_dir(newdir) + } } struct Ext4MountCreator; diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 4a10e4c7..069ae4ca 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -285,6 +285,103 @@ impl BlockDevice { Ok(FillResult::Partial(nfilled)) } } + + /// Write some data to the block device, may involve some copy and fragmentation + /// + /// # Arguments + /// `offset` - offset in bytes + /// `data` - data to write + /// + pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { + let mut sector_start = offset as u64 / 512; + let mut first_sector_offset = offset as u64 % 512; + let mut remaining_data = data; + let mut nwritten = 0; + + while !remaining_data.is_empty() { + let pages: &[Page]; + let page: Option; + let page_vec: Option>; + + // Calculate sectors needed for this write + let write_end = first_sector_offset + remaining_data.len() as u64; + let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); + + match sector_count { + count if count <= 8 => { + let _page = Page::alloc(); + page = Some(_page); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count if count <= 16 => { + let _pages = Page::alloc_order(1); + page = Some(_pages); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count => { + let npages = (count + 15) / 16; + let mut _page_vec = Vec::with_capacity(npages as usize); + for _ in 0..npages { + _page_vec.push(Page::alloc_order(1)); + } + page_vec = Some(_page_vec); + pages = page_vec.as_ref().unwrap().as_slice(); + } + } + + if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { + let read_req = BlockDeviceRequest::Read { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(read_req)?; + } + + let mut data_offset = 0; + let mut page_offset = first_sector_offset as usize; + + for page in pages.iter() { + // SAFETY: We own the page and can modify it + let page_data = unsafe { + let memblk = page.as_memblk(); + core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) + }; + + let copy_len = + (remaining_data.len() - data_offset).min(page_data.len() - page_offset); + + if copy_len == 0 { + break; + } + + page_data[page_offset..page_offset + copy_len] + .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); + + data_offset += copy_len; + page_offset = 0; // Only first page has offset + + if data_offset >= remaining_data.len() { + break; + } + } + + let write_req = BlockDeviceRequest::Write { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(write_req)?; + + let bytes_written = data_offset; + nwritten += bytes_written; + remaining_data = &remaining_data[bytes_written..]; + sector_start += sector_count; + first_sector_offset = 0; + } + + Ok(nwritten) + } } pub enum BlockDeviceRequest<'lt> { From d59a550880297abbe5447b10e2214f4ad822cdec Mon Sep 17 00:00:00 2001 From: Heinz Date: Sat, 26 Jul 2025 16:33:52 +0800 Subject: [PATCH 03/29] feat(fs): impl remove file and dir. --- src/fs/ext4.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index f7ca7578..b321fbf1 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,10 +4,10 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, S_IFDIR, S_IFREG}, + constants::{EIO, EISDIR, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ - dentry::Dentry, + dentry::{dcache, Dentry}, inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, @@ -26,7 +26,7 @@ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; use eonix_runtime::task::Task; -use eonix_sync::RwLock; +use eonix_sync::{AsProofMut, ProofMut, RwLock}; pub struct Ext4BlockDevice { device: Arc, @@ -292,6 +292,37 @@ impl DirInode { self.size.fetch_add(1, Ordering::Relaxed); *self.mtime.lock() = now; } + + fn unlink( + &self, + file: &Arc, + decrease_size: bool, + _dir_lock: ProofMut<()>, + _file_lock: ProofMut<()>, + ) -> KResult<()> { + let now = Instant::now(); + + // SAFETY: `file_lock` has done the synchronization + if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { + return Err(EISDIR); + } + + if decrease_size { + // SAFETY: `dir_lock` has done the synchronization + self.size.fetch_sub(1, Ordering::Relaxed); + } + + *self.mtime.lock() = now; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + // SAFETY: `file_lock` has done the synchronization + file.nlink.fetch_sub(1, Ordering::Relaxed); + *file.ctime.lock() = now; + + Ok(()) + } } impl Inode for DirInode { @@ -437,6 +468,30 @@ impl Inode for DirInode { self.link(newdir.as_ref()); at.save_dir(newdir) } + + fn unlink(&self, at: &Arc) -> KResult<()> { + let dir_lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let file = at.get_inode()?; + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + let file_lock = Task::block_on(file.rwsem.write()); + + if file.is_dir() { + let _ = ext4fs.inner.rmdir(self.ino as u32, &name); + } else { + let _ = ext4fs.inner.unlink(self.ino as u32, &name); + } + + self.unlink(&file, true, dir_lock.prove_mut(), file_lock.prove_mut())?; + dcache::d_remove(at); + + Ok(()) + } } struct Ext4MountCreator; From 5c4016615ac79f980c070514acada82ade8f188f Mon Sep 17 00:00:00 2001 From: Heinz Date: Sat, 26 Jul 2025 18:06:31 +0800 Subject: [PATCH 04/29] fix(fs): fix some informations --- src/fs/ext4.rs | 121 ++++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index b321fbf1..cb4bc136 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,7 +4,7 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, EISDIR, S_IFDIR, S_IFREG}, + constants::{EIO, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::{dcache, Dentry}, @@ -26,7 +26,7 @@ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; use eonix_runtime::task::Task; -use eonix_sync::{AsProofMut, ProofMut, RwLock}; +use eonix_sync::RwLock; pub struct Ext4BlockDevice { device: Arc, @@ -87,18 +87,27 @@ impl Ext4Fs { icache.get(&ino).cloned().map(Ext4Inode::into_inner) } - fn update_modify_inode(&self, ino: u64, size: u64, mtime: u32) { + fn modify_inode_stat(&self, ino: u32, size: Option, mtime: u32) { + let _ = self + .inner + .setattr(ino, None, None, None, size, None, Some(mtime), None, None); + } + + fn create_inode_stat(&self, parent: u32, child: u32, mtime: u32) { let _ = self.inner.setattr( - ino as u32, + parent, + None, None, None, None, - Some(size), None, Some(mtime), None, None, ); + let _ = self + .inner + .setattr(child, None, None, None, None, None, Some(mtime), None, None); } fn get_or_insert( @@ -262,7 +271,11 @@ impl Inode for FileInode { let new_size = (offset + total_written) as u64; self.size .store(offset as u64 + total_written as u64, Ordering::Relaxed); - ext4fs.update_modify_inode(self.ino, new_size, mtime.since_epoch().as_secs() as u32); + ext4fs.modify_inode_stat( + self.ino as u32, + Some(new_size), + mtime.since_epoch().as_secs() as u32, + ); Ok(total_written) } @@ -274,54 +287,44 @@ impl Inode for FileInode { } impl DirInode { - fn new(idata: InodeData) -> Arc { - let inode = Arc::new(Self { idata }); - - inode + fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|_| DirInode { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data + .mode + .store(S_IFDIR | (mode & 0o777), Ordering::Relaxed); + inode_data.nlink.store(2, Ordering::Relaxed); + inode_data.size.store(4096, Ordering::Relaxed); + inode_data + }, + }) } - fn link(&self, file: &dyn Inode) { - let now = Instant::now(); + fn update_time(&self, time: Instant) { + *self.ctime.lock() = time; + *self.mtime.lock() = time; + } - // SAFETY: Only `unlink` will do something based on `nlink` count - // No need to synchronize here - file.nlink.fetch_add(1, Ordering::Relaxed); - *self.ctime.lock() = now; + fn update_child_time(&self, child: &dyn Inode, time: Instant) { + self.update_time(time); + *child.ctime.lock() = time; + *child.mtime.lock() = time; + } - // SAFETY: `rwsem` has done the synchronization + fn link_file(&self) { + // TODO self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; } - fn unlink( - &self, - file: &Arc, - decrease_size: bool, - _dir_lock: ProofMut<()>, - _file_lock: ProofMut<()>, - ) -> KResult<()> { - let now = Instant::now(); - - // SAFETY: `file_lock` has done the synchronization - if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(EISDIR); - } - - if decrease_size { - // SAFETY: `dir_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - } - - *self.mtime.lock() = now; - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `file_lock` has done the synchronization - file.nlink.fetch_sub(1, Ordering::Relaxed); - *file.ctime.lock() = now; + fn link_dir(&self) { + // TODO + self.nlink.fetch_add(1, Ordering::Relaxed); + self.size.fetch_add(1, Ordering::Relaxed); + } - Ok(()) + fn unlink_dir(&self) { + self.nlink.fetch_sub(1, Ordering::Relaxed); } } @@ -436,11 +439,10 @@ impl Inode for DirInode { let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); let now = Instant::now(); + self.update_child_time(file.as_ref(), now); + self.link_file(); - *self.ctime.lock() = now; - // SAFETY: `rwsem` has done the synchronization - self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); at.save_reg(file) } @@ -463,14 +465,18 @@ impl Inode for DirInode { ) .unwrap(); - let newdir = DirInode::new(InodeData::new(new_ino as u64, self.vfs.clone())); + let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + self.update_child_time(new_dir.as_ref(), now); + self.link_dir(); + + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); - self.link(newdir.as_ref()); - at.save_dir(newdir) + at.save_dir(new_dir) } fn unlink(&self, at: &Arc) -> KResult<()> { - let dir_lock = Task::block_on(self.rwsem.write()); + let _dir_lock = Task::block_on(self.rwsem.write()); let vfs = self.vfs.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); @@ -479,15 +485,18 @@ impl Inode for DirInode { let name = at.get_name(); let name = String::from_utf8_lossy(&name); - let file_lock = Task::block_on(file.rwsem.write()); + let _file_lock = Task::block_on(file.rwsem.write()); if file.is_dir() { let _ = ext4fs.inner.rmdir(self.ino as u32, &name); + self.unlink_dir(); } else { let _ = ext4fs.inner.unlink(self.ino as u32, &name); } + let now = Instant::now(); + self.update_time(now); + ext4fs.modify_inode_stat(self.ino as u32, None, now.since_epoch().as_secs() as u32); - self.unlink(&file, true, dir_lock.prove_mut(), file_lock.prove_mut())?; dcache::d_remove(at); Ok(()) From 1d1a0257ba9c895b5eb54d15ddf7d3f397fb00df Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 29 Jul 2025 23:12:06 +0800 Subject: [PATCH 05/29] feat(fs): impl rename --- src/fs/ext4.rs | 115 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index cb4bc136..39df849f 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -4,11 +4,14 @@ use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::{EIO, S_IFDIR, S_IFREG}, + constants::{EEXIST, EINVAL, EIO, ENOSYS, S_IFDIR, S_IFREG}, timer::Instant, vfs::{ dentry::{dcache, Dentry}, - inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, WriteOffset}, + inode::{ + define_struct_inode, AtomicNlink, Ino, Inode, InodeData, Mode, RenameData, + WriteOffset, + }, mount::{register_filesystem, Mount, MountCreator}, s_isdir, s_isreg, vfs::Vfs, @@ -27,6 +30,7 @@ use another_ext4::{ }; use eonix_runtime::task::Task; use eonix_sync::RwLock; +use xmas_elf::dynamic::FLAG_1_NOW; pub struct Ext4BlockDevice { device: Arc, @@ -110,6 +114,20 @@ impl Ext4Fs { .setattr(child, None, None, None, None, None, Some(mtime), None, None); } + fn chmod_stat(&self, ino: u32, new_mode: u16, ctime: u32) { + let _ = self.inner.setattr( + ino, + Some(InodeMode::from_bits_retain(new_mode.try_into().unwrap())), + None, + None, + None, + None, + None, + Some(ctime), + None, + ); + } + fn get_or_insert( &self, icache: &mut BTreeMap, @@ -280,6 +298,28 @@ impl Inode for FileInode { Ok(total_written) } + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(Ordering::Relaxed); + let new_mode = (old_mode & !0o777) | (mode & 0o777); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode as u16, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode, Ordering::Relaxed); + *self.ctime.lock() = now; + + Ok(()) + } + // TODO fn truncate(&self, length: usize) -> KResult<()> { Ok(()) @@ -313,12 +353,10 @@ impl DirInode { } fn link_file(&self) { - // TODO self.size.fetch_add(1, Ordering::Relaxed); } fn link_dir(&self) { - // TODO self.nlink.fetch_add(1, Ordering::Relaxed); self.size.fetch_add(1, Ordering::Relaxed); } @@ -501,6 +539,75 @@ impl Inode for DirInode { Ok(()) } + + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(Ordering::Relaxed); + let new_mode = (old_mode & !0o777) | (mode & 0o777); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode as u16, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode, Ordering::Relaxed); + *self.ctime.lock() = now; + + Ok(()) + } + + fn rename(&self, rename_data: RenameData) -> KResult<()> { + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + vfs, + } = rename_data; + + if is_exchange { + println_warn!("Ext4Fs does not support exchange rename for now"); + return Err(ENOSYS); + } + + // TODO: may need another lock + let _lock = Task::block_on(self.rwsem.write()); + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.get_inode(); + if no_replace && new_file.is_ok() { + return Err(EEXIST); + } + + let name = old_dentry.name(); + let name = core::str::from_utf8(&*name).map_err(|_| EINVAL)?; + let new_name = new_dentry.name(); + let new_name = core::str::from_utf8(&*new_name).map_err(|_| EINVAL)?; + + ext4fs + .inner + .rename(self.ino as u32, name, new_parent.ino as u32, new_name) + .map_err(|err| err.code() as u32)?; + + // TODO: may need more operations + let now = Instant::now(); + *self.mtime.lock() = now; + *old_file.ctime.lock() = now; + self.size.fetch_sub(1, Ordering::Relaxed); + + Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); + + Ok(()) + } } struct Ext4MountCreator; From 22458ed33cd225123a8a23b74ae86c32984d2943 Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 29 Jul 2025 23:34:09 +0800 Subject: [PATCH 06/29] fix(fs): fix rename's metadata --- src/fs/ext4.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 39df849f..25810bdf 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -30,7 +30,6 @@ use another_ext4::{ }; use eonix_runtime::task::Task; use eonix_sync::RwLock; -use xmas_elf::dynamic::FLAG_1_NOW; pub struct Ext4BlockDevice { device: Arc, @@ -600,9 +599,24 @@ impl Inode for DirInode { // TODO: may need more operations let now = Instant::now(); - *self.mtime.lock() = now; *old_file.ctime.lock() = now; - self.size.fetch_sub(1, Ordering::Relaxed); + *self.mtime.lock() = now; + + let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; + if !same_parent { + *new_parent.mtime.lock() = now; + if old_file.is_dir() { + self.nlink.fetch_sub(1, Ordering::Relaxed); + new_parent.nlink.fetch_add(1, Ordering::Relaxed); + } + } + + if let Ok(replaced_file) = new_dentry.get_inode() { + if !no_replace { + *replaced_file.ctime.lock() = now; + replaced_file.nlink.fetch_sub(1, Ordering::Relaxed); + } + } Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); From 806c4fe0acfd8172c62e0111aee4544a8b6a05f5 Mon Sep 17 00:00:00 2001 From: Heinz Date: Thu, 31 Jul 2025 14:49:30 +0800 Subject: [PATCH 07/29] fix(fs): fix ext4's write offset update --- src/fs/ext4.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 25810bdf..1923d218 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -266,10 +266,14 @@ impl Inode for FileInode { let mut temp_buf = vec![0u8; 4096]; let mut total_written = 0; + let mut store_new_end = None; let offset = match offset { WriteOffset::Position(offset) => offset, // TODO: here need to add some operate - WriteOffset::End(end) => *end, + WriteOffset::End(end) => { + store_new_end = Some(end); + self.size.load(Ordering::Relaxed) as usize + } }; while let Some(data) = stream.poll_data(&mut temp_buf)? { @@ -283,6 +287,9 @@ impl Inode for FileInode { } } + if let Some(store_end) = store_new_end { + *store_end = offset + total_written; + } let mtime = Instant::now(); *self.mtime.lock() = mtime; let new_size = (offset + total_written) as u64; From db1caebde5063a6cb0c2c5f93c4689d5ece5e0e5 Mon Sep 17 00:00:00 2001 From: Heinz Date: Mon, 4 Aug 2025 22:59:58 +0800 Subject: [PATCH 08/29] feat(fs): partial work for ext4's page cache Fix page cache's bug, add size check in read function. Add page cache's base operations for ext4, but the cachepage will not be dropped until kernel stop, so we need to call fsync function manually, consider use some strategy such as LRU. --- src/fs/ext4.rs | 53 +++++++++++++++++------------ src/fs/fat32.rs | 8 ++--- src/fs/tmpfs.rs | 10 +++--- src/kernel/mem.rs | 2 +- src/kernel/mem/page_cache.rs | 66 +++++++++++++++++++++++++++++++----- src/kernel/vfs/inode.rs | 2 +- 6 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 0853e69a..763f1caa 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,6 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; -use crate::kernel::mem::{PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ @@ -83,7 +83,7 @@ impl Vfs for Ext4Fs { } fn is_read_only(&self) -> bool { - true + false } } @@ -257,12 +257,12 @@ impl FileInode { } impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult { + fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { self.read_direct(page, offset) } - fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult { - todo!() + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { + self.write_direct(page, offset) } fn size(&self) -> usize { @@ -296,12 +296,6 @@ impl Inode for FileInode { fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { let _lock = Task::block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let ext4fs = vfs.as_any().downcast_ref::().unwrap(); - - let mut temp_buf = vec![0u8; 4096]; - let mut total_written = 0; - let mut store_new_end = None; let offset = match offset { WriteOffset::Position(offset) => offset, @@ -312,6 +306,31 @@ impl Inode for FileInode { } }; + let total_written = Task::block_on(self.page_cache.write(stream, offset))?; + let cursor_end = offset + total_written; + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + let mtime = Instant::now(); + *self.mtime.lock() = mtime; + self.size.store(cursor_end as u64, Ordering::Relaxed); + + // TODO: change this with some update strategy such as LRU + let _ = Task::block_on(self.page_cache.fsync()); + + Ok(total_written) + } + + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { + //let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let mut temp_buf = vec![0u8; 4096]; + let mut total_written = 0; + while let Some(data) = stream.poll_data(&mut temp_buf)? { let written = ext4fs .inner @@ -323,18 +342,10 @@ impl Inode for FileInode { } } - if let Some(store_end) = store_new_end { - *store_end = offset + total_written; - } - let mtime = Instant::now(); - *self.mtime.lock() = mtime; - let new_size = (offset + total_written) as u64; - self.size - .store(offset as u64 + total_written as u64, Ordering::Relaxed); ext4fs.modify_inode_stat( self.ino as u32, - Some(new_size), - mtime.since_epoch().as_secs() as u32, + Some(self.size() as u64), + self.mtime.lock().since_epoch().as_secs() as u32, ); Ok(total_written) diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 852d8673..fb4a3e2e 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -3,7 +3,7 @@ mod file; use crate::io::Stream; use crate::kernel::constants::EIO; -use crate::kernel::mem::AsMemoryBlock; +use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; use crate::kernel::vfs::inode::WriteOffset; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, @@ -308,11 +308,11 @@ impl Inode for FileInode { Ok(buffer.wrote()) } - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { todo!() } } @@ -322,7 +322,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { todo!() } diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 334e2781..13a01de5 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -1,6 +1,6 @@ use crate::io::Stream; use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::kernel::timer::Instant; use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; @@ -496,7 +496,7 @@ impl PageCacheBackend for FileInode { Ok(PAGE_SIZE) } - fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { Ok(PAGE_SIZE) } @@ -511,13 +511,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); Task::block_on(self.pages.read(buffer, offset)) } fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { // TODO: We don't need that strong guarantee, find some way to avoid locks - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); let mut store_new_end = None; let offset = match offset { @@ -545,7 +545,7 @@ impl Inode for FileInode { } fn truncate(&self, length: usize) -> KResult<()> { - let lock = Task::block_on(self.rwsem.write()); + let _lock = Task::block_on(self.rwsem.write()); Task::block_on(self.pages.resize(length))?; self.size.store(length as u64, Ordering::Relaxed); *self.mtime.lock() = Instant::now(); diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index ce705cff..efd06824 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, PageCache, PageCacheBackend}; +pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; pub use paging::{Page, PageBuffer}; diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 863e538e..fa475f9b 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -125,27 +125,32 @@ impl PageCache { pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; + let size = self.backend.upgrade().unwrap().size(); loop { + if offset >= size { + break; + } let page_id = offset >> PAGE_SIZE_BITS; let page = pages.get(&page_id); match page { Some(page) => { let inner_offset = offset % PAGE_SIZE; + let available_in_file = size.saturating_sub(offset); // TODO: still cause unnecessary IO if valid_size < PAGESIZE // and fill result is Done - if page.valid_size() == 0 - || buffer - .fill(&page.valid_data()[inner_offset..])? - .should_stop() + let page_data = &page.valid_data()[inner_offset..]; + let read_size = page_data.len().min(available_in_file); + + if read_size == 0 + || buffer.fill(&page_data[..read_size])?.should_stop() || buffer.available() == 0 { break; } - - offset += PAGE_SIZE - inner_offset; + offset += read_size; } None => { let mut new_page = CachePage::new(); @@ -217,7 +222,7 @@ impl PageCache { self.backend .upgrade() .unwrap() - .write_page(page, page_id << PAGE_SIZE_BITS)?; + .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; page.clear_dirty(); } } @@ -293,6 +298,51 @@ impl PageCache { } } +pub struct CachePageStream { + page: CachePage, + cur: usize, +} + +impl CachePageStream { + pub fn new(page: CachePage) -> Self { + Self { page, cur: 0 } + } + + pub fn remaining(&self) -> usize { + self.page.valid_size().saturating_sub(self.cur) + } + + pub fn is_drained(&self) -> bool { + self.cur >= self.page.valid_size() + } +} + +impl Stream for CachePageStream { + fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let page_data = &self.page.all()[self.cur..self.page.valid_size()]; + let to_read = buf.len().min(page_data.len()); + + buf[..to_read].copy_from_slice(&page_data[..to_read]); + self.cur += to_read; + + Ok(Some(&mut buf[..to_read])) + } + + fn ignore(&mut self, len: usize) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let to_ignore = len.min(self.page.valid_size() - self.cur); + self.cur += to_ignore; + Ok(Some(to_ignore)) + } +} + // with this trait, "page cache" and "block cache" are unified, // for fs, offset is file offset (floor algin to PAGE_SIZE) // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) @@ -300,7 +350,7 @@ impl PageCache { pub trait PageCacheBackend { fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - fn write_page(&self, page: &CachePage, offset: usize) -> KResult; + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; fn size(&self) -> usize; } diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 2b52043d..0f73c910 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -136,7 +136,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { Err(if self.is_dir() { EISDIR } else { EINVAL }) } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { Err(if self.is_dir() { EISDIR } else { EINVAL }) } From a2c50b9a112948f448b5142dee375303c2e3e15a Mon Sep 17 00:00:00 2001 From: Heinz Date: Tue, 5 Aug 2025 22:13:54 +0800 Subject: [PATCH 09/29] feat(fs): temporary cache write back strategy for ext4 temporary write back by timer, when write function is called, check if the time since the last write back is greater than 10 seconds. If it is, then write back. --- src/fs/ext4.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 763f1caa..9d315980 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +use crate::kernel::timer::Ticks; use crate::{ io::{Buffer, ByteBuffer, Stream}, kernel::{ @@ -223,6 +224,7 @@ impl Ext4Inode { define_struct_inode! { struct FileInode { + last_sync: AtomicU64, page_cache: PageCache, } } @@ -235,6 +237,7 @@ impl FileInode { fn with_idata(idata: InodeData) -> Arc { let inode = Arc::new_cyclic(|weak_self: &Weak| Self { idata, + last_sync: AtomicU64::new(0), page_cache: PageCache::new(weak_self.clone()), }); @@ -251,9 +254,22 @@ impl FileInode { inode_data.nlink.store(1, Ordering::Relaxed); inode_data }, + last_sync: AtomicU64::new(0), page_cache: PageCache::new(weak_self.clone()), }) } + + fn sync_if_needed(&self) { + let now = Ticks::now().in_secs(); + let last = self.last_sync.load(Ordering::Relaxed); + + // TODO: this is a temporary implement, + // consider change this with some update strategy such as LRU future + if now - last > 10 { + self.last_sync.store(now, Ordering::Relaxed); + let _ = Task::block_on(self.page_cache.fsync()); + } + } } impl PageCacheBackend for FileInode { @@ -316,8 +332,7 @@ impl Inode for FileInode { *self.mtime.lock() = mtime; self.size.store(cursor_end as u64, Ordering::Relaxed); - // TODO: change this with some update strategy such as LRU - let _ = Task::block_on(self.page_cache.fsync()); + self.sync_if_needed(); Ok(total_written) } From e89a28610421c25b8a86203e10445fb811d1d13c Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 6 Aug 2025 01:56:03 +0800 Subject: [PATCH 10/29] runtime: rework the whole runtime arch. (partial) Remove old Scheduler. Add Runtime as replacement. Use stackless coroutine as the low level tasking mechanism and build the stackful tasks on top of it. Redesign of the task state system. Rework the executor. Remove Run trait and anything related. Signed-off-by: greatbridf --- crates/eonix_runtime/src/executor.rs | 158 +++++------ crates/eonix_runtime/src/executor/builder.rs | 6 +- .../src/executor/execute_status.rs | 4 - crates/eonix_runtime/src/lib.rs | 1 - crates/eonix_runtime/src/run.rs | 34 --- crates/eonix_runtime/src/run/future_run.rs | 34 --- crates/eonix_runtime/src/scheduler.rs | 265 +++++++----------- crates/eonix_runtime/src/task.rs | 182 ++++-------- crates/eonix_runtime/src/task/adapter.rs | 3 +- crates/eonix_runtime/src/task/task_state.rs | 29 +- 10 files changed, 241 insertions(+), 475 deletions(-) delete mode 100644 crates/eonix_runtime/src/executor/execute_status.rs delete mode 100644 crates/eonix_runtime/src/run.rs delete mode 100644 crates/eonix_runtime/src/run/future_run.rs diff --git a/crates/eonix_runtime/src/executor.rs b/crates/eonix_runtime/src/executor.rs index 12eb9556..7be70eb9 100644 --- a/crates/eonix_runtime/src/executor.rs +++ b/crates/eonix_runtime/src/executor.rs @@ -1,125 +1,105 @@ -mod builder; -mod execute_status; +// mod builder; mod output_handle; mod stack; -use crate::{ - run::{Contexted, Run, RunState}, - scheduler::Scheduler, - task::Task, +use alloc::{ + boxed::Box, + sync::{Arc, Weak}, }; -use alloc::sync::Weak; use core::{ + marker::PhantomData, pin::Pin, - sync::atomic::{compiler_fence, fence, AtomicBool, Ordering}, - task::Waker, + task::{Context, Poll}, }; use eonix_sync::Spin; -pub use builder::ExecutorBuilder; -pub use execute_status::ExecuteStatus; pub use output_handle::OutputHandle; pub use stack::Stack; -/// An `Executor` executes a `Run` object in a separate thread of execution -/// where we have a dedicated stack and context. -pub trait Executor: Send { - fn progress(&self) -> ExecuteStatus; +/// An `Executor` executes a Future object in a separate thread of execution. +/// +/// When the Future is finished, the `Executor` will call the `OutputHandle` to commit the output. +/// Then the `Executor` will release the resources associated with the Future. +pub struct Executor(Option>>); + +trait TypeErasedExecutor: Send { + /// # Returns + /// Whether the executor has finished. + fn run(self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool; } -struct RealExecutor +struct RealExecutor<'a, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send + 'a, + F::Output: Send + 'a, { - _stack: S, - runnable: R, - output_handle: Weak>>, - finished: AtomicBool, + future: F, + output_handle: Weak>>, + _phantom: PhantomData<&'a ()>, } -impl RealExecutor +impl TypeErasedExecutor for RealExecutor<'_, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send, + F::Output: Send, { - extern "C" fn execute(self: Pin<&Self>) -> ! { - // We get here with preempt count == 1. - eonix_preempt::enable(); - - { - let waker = Waker::from(Task::current().clone()); + fn run(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool { + if self.output_handle.as_ptr().is_null() { + return true; + } - let output_data = loop { - // TODO!!!!!!: CHANGE THIS. - let runnable_pointer = &raw const self.get_ref().runnable; + let future = unsafe { + // SAFETY: We don't move the future. + self.as_mut().map_unchecked_mut(|me| &mut me.future) + }; - // SAFETY: We don't move the runnable object and we MIGHT not be using the - // part that is used in `pinned_run` in the runnable...? - let mut pinned_runnable = - unsafe { Pin::new_unchecked(&mut *(runnable_pointer as *mut R)) }; + match future.poll(cx) { + Poll::Ready(output) => { + if let Some(output_handle) = self.output_handle.upgrade() { + output_handle.lock().commit_output(output); - match pinned_runnable.as_mut().run(&waker) { - RunState::Finished(output) => break output, - RunState::Running => Task::park(), + unsafe { + // SAFETY: `output_handle` is Unpin. + self.get_unchecked_mut().output_handle = Weak::new(); + } } - }; - if let Some(output_handle) = self.output_handle.upgrade() { - output_handle.lock().commit_output(output_data); + true } - } - - // SAFETY: We are on the same CPU as the task. - self.finished.store(true, Ordering::Relaxed); - - unsafe { - // SAFETY: `preempt::count()` == 1. - eonix_preempt::disable(); - Scheduler::goto_scheduler_noreturn() + Poll::Pending => false, } } } -impl Executor for RealExecutor -where - S: Send, - R: Run + Contexted + Send, - R::Output: Send, -{ - fn progress(&self) -> ExecuteStatus { - // TODO!!!: If the task comes from another cpu, we need to sync. - // - // The other cpu should see the changes of kernel stack of the target thread - // made in this cpu. - // - // Can we find a better way other than `fence`s? - // - // An alternative way is to use an atomic variable to store the cpu id of - // the current task. Then we can use acquire release swap to ensure that the - // other cpu sees the changes. - fence(Ordering::SeqCst); - compiler_fence(Ordering::SeqCst); - - // TODO!!!: We should load the context only if the previous task is - // different from the current task. - - self.runnable.load_running_context(); - - unsafe { - // SAFETY: We are in the scheduler context and we are not preempted. - Scheduler::go_from_scheduler(&Task::current().execution_context); - } - - self.runnable.restore_running_context(); +impl Executor { + pub fn new(future: F) -> (Self, Arc>>) + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + let output_handle = OutputHandle::new(); + + // TODO: accept futures with non 'static lifetimes. + ( + Executor(Some(Box::pin(RealExecutor { + future, + output_handle: Arc::downgrade(&output_handle), + _phantom: PhantomData, + }))), + output_handle, + ) + } - compiler_fence(Ordering::SeqCst); - fence(Ordering::SeqCst); + pub fn run(&mut self, cx: &mut Context<'_>) -> bool { + if let Some(executor) = self.0.as_mut() { + let finished = executor.as_mut().run(cx); + if finished { + self.0.take(); + } - if self.finished.load(Ordering::Acquire) { - ExecuteStatus::Finished + finished } else { - ExecuteStatus::Executing + true } } } diff --git a/crates/eonix_runtime/src/executor/builder.rs b/crates/eonix_runtime/src/executor/builder.rs index eb073dc8..2729270b 100644 --- a/crates/eonix_runtime/src/executor/builder.rs +++ b/crates/eonix_runtime/src/executor/builder.rs @@ -1,8 +1,5 @@ use super::{Executor, OutputHandle, RealExecutor, Stack}; -use crate::{ - context::ExecutionContext, - run::{Contexted, Run}, -}; +use crate::context::ExecutionContext; use alloc::{boxed::Box, sync::Arc}; use core::{pin::Pin, sync::atomic::AtomicBool}; use eonix_sync::Spin; @@ -15,7 +12,6 @@ pub struct ExecutorBuilder { impl ExecutorBuilder where S: Stack, - R: Run + Contexted + Send + 'static, R::Output: Send, { pub fn new() -> Self { diff --git a/crates/eonix_runtime/src/executor/execute_status.rs b/crates/eonix_runtime/src/executor/execute_status.rs deleted file mode 100644 index 9c95aa6f..00000000 --- a/crates/eonix_runtime/src/executor/execute_status.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub enum ExecuteStatus { - Executing, - Finished, -} diff --git a/crates/eonix_runtime/src/lib.rs b/crates/eonix_runtime/src/lib.rs index 1df43fa4..458e01d9 100644 --- a/crates/eonix_runtime/src/lib.rs +++ b/crates/eonix_runtime/src/lib.rs @@ -3,7 +3,6 @@ pub mod context; pub mod executor; mod ready_queue; -pub mod run; pub mod scheduler; pub mod task; diff --git a/crates/eonix_runtime/src/run.rs b/crates/eonix_runtime/src/run.rs deleted file mode 100644 index 368b567b..00000000 --- a/crates/eonix_runtime/src/run.rs +++ /dev/null @@ -1,34 +0,0 @@ -mod future_run; - -use core::{pin::Pin, task::Waker}; -pub use future_run::FutureRun; - -pub enum RunState { - Running, - Finished(Output), -} - -pub trait Contexted { - /// # Safety - /// This function should be called in a preemption disabled context. - fn load_running_context(&self) {} - - /// # Safety - /// This function should be called in a preemption disabled context. - fn restore_running_context(&self) {} -} - -pub trait Run { - type Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState; - - fn join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output { - loop { - match self.as_mut().run(waker) { - RunState::Running => continue, - RunState::Finished(output) => break output, - } - } - } -} diff --git a/crates/eonix_runtime/src/run/future_run.rs b/crates/eonix_runtime/src/run/future_run.rs deleted file mode 100644 index 813f8d2f..00000000 --- a/crates/eonix_runtime/src/run/future_run.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::{Contexted, Run, RunState}; -use core::{ - pin::Pin, - task::{Context, Poll, Waker}, -}; - -pub struct FutureRun(F); - -impl FutureRun -where - F: Future, -{ - pub const fn new(future: F) -> Self { - Self(future) - } -} - -impl Contexted for FutureRun where F: Future {} -impl Run for FutureRun -where - F: Future + 'static, -{ - type Output = F::Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut future = unsafe { self.map_unchecked_mut(|me| &mut me.0) }; - let mut context = Context::from_waker(waker); - - match future.as_mut().poll(&mut context) { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, - } - } -} diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 9031d3a1..dcd8cfc1 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,16 @@ use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, OutputHandle, Stack}, - ready_queue::{cpu_rq, local_rq}, - run::{Contexted, Run}, - task::{Task, TaskAdapter, TaskHandle}, + executor::OutputHandle, + ready_queue::{cpu_rq, local_rq, ReadyQueue}, + task::{Task, TaskAdapter, TaskHandle, TaskState}, }; use alloc::sync::Arc; use core::{ - mem::forget, + ops::{Deref, DerefMut}, ptr::NonNull, - sync::atomic::{compiler_fence, Ordering}, + sync::atomic::Ordering, task::Waker, }; use eonix_hal::processor::halt; -use eonix_log::println_trace; -use eonix_preempt::assert_preempt_count_eq; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; @@ -22,13 +18,12 @@ use pointers::BorrowedArc; #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; -#[eonix_percpu::define_percpu] -static LOCAL_SCHEDULER_CONTEXT: ExecutionContext = ExecutionContext::new(); - static TASKS: LazyLock>> = LazyLock::new(|| Spin::new(RBTree::new(TaskAdapter::new()))); -pub struct Scheduler; +pub static RUNTIME: Runtime = Runtime(); + +pub struct Runtime(); pub struct JoinHandle(Arc>>) where @@ -68,74 +63,7 @@ where } } -impl Scheduler { - /// `Scheduler` might be used in various places. Do not hold it for a long time. - /// - /// # Safety - /// The locked returned by this function should be locked with `lock_irq` to prevent from - /// rescheduling during access to the scheduler. Disabling preemption will do the same. - /// - /// Drop the lock before calling `schedule`. - pub fn get() -> &'static Self { - static GLOBAL_SCHEDULER: Scheduler = Scheduler; - &GLOBAL_SCHEDULER - } - - pub fn init_local_scheduler() - where - S: Stack, - { - let stack = S::new(); - - unsafe { - eonix_preempt::disable(); - // SAFETY: Preemption is disabled. - let context: &mut ExecutionContext = LOCAL_SCHEDULER_CONTEXT.as_mut(); - context.set_ip(local_scheduler as _); - context.set_sp(stack.get_bottom().addr().get() as usize); - context.set_interrupt(true); - eonix_preempt::enable(); - } - - // We don't need to keep the stack around. - forget(stack); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn go_from_scheduler(to: &ExecutionContext) { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }.switch_to(to); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler(from: &ExecutionContext) { - // SAFETY: Preemption is disabled. - from.switch_to(unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }); - } - - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler_noreturn() -> ! { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref().switch_noreturn() } - } - - fn add_task(task: Arc) { - TASKS.lock().insert(task); - } - - fn remove_task(task: &Task) { - unsafe { TASKS.lock().cursor_mut_from_ptr(task as *const _).remove() }; - } - +impl Runtime { fn select_cpu_for_task(&self, task: &Task) -> usize { task.cpu.load(Ordering::Relaxed) as _ } @@ -165,112 +93,127 @@ impl Scheduler { } } - pub fn spawn(&self, runnable: R) -> JoinHandle + pub fn spawn(&self, future: F) -> JoinHandle where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, + F: Future + Send + 'static, + F::Output: Send + 'static, { let TaskHandle { task, output_handle, - } = Task::new::(runnable); + } = Task::new(future); - Self::add_task(task.clone()); + self.add_task(task.clone()); self.activate(&task); JoinHandle(output_handle) } - /// Go to idle task. Call this with `preempt_count == 1`. - /// The preempt count will be decremented by this function. - /// - /// # Safety - /// We might never return from here. - /// Drop all variables that take ownership of some resource before calling this function. - pub fn schedule() { - assert_preempt_count_eq!(1, "Scheduler::schedule"); + // /// Go to idle task. Call this with `preempt_count == 1`. + // /// The preempt count will be decremented by this function. + // /// + // /// # Safety + // /// We might never return from here. + // /// Drop all variables that take ownership of some resource before calling this function. + // pub fn schedule() { + // assert_preempt_count_eq!(1, "Scheduler::schedule"); + + // // Make sure all works are done before scheduling. + // compiler_fence(Ordering::SeqCst); + + // // TODO!!!!!: Use of reference here needs further consideration. + // // + // // Since we might never return to here, we can't take ownership of `current()`. + // // Is it safe to believe that `current()` will never change across calls? + // unsafe { + // // SAFETY: Preemption is disabled. + // Scheduler::goto_scheduler(&Task::current().execution_context); + // } + // eonix_preempt::enable(); + // } +} - // Make sure all works are done before scheduling. - compiler_fence(Ordering::SeqCst); +impl Runtime { + fn add_task(&self, task: Arc) { + TASKS.lock_irq().insert(task); + } - // TODO!!!!!: Use of reference here needs further consideration. - // - // Since we might never return to here, we can't take ownership of `current()`. - // Is it safe to believe that `current()` will never change across calls? + fn remove_task(&self, task: &impl Deref>) { unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context); + TASKS + .lock_irq() + .cursor_mut_from_ptr(Arc::as_ptr(task)) + .remove(); } - eonix_preempt::enable(); } -} - -extern "C" fn local_scheduler() -> ! { - loop { - assert_preempt_count_eq!(1, "Scheduler::idle_task"); - let mut rq = local_rq().lock_irq(); - let previous_task = CURRENT_TASK + fn current(&self) -> Option> { + CURRENT_TASK .get() - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }); - let next_task = rq.get(); + .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) + } + + fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { + let Some(current) = self.current() else { + return; + }; + + match current.state.cmpxchg(TaskState::RUNNING, TaskState::READY) { + Ok(_) => { + let current = unsafe { + Arc::from_raw( + CURRENT_TASK + .get() + .expect("Current task should be present") + .as_ptr(), + ) + }; + + rq.put(current); + } + Err(old) => { + assert_eq!( + old, + TaskState::PARKED, + "Current task should be in PARKED state" + ); + } + } + } - match (previous_task, next_task) { - (None, None) => { - // Nothing to do, halt the cpu and rerun the loop. + /// Enter the runtime with an "init" future and run till its completion. + /// + /// The "init" future has the highest priority and when it completes, + /// the runtime will exit immediately and yield its output. + pub fn enter(&self) { + loop { + let mut rq = local_rq().lock_irq(); + + self.remove_and_enqueue_current(&mut rq); + + let Some(next) = rq.get() else { drop(rq); halt(); continue; - } - (None, Some(next)) => { - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); - } - (Some(previous), None) => { - if previous.state.is_running() { - // Previous thread is `Running`, return to the current running thread. - println_trace!( - "trace_scheduler", - "Returning to task id({}) without doing context switch", - previous.id - ); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - } else { - // Nothing to do, halt the cpu and rerun the loop. - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - drop(rq); - halt(); - continue; - } - } - (Some(previous), Some(next)) => { - println_trace!( - "trace_scheduler", - "Switching from task id({}) to task id({})", - previous.id, - next.id - ); + }; - debug_assert_ne!(previous.id, next.id, "Switching to the same task"); + let old_state = next.state.swap(TaskState::RUNNING); + assert_eq!( + old_state, + TaskState::READY, + "Next task should be in READY state" + ); - if previous.state.is_running() || !previous.state.try_park() { - rq.put(previous); - } else { - previous.on_rq.store(false, Ordering::Release); - } + CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); + drop(rq); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); - } - } + // TODO: MAYBE we can move the release of finished tasks to some worker thread. + if Task::current().run() { + Task::current().state.set(TaskState::DEAD); + CURRENT_TASK.set(None); - drop(rq); - // TODO: We can move the release of finished tasks to some worker thread. - if let ExecuteStatus::Finished = Task::current().run() { - let current = CURRENT_TASK - .swap(None) - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }) - .expect("Current task should be present"); - Scheduler::remove_task(¤t); + self.remove_task(&Task::current()); + } } } } diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 66746ba2..c193d774 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -2,25 +2,22 @@ mod adapter; mod task_state; use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, Executor, ExecutorBuilder, OutputHandle, Stack}, - run::{Contexted, Run}, - scheduler::Scheduler, + executor::{Executor, OutputHandle}, + ready_queue::{cpu_rq, ReadyQueue}, }; -use alloc::{boxed::Box, sync::Arc, task::Wake}; +use alloc::{sync::Arc, task::Wake}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - pin::{pin, Pin}, - sync::atomic::{AtomicBool, AtomicU32, Ordering}, - task::{Context, Poll, Waker}, + ops::DerefMut, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Waker}, }; use eonix_hal::processor::CPU; -use eonix_preempt::assert_preempt_enabled; -use eonix_sync::Spin; -use intrusive_collections::RBTreeAtomicLink; -use task_state::TaskState; +use eonix_sync::{Spin, SpinIrq}; +use intrusive_collections::{LinkedListAtomicLink, RBTreeAtomicLink}; -pub use adapter::TaskAdapter; +pub use adapter::{TaskAdapter, TaskRqAdapter}; +pub(crate) use task_state::TaskState; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct TaskId(u32); @@ -33,159 +30,70 @@ where pub(crate) output_handle: Arc>>, } -/// A `Task` represents a schedulable unit. -/// -/// Initial: state = Running, unparked = false -/// -/// Task::park() => swap state <- Parking, assert prev == Running -/// => swap unparked <- false -/// -> true => store state <- Running => return -/// -> false => goto scheduler => get rq lock => load state -/// -> Running => enqueue -/// -> Parking => cmpxchg Parking -> Parked -/// -> Running => enqueue -/// -> Parking => on_rq <- false -/// -> Parked => ??? -/// -/// Task::unpark() => swap unparked <- true -/// -> true => return -/// -> false => swap state <- Running -/// -> Running => return -/// -> Parking | Parked => Scheduler::activate pub struct Task { /// Unique identifier of the task. pub id: TaskId, - /// Whether the task is on some run queue (a.k.a ready). - pub(crate) on_rq: AtomicBool, - /// Whether someone has called `unpark` on this task. - pub(crate) unparked: AtomicBool, /// The last cpu that the task was executed on. /// If `on_rq` is `false`, we can't assume that this task is still on the cpu. pub(crate) cpu: AtomicU32, /// Task state. pub(crate) state: TaskState, - /// Task execution context. - pub(crate) execution_context: ExecutionContext, /// Executor object. - executor: AtomicUniqueRefCell>>>, + executor: AtomicUniqueRefCell, /// Link in the global task list. link_task_list: RBTreeAtomicLink, + /// Link in the ready queue. + link_ready_queue: LinkedListAtomicLink, } impl Task { - pub fn new(runnable: R) -> TaskHandle + pub fn new(future: F) -> TaskHandle where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, + F: Future + Send + 'static, + F::Output: Send + 'static, { static ID: AtomicU32 = AtomicU32::new(0); - let (executor, execution_context, output) = ExecutorBuilder::new() - .stack(S::new()) - .runnable(runnable) - .build(); + let (executor, output_handle) = Executor::new(future); let task = Arc::new(Self { id: TaskId(ID.fetch_add(1, Ordering::Relaxed)), - on_rq: AtomicBool::new(false), - unparked: AtomicBool::new(false), cpu: AtomicU32::new(CPU::local().cpuid() as u32), state: TaskState::new(TaskState::RUNNING), - executor: AtomicUniqueRefCell::new(Some(executor)), - execution_context, + executor: AtomicUniqueRefCell::new(executor), link_task_list: RBTreeAtomicLink::new(), + link_ready_queue: LinkedListAtomicLink::new(), }); TaskHandle { task, - output_handle: output, + output_handle, } } - pub fn run(&self) -> ExecuteStatus { + /// # Returns + /// Whether the task has finished. + pub fn run(self: &Arc) -> bool { let mut executor_borrow = self.executor.borrow(); + let waker = Waker::from(self.clone()); + let mut cx = Context::from_waker(&waker); - let executor = executor_borrow - .as_ref() - .expect("Executor should be present") - .as_ref() - .get_ref(); - - if let ExecuteStatus::Finished = executor.progress() { - executor_borrow.take(); - ExecuteStatus::Finished - } else { - ExecuteStatus::Executing - } - } - - pub fn unpark(self: &Arc) { - if self.unparked.swap(true, Ordering::Release) { - return; - } - - eonix_preempt::disable(); - - match self.state.swap(TaskState::RUNNING) { - TaskState::RUNNING => {} - TaskState::PARKED | TaskState::PARKING => { - // We are waking up from sleep or someone else is parking this task. - // Try to wake it up. - Scheduler::get().activate(self); - } - _ => unreachable!(), - } - - eonix_preempt::enable(); + executor_borrow.run(&mut cx) } - pub fn park() { - eonix_preempt::disable(); - Self::park_preempt_disabled(); - } - - /// Park the current task with `preempt::count() == 1`. - pub fn park_preempt_disabled() { - let task = Task::current(); - - let old_state = task.state.swap(TaskState::PARKING); - assert_eq!( - old_state, - TaskState::RUNNING, - "Parking a task that is not running." - ); - - if task.unparked.swap(false, Ordering::AcqRel) { - // Someone has called `unpark` on this task previously. - task.state.swap(TaskState::RUNNING); - } else { - unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context) - }; - assert!(task.unparked.swap(false, Ordering::Acquire)); - } - - eonix_preempt::enable(); - } - - pub fn block_on(future: F) -> F::Output - where - F: Future, - { - assert_preempt_enabled!("block_on() must be called with preemption enabled"); - - let waker = Waker::from(Task::current().clone()); - let mut context = Context::from_waker(&waker); - let mut future = pin!(future); - + /// Get the stabilized lock for the task's run queue. + fn rq(&self) -> Option + 'static> { loop { - if let Poll::Ready(output) = future.as_mut().poll(&mut context) { - break output; + let cpu = self.cpu.load(Ordering::Relaxed); + let rq = cpu_rq(cpu as usize).lock_irq(); + + if cpu == self.cpu.load(Ordering::Acquire) { + if self.link_ready_queue.is_linked() { + return Some(rq); + } else { + return None; + } } - - Task::park(); } } } @@ -196,6 +104,20 @@ impl Wake for Task { } fn wake_by_ref(self: &Arc) { - self.unpark(); + if self + .state + .cmpxchg(TaskState::PARKED, TaskState::READY) + .is_err() + { + return; + } + + if let Some(mut rq) = self.rq() { + if self.state.get() != TaskState::PARKED { + return; + } + + rq.put(self.clone()); + } } } diff --git a/crates/eonix_runtime/src/task/adapter.rs b/crates/eonix_runtime/src/task/adapter.rs index de1d0bad..3b5d1583 100644 --- a/crates/eonix_runtime/src/task/adapter.rs +++ b/crates/eonix_runtime/src/task/adapter.rs @@ -1,8 +1,9 @@ use super::{Task, TaskId}; use alloc::sync::Arc; -use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; +use intrusive_collections::{intrusive_adapter, KeyAdapter, LinkedListAtomicLink, RBTreeAtomicLink}; intrusive_adapter!(pub TaskAdapter = Arc: Task { link_task_list: RBTreeAtomicLink }); +intrusive_adapter!(pub TaskRqAdapter = Arc: Task { link_ready_queue: LinkedListAtomicLink }); impl<'a> KeyAdapter<'a> for TaskAdapter { type Key = TaskId; diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index b22ad889..ec9d67ad 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -4,32 +4,29 @@ use core::sync::atomic::{AtomicU32, Ordering}; pub struct TaskState(AtomicU32); impl TaskState { - pub const RUNNING: u32 = 0; - pub const PARKING: u32 = 1; + pub const READY: u32 = 0; + pub const RUNNING: u32 = 1; pub const PARKED: u32 = 2; + pub const DEAD: u32 = 1 << 31; pub(crate) const fn new(state: u32) -> Self { Self(AtomicU32::new(state)) } pub(crate) fn swap(&self, state: u32) -> u32 { - self.0.swap(state, Ordering::AcqRel) + self.0.swap(state, Ordering::SeqCst) } - pub(crate) fn try_park(&self) -> bool { - match self.0.compare_exchange( - TaskState::PARKING, - TaskState::PARKED, - Ordering::AcqRel, - Ordering::Acquire, - ) { - Ok(_) => true, - Err(TaskState::RUNNING) => false, - Err(_) => unreachable!("Invalid task state while trying to park."), - } + pub(crate) fn set(&self, state: u32) { + self.0.store(state, Ordering::SeqCst); } - pub(crate) fn is_running(&self) -> bool { - self.0.load(Ordering::Acquire) == Self::RUNNING + pub(crate) fn get(&self) -> u32 { + self.0.load(Ordering::SeqCst) + } + + pub(crate) fn cmpxchg(&self, current: u32, new: u32) -> Result { + self.0 + .compare_exchange(current, new, Ordering::SeqCst, Ordering::SeqCst) } } From e23c9eb1f24c572e19c9d275acf62e4ce0e2a1e0 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Thu, 7 Aug 2025 16:59:47 +0800 Subject: [PATCH 11/29] runtime: new task sleep-wakeup method and some adaption We use RUNNING to indicate that the task is on the cpu, and use READY to indicate that the task could be further run again and therefore put into the ready queue after one poll() call. When the task is acquired from the ready queue and put onto cpu, it's marked as RUNNING only, making it put suspended after we got the Poll::Pending from the poll() call. If we (or others) call Waker::wake() within the run, we'll set the READY flag then. And when we return from the poll call, we could find it by a CAS and put it back to the ready queue again. We've also done some adaption work to the rest of the kernel, mainly to remove *SOME* of the Task::block_on calls. But to completely remove it is not possible for now. We should solve that in further few commits. Signed-off-by: greatbridf --- crates/eonix_runtime/src/executor.rs | 39 +++--- crates/eonix_runtime/src/scheduler.rs | 129 +++++++++----------- crates/eonix_runtime/src/task.rs | 52 ++++---- crates/eonix_runtime/src/task/task_state.rs | 19 +-- src/driver/serial.rs | 6 +- src/driver/virtio/riscv64.rs | 12 +- src/kernel/interrupt.rs | 13 +- src/kernel/mem/page_alloc/raw_page.rs | 1 - src/kernel/syscall/mm.rs | 2 +- src/kernel/task.rs | 2 +- src/kernel/task/clone.rs | 9 +- src/kernel/task/process.rs | 12 +- src/kernel/task/process_list.rs | 10 +- src/kernel/task/session.rs | 4 +- src/kernel/task/signal.rs | 13 +- src/kernel/task/thread.rs | 108 +++++----------- src/kernel/terminal.rs | 18 +-- src/lib.rs | 63 +++++----- 18 files changed, 210 insertions(+), 302 deletions(-) diff --git a/crates/eonix_runtime/src/executor.rs b/crates/eonix_runtime/src/executor.rs index 7be70eb9..3b858a47 100644 --- a/crates/eonix_runtime/src/executor.rs +++ b/crates/eonix_runtime/src/executor.rs @@ -23,9 +23,7 @@ pub use stack::Stack; pub struct Executor(Option>>); trait TypeErasedExecutor: Send { - /// # Returns - /// Whether the executor has finished. - fn run(self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()>; } struct RealExecutor<'a, F> @@ -43,9 +41,9 @@ where F: Future + Send, F::Output: Send, { - fn run(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> bool { + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { if self.output_handle.as_ptr().is_null() { - return true; + return Poll::Ready(()); } let future = unsafe { @@ -53,21 +51,16 @@ where self.as_mut().map_unchecked_mut(|me| &mut me.future) }; - match future.poll(cx) { - Poll::Ready(output) => { - if let Some(output_handle) = self.output_handle.upgrade() { - output_handle.lock().commit_output(output); + future.poll(cx).map(|output| { + if let Some(output_handle) = self.output_handle.upgrade() { + output_handle.lock().commit_output(output); - unsafe { - // SAFETY: `output_handle` is Unpin. - self.get_unchecked_mut().output_handle = Weak::new(); - } + unsafe { + // SAFETY: `output_handle` is Unpin. + self.get_unchecked_mut().output_handle = Weak::new(); } - - true } - Poll::Pending => false, - } + }) } } @@ -79,7 +72,6 @@ impl Executor { { let output_handle = OutputHandle::new(); - // TODO: accept futures with non 'static lifetimes. ( Executor(Some(Box::pin(RealExecutor { future, @@ -90,16 +82,13 @@ impl Executor { ) } - pub fn run(&mut self, cx: &mut Context<'_>) -> bool { + pub fn poll(&mut self, cx: &mut Context<'_>) -> Poll<()> { if let Some(executor) = self.0.as_mut() { - let finished = executor.as_mut().run(cx); - if finished { + executor.as_mut().poll(cx).map(|_| { self.0.take(); - } - - finished + }) } else { - true + Poll::Ready(()) } } } diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index dcd8cfc1..c9c73ea5 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,14 +1,13 @@ use crate::{ executor::OutputHandle, - ready_queue::{cpu_rq, local_rq, ReadyQueue}, + ready_queue::{local_rq, ReadyQueue}, task::{Task, TaskAdapter, TaskHandle, TaskState}, }; -use alloc::sync::Arc; +use alloc::{sync::Arc, task::Wake}; use core::{ ops::{Deref, DerefMut}, ptr::NonNull, - sync::atomic::Ordering, - task::Waker, + task::{Context, Poll, Waker}, }; use eonix_hal::processor::halt; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; @@ -64,35 +63,6 @@ where } impl Runtime { - fn select_cpu_for_task(&self, task: &Task) -> usize { - task.cpu.load(Ordering::Relaxed) as _ - } - - pub fn activate(&self, task: &Arc) { - // Only one cpu can be activating the task at a time. - // TODO: Add some checks. - - if task.on_rq.swap(true, Ordering::Acquire) { - // Lock the rq and check whether the task is on the rq again. - let cpuid = task.cpu.load(Ordering::Acquire); - let mut rq = cpu_rq(cpuid as _).lock_irq(); - - if !task.on_rq.load(Ordering::Acquire) { - // Task has just got off the rq. Put it back. - rq.put(task.clone()); - } else { - // Task is already on the rq. Do nothing. - return; - } - } else { - // Task not on some rq. Select one and put it here. - let cpu = self.select_cpu_for_task(&task); - let mut rq = cpu_rq(cpu).lock_irq(); - task.cpu.store(cpu as _, Ordering::Release); - rq.put(task.clone()); - } - } - pub fn spawn(&self, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -104,36 +74,11 @@ impl Runtime { } = Task::new(future); self.add_task(task.clone()); - self.activate(&task); + task.wake_by_ref(); JoinHandle(output_handle) } - // /// Go to idle task. Call this with `preempt_count == 1`. - // /// The preempt count will be decremented by this function. - // /// - // /// # Safety - // /// We might never return from here. - // /// Drop all variables that take ownership of some resource before calling this function. - // pub fn schedule() { - // assert_preempt_count_eq!(1, "Scheduler::schedule"); - - // // Make sure all works are done before scheduling. - // compiler_fence(Ordering::SeqCst); - - // // TODO!!!!!: Use of reference here needs further consideration. - // // - // // Since we might never return to here, we can't take ownership of `current()`. - // // Is it safe to believe that `current()` will never change across calls? - // unsafe { - // // SAFETY: Preemption is disabled. - // Scheduler::goto_scheduler(&Task::current().execution_context); - // } - // eonix_preempt::enable(); - // } -} - -impl Runtime { fn add_task(&self, task: Arc) { TASKS.lock_irq().insert(task); } @@ -158,12 +103,18 @@ impl Runtime { return; }; - match current.state.cmpxchg(TaskState::RUNNING, TaskState::READY) { - Ok(_) => { + match current.state.update(|state| match state { + TaskState::READY_RUNNING => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::BLOCKED), + _ => { + unreachable!("Current task should be at least in RUNNING state, but got {state:?}") + } + }) { + Ok(TaskState::READY_RUNNING) => { let current = unsafe { Arc::from_raw( CURRENT_TASK - .get() + .swap(None) .expect("Current task should be present") .as_ptr(), ) @@ -171,14 +122,40 @@ impl Runtime { rq.put(current); } - Err(old) => { - assert_eq!( - old, - TaskState::PARKED, - "Current task should be in PARKED state" - ); + Ok(_) => {} + _ => unreachable!(), + } + } + + pub fn block_till_woken(set_waker: impl FnOnce(&Waker)) -> impl Future { + struct BlockTillWoken { + set_waker: Option, + slept: bool, + } + + impl Future for BlockTillWoken { + type Output = (); + + fn poll(self: core::pin::Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + if self.slept { + Poll::Ready(()) + } else { + let (set_waker, slept) = unsafe { + let me = self.get_unchecked_mut(); + (me.set_waker.take().unwrap(), &mut me.slept) + }; + + set_waker(cx.waker()); + *slept = true; + Poll::Pending + } } } + + BlockTillWoken { + set_waker: Some(set_waker), + slept: false, + } } /// Enter the runtime with an "init" future and run till its completion. @@ -204,15 +181,23 @@ impl Runtime { "Next task should be in READY state" ); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); + unsafe { + CURRENT_TASK.set(Some(NonNull::new_unchecked(Arc::into_raw(next) as *mut _))); + } + drop(rq); // TODO: MAYBE we can move the release of finished tasks to some worker thread. - if Task::current().run() { - Task::current().state.set(TaskState::DEAD); - CURRENT_TASK.set(None); + if Task::current().poll().is_ready() { + let old_state = Task::current().state.swap(TaskState::DEAD); + assert!( + old_state & TaskState::RUNNING != 0, + "Current task should be at least in RUNNING state" + ); self.remove_task(&Task::current()); + + CURRENT_TASK.set(None); } } } diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index c193d774..8f4062d8 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -10,7 +10,7 @@ use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ ops::DerefMut, sync::atomic::{AtomicU32, Ordering}, - task::{Context, Waker}, + task::{Context, Poll, Waker}, }; use eonix_hal::processor::CPU; use eonix_sync::{Spin, SpinIrq}; @@ -59,7 +59,7 @@ impl Task { let task = Arc::new(Self { id: TaskId(ID.fetch_add(1, Ordering::Relaxed)), cpu: AtomicU32::new(CPU::local().cpuid() as u32), - state: TaskState::new(TaskState::RUNNING), + state: TaskState::new(TaskState::BLOCKED), executor: AtomicUniqueRefCell::new(executor), link_task_list: RBTreeAtomicLink::new(), link_ready_queue: LinkedListAtomicLink::new(), @@ -71,31 +71,35 @@ impl Task { } } - /// # Returns - /// Whether the task has finished. - pub fn run(self: &Arc) -> bool { + pub fn poll(self: &Arc) -> Poll<()> { let mut executor_borrow = self.executor.borrow(); let waker = Waker::from(self.clone()); let mut cx = Context::from_waker(&waker); - executor_borrow.run(&mut cx) + executor_borrow.poll(&mut cx) } /// Get the stabilized lock for the task's run queue. - fn rq(&self) -> Option + 'static> { + pub fn rq(&self) -> impl DerefMut + 'static { loop { let cpu = self.cpu.load(Ordering::Relaxed); let rq = cpu_rq(cpu as usize).lock_irq(); - if cpu == self.cpu.load(Ordering::Acquire) { - if self.link_ready_queue.is_linked() { - return Some(rq); - } else { - return None; - } + // We stabilize the task cpu with the cpu rq here for now. + if cpu != self.cpu.load(Ordering::Acquire) { + continue; } + + return rq; } } + + pub fn block_on(future: F) -> F::Output + where + F: Future, + { + todo!() + } } impl Wake for Task { @@ -104,20 +108,18 @@ impl Wake for Task { } fn wake_by_ref(self: &Arc) { - if self - .state - .cmpxchg(TaskState::PARKED, TaskState::READY) - .is_err() - { + let Ok(old) = self.state.update(|state| match state { + TaskState::BLOCKED => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::READY | TaskState::RUNNING), + TaskState::READY | TaskState::READY_RUNNING => None, + state => unreachable!("Waking a {state:?} task"), + }) else { return; - } - - if let Some(mut rq) = self.rq() { - if self.state.get() != TaskState::PARKED { - return; - } + }; - rq.put(self.clone()); + if old == TaskState::BLOCKED { + // If the task was blocked, we need to put it back to the ready queue. + self.rq().put(self.clone()); } } } diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index ec9d67ad..074acfb4 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -4,9 +4,10 @@ use core::sync::atomic::{AtomicU32, Ordering}; pub struct TaskState(AtomicU32); impl TaskState { - pub const READY: u32 = 0; - pub const RUNNING: u32 = 1; - pub const PARKED: u32 = 2; + pub const BLOCKED: u32 = 0; + pub const READY: u32 = 1; + pub const RUNNING: u32 = 2; + pub const READY_RUNNING: u32 = TaskState::READY | TaskState::RUNNING; pub const DEAD: u32 = 1 << 31; pub(crate) const fn new(state: u32) -> Self { @@ -17,16 +18,8 @@ impl TaskState { self.0.swap(state, Ordering::SeqCst) } - pub(crate) fn set(&self, state: u32) { - self.0.store(state, Ordering::SeqCst); - } - - pub(crate) fn get(&self) -> u32 { - self.0.load(Ordering::SeqCst) - } - - pub(crate) fn cmpxchg(&self, current: u32, new: u32) -> Result { + pub(crate) fn update(&self, func: impl FnMut(u32) -> Option) -> Result { self.0 - .compare_exchange(current, new, Ordering::SeqCst, Ordering::SeqCst) + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, func) } } diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d7fabbbd..d69965f4 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -3,14 +3,14 @@ mod io; use crate::{ kernel::{ block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - task::KernelStack, CharDevice, CharDeviceType, Terminal, TerminalDevice, + CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; use alloc::{collections::vec_deque::VecDeque, format, sync::Arc}; use bitflags::bitflags; use core::pin::pin; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{SpinIrq as _, WaitList}; use io::SerialIO; @@ -161,7 +161,7 @@ impl Serial { })?; } - Scheduler::get().spawn::(FutureRun::new(Self::worker(port.clone()))); + RUNTIME.spawn(Self::worker(port.clone())); let _ = set_console(terminal.clone()); eonix_log::set_console(terminal.clone()); diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 66f150c3..ad132569 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,23 +1,15 @@ use super::virtio_blk::HAL; -use crate::kernel::{ - block::{make_device, BlockDevice}, - mem::{AsMemoryBlock, MemoryBlock, Page}, -}; +use crate::kernel::block::{make_device, BlockDevice}; use alloc::{sync::Arc, vec::Vec}; -use core::num::NonZero; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; use eonix_log::{println_info, println_warn}; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; +use eonix_mm::address::{PAddr, PhysAccess}; use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, transport::{mmio::MmioTransport, Transport}, - Hal, }; pub fn init() { diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 1a84d534..4b55f182 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,5 +1,5 @@ use super::mem::handle_kernel_page_fault; -use super::timer::{should_reschedule, timer_interrupt}; +use super::timer::timer_interrupt; use crate::kernel::constants::EINVAL; use crate::prelude::*; use alloc::sync::Arc; @@ -7,7 +7,6 @@ use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::scheduler::Scheduler; use eonix_sync::SpinIrq as _; static IRQ_HANDLERS: Spin<[Vec>; 16]> = @@ -51,15 +50,7 @@ pub fn interrupt_handler(trap_ctx: &mut TrapContext) { TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no), TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), - TrapType::Timer { callback } => { - callback(timer_interrupt); - - if eonix_preempt::count() == 0 && should_reschedule() { - // To make scheduler satisfied. - eonix_preempt::disable(); - Scheduler::schedule(); - } - } + TrapType::Timer { callback } => callback(timer_interrupt), } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 4b420255..54d4d590 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -6,7 +6,6 @@ use core::{ sync::atomic::{AtomicU32, AtomicUsize, Ordering}, }; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::paging::PAGE_SIZE; use eonix_mm::{ address::{PAddr, PhysAccess as _}, paging::{RawPage as RawPageTrait, PFN}, diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index b639650d..dd263e6b 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,6 +1,6 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT, ENOMEM}; +use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; diff --git a/src/kernel/task.rs b/src/kernel/task.rs index e8d36e51..1b47923e 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -18,4 +18,4 @@ pub use process_group::ProcessGroup; pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; -pub use thread::{new_thread_runnable, yield_now, Thread, ThreadBuilder}; +pub use thread::{yield_now, Thread, ThreadBuilder}; diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index c8efe5e8..48e34f96 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,10 +1,7 @@ use crate::{ kernel::{ syscall::procops::parse_user_tls, - task::{ - alloc_pid, new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, Thread, - ThreadBuilder, - }, + task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, user::UserPointerMut, }, KResult, @@ -12,7 +9,7 @@ use crate::{ use bitflags::bitflags; use core::num::NonZero; use eonix_hal::processor::UserTLS; -use eonix_runtime::{scheduler::Scheduler, task::Task}; +use eonix_runtime::{scheduler::RUNTIME, task::Task}; use eonix_sync::AsProof; use posix_types::signal::Signal; @@ -166,7 +163,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - Scheduler::get().spawn::(new_thread_runnable(new_thread)); + RUNTIME.spawn(new_thread.run()); Ok(new_pid) } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 53499a06..bf2edc95 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -350,7 +350,11 @@ impl Process { trace_continue: bool, ) -> KResult> { let wait_object = { - let mut waits = self.wait_list.entry(wait_id, trace_stop, trace_continue); + let mut waits = self + .wait_list + .entry(wait_id, trace_stop, trace_continue) + .await; + loop { if let Some(object) = waits.get() { break object; @@ -377,7 +381,7 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid); + procs.remove_process(wait_object.pid).await; assert!(self .inner .access_mut(procs.prove_mut()) @@ -572,9 +576,9 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { Entry { - process_list: Task::block_on(ProcessList::get().read()), + process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), cv: &self.cv_wait_procs, want_stop, diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index 2832dae5..5293b4b7 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -9,7 +9,6 @@ use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; pub struct ProcessList { @@ -54,7 +53,7 @@ impl ProcessList { self.threads.insert(thread.tid, thread.clone()); } - pub fn remove_process(&mut self, pid: u32) { + pub async fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. if let Some(thread) = self.threads.remove(&pid) { self.processes.remove(&pid); @@ -64,7 +63,7 @@ impl ProcessList { let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); pgroup.remove_member(pid, self.prove_mut()); - Task::block_on(rcu_sync()); + rcu_sync().await; if Arc::strong_count(&pgroup) == 1 { self.pgroups.remove(&pgroup.pgid); @@ -154,10 +153,9 @@ impl ProcessList { // If we are the session leader, we should drop the control terminal. if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = - Task::block_on(process.session(self.prove()).drop_control_terminal()) + if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await { - terminal.drop_session(); + terminal.drop_session().await; } } diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index 261a60c0..a7b57afd 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -87,14 +87,14 @@ impl Session { ) -> KResult<()> { let mut job_control = self.job_control.write().await; if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().as_ref() { + if let Some(session) = terminal.session().await.as_ref() { if session.sid == self.sid { return Ok(()); } } return Err(EPERM); } - terminal.set_session(self, forced)?; + terminal.set_session(self, forced).await?; job_control.control_terminal = Some(terminal.clone()); job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); Ok(()) diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index 5cff2fe6..b6ed34bf 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -9,7 +9,7 @@ use core::{cmp::Reverse, task::Waker}; use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::Runtime; use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; @@ -226,15 +226,12 @@ impl SignalList { // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`. // SAFETY: Preempt disabled above. - { + Runtime::block_till_woken(|waker| { let mut inner = self.inner.lock(); - let waker = Waker::from(Task::current().clone()); - - let old_waker = inner.stop_waker.replace(waker); + let old_waker = inner.stop_waker.replace(waker.clone()); assert!(old_waker.is_none(), "We should not have a waker here"); - } - - Task::park_preempt_disabled(); + }) + .await; if let Some(parent) = thread.process.parent.load() { parent.notify( diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index cccbb918..e3b3a967 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -17,10 +17,10 @@ use alloc::sync::Arc; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ future::Future, - pin::Pin, + pin::{pin, Pin}, ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll, Waker}, + task::{Context, Poll}, }; use eonix_hal::{ fpu::FpuState, @@ -33,7 +33,6 @@ use eonix_hal::{ trap::{disable_irqs_save, TrapContext}, }; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::run::{Contexted, Run, RunState}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; @@ -41,11 +40,6 @@ use posix_types::signal::Signal; #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; -pub struct ThreadRunnable { - thread: Arc, - future: F, -} - pub struct ThreadBuilder { tid: Option, name: Option>, @@ -421,28 +415,44 @@ impl Thread { } } - pub async fn run(self: Arc) { - struct ContextedRun<'a, F: Future>(F, &'a Thread); + pub fn run(self: Arc) -> impl Future + Send + 'static { + async fn real_run_with_context(me: &Arc) { + let mut future = pin!(me.real_run()); + + core::future::poll_fn(|cx| { + me.process.mm_list.activate(); - impl Future for ContextedRun<'_, F> { - type Output = F::Output; + CURRENT_THREAD.set(NonNull::new(Arc::as_ptr(me) as *mut _)); + + unsafe { + // SAFETY: Preemption is disabled. + me.load_thread_area32(); + } + + unsafe { + let trap_ctx_ptr: *const TrapContext = &raw const *me.trap_ctx.borrow(); + // SAFETY: + CPU::local() + .as_mut() + .load_interrupt_stack(trap_ctx_ptr as u64); + } - fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { let irq_state = disable_irqs_save(); - let (future, _) = unsafe { - // SAFETY: We construct a pinned future and `&Thread` is `Unpin`. - let me = self.as_mut().get_unchecked_mut(); - (Pin::new_unchecked(&mut me.0), me.1) - }; - let retval = future.poll(ctx); + let result = future.as_mut().poll(cx); irq_state.restore(); - retval - } + + me.process.mm_list.deactivate(); + + CURRENT_THREAD.set(None); + + result + }) + .await } - ContextedRun(self.real_run(), &self).await + async move { real_run_with_context(&self).await } } } @@ -467,57 +477,3 @@ pub async fn yield_now() { Yield { yielded: false }.await; } - -pub fn new_thread_runnable( - thread: Arc, -) -> ThreadRunnable + Send + 'static> { - ThreadRunnable { - thread: thread.clone(), - future: thread.run(), - } -} - -impl Contexted for ThreadRunnable { - fn load_running_context(&self) { - self.thread.process.mm_list.activate(); - - let raw_ptr: *const Thread = &raw const *self.thread; - CURRENT_THREAD.set(NonNull::new(raw_ptr as *mut _)); - - unsafe { - // SAFETY: Preemption is disabled. - self.thread.load_thread_area32(); - } - - unsafe { - let trap_ctx_ptr: *const TrapContext = &raw const *self.thread.trap_ctx.borrow(); - // SAFETY: - CPU::local() - .as_mut() - .load_interrupt_stack(trap_ctx_ptr as u64); - } - } - - fn restore_running_context(&self) { - self.thread.process.mm_list.deactivate(); - - CURRENT_THREAD.set(None); - } -} - -impl Run for ThreadRunnable { - type Output = F::Output; - - fn run(mut self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut ctx = Context::from_waker(waker); - - match unsafe { - self.as_mut() - .map_unchecked_mut(|me| &mut me.future) - .poll(&mut ctx) - } { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, - } - } -} diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 31c08ea2..5532a2e1 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -623,12 +623,12 @@ impl Terminal { ptr.write(window_size) } TerminalIORequest::GetTermios(ptr) => { - let termios = Task::block_on(self.inner.lock()).termio.get_user(); + let termios = self.inner.lock().await.termio.get_user(); ptr.write(termios) } TerminalIORequest::SetTermios(ptr) => { let user_termios = ptr.read()?; - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); @@ -644,13 +644,13 @@ impl Terminal { } /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = Task::block_on(self.inner.lock()); + pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { + let mut inner = self.inner.lock().await; if let Some(session) = inner.session.upgrade() { if !forced { Err(EPERM) } else { - Task::block_on(session.drop_control_terminal()); + session.drop_control_terminal().await; inner.session = Arc::downgrade(&session); Ok(()) } @@ -661,12 +661,12 @@ impl Terminal { } } - pub fn drop_session(&self) { - Task::block_on(self.inner.lock()).session = Weak::new(); + pub async fn drop_session(&self) { + self.inner.lock().await.session = Weak::new(); } - pub fn session(&self) -> Option> { - Task::block_on(self.inner.lock()).session.upgrade() + pub async fn session(&self) -> Option> { + self.inner.lock().await.session.upgrade() } } diff --git a/src/lib.rs b/src/lib.rs index 6fd82c40..2900772a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,19 +24,19 @@ use crate::kernel::task::alloc_pid; use alloc::{ffi::CString, sync::Arc}; use core::{ hint::spin_loop, - sync::atomic::{AtomicBool, Ordering}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; use eonix_hal::{ - arch_exported::bootstrap::shutdown, processor::CPU, traits::trap::IrqState, + arch_exported::bootstrap::shutdown, + processor::{halt, CPU, CPU_COUNT}, + traits::trap::IrqState, trap::disable_irqs_save, }; use eonix_mm::address::PRange; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task}; +use eonix_runtime::scheduler::RUNTIME; use kernel::{ mem::GlobalPageAlloc, - task::{ - new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder, - }, + task::{ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -80,6 +80,25 @@ fn panic(info: &core::panic::PanicInfo) -> ! { } static BSP_OK: AtomicBool = AtomicBool::new(false); +static CPU_SHUTTING_DOWN: AtomicUsize = AtomicUsize::new(0); + +fn shutdown_system() -> ! { + let cpu_count = CPU_COUNT.load(Ordering::Relaxed); + + if CPU_SHUTTING_DOWN.fetch_add(1, Ordering::AcqRel) + 1 == cpu_count { + println_info!("All CPUs are shutting down. Gracefully powering off..."); + shutdown(); + } else { + println_info!( + "CPU {} is shutting down. Waiting for other CPUs...", + CPU::local().cpuid() + ); + + loop { + halt(); + } + } +} #[eonix_hal::main] fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { @@ -90,22 +109,14 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { driver::sbi_console::init_console(); } - // To satisfy the `Scheduler` "preempt count == 0" assertion. - eonix_preempt::disable(); - - // We need root dentry to be present in constructor of `FsContext`. - // So call `init_vfs` first, then `init_multitasking`. - Scheduler::init_local_scheduler::(); - - Scheduler::get().spawn::(FutureRun::new(init_process(data.get_early_stack()))); - BSP_OK.store(true, Ordering::Release); + RUNTIME.spawn(init_process(data.get_early_stack())); + drop(data); - unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() - } + + RUNTIME.enter(); + shutdown_system(); } #[eonix_hal::ap_main] @@ -115,16 +126,10 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { spin_loop(); } - Scheduler::init_local_scheduler::(); println_debug!("AP{} started", CPU::local().cpuid()); - eonix_preempt::disable(); - - // TODO!!!!!: Free the stack after having switched to idle task. - unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() - } + RUNTIME.enter(); + shutdown_system(); } async fn init_process(early_kstack: PRange) { @@ -223,7 +228,7 @@ async fn init_process(early_kstack: PRange) { .name(Arc::from(&b"busybox"[..])) .entry(load_info.entry_ip, load_info.sp); - let mut process_list = Task::block_on(ProcessList::get().write()); + let mut process_list = ProcessList::get().write().await; let (thread, process) = ProcessBuilder::new() .pid(alloc_pid()) .mm_list(load_info.mm_list) @@ -235,5 +240,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - Scheduler::get().spawn::(new_thread_runnable(thread)); + RUNTIME.spawn(thread.run()); } From fb9a175e70c182681ad3467f70e18b7c2fa08f8a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Fri, 8 Aug 2025 00:44:41 +0800 Subject: [PATCH 12/29] runtime: add trace logs and fix few bugs Add tracing logs in Runtime::enter and other critical points. Pass trace_scheduler feature down to eonix_runtime crate, fixing the problem that the feature is not working. When the task is blocked, we set CURRENT_TASK to None as well. In early initialization stage, the stack is placed in identically mapped physical address. VirtIO driver might try converting the given buffer paths back to physical ones, which will generate errors. So BSP and AP should allocate an another stack and switch to it. We use TaskContext for the fix. Signed-off-by: greatbridf --- Cargo.toml | 2 +- crates/eonix_runtime/src/scheduler.rs | 42 ++++++++++++++++++++------- src/lib.rs | 42 +++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 15df5f15..5231dbb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ virtio-drivers = { version = "0.11.0" } default = [] trace_pci = [] trace_syscall = [] -trace_scheduler = [] +trace_scheduler = ["eonix_runtime/trace_scheduler"] log_trace = ["trace_pci", "trace_syscall", "trace_scheduler"] log_debug = [] smp = [] diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index c9c73ea5..3f72fbf4 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -10,6 +10,7 @@ use core::{ task::{Context, Poll, Waker}, }; use eonix_hal::processor::halt; +use eonix_log::println_trace; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; @@ -99,7 +100,10 @@ impl Runtime { } fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { - let Some(current) = self.current() else { + let Some(current) = CURRENT_TASK + .swap(None) + .map(|cur| unsafe { Arc::from_raw(cur.as_ptr()) }) + else { return; }; @@ -111,18 +115,23 @@ impl Runtime { } }) { Ok(TaskState::READY_RUNNING) => { - let current = unsafe { - Arc::from_raw( - CURRENT_TASK - .swap(None) - .expect("Current task should be present") - .as_ptr(), - ) - }; + println_trace!( + "trace_scheduler", + "Re-enqueueing task {:?} (CPU{})", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); rq.put(current); } - Ok(_) => {} + Ok(_) => { + println_trace!( + "trace_scheduler", + "Current task {:?} (CPU{}) is blocked, not re-enqueueing", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); + } _ => unreachable!(), } } @@ -174,6 +183,13 @@ impl Runtime { continue; }; + println_trace!( + "trace_scheduler", + "Switching to task {:?} (CPU{})", + next.id, + eonix_hal::processor::CPU::local().cpuid(), + ); + let old_state = next.state.swap(TaskState::RUNNING); assert_eq!( old_state, @@ -195,6 +211,12 @@ impl Runtime { "Current task should be at least in RUNNING state" ); + println_trace!( + "trace_scheduler", + "Task {:?} finished execution, removing...", + Task::current().id, + ); + self.remove_task(&Task::current()); CURRENT_TASK.set(None); diff --git a/src/lib.rs b/src/lib.rs index 2900772a..beebe7c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,15 +28,16 @@ use core::{ }; use eonix_hal::{ arch_exported::bootstrap::shutdown, + context::TaskContext, processor::{halt, CPU, CPU_COUNT}, - traits::trap::IrqState, + traits::{context::RawTaskContext, trap::IrqState}, trap::disable_irqs_save, }; use eonix_mm::address::PRange; -use eonix_runtime::scheduler::RUNTIME; +use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -115,8 +116,21 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { drop(data); - RUNTIME.enter(); - shutdown_system(); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); + + unsafe { + TaskContext::switch_to_noreturn(&mut ctx); + } } #[eonix_hal::ap_main] @@ -128,6 +142,24 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { println_debug!("AP{} started", CPU::local().cpuid()); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); + + unsafe { + TaskContext::switch_to_noreturn(&mut ctx); + } +} + +fn standard_main() -> ! { RUNTIME.enter(); shutdown_system(); } From 3ab454f6df811741e346cbb7951502151b170f3a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:00:39 +0800 Subject: [PATCH 13/29] riscv64, trap: remove load_interrupt_stack impl This is used only by Thread when we enter user execution context, when we need to save the "interrupt stack" to the local CPU so we can get the information needed to capture the trap. We need to support nested captured trap returns. So instead of setting that manually, we save the needed information when trap_return() is called (since we have precisely the trap context needed) and restore it after the trap is captured. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/cpu.rs | 6 +----- crates/eonix_hal/src/arch/riscv64/trap/mod.rs | 8 ++++++++ src/kernel/task/thread.rs | 8 -------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 8d62e741..7e6e3ac0 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -59,11 +59,7 @@ impl CPU { sscratch::write(TRAP_SCRATCH.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) { - TRAP_SCRATCH - .as_mut() - .set_trap_context(NonNull::new(sp as *mut _).unwrap()); - } + pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { // nothing diff --git a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs index 2d359759..58566ebe 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs @@ -283,9 +283,15 @@ impl TrapReturn for TrapContext { unsafe fn trap_return(&mut self) { let irq_states = disable_irqs_save(); + let old_handler = core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler); + let old_trap_context = core::mem::replace( + &mut TRAP_SCRATCH.as_mut().trap_context, + Some(NonNull::from(&mut *self)), + ); + let mut to_ctx = TaskContext::new(); to_ctx.set_program_counter(captured_trap_return as usize); to_ctx.set_stack_pointer(&raw mut *self as usize); @@ -296,6 +302,8 @@ impl TrapReturn for TrapContext { } TRAP_SCRATCH.as_mut().handler = old_handler; + TRAP_SCRATCH.as_mut().trap_context = old_trap_context; + irq_states.restore(); } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index e3b3a967..ef71657f 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -429,14 +429,6 @@ impl Thread { me.load_thread_area32(); } - unsafe { - let trap_ctx_ptr: *const TrapContext = &raw const *me.trap_ctx.borrow(); - // SAFETY: - CPU::local() - .as_mut() - .load_interrupt_stack(trap_ctx_ptr as u64); - } - let irq_state = disable_irqs_save(); let result = future.as_mut().poll(cx); From 6b152c74dd16cf54ea60d9d074b1e4baadbc0839 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:06:15 +0800 Subject: [PATCH 14/29] riscv64, trap: fix kernel space trap returns On riscv64 platforms, we load the kernel tp only if we've come from U mode to reduce overhead. But we would restore the tp saved in TrapContext even if we are returning to kernel space, which causes problems because the default tp is zero. We should save kernel tp register to the field in TrapContext structs when we set privilege mode to kernel. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 776fab2a..369eef3e 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -224,7 +224,15 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.sstatus.set_spp(SPP::User), - false => self.sstatus.set_spp(SPP::Supervisor), + false => { + unsafe { + core::arch::asm!( + "mv {}, tp", + out(reg) self.regs.tp, + ); + }; + self.sstatus.set_spp(SPP::Supervisor); + } } } From 33ff3156a046af1843ab7afe73fc6daa79cd9557 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:52:57 +0800 Subject: [PATCH 15/29] task: brand new block_on and stackful wrapper We provide a simple block_on to constantly poll the given future and block the current execution thread as before. We also introduce a new future wrapper named `stackful` to convert any future into a stackful one. We allocate a stack and keep polling the future on the stack by constructing a TrapContext and call trap_return() to get into the stackful environment. Then we capture the timer interrupt to get preempts work. Signed-off-by: greatbridf --- crates/eonix_runtime/src/task.rs | 7 -- src/kernel/task.rs | 137 +++++++++++++++++++++++++++++++ src/kernel/task/clone.rs | 7 +- src/lib.rs | 4 +- 4 files changed, 143 insertions(+), 12 deletions(-) diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 8f4062d8..7b89d3fe 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -93,13 +93,6 @@ impl Task { return rq; } } - - pub fn block_on(future: F) -> F::Output - where - F: Future, - { - todo!() - } } impl Wake for Task { diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 1b47923e..e2bbcb3f 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -19,3 +19,140 @@ pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; pub use thread::{yield_now, Thread, ThreadBuilder}; + +fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output +where + F: core::future::Future, +{ + let waker = core::task::Waker::noop(); + let mut cx = core::task::Context::from_waker(&waker); + + loop { + match future.as_mut().poll(&mut cx) { + core::task::Poll::Ready(output) => return output, + core::task::Poll::Pending => {} + } + } +} + +/// Constantly poll the given future until it is ready, blocking the current thread. +/// +/// # Warning +/// This function will block the current thread and should not be used in async +/// contexts as it might cause infinite blocking or deadlocks. The following is +/// a bad example: +/// +/// ```ignore +/// block_on(async { +/// // This will block the current thread forever. +/// loop { +/// println_debug!("This will never end!"); +/// } +/// }); +/// +/// // The code below will never be reached. +/// println_debug!("You'll never see this message!"); +/// ``` +/// +/// Use [`stackful`] instead to run async (or computational) code in a separate +/// stackful (and preemptive) context or `RUNTIME.spawn` to run async code in +/// the runtime's executor. +pub fn block_on(future: F) -> F::Output +where + F: core::future::Future, +{ + do_block_on(core::pin::pin!(future)) +} + +/// Run the given future in a stackful context, allowing it to be preempted by +/// timer interrupts. +/// +/// ```ignore +/// RUNTIME.spawn(stackful(async { +/// // Some simulated computation heavy task. +/// loop { +/// println_debug!("Hello from stackful future!"); +/// } +/// })); +/// ``` +pub async fn stackful(mut future: F) -> F::Output +where + F: core::future::Future, +{ + use core::cell::UnsafeCell; + use eonix_hal::traits::fault::Fault; + use eonix_hal::traits::trap::RawTrapContext; + use eonix_hal::traits::trap::TrapReturn; + use eonix_hal::trap::TrapContext; + use eonix_log::println_debug; + use eonix_runtime::executor::Stack; + + use crate::kernel::{ + interrupt::{default_fault_handler, default_irq_handler}, + timer::{should_reschedule, timer_interrupt}, + }; + + let stack = KernelStack::new(); + + fn execute( + future: core::pin::Pin<&mut F>, + output_ptr: core::ptr::NonNull>, + ) -> ! + where + F: core::future::Future, + { + let output = do_block_on(future); + + unsafe { + output_ptr.write(Some(output)); + } + + unsafe { + core::arch::asm!("ebreak"); + } + + unreachable!() + } + + let sp = stack.get_bottom(); + let output = UnsafeCell::new(None); + + let mut trap_ctx = TrapContext::new(); + + trap_ctx.set_user_mode(false); + trap_ctx.set_interrupt_enabled(true); + let _ = trap_ctx.set_user_call_frame( + execute:: as usize, + Some(sp.addr().get()), + None, + &[(&raw mut future) as usize, output.get() as usize], + |_, _| Ok::<(), u32>(()), + ); + + loop { + unsafe { + trap_ctx.trap_return(); + } + + match trap_ctx.trap_type() { + eonix_hal::traits::trap::TrapType::Syscall { .. } => {} + eonix_hal::traits::trap::TrapType::Fault(fault) => { + // Breakpoint + if let Fault::Unknown(3) = &fault { + println_debug!("Breakpoint hit, returning output"); + break output.into_inner().unwrap(); + } + + default_fault_handler(fault, &mut trap_ctx) + } + eonix_hal::traits::trap::TrapType::Irq { callback } => callback(default_irq_handler), + eonix_hal::traits::trap::TrapType::Timer { callback } => { + callback(timer_interrupt); + + if should_reschedule() { + yield_now().await; + } + } + } + } +} diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 48e34f96..2a16ce56 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,3 +1,4 @@ +use super::{block_on, stackful}; use crate::{ kernel::{ syscall::procops::parse_user_tls, @@ -9,7 +10,7 @@ use crate::{ use bitflags::bitflags; use core::num::NonZero; use eonix_hal::processor::UserTLS; -use eonix_runtime::{scheduler::RUNTIME, task::Task}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; use posix_types::signal::Signal; @@ -131,7 +132,7 @@ impl CloneArgs { } pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { - let mut procs = Task::block_on(ProcessList::get().write()); + let mut procs = block_on(ProcessList::get().write()); let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?; let current_process = thread.process.clone(); @@ -163,7 +164,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - RUNTIME.spawn(new_thread.run()); + RUNTIME.spawn(stackful(new_thread.run())); Ok(new_pid) } diff --git a/src/lib.rs b/src/lib.rs index beebe7c1..cbe7bc5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ use eonix_mm::address::PRange; use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{stackful, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -272,5 +272,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - RUNTIME.spawn(thread.run()); + RUNTIME.spawn(stackful(thread.run())); } From 5ada0d063410c21ad08a9cbda3a4b93993bae910 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 02:55:56 +0800 Subject: [PATCH 16/29] build, Makefile: remove --feature if none is present If we don't pass in FEATURES or SMP, we will have no feature enabled. In this scenerio, the dangling --feature argument will cause cargo to panic. We provide the features and the --feature together to avoid this... Signed-off-by: greatbridf --- Makefile.src | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile.src b/Makefile.src index 2701ecd6..ab13f5b8 100644 --- a/Makefile.src +++ b/Makefile.src @@ -22,7 +22,15 @@ KERNEL_CARGO_MANIFESTS += $(shell find src macros crates -name Cargo.toml -type KERNEL_DEPS := $(KERNEL_SOURCES) $(KERNEL_CARGO_MANIFESTS) QEMU_ARGS ?= -no-reboot -CARGO_FLAGS := --profile $(PROFILE) --features $(FEATURES)$(if $(SMP),$(COMMA)smp,) +CARGO_FLAGS := --profile $(PROFILE) + +ifneq ($(SMP),) +CARGO_FLAGS += --features smp +endif + +ifneq ($(FEATURES),) +CARGO_FLAGS += --features $(FEATURES) +endif ifeq ($(HOST),darwin) QEMU_ACCEL ?= -accel tcg From 21dd5ea1c754e5ab9a334960dc5ddfb5f04106e6 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 03:00:04 +0800 Subject: [PATCH 17/29] rcu: provide call_rcu() to call rcu drop asynchronously We can pass a function to be called after a success rcu_sync call. Signed-off-by: greatbridf --- src/kernel/task/process.rs | 24 +++++++++++++----------- src/rcu.rs | 21 ++++++++++++++++----- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index bf2edc95..fb53f4fa 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -4,10 +4,11 @@ use super::{ }; use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::rcu::call_rcu; use crate::{ kernel::mem::MMList, prelude::*, - rcu::{rcu_sync, RCUPointer, RCUReadGuard}, + rcu::{RCUPointer, RCUReadGuard}, sync::CondVar, }; use alloc::{ @@ -408,12 +409,14 @@ impl Process { .session(session.clone()) .build(&mut process_list); - { - let _old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); - Task::block_on(rcu_sync()); - } + let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); + old_pgroup.remove_member(self.pid, process_list.prove_mut()); + + call_rcu(move || { + drop(old_session); + drop(old_pgroup); + }); Ok(pgroup.pgid) } @@ -459,10 +462,9 @@ impl Process { }; pgroup.remove_member(self.pid, procs.prove_mut()); - { - let _old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - Task::block_on(rcu_sync()); - } + + let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); + call_rcu(move || drop(old_pgroup)); Ok(()) } diff --git a/src/rcu.rs b/src/rcu.rs index f018d3f3..32ff7657 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,11 +1,11 @@ -use crate::prelude::*; +use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; use pointers::BorrowedArc; @@ -21,7 +21,7 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { Self { value, - _guard: Task::block_on(GLOBAL_RCU_SEM.read()), + _guard: block_on(GLOBAL_RCU_SEM.read()), _phantom: PhantomData, } } @@ -48,6 +48,14 @@ pub async fn rcu_sync() { let _ = GLOBAL_RCU_SEM.write().await; } +pub fn call_rcu(func: impl FnOnce() + Send + 'static) { + RUNTIME.spawn(async move { + // Wait for all readers to finish. + rcu_sync().await; + func(); + }); +} + pub trait RCUNode { fn rcu_prev(&self) -> &AtomicPtr; fn rcu_next(&self) -> &AtomicPtr; @@ -154,7 +162,7 @@ impl> RCUList { } pub fn iter(&self) -> RCUIterator { - let _lck = Task::block_on(self.reader_lock.read()); + let _lck = block_on(self.reader_lock.read()); RCUIterator { // SAFETY: We have a read lock, so the node is still alive. @@ -264,7 +272,10 @@ impl Drop for RCUPointer { if let Some(arc) = unsafe { self.swap(None) } { // We only wait if there are other references. if Arc::strong_count(&arc) == 1 { - Task::block_on(rcu_sync()); + call_rcu(move || { + let _ = arc; + todo!(); + }); } } } From 874a4fa000dfbe95a12781a4fa72cc00082baa60 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 9 Aug 2025 03:01:37 +0800 Subject: [PATCH 18/29] task: migrate all Task::block_on calls to task::block_on Simple renamings... Further work is needed to make the system work. Signed-off-by: greatbridf --- src/driver/ahci/mod.rs | 4 +-- src/driver/ahci/port.rs | 6 ++-- src/driver/virtio/riscv64.rs | 8 +++-- src/fs/ext4.rs | 11 +++---- src/fs/fat32.rs | 10 +++--- src/fs/procfs.rs | 10 +++--- src/fs/tmpfs.rs | 46 ++++++++++++++-------------- src/kernel/chardev.rs | 11 +++---- src/kernel/mem/mm_area.rs | 4 +-- src/kernel/mem/mm_list.rs | 14 ++++----- src/kernel/mem/mm_list/page_fault.rs | 5 ++- src/kernel/syscall/file_rw.rs | 26 ++++++++-------- src/kernel/syscall/mm.rs | 22 ++++++------- src/kernel/syscall/procops.rs | 41 ++++++++++++------------- src/kernel/task/process.rs | 14 ++++----- src/kernel/terminal.rs | 5 ++- src/kernel/vfs/dentry/dcache.rs | 4 +-- src/kernel/vfs/file.rs | 19 ++++++------ src/kernel/vfs/filearray.rs | 11 ++++++- src/kernel/vfs/inode.rs | 4 +-- 20 files changed, 137 insertions(+), 138 deletions(-) diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index e988c9c3..c3b1cfa0 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -6,6 +6,7 @@ use crate::{ constants::{EINVAL, EIO}, interrupt::register_irq_handler, pcie::{self, Header, PCIDevice, PCIDriver, PciError}, + task::block_on, }, prelude::*, }; @@ -13,7 +14,6 @@ use alloc::{format, sync::Arc}; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::SpinIrq as _; use port::AdapterPort; @@ -133,7 +133,7 @@ impl Device<'static> { port, )?; - Task::block_on(port.partprobe())?; + block_on(port.partprobe())?; Ok(()) })() { diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index 27333d5d..f558f6e1 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -9,11 +9,11 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::paging::Page; use crate::kernel::mem::AsMemoryBlock as _; +use crate::kernel::task::block_on; use crate::prelude::*; use alloc::collections::vec_deque::VecDeque; use core::pin::pin; use eonix_mm::address::{Addr as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. @@ -156,7 +156,7 @@ impl AdapterPort<'_> { wait.as_mut().add_to_wait_list(); drop(free_list); - Task::block_on(wait); + block_on(wait); } } @@ -222,7 +222,7 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = Task::block_on(slot.wait_finish()) { + if let Err(_) = block_on(slot.wait_finish()) { self.stats.inc_cmd_error(); return Err(EIO); }; diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index ad132569..9bdbf6ce 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,11 +1,13 @@ use super::virtio_blk::HAL; -use crate::kernel::block::{make_device, BlockDevice}; +use crate::kernel::{ + block::{make_device, BlockDevice}, + task::block_on, +}; use alloc::{sync::Arc, vec::Vec}; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; use eonix_log::{println_info, println_warn}; use eonix_mm::address::{PAddr, PhysAccess}; -use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, @@ -47,7 +49,7 @@ pub fn init() { ) .expect("Failed to register VirtIO Block device"); - Task::block_on(block_device.partprobe()) + block_on(block_device.partprobe()) .expect("Failed to probe partitions for VirtIO Block device"); disk_id += 1; diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index a2138a09..b4953491 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,6 +1,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; use crate::{ io::{Buffer, ByteBuffer}, kernel::{ @@ -24,7 +25,6 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error}; use ext4_rs::{Errno, Ext4}; @@ -126,7 +126,7 @@ impl Ext4Fs { }); let root_inode = { - let mut icache = Task::block_on(ext4fs.icache.write()); + let mut icache = block_on(ext4fs.icache.write()); let root_inode = ext4fs.inner.get_inode_ref(2); ext4fs.get_or_insert( @@ -216,7 +216,7 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { @@ -251,8 +251,7 @@ impl Inode for DirInode { }; // Fast path: if the inode is already in the cache, return it. - if let Some(inode) = ext4fs.try_get(&Task::block_on(ext4fs.icache.read()), attr.ino as u64) - { + if let Some(inode) = ext4fs.try_get(&block_on(ext4fs.icache.read()), attr.ino as u64) { return Ok(Some(inode)); } @@ -261,7 +260,7 @@ impl Inode for DirInode { let real_perm = extra_perm | perm | perm >> 3 | perm >> 6; // Create a new inode based on the attributes. - let mut icache = Task::block_on(ext4fs.icache.write()); + let mut icache = block_on(ext4fs.icache.write()); let inode = ext4fs.get_or_insert( &mut icache, InodeData { diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 852d8673..f328dc74 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -4,6 +4,7 @@ mod file; use crate::io::Stream; use crate::kernel::constants::EIO; use crate::kernel::mem::AsMemoryBlock; +use crate::kernel::task::block_on; use crate::kernel::vfs::inode::WriteOffset; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, @@ -32,7 +33,6 @@ use alloc::{ }; use core::{ops::ControlFlow, sync::atomic::Ordering}; use dir::Dirs as _; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use file::ClusterRead; @@ -266,13 +266,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); if self.size.load(Ordering::Relaxed) as usize == 0 { return Ok(0); @@ -354,7 +354,7 @@ impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, 0) @@ -385,7 +385,7 @@ impl Inode for DirInode { ) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, offset) diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 85c0ecbb..82f597b8 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,4 +1,5 @@ use crate::kernel::constants::{EACCES, ENOTDIR}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{ io::Buffer, @@ -17,7 +18,6 @@ use crate::{ }; use alloc::sync::{Arc, Weak}; use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; use itertools::Itertools; @@ -134,7 +134,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); Ok(self .entries .access(lock.prove()) @@ -147,7 +147,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -234,7 +234,7 @@ pub fn creat( let inode = FileInode::new(ino, Arc::downgrade(&fs), file); { - let lock = Task::block_on(parent.idata.rwsem.write()); + let lock = block_on(parent.idata.rwsem.write()); parent .entries .access_mut(lock.prove_mut()) @@ -258,7 +258,7 @@ pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { parent .entries - .access_mut(Task::block_on(inode.rwsem.write()).prove_mut()) + .access_mut(block_on(inode.rwsem.write()).prove_mut()) .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); Ok(ProcFsNode::Dir(inode)) diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 334e2781..840f97b1 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -1,6 +1,7 @@ use crate::io::Stream; use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; @@ -21,7 +22,6 @@ use alloc::sync::{Arc, Weak}; use core::fmt::Debug; use core::{ops::ControlFlow, sync::atomic::Ordering}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; use itertools::Itertools; @@ -138,7 +138,7 @@ impl Inode for DirectoryInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -153,7 +153,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = FileInode::new(ino, self.vfs.clone(), 0, mode); @@ -170,7 +170,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = NodeInode::new( @@ -188,7 +188,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); @@ -201,7 +201,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); @@ -213,11 +213,11 @@ impl Inode for DirectoryInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let dir_lock = Task::block_on(self.rwsem.write()); + let dir_lock = block_on(self.rwsem.write()); let file = at.get_inode()?; let filename = at.get_name(); - let file_lock = Task::block_on(file.rwsem.write()); + let file_lock = block_on(file.rwsem.write()); let entries = self.entries.access_mut(dir_lock.prove_mut()); @@ -240,7 +240,7 @@ impl Inode for DirectoryInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization let old = self.mode.load(Ordering::Relaxed); @@ -271,7 +271,7 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("vfs must be a TmpFs"); - let _rename_lock = Task::block_on(vfs.rename_lock.lock()); + let _rename_lock = block_on(vfs.rename_lock.lock()); let old_file = old_dentry.get_inode()?; let new_file = new_dentry.get_inode(); @@ -284,7 +284,7 @@ impl Inode for DirectoryInode { if same_parent { // Same directory rename // Remove from old location and add to new location - let parent_lock = Task::block_on(self.rwsem.write()); + let parent_lock = block_on(self.rwsem.write()); let entries = self.entries.access_mut(parent_lock.prove_mut()); fn rename_old( @@ -328,7 +328,7 @@ impl Inode for DirectoryInode { if let Some(new_idx) = new_entry_idx { // Replace existing file (i.e. rename the old and unlink the new) let new_file = new_file.unwrap(); - let _new_file_lock = Task::block_on(new_file.rwsem.write()); + let _new_file_lock = block_on(new_file.rwsem.write()); // SAFETY: `new_file_lock` has done the synchronization if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { @@ -364,8 +364,8 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("new parent must be a DirectoryInode"); - let old_parent_lock = Task::block_on(self.rwsem.write()); - let new_parent_lock = Task::block_on(new_parent_inode.rwsem.write()); + let old_parent_lock = block_on(self.rwsem.write()); + let new_parent_lock = block_on(new_parent_inode.rwsem.write()); let old_ino = old_file.ino; let new_ino = new_file.as_ref().ok().map(|f| f.ino); @@ -391,7 +391,7 @@ impl Inode for DirectoryInode { if has_new { // Replace existing file (i.e. move the old and unlink the new) let new_file = new_file.unwrap(); - let new_file_lock = Task::block_on(new_file.rwsem.write()); + let new_file_lock = block_on(new_file.rwsem.write()); if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0 @@ -424,7 +424,7 @@ impl Inode for DirectoryInode { *old_file.ctime.lock() = now; } - Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); + block_on(dcache::d_exchange(old_dentry, new_dentry)); Ok(()) } @@ -511,13 +511,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.read(buffer, offset)) + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.read(buffer, offset)) } fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { // TODO: We don't need that strong guarantee, find some way to avoid locks - let lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); let mut store_new_end = None; let offset = match offset { @@ -530,7 +530,7 @@ impl Inode for FileInode { } }; - let wrote = Task::block_on(self.pages.write(stream, offset))?; + let wrote = block_on(self.pages.write(stream, offset))?; let cursor_end = offset + wrote; if let Some(store_end) = store_new_end { @@ -545,8 +545,8 @@ impl Inode for FileInode { } fn truncate(&self, length: usize) -> KResult<()> { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.resize(length))?; + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.resize(length))?; self.size.store(length as u64, Ordering::Relaxed); *self.mtime.lock() = Instant::now(); Ok(()) @@ -554,7 +554,7 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization let old = self.mode.load(Ordering::Relaxed); diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index cd23fc14..4e0d9d0b 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -2,7 +2,7 @@ use super::{ block::make_device, console::get_console, constants::{EEXIST, EIO}, - task::{ProcessList, Thread}, + task::{block_on, ProcessList, Thread}, terminal::Terminal, vfs::{ file::{File, FileType, TerminalFile}, @@ -18,7 +18,6 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; use eonix_sync::AsProof as _; use posix_types::open::OpenFlags; @@ -43,7 +42,7 @@ static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => Task::block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -75,13 +74,13 @@ impl CharDevice { pub fn open(self: &Arc, flags: OpenFlags) -> KResult> { Ok(match &self.device { CharDeviceType::Terminal(terminal) => { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); let current = Thread::current(); let session = current.process.session(procs.prove()); // We only set the control terminal if the process is the session leader. if session.sid == Thread::current().process.pid { // Silently fail if we can't set the control terminal. - dont_check!(Task::block_on(session.set_control_terminal( + dont_check!(block_on(session.set_control_terminal( &terminal, false, procs.prove() @@ -123,7 +122,7 @@ struct ConsoleDevice; impl VirtualCharDevice for ConsoleDevice { fn read(&self, buffer: &mut dyn Buffer) -> KResult { let console_terminal = get_console().ok_or(EIO)?; - Task::block_on(console_terminal.read(buffer)) + block_on(console_terminal.read(buffer)) } fn write(&self, stream: &mut dyn Stream) -> KResult { diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 956ae7e4..528d79ad 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -2,6 +2,7 @@ use super::mm_list::EMPTY_PAGE; use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, Mapping, Page, Permission}; use crate::kernel::constants::EINVAL; +use crate::kernel::task::block_on; use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; @@ -9,7 +10,6 @@ use core::cmp; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; use eonix_mm::paging::{PAGE_SIZE, PFN}; -use eonix_runtime::task::Task; #[derive(Debug)] pub struct MMArea { @@ -209,7 +209,7 @@ impl MMArea { } if attr.contains(PageAttribute::MAPPED) { - Task::block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; + block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; } attr.insert(PageAttribute::ACCESSED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 1d142546..6593624b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -7,6 +7,7 @@ use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, MMArea, Page}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::kernel::task::block_on; use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; @@ -23,7 +24,6 @@ use eonix_mm::{ page_table::{PageTable, RawAttribute, PTE}, paging::PAGE_SIZE, }; -use eonix_runtime::task::Task; use eonix_sync::{LazyLock, Mutex}; pub use mapping::{FileMapping, Mapping}; @@ -507,7 +507,7 @@ impl MMList { const VDSO_SIZE: usize = 0x1000; let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let mut pte_iter = inner .page_table @@ -538,7 +538,7 @@ impl MMList { is_shared: bool, ) -> KResult { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); if hint == VAddr::NULL { let at = inner.find_available(hint, len).ok_or(ENOMEM)?; @@ -565,14 +565,14 @@ impl MMList { permission: Permission, is_shared: bool, ) -> KResult { - Task::block_on(self.inner.borrow().lock()) + block_on(self.inner.borrow().lock()) .mmap(at, len, mapping.clone(), permission, is_shared) .map(|_| at) } pub fn set_break(&self, pos: Option) -> VAddr { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); // SAFETY: `set_break` is only called in syscalls, where program break should be valid. assert!(inner.break_start.is_some() && inner.break_pos.is_some()); @@ -631,7 +631,7 @@ impl MMList { /// This should be called only **once** for every thread. pub fn register_break(&self, start: VAddr) { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = block_on(inner.lock()); assert!(inner.break_start.is_none() && inner.break_pos.is_none()); inner.break_start = Some(start.into()); @@ -651,7 +651,7 @@ impl MMList { } let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let mut offset = 0; let mut remaining = len; diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index bb62b388..089fdf06 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,10 +1,9 @@ use super::{MMList, VAddr}; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::signal::Signal; #[repr(C)] @@ -149,7 +148,7 @@ pub fn handle_kernel_page_fault( let mms = &Thread::current().process.mm_list; let inner = mms.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = block_on(inner.lock()); let area = match inner.areas.get(&VRange::from(addr)) { Some(area) => area, diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 5683b27e..97d47c48 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,11 +1,9 @@ -use core::time::Duration; - use super::FromSyscallArg; use crate::io::IntoStream; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, }; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; use crate::{ @@ -24,7 +22,7 @@ use crate::{ prelude::*, }; use alloc::sync::Arc; -use eonix_runtime::task::Task; +use core::time::Duration; use posix_types::ctypes::{Long, PtrT}; use posix_types::namei::RenameFlags; use posix_types::open::{AtFlags, OpenFlags}; @@ -77,14 +75,14 @@ fn dentry_from( fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) + block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) } #[eonix_macros::define_syscall(SYS_PREAD64)] fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on( + block_on( thread .files .get(fd) @@ -98,7 +96,7 @@ fn write(fd: FD, buffer: *const u8, count: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) + block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) } #[eonix_macros::define_syscall(SYS_PWRITE64)] @@ -106,7 +104,7 @@ fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult KResult { let mut tot = 0usize; for mut buffer in iov_buffers.into_iter() { // TODO!!!: `readv` - let nread = Task::block_on(file.read(&mut buffer, None))?; + let nread = block_on(file.read(&mut buffer, None))?; tot += nread; if nread != buffer.total() { @@ -426,7 +424,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { let mut tot = 0usize; for mut stream in iov_streams.into_iter() { - let nread = Task::block_on(file.write(&mut stream, None))?; + let nread = block_on(file.write(&mut stream, None))?; tot += nread; if nread == 0 || !stream.is_drained() { @@ -477,7 +475,7 @@ fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult K let mut fd = fds.read()?; let file = thread.files.get(fd.fd).ok_or(EBADF)?; - fd.revents = Task::block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); + fd.revents = block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); fds.write(fd)?; Ok(1) @@ -550,11 +548,11 @@ fn pselect6( } let timeout = UserPointerMut::new(timeout)?; - + // Read here to check for invalid pointers. let _timeout_value = timeout.read()?; - Task::block_on(sleep(Duration::from_millis(10))); + block_on(sleep(Duration::from_millis(10))); timeout.write(TimeSpec { tv_sec: 0, diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index dd263e6b..b6ba5fdc 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -2,7 +2,7 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; -use crate::kernel::task::Thread; +use crate::kernel::task::{block_on, Thread}; use crate::kernel::vfs::filearray::FD; use crate::{ kernel::{ @@ -14,7 +14,6 @@ use crate::{ use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::syscall_no::*; impl FromSyscallArg for UserMmapProtocol { @@ -67,11 +66,8 @@ fn do_mmap2( Mapping::Anonymous } else { // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = Task::block_on(SHM_MANAGER.lock()).create_shared_area( - len, - thread.process.pid, - 0x777, - ); + let shared_area = + block_on(SHM_MANAGER.lock()).create_shared_area(len, thread.process.pid, 0x777); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -94,7 +90,7 @@ fn do_mmap2( // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether // `addr` is above user reachable memory. let addr = if flags.contains(UserMmapFlags::MAP_FIXED) { - Task::block_on(mm_list.unmap(addr, len)); + block_on(mm_list.unmap(addr, len)); mm_list.mmap_fixed(addr, len, mapping, permission, is_shared) } else { mm_list.mmap_hint(addr, len, mapping, permission, is_shared) @@ -137,7 +133,7 @@ fn munmap(addr: usize, len: usize) -> KResult { } let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) + block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) } #[eonix_macros::define_syscall(SYS_BRK)] @@ -160,7 +156,7 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.protect( + block_on(thread.process.mm_list.protect( addr, len, Permission { @@ -175,7 +171,7 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let size = size.align_up(PAGE_SIZE); - let mut shm_manager = Task::block_on(SHM_MANAGER.lock()); + let mut shm_manager = block_on(SHM_MANAGER.lock()); let shmid = gen_shm_id(key)?; let mode = shmflg & 0o777; @@ -207,7 +203,7 @@ fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { #[eonix_macros::define_syscall(SYS_SHMAT)] fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { let mm_list = &thread.process.mm_list; - let shm_manager = Task::block_on(SHM_MANAGER.lock()); + let shm_manager = block_on(SHM_MANAGER.lock()); let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; let mode = shmflg & 0o777; @@ -256,7 +252,7 @@ fn shmdt(addr: usize) -> KResult { let size = *shm_areas.get(&addr).ok_or(EINVAL)?; shm_areas.remove(&addr); drop(shm_areas); - return Task::block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); + return block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); } #[eonix_macros::define_syscall(SYS_SHMCTL)] diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index c21aade5..62194691 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -8,8 +8,8 @@ use crate::kernel::constants::{ }; use crate::kernel::mem::PageBuffer; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, - RobustListHead, SignalAction, Thread, WaitId, WaitType, + block_on, do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, + ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; @@ -27,7 +27,6 @@ use eonix_hal::processor::UserTLS; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::task::Task; use eonix_sync::AsProof as _; use posix_types::ctypes::PtrT; use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; @@ -59,7 +58,7 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + block_on(sleep(duration)); if let Some(rem) = rem { rem.write((0, 0))?; @@ -90,7 +89,7 @@ fn clock_nanosleep( }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + block_on(sleep(duration)); if let Some(rem) = rem { rem.write((0, 0))?; @@ -212,7 +211,7 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult KResult SyscallNoReturn { unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); + let mut procs = block_on(ProcessList::get().write()); + block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); } SyscallNoReturn @@ -249,8 +248,8 @@ fn exit(status: u32) -> SyscallNoReturn { #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] fn exit_group(status: u32) -> SyscallNoReturn { unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); + let mut procs = block_on(ProcessList::get().write()); + block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); } SyscallNoReturn @@ -278,7 +277,7 @@ fn do_waitid( Some(options) => options, }; - let Some(wait_object) = Task::block_on(thread.process.wait( + let Some(wait_object) = block_on(thread.process.wait( wait_id, options.contains(UserWaitOptions::WNOHANG), options.contains(UserWaitOptions::WUNTRACED), @@ -377,7 +376,7 @@ fn getsid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.session_rcu().sid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); procs .try_find_process(pid) .map(|proc| proc.session(procs.prove()).sid) @@ -390,7 +389,7 @@ fn getpgid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.pgroup_rcu().pgid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); procs .try_find_process(pid) .map(|proc| proc.pgroup(procs.prove()).pgid) @@ -476,7 +475,7 @@ fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize { #[eonix_macros::define_syscall(SYS_SCHED_YIELD)] fn sched_yield() -> KResult<()> { - Task::block_on(yield_now()); + block_on(yield_now()); Ok(()) } @@ -572,7 +571,7 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { #[eonix_macros::define_syscall(SYS_KILL)] fn kill(pid: i32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); match pid { // Send signal to every process for which the calling process has // permission to send signals. @@ -599,7 +598,7 @@ fn kill(pid: i32, sig: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_TKILL)] fn tkill(tid: u32, sig: u32) -> KResult<()> { - Task::block_on(ProcessList::get().read()) + block_on(ProcessList::get().read()) .try_find_thread(tid) .ok_or(ESRCH)? .raise(Signal::try_from_raw(sig)?); @@ -608,7 +607,7 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_TGKILL)] fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?; if thread_to_kill.process.pid != tgid { @@ -867,11 +866,11 @@ fn futex( match futex_op { FutexOp::FUTEX_WAIT => { - Task::block_on(futex_wait(uaddr, pid, val as u32, None))?; + block_on(futex_wait(uaddr, pid, val as u32, None))?; return Ok(0); } FutexOp::FUTEX_WAKE => { - return Task::block_on(futex_wake(uaddr, pid, val as u32)); + return block_on(futex_wake(uaddr, pid, val as u32)); } FutexOp::FUTEX_REQUEUE => { todo!() @@ -906,7 +905,7 @@ fn rt_sigreturn() -> KResult { "`rt_sigreturn` failed in thread {} with error {err}!", thread.tid ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); + block_on(thread.force_kill(Signal::SIGSEGV)); })?; Ok(SyscallNoReturn) @@ -927,7 +926,7 @@ fn sigreturn() -> KResult { "`sigreturn` failed in thread {} with error {err}!", thread.tid ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); + block_on(thread.force_kill(Signal::SIGSEGV)); })?; Ok(SyscallNoReturn) diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index fb53f4fa..3e69fc4b 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,3 +1,4 @@ +use super::block_on; use super::{ process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, ProcessList, Session, Thread, @@ -17,7 +18,6 @@ use alloc::{ }; use core::sync::atomic::{AtomicU32, Ordering}; use eonix_mm::address::VAddr; -use eonix_runtime::task::Task; use eonix_sync::{ AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, UnlockableGuard as _, UnlockedGuard as _, @@ -134,7 +134,7 @@ impl WaitId { } else if id == -1 { WaitId::Any } else if id == 0 { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); WaitId::Pgid(thread.process.pgroup(procs.prove()).pgid) } else { WaitId::Pid(id.cast_unsigned()) @@ -208,9 +208,9 @@ impl ProcessBuilder { pub fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { - Task::block_on(process.mm_list.new_shared()) + block_on(process.mm_list.new_shared()) } else { - Task::block_on(process.mm_list.new_cloned()) + block_on(process.mm_list.new_cloned()) }; if let Some(exit_signal) = clone_args.exit_signal { @@ -396,7 +396,7 @@ impl Process { /// Create a new session for the process. pub fn setsid(self: &Arc) -> KResult { - let mut process_list = Task::block_on(ProcessList::get().write()); + let mut process_list = block_on(ProcessList::get().write()); // If there exists a session that has the same sid as our pid, we can't create a new // session. The standard says that we should create a new process group and be the // only process in the new process group and session. @@ -474,7 +474,7 @@ impl Process { /// This function should be called on the process that issued the syscall in order to do /// permission checks. pub fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { - let mut procs = Task::block_on(ProcessList::get().write()); + let mut procs = block_on(ProcessList::get().write()); // We may set pgid of either the calling process or a child process. if pid == self.pid { self.do_setpgid(pgid, &mut procs) @@ -609,7 +609,7 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); if let Some(process) = procs.try_find_process(item.pid) { return process.pgroup(procs.prove()).pgid == pgid; } diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 5532a2e1..134021a8 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,5 +1,5 @@ use super::{ - task::{ProcessList, Session, Thread}, + task::{block_on, ProcessList, Session, Thread}, user::{UserPointer, UserPointerMut}, }; use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; @@ -10,7 +10,6 @@ use alloc::{ }; use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, Mutex}; use posix_types::signal::Signal; @@ -449,7 +448,7 @@ impl Terminal { fn signal(&self, inner: &mut TerminalInner, signal: Signal) { if let Some(session) = inner.session.upgrade() { - Task::block_on(session.raise_foreground(signal)); + block_on(session.raise_foreground(signal)); } if !inner.termio.noflsh() { self.clear_read_buffer(inner); diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 9dfdbddc..06a4e14d 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,5 +1,6 @@ use super::{Dentry, Inode}; use crate::kernel::constants::ENOENT; +use crate::kernel::task::block_on; use crate::rcu::RCUPointer; use crate::{ kernel::vfs::{s_isdir, s_islnk}, @@ -8,7 +9,6 @@ use crate::{ }; use alloc::sync::Arc; use core::sync::atomic::Ordering; -use eonix_runtime::task::Task; use eonix_sync::Mutex; const DCACHE_HASH_BITS: u32 = 8; @@ -42,7 +42,7 @@ pub fn d_find_fast(dentry: &Dentry) -> Option> { /// /// Silently fail without any side effects pub fn d_try_revalidate(dentry: &Arc) { - let _lock = Task::block_on(D_EXCHANGE_LOCK.lock()); + let _lock = block_on(D_EXCHANGE_LOCK.lock()); (|| -> KResult<()> { let parent = dentry.parent().get_inode()?; diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index 49cb1d44..75e4df2f 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -8,7 +8,7 @@ use crate::{ kernel::{ constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, mem::{paging::Page, AsMemoryBlock as _}, - task::Thread, + task::{block_on, Thread}, terminal::{Terminal, TerminalIORequest}, user::{UserPointer, UserPointerMut}, vfs::inode::Inode, @@ -29,7 +29,6 @@ use core::{ ops::{ControlFlow, Deref}, sync::atomic::{AtomicU32, Ordering}, }; -use eonix_runtime::task::Task; use eonix_sync::Mutex; use posix_types::{open::OpenFlags, signal::Signal, stat::StatX}; @@ -159,7 +158,7 @@ impl Pipe { } fn close_read(&self) { - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = block_on(self.inner.lock()); if inner.read_closed { return; } @@ -169,7 +168,7 @@ impl Pipe { } fn close_write(&self) { - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = block_on(self.inner.lock()); if inner.write_closed { return; } @@ -318,7 +317,7 @@ impl InodeFile { } fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let new_cursor = match option { SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, @@ -339,7 +338,7 @@ impl InodeFile { return Err(EBADF); } - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); if self.append { let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; @@ -367,7 +366,7 @@ impl InodeFile { let nread = self.dentry.read(buffer, offset)?; nread } else { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.read(buffer, *cursor)?; @@ -379,7 +378,7 @@ impl InodeFile { } fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.readdir(*cursor, |filename, ino| { // Filename length + 1 for padding '\0' @@ -409,7 +408,7 @@ impl InodeFile { } fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); + let mut cursor = block_on(self.cursor.lock()); let nread = self.dentry.readdir(*cursor, |filename, ino| { // + 1 for filename length padding '\0', + 1 for d_type. @@ -466,7 +465,7 @@ impl TerminalFile { } fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - Task::block_on(self.terminal.ioctl(match request as u32 { + block_on(self.terminal.ioctl(match request as u32 { TCGETS => TerminalIORequest::GetTermios(UserPointerMut::new_vaddr(arg3)?), TCSETS => TerminalIORequest::SetTermios(UserPointer::new_vaddr(arg3)?), TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::new_vaddr(arg3)?), diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index f8b06a12..0fb9205d 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -29,7 +29,7 @@ use itertools::{ }; use posix_types::open::{FDFlags, OpenFlags}; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); #[derive(Clone)] @@ -323,6 +323,15 @@ impl FD { pub const AT_FDCWD: FD = FD(-100i32 as u32); } +impl core::fmt::Debug for FD { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + &Self::AT_FDCWD => f.write_str("FD(AT_FDCWD)"), + FD(no) => f.debug_tuple("FD").field(&no).finish(), + } + } +} + impl FromSyscallArg for FD { fn from_arg(value: usize) -> Self { Self(value as u32) diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 2b52043d..3eb6c8dc 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -5,6 +5,7 @@ use crate::kernel::constants::{ STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT, }; use crate::kernel::mem::PageCache; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{io::Buffer, prelude::*}; use alloc::sync::{Arc, Weak}; @@ -14,7 +15,6 @@ use core::{ ptr::addr_of_mut, sync::atomic::{AtomicU32, AtomicU64, Ordering}, }; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use posix_types::stat::StatX; @@ -280,7 +280,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { f( uninit_mut.as_mut_ptr(), // SAFETY: `idata` is initialized and we will never move the lock. - &Task::block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), + &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), ); // Safety: `uninit` is initialized From 661a15940badf992d7836eb93c7ab293590c6ad4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 00:23:12 +0800 Subject: [PATCH 19/29] riscv64, trap: rework to fix nested captured traps The previous implementation has some bugs inside that will cause kernel space nested traps to lose some required information: - In kernel mode, trap contexts are saved above the current stack frame without exception, which is not what we want. We expect to read the trap data in the CAPTURED context. - The capturer task context is not saved as well, which will mess up the nested traps completely. - We are reading page fault virtual addresses in TrapContext::trap_type, which won't work since if the inner trap is captured, and the outer trap interleaves with the trap_type() call, we will lose the stval data in the inner trap. The solution is to separate our "normal" trap handling procedure out of captured trap handling procedure. We swap the stvec CSR when we set up captured traps and restore it afterwards so the two approach don't have to tell then apart in trap entries. Then, we can store the TrapContext pointer in sscratch without having to distinguish between trap handling types. In the way, we keep the procedure simple. The register stval is saved together with other registers to be used in page faults. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 8 - crates/eonix_hal/src/arch/riscv64/cpu.rs | 13 +- .../src/arch/riscv64/trap/captured.rs | 177 +++++++++++ .../src/arch/riscv64/trap/default.rs | 134 ++++++++ crates/eonix_hal/src/arch/riscv64/trap/mod.rs | 291 ++---------------- .../src/arch/riscv64/trap/trap_context.rs | 97 +++--- 6 files changed, 389 insertions(+), 331 deletions(-) create mode 100644 crates/eonix_hal/src/arch/riscv64/trap/captured.rs create mode 100644 crates/eonix_hal/src/arch/riscv64/trap/default.rs diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 7c6a6ae0..0f1dff63 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -3,7 +3,6 @@ use super::{ console::write_str, cpu::{CPUID, CPU_COUNT}, time::set_next_timer, - trap::TRAP_SCRATCH, }; use crate::{ arch::{ @@ -234,13 +233,6 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { } percpu_area.register(cpu.cpuid()); - - unsafe { - // SAFETY: Interrupts are disabled. - TRAP_SCRATCH - .as_mut() - .set_kernel_tp(PercpuArea::get_for(cpu.cpuid()).unwrap().cast()); - } } fn get_ap_start_addr() -> usize { diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 7e6e3ac0..9c843eaf 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -1,9 +1,13 @@ use super::{ interrupt::InterruptControl, - trap::{setup_trap, TRAP_SCRATCH}, + trap::{setup_trap, TrapContext}, }; use crate::arch::fdt::{FdtExt, FDT}; -use core::{arch::asm, pin::Pin, ptr::NonNull, sync::atomic::AtomicUsize}; +use core::{ + arch::asm, cell::UnsafeCell, mem::MaybeUninit, pin::Pin, ptr::NonNull, + sync::atomic::AtomicUsize, +}; +use eonix_hal_traits::trap::RawTrapContext; use eonix_preempt::PreemptGuard; use eonix_sync_base::LazyLock; use riscv::register::{ @@ -17,6 +21,9 @@ pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0); #[eonix_percpu::define_percpu] pub static CPUID: usize = 0; +#[eonix_percpu::define_percpu] +static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); @@ -56,7 +63,7 @@ impl CPU { interrupt.init(); sstatus::set_sum(); - sscratch::write(TRAP_SCRATCH.as_ptr() as usize); + sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/captured.rs b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs new file mode 100644 index 00000000..d4c00e9f --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs @@ -0,0 +1,177 @@ +use crate::{arch::trap::Registers, context::TaskContext, trap::TrapContext}; +use core::{arch::naked_asm, mem::MaybeUninit}; +use eonix_hal_traits::context::RawTaskContext; + +static mut DIRTY_TASK_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + +// If captured trap context is present, we use it directly. +// We need to restore the kernel tp from that TrapContext but sp is +// fine since we will use TaskContext::switch. +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "ld tp, {ra}(t0)", // Load kernel tp from trap_ctx.ra + "sd ra, {ra}(t0)", + "ld ra, {sp}(t0)", // Load capturer task context from trap_ctx.sp + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "csrr t2, sstatus", + "csrr t3, sepc", + "csrr t4, scause", + "csrr t5, stval", + "csrrw t6, sscratch, t0", + "sd t6, {t0}(t0)", + "sd t2, {sstatus}(t0)", + "sd t3, {sepc}(t0)", + "sd t4, {scause}(t0)", + "sd t5, {stval}(t0)", + "la a0, {dirty_task_context}", + "mv a1, ra", + "j {task_context_switch}", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + stval = const TrapContext::OFFSET_STVAL, + dirty_task_context = sym DIRTY_TASK_CONTEXT, + task_context_switch = sym TaskContext::switch, + ); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_return(ctx: &mut TrapContext) -> ! { + naked_asm!( + "csrr t0, sscratch", + "ld t1, {sstatus}(t0)", + "ld t2, {sepc}(t0)", + "csrw sstatus, t1", + "csrw sepc, t2", + "mv t4, tp", + "mv t5, sp", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "sd t4, {ra}(t0)", // Store kernel tp to trap_ctx.ra + "sd t5, {sp}(t0)", // Store capturer task context to trap_ctx.sp + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/default.rs b/crates/eonix_hal/src/arch/riscv64/trap/default.rs new file mode 100644 index 00000000..4025b719 --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/default.rs @@ -0,0 +1,134 @@ +use super::Registers; +use crate::trap::TrapContext; +use core::arch::naked_asm; + +unsafe extern "C" { + fn _default_trap_handler(trap_context: &mut TrapContext); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _default_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "sd ra, {ra}(t0)", + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "mv a0, t0", + "csrrw t0, sscratch, t0", + "sd t0, {t0}(a0)", + "csrr t0, sepc", + "csrr t1, scause", + "csrr t2, sstatus", + "csrr t3, stval", + "sd t0, {sepc}(a0)", + "sd t1, {scause}(a0)", + "sd t2, {sstatus}(a0)", + "sd t3, {stval}(a0)", + + "la t0, {default_trap_handler}", + "jalr t0", + + "csrr t0, sscratch", + "ld t1, {sepc}(t0)", + "ld t2, {sstatus}(t0)", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + + "csrw sepc, t1", + "csrw sstatus, t2", + + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + tp = const Registers::OFFSET_TP, + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + t0 = const Registers::OFFSET_T0, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + sstatus = const TrapContext::OFFSET_SSTATUS, + stval = const TrapContext::OFFSET_STVAL, + default_trap_handler = sym _default_trap_handler, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs index 58566ebe..28689111 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs @@ -1,18 +1,22 @@ +mod captured; +mod default; mod trap_context; use super::config::platform::virt::*; use super::context::TaskContext; +use captured::{_captured_trap_entry, _captured_trap_return}; use core::arch::{global_asm, naked_asm}; use core::mem::{offset_of, size_of}; use core::num::NonZero; use core::ptr::NonNull; +use default::_default_trap_entry; use eonix_hal_traits::{ context::RawTaskContext, trap::{IrqState as IrqStateTrait, TrapReturn}, }; use riscv::register::sstatus::{self, Sstatus}; use riscv::register::stvec::TrapMode; -use riscv::register::{scause, sepc, stval}; +use riscv::register::{scause, sepc, sscratch, stval}; use riscv::{ asm::sfence_vma_all, register::stvec::{self, Stvec}, @@ -21,288 +25,35 @@ use sbi::SbiError; pub use trap_context::*; -#[repr(C)] -pub struct TrapScratch { - t1: u64, - t2: u64, - kernel_tp: Option>, - trap_context: Option>, - handler: unsafe extern "C" fn(), - capturer_context: TaskContext, -} - -#[eonix_percpu::define_percpu] -pub(crate) static TRAP_SCRATCH: TrapScratch = TrapScratch { - t1: 0, - t2: 0, - kernel_tp: None, - trap_context: None, - handler: default_trap_handler, - capturer_context: TaskContext::new(), -}; - -static mut DIRTY_TASK_CONTEXT: TaskContext = TaskContext::new(); - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_entry() -> ! { - naked_asm!( - "csrrw t0, sscratch, t0", // Swap t0 and sscratch - "sd t1, 0(t0)", - "sd t2, 8(t0)", - "csrr t1, sstatus", - "andi t1, t1, 0x100", - "beqz t1, 2f", - // else SPP = 1, supervisor mode - "addi t1, sp, -{trap_context_size}", - "mv t2, tp", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "mv sp, t1", - "j 4f", - // SPP = 0, user mode - "2:", - "ld t1, 24(t0)", // Load captured TrapContext address - "mv t2, tp", - "ld tp, 16(t0)", // Restore kernel tp - // t0: &mut TrapScratch, t1: &mut TrapContext, t2: tp before trap - "3:", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "4:", - "sd gp, {gp}(t1)", - "sd t2, {tp}(t1)", - "ld ra, 0(t0)", - "ld t2, 8(t0)", - "sd ra, {t1}(t1)", // Save t1 - "sd t2, {t2}(t1)", // Save t2 - "ld ra, 32(t0)", // Load handler address - "csrrw t2, sscratch, t0", // Swap t0 and sscratch - "sd t2, {t0}(t1)", - "sd a0, {a0}(t1)", - "sd a1, {a1}(t1)", - "sd a2, {a2}(t1)", - "sd a3, {a3}(t1)", - "sd a4, {a4}(t1)", - "sd a5, {a5}(t1)", - "sd a6, {a6}(t1)", - "sd a7, {a7}(t1)", - "sd t3, {t3}(t1)", - "sd t4, {t4}(t1)", - "sd t5, {t5}(t1)", - "sd t6, {t6}(t1)", - "sd s0, {s0}(t1)", - "sd s1, {s1}(t1)", - "sd s2, {s2}(t1)", - "sd s3, {s3}(t1)", - "sd s4, {s4}(t1)", - "sd s5, {s5}(t1)", - "sd s6, {s6}(t1)", - "sd s7, {s7}(t1)", - "sd s8, {s8}(t1)", - "sd s9, {s9}(t1)", - "sd s10, {s10}(t1)", - "sd s11, {s11}(t1)", - "csrr t2, sstatus", - "csrr t3, sepc", - "csrr t4, scause", - "sd t2, {sstatus}(t1)", - "sd t3, {sepc}(t1)", - "sd t4, {scause}(t1)", - "ret", - trap_context_size = const size_of::(), - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - scause = const TrapContext::OFFSET_SCAUSE, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! { - naked_asm!( - "ld ra, {ra}(a0)", - "ld sp, {sp}(a0)", - "ld gp, {gp}(a0)", - "ld tp, {tp}(a0)", - "ld t1, {t1}(a0)", - "ld t2, {t2}(a0)", - "ld t0, {t0}(a0)", - "ld a1, {a1}(a0)", - "ld a2, {a2}(a0)", - "ld a3, {a3}(a0)", - "ld a4, {a4}(a0)", - "ld a5, {a5}(a0)", - "ld a6, {a6}(a0)", - "ld a7, {a7}(a0)", - "ld t3, {t3}(a0)", - "ld t4, {sepc}(a0)", // Load sepc from TrapContext - "ld t5, {sstatus}(a0)", // Load sstatus from TrapContext - "ld s0, {s0}(a0)", - "ld s1, {s1}(a0)", - "ld s2, {s2}(a0)", - "ld s3, {s3}(a0)", - "ld s4, {s4}(a0)", - "ld s5, {s5}(a0)", - "ld s6, {s6}(a0)", - "ld s7, {s7}(a0)", - "ld s8, {s8}(a0)", - "ld s9, {s9}(a0)", - "ld s10, {s10}(a0)", - "ld s11, {s11}(a0)", - "csrw sepc, t4", // Restore sepc - "csrw sstatus, t5", // Restore sstatus - "ld t4, {t4}(a0)", - "ld t5, {t5}(a0)", - "ld t6, {t6}(a0)", - "ld a0, {a0}(a0)", - "sret", - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn default_trap_handler() { - unsafe extern "C" { - fn _default_trap_handler(trap_context: &mut TrapContext); - } - - naked_asm!( - "andi sp, sp, -16", // Align stack pointer to 16 bytes - "addi sp, sp, -16", - "mv a0, t1", // TrapContext pointer in t1 - "sd a0, 0(sp)", // Save TrapContext pointer - "", - "call {default_handler}", - "", - "ld a0, 0(sp)", // Restore TrapContext pointer - "j {trap_return}", - default_handler = sym _default_trap_handler, - trap_return = sym _raw_trap_return, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_handler() { - naked_asm!( - "la a0, {dirty_task_context}", - "addi a1, t0, {capturer_context_offset}", - "j {switch}", - dirty_task_context = sym DIRTY_TASK_CONTEXT, - capturer_context_offset = const offset_of!(TrapScratch, capturer_context), - switch = sym TaskContext::switch, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! { - naked_asm!( - "mv a0, sp", - "j {raw_trap_return}", - raw_trap_return = sym _raw_trap_return, - ); -} - -impl TrapScratch { - pub fn set_trap_context(&mut self, ctx: NonNull) { - self.trap_context = Some(ctx); - } - - pub fn clear_trap_context(&mut self) { - self.trap_context = None; - } - - pub fn set_kernel_tp(&mut self, tp: NonNull) { - self.kernel_tp = Some(NonZero::new(tp.addr().get() as u64).unwrap()); - } -} - impl TrapReturn for TrapContext { type TaskContext = TaskContext; unsafe fn trap_return(&mut self) { let irq_states = disable_irqs_save(); - let old_handler = - core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler); + let old_stvec = stvec::read(); + stvec::write({ + let mut stvec_val = Stvec::from_bits(0); + stvec_val.set_address(_captured_trap_entry as usize); + stvec_val.set_trap_mode(TrapMode::Direct); + stvec_val + }); - let old_trap_context = core::mem::replace( - &mut TRAP_SCRATCH.as_mut().trap_context, - Some(NonNull::from(&mut *self)), - ); + let old_trap_ctx = sscratch::read(); + sscratch::write(&raw mut *self as usize); + let mut from_ctx = TaskContext::new(); let mut to_ctx = TaskContext::new(); - to_ctx.set_program_counter(captured_trap_return as usize); - to_ctx.set_stack_pointer(&raw mut *self as usize); + to_ctx.set_program_counter(_captured_trap_return as usize); + to_ctx.set_stack_pointer(&raw mut from_ctx as usize); to_ctx.set_interrupt_enabled(false); unsafe { - TaskContext::switch(&mut TRAP_SCRATCH.as_mut().capturer_context, &mut to_ctx); + TaskContext::switch(&mut from_ctx, &mut to_ctx); } - TRAP_SCRATCH.as_mut().handler = old_handler; - TRAP_SCRATCH.as_mut().trap_context = old_trap_context; + sscratch::write(old_trap_ctx); + stvec::write(old_stvec); irq_states.restore(); } @@ -319,7 +70,7 @@ fn setup_trap_handler(trap_entry_addr: usize) { } pub fn setup_trap() { - setup_trap_handler(_raw_trap_entry as usize); + setup_trap_handler(_default_trap_entry as usize); } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 369eef3e..73ed34b2 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -1,5 +1,5 @@ use crate::{arch::time::set_next_timer, processor::CPU}; -use core::arch::asm; +use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, trap::{RawTrapContext, TrapType}, @@ -18,24 +18,23 @@ use riscv::{ #[repr(C)] #[derive(Default, Clone, Copy)] pub struct Registers { + tp: u64, ra: u64, sp: u64, gp: u64, - tp: u64, - t1: u64, - t2: u64, - t0: u64, a0: u64, a1: u64, a2: u64, a3: u64, a4: u64, + t1: u64, a5: u64, a6: u64, a7: u64, t3: u64, t4: u64, t5: u64, + t2: u64, t6: u64, s0: u64, s1: u64, @@ -49,10 +48,11 @@ pub struct Registers { s9: u64, s10: u64, s11: u64, + t0: u64, } /// Saved CPU context when a trap (interrupt or exception) occurs on RISC-V 64. -#[repr(C)] +#[repr(C, align(16))] #[derive(Clone, Copy)] pub struct TrapContext { regs: Registers, @@ -60,46 +60,48 @@ pub struct TrapContext { sstatus: Sstatus, sepc: usize, scause: Scause, + stval: usize, } impl Registers { - pub const OFFSET_RA: usize = 0 * 8; - pub const OFFSET_SP: usize = 1 * 8; - pub const OFFSET_GP: usize = 2 * 8; - pub const OFFSET_TP: usize = 3 * 8; - pub const OFFSET_T1: usize = 4 * 8; - pub const OFFSET_T2: usize = 5 * 8; - pub const OFFSET_T0: usize = 6 * 8; - pub const OFFSET_A0: usize = 7 * 8; - pub const OFFSET_A1: usize = 8 * 8; - pub const OFFSET_A2: usize = 9 * 8; - pub const OFFSET_A3: usize = 10 * 8; - pub const OFFSET_A4: usize = 11 * 8; - pub const OFFSET_A5: usize = 12 * 8; - pub const OFFSET_A6: usize = 13 * 8; - pub const OFFSET_A7: usize = 14 * 8; - pub const OFFSET_T3: usize = 15 * 8; - pub const OFFSET_T4: usize = 16 * 8; - pub const OFFSET_T5: usize = 17 * 8; - pub const OFFSET_T6: usize = 18 * 8; - pub const OFFSET_S0: usize = 19 * 8; - pub const OFFSET_S1: usize = 20 * 8; - pub const OFFSET_S2: usize = 21 * 8; - pub const OFFSET_S3: usize = 22 * 8; - pub const OFFSET_S4: usize = 23 * 8; - pub const OFFSET_S5: usize = 24 * 8; - pub const OFFSET_S6: usize = 25 * 8; - pub const OFFSET_S7: usize = 26 * 8; - pub const OFFSET_S8: usize = 27 * 8; - pub const OFFSET_S9: usize = 28 * 8; - pub const OFFSET_S10: usize = 29 * 8; - pub const OFFSET_S11: usize = 30 * 8; + pub const OFFSET_TP: usize = offset_of!(Registers, tp); + pub const OFFSET_SP: usize = offset_of!(Registers, sp); + pub const OFFSET_RA: usize = offset_of!(Registers, ra); + pub const OFFSET_GP: usize = offset_of!(Registers, gp); + pub const OFFSET_T1: usize = offset_of!(Registers, t1); + pub const OFFSET_T2: usize = offset_of!(Registers, t2); + pub const OFFSET_T0: usize = offset_of!(Registers, t0); + pub const OFFSET_A0: usize = offset_of!(Registers, a0); + pub const OFFSET_A1: usize = offset_of!(Registers, a1); + pub const OFFSET_A2: usize = offset_of!(Registers, a2); + pub const OFFSET_A3: usize = offset_of!(Registers, a3); + pub const OFFSET_A4: usize = offset_of!(Registers, a4); + pub const OFFSET_A5: usize = offset_of!(Registers, a5); + pub const OFFSET_A6: usize = offset_of!(Registers, a6); + pub const OFFSET_A7: usize = offset_of!(Registers, a7); + pub const OFFSET_T3: usize = offset_of!(Registers, t3); + pub const OFFSET_T4: usize = offset_of!(Registers, t4); + pub const OFFSET_T5: usize = offset_of!(Registers, t5); + pub const OFFSET_T6: usize = offset_of!(Registers, t6); + pub const OFFSET_S0: usize = offset_of!(Registers, s0); + pub const OFFSET_S1: usize = offset_of!(Registers, s1); + pub const OFFSET_S2: usize = offset_of!(Registers, s2); + pub const OFFSET_S3: usize = offset_of!(Registers, s3); + pub const OFFSET_S4: usize = offset_of!(Registers, s4); + pub const OFFSET_S5: usize = offset_of!(Registers, s5); + pub const OFFSET_S6: usize = offset_of!(Registers, s6); + pub const OFFSET_S7: usize = offset_of!(Registers, s7); + pub const OFFSET_S8: usize = offset_of!(Registers, s8); + pub const OFFSET_S9: usize = offset_of!(Registers, s9); + pub const OFFSET_S10: usize = offset_of!(Registers, s10); + pub const OFFSET_S11: usize = offset_of!(Registers, s11); } impl TrapContext { - pub const OFFSET_SSTATUS: usize = 31 * 8; - pub const OFFSET_SEPC: usize = 32 * 8; - pub const OFFSET_SCAUSE: usize = 33 * 8; + pub const OFFSET_SSTATUS: usize = offset_of!(TrapContext, sstatus); + pub const OFFSET_SEPC: usize = offset_of!(TrapContext, sepc); + pub const OFFSET_SCAUSE: usize = offset_of!(TrapContext, scause); + pub const OFFSET_STVAL: usize = offset_of!(TrapContext, stval); fn syscall_no(&self) -> usize { self.regs.a7 as usize @@ -131,6 +133,7 @@ impl RawTrapContext for TrapContext { sstatus, sepc: 0, scause: Scause::from_bits(0), + stval: 0, } } @@ -176,16 +179,10 @@ impl RawTrapContext for TrapContext { }, exception @ (Exception::InstructionPageFault | Exception::LoadPageFault - | Exception::StorePageFault) => { - #[inline(always)] - fn get_page_fault_address() -> VAddr { - VAddr::from(stval::read()) - } - TrapType::Fault(Fault::PageFault { - error_code: self.get_page_fault_error_code(exception), - address: get_page_fault_address(), - }) - } + | Exception::StorePageFault) => TrapType::Fault(Fault::PageFault { + error_code: self.get_page_fault_error_code(exception), + address: VAddr::from(self.stval), + }), // breakpoint and supervisor env call _ => TrapType::Fault(Fault::Unknown(e)), } From 9c900be22500f03319d30bdd42f1709b1cc27dc5 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 00:36:27 +0800 Subject: [PATCH 20/29] task, thread: working version of threads We've got everything done in order to make the system run. Add Thread::contexted to load the context needed for the thread to run. Wrap the Thread::real_run() with contexted(stackful(...)) in Thread::run(). We would use this for now. Later, we will make the thread completely asynchronous. This way we don't have to change its interface then. Signed-off-by: greatbridf --- src/kernel/task.rs | 88 +++++++++++++++++++++++++++++---------- src/kernel/task/clone.rs | 4 +- src/kernel/task/thread.rs | 64 +++++++++++++++++----------- src/lib.rs | 4 +- 4 files changed, 109 insertions(+), 51 deletions(-) diff --git a/src/kernel/task.rs b/src/kernel/task.rs index e2bbcb3f..2cdb8c22 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -79,29 +79,68 @@ pub async fn stackful(mut future: F) -> F::Output where F: core::future::Future, { + use crate::kernel::{ + interrupt::{default_fault_handler, default_irq_handler}, + timer::{should_reschedule, timer_interrupt}, + }; + use alloc::sync::Arc; + use alloc::task::Wake; use core::cell::UnsafeCell; + use core::future::Future; + use core::pin::Pin; + use core::ptr::NonNull; + use core::sync::atomic::AtomicBool; + use core::sync::atomic::Ordering; + use core::task::Context; + use core::task::Poll; + use core::task::Waker; use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::traits::trap::TrapReturn; + use eonix_hal::traits::trap::TrapType; use eonix_hal::trap::TrapContext; - use eonix_log::println_debug; + use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; - - use crate::kernel::{ - interrupt::{default_fault_handler, default_irq_handler}, - timer::{should_reschedule, timer_interrupt}, - }; + use thread::wait_for_wakeups; let stack = KernelStack::new(); - fn execute( - future: core::pin::Pin<&mut F>, - output_ptr: core::ptr::NonNull>, - ) -> ! + fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! where - F: core::future::Future, + F: Future, { - let output = do_block_on(future); + struct WokenUp(AtomicBool); + + impl Wake for WokenUp { + fn wake(self: Arc) { + self.wake_by_ref(); + } + + fn wake_by_ref(self: &Arc) { + self.0.swap(true, Ordering::AcqRel); + } + } + + let woken_up = Arc::new(WokenUp(AtomicBool::new(false))); + let waker = Waker::from(woken_up.clone()); + let mut cx = Context::from_waker(&waker); + + let output = loop { + match future.as_mut().poll(&mut cx) { + Poll::Ready(output) => break output, + Poll::Pending => { + if woken_up.0.swap(false, Ordering::Acquire) { + continue; + } + + assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + + unsafe { + core::arch::asm!("ebreak"); + } + } + } + }; unsafe { output_ptr.write(Some(output)); @@ -115,7 +154,7 @@ where } let sp = stack.get_bottom(); - let output = UnsafeCell::new(None); + let mut output = UnsafeCell::new(None); let mut trap_ctx = TrapContext::new(); @@ -135,21 +174,26 @@ where } match trap_ctx.trap_type() { - eonix_hal::traits::trap::TrapType::Syscall { .. } => {} - eonix_hal::traits::trap::TrapType::Fault(fault) => { + TrapType::Syscall { .. } => {} + TrapType::Fault(fault) => { // Breakpoint if let Fault::Unknown(3) = &fault { - println_debug!("Breakpoint hit, returning output"); - break output.into_inner().unwrap(); + if let Some(output) = output.get_mut().take() { + break output; + } else { + wait_for_wakeups().await; + } + + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + } else { + default_fault_handler(fault, &mut trap_ctx) } - - default_fault_handler(fault, &mut trap_ctx) } - eonix_hal::traits::trap::TrapType::Irq { callback } => callback(default_irq_handler), - eonix_hal::traits::trap::TrapType::Timer { callback } => { + TrapType::Irq { callback } => callback(default_irq_handler), + TrapType::Timer { callback } => { callback(timer_interrupt); - if should_reschedule() { + if eonix_preempt::count() == 0 && should_reschedule() { yield_now().await; } } diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 2a16ce56..574cdfc9 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,4 +1,4 @@ -use super::{block_on, stackful}; +use super::block_on; use crate::{ kernel::{ syscall::procops::parse_user_tls, @@ -164,7 +164,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? } - RUNTIME.spawn(stackful(new_thread.run())); + RUNTIME.spawn(new_thread.run()); Ok(new_pid) } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index ef71657f..be3a6077 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,6 +1,6 @@ use super::{ signal::{RaiseResult, SignalList}, - Process, ProcessList, WaitType, + stackful, Process, ProcessList, WaitType, }; use crate::{ kernel::{ @@ -16,8 +16,8 @@ use crate::{ use alloc::sync::Arc; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - future::Future, - pin::{pin, Pin}, + future::{poll_fn, Future}, + pin::Pin, ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, task::{Context, Poll}, @@ -28,9 +28,9 @@ use eonix_hal::{ traits::{ fault::Fault, fpu::RawFpuState as _, - trap::{IrqState as _, RawTrapContext, TrapReturn, TrapType}, + trap::{RawTrapContext, TrapReturn, TrapType}, }, - trap::{disable_irqs_save, TrapContext}, + trap::TrapContext, }; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; @@ -415,36 +415,39 @@ impl Thread { } } - pub fn run(self: Arc) -> impl Future + Send + 'static { - async fn real_run_with_context(me: &Arc) { - let mut future = pin!(me.real_run()); + async fn contexted(&self, future: F) -> F::Output + where + F: Future, + { + let mut future = core::pin::pin!(future); - core::future::poll_fn(|cx| { - me.process.mm_list.activate(); + core::future::poll_fn(|cx| { + self.process.mm_list.activate(); - CURRENT_THREAD.set(NonNull::new(Arc::as_ptr(me) as *mut _)); + CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); - unsafe { - // SAFETY: Preemption is disabled. - me.load_thread_area32(); - } + unsafe { + eonix_preempt::disable(); - let irq_state = disable_irqs_save(); + // SAFETY: Preemption is disabled. + self.load_thread_area32(); - let result = future.as_mut().poll(cx); + eonix_preempt::enable(); + } - irq_state.restore(); + let result = future.as_mut().poll(cx); - me.process.mm_list.deactivate(); + self.process.mm_list.deactivate(); - CURRENT_THREAD.set(None); + CURRENT_THREAD.set(None); - result - }) - .await - } + result + }) + .await + } - async move { real_run_with_context(&self).await } + pub fn run(self: Arc) -> impl Future + Send + 'static { + async move { self.contexted(stackful(self.real_run())).await } } } @@ -469,3 +472,14 @@ pub async fn yield_now() { Yield { yielded: false }.await; } + +pub fn wait_for_wakeups() -> impl Future { + let mut waited = false; + poll_fn(move |_| match waited { + true => Poll::Ready(()), + false => { + waited = true; + Poll::Pending + } + }) +} diff --git a/src/lib.rs b/src/lib.rs index cbe7bc5d..beebe7c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ use eonix_mm::address::PRange; use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{stackful, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, + task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, @@ -272,5 +272,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - RUNTIME.spawn(stackful(thread.run())); + RUNTIME.spawn(thread.run()); } From 30bfc5a0db6d705f15800d4eaccc6a3d62726786 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 03:44:38 +0800 Subject: [PATCH 21/29] loongarch64, trap: rework to fix nested captured traps Similar to 661a15940badf992d7836eb93c7ab293590c6ad4: - Save previous {trap, task}_ctx and restore them afterwards. - Set kernel tp when setting trap context user mode. - Add the program counter with 4 bytes on breakpoints. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/loongarch64/trap/mod.rs | 13 +++++++++++-- .../src/arch/loongarch64/trap/trap_context.rs | 14 ++++++++++++-- src/driver/virtio/loongarch64.rs | 4 ++-- src/kernel/task.rs | 12 ++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs index f529bf61..4a6c4754 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs @@ -278,11 +278,18 @@ impl TrapReturn for TrapContext { to_ctx.set_interrupt_enabled(false); unsafe { + let mut old_trap_ctx: usize; + let mut old_task_ctx: usize; + asm!( + "csrrd {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrrd {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", "csrwr {captured_trap_context}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", "csrwr {capturer_task_context}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", captured_trap_context = inout(reg) &raw mut *self => _, capturer_task_context = inout(reg) &raw mut capturer_ctx => _, + old_trap_ctx = out(reg) old_trap_ctx, + old_task_ctx = out(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), @@ -291,8 +298,10 @@ impl TrapReturn for TrapContext { TaskContext::switch(&mut capturer_ctx, &mut to_ctx); asm!( - "csrwr $zero, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", - "csrwr $zero, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + "csrwr {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrwr {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + old_trap_ctx = inout(reg) old_trap_ctx, + old_task_ctx = inout(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs index 83e28cfa..70a2bdc2 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs @@ -1,4 +1,4 @@ -use crate::processor::CPU; +use crate::{arch::trap::CSR_KERNEL_TP, processor::CPU}; use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, @@ -226,7 +226,17 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.prmd |= 0x3, - false => self.prmd &= !0x3, + false => { + unsafe { + asm!( + "csrrd {tp}, {CSR_KERNEL_TP}", + tp = out(reg) self.regs.tp, + CSR_KERNEL_TP = const CSR_KERNEL_TP, + options(nomem, nostack, preserves_flags), + ) + } + self.prmd &= !0x3; + } } } diff --git a/src/driver/virtio/loongarch64.rs b/src/driver/virtio/loongarch64.rs index bcd7e713..996683bd 100644 --- a/src/driver/virtio/loongarch64.rs +++ b/src/driver/virtio/loongarch64.rs @@ -3,13 +3,13 @@ use crate::kernel::{ block::{make_device, BlockDevice}, constants::EIO, pcie::{self, PCIDevice, PCIDriver, PciError, SegmentGroup}, + task::block_on, }; use alloc::sync::Arc; use core::sync::atomic::{AtomicUsize, Ordering}; use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; use eonix_log::println_warn; use eonix_mm::address::PhysAccess; -use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, @@ -134,7 +134,7 @@ impl PCIDriver for VirtIODriver { Arc::new(Spin::new(virtio_block)), )?; - Task::block_on(block_device.partprobe()).map_err(|err| { + block_on(block_device.partprobe()).map_err(|err| { println_warn!( "Failed to probe partitions for VirtIO Block device: {}", err diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 2cdb8c22..9c900a64 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -136,7 +136,11 @@ where assert_preempt_enabled!("Blocking in stackful futures is not allowed."); unsafe { + #[cfg(target_arch = "riscv64")] core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); } } } @@ -147,7 +151,11 @@ where } unsafe { + #[cfg(target_arch = "riscv64")] core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); } unreachable!() @@ -184,7 +192,11 @@ where wait_for_wakeups().await; } + #[cfg(target_arch = "riscv64")] trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + + #[cfg(target_arch = "loongarch64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); } else { default_fault_handler(fault, &mut trap_ctx) } From a6221725870605955b606ca5329b384cdefe5414 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 10 Aug 2025 03:48:24 +0800 Subject: [PATCH 22/29] trap: introduce Breakpoint fault type TODO: hide changes to the program counter in the HAL crate. Signed-off-by: greatbridf --- crates/eonix_hal/eonix_hal_traits/src/trap.rs | 1 + .../src/arch/loongarch64/trap/trap_context.rs | 1 + .../src/arch/riscv64/trap/trap_context.rs | 1 + src/kernel/interrupt.rs | 1 + src/kernel/task.rs | 33 ++++++++----------- src/kernel/task/thread.rs | 1 + 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/crates/eonix_hal/eonix_hal_traits/src/trap.rs b/crates/eonix_hal/eonix_hal_traits/src/trap.rs index d698dbaa..e51a9eb4 100644 --- a/crates/eonix_hal/eonix_hal_traits/src/trap.rs +++ b/crates/eonix_hal/eonix_hal_traits/src/trap.rs @@ -66,6 +66,7 @@ where { Syscall { no: usize, args: [usize; 6] }, Fault(Fault), + Breakpoint, Irq { callback: FIrq }, Timer { callback: FTimer }, } diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs index 70a2bdc2..56bf59b6 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs @@ -173,6 +173,7 @@ impl RawTrapContext for TrapContext { | Exception::MemoryAccessAddressError | Exception::PagePrivilegeIllegal, ) => TrapType::Fault(Fault::BadAccess), + Trap::Exception(Exception::Breakpoint) => TrapType::Breakpoint, Trap::Exception(Exception::InstructionNotExist) => TrapType::Fault(Fault::InvalidOp), Trap::Exception(Exception::Syscall) => TrapType::Syscall { no: self.syscall_no(), diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 73ed34b2..ab4ca9a2 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -166,6 +166,7 @@ impl RawTrapContext for TrapContext { } Trap::Exception(e) => { match Exception::from_number(e).unwrap() { + Exception::Breakpoint => TrapType::Breakpoint, Exception::InstructionMisaligned | Exception::LoadMisaligned | Exception::InstructionFault diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 4b55f182..9394f64c 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -48,6 +48,7 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { pub fn interrupt_handler(trap_ctx: &mut TrapContext) { match trap_ctx.trap_type() { TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no), + TrapType::Breakpoint => unreachable!("Breakpoint in kernel space."), TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => callback(timer_interrupt), diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 9c900a64..35d293e3 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -94,7 +94,6 @@ where use core::task::Context; use core::task::Poll; use core::task::Waker; - use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::traits::trap::TrapReturn; use eonix_hal::traits::trap::TrapType; @@ -183,24 +182,7 @@ where match trap_ctx.trap_type() { TrapType::Syscall { .. } => {} - TrapType::Fault(fault) => { - // Breakpoint - if let Fault::Unknown(3) = &fault { - if let Some(output) = output.get_mut().take() { - break output; - } else { - wait_for_wakeups().await; - } - - #[cfg(target_arch = "riscv64")] - trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); - - #[cfg(target_arch = "loongarch64")] - trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); - } else { - default_fault_handler(fault, &mut trap_ctx) - } - } + TrapType::Fault(fault) => default_fault_handler(fault, &mut trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -209,6 +191,19 @@ where yield_now().await; } } + TrapType::Breakpoint => { + if let Some(output) = output.get_mut().take() { + break output; + } else { + wait_for_wakeups().await; + } + + #[cfg(target_arch = "riscv64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + + #[cfg(target_arch = "loongarch64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); + } } } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index be3a6077..50600436 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -391,6 +391,7 @@ impl Thread { self.signal_list.raise(Signal::SIGILL); } TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), + TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); From 21b765092ffa27bcf7ed2e53a29ff64420e8f9c7 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 11 Aug 2025 00:55:18 +0800 Subject: [PATCH 23/29] task: fix stackful waker implementation The current implementation use the WokenUp object to detect whether the stackful task is woken up somewhere. This is WRONG since we might lose wakeups as the runtime have no idea what we have done. If someone wakes us up, the task won't be enqueued so we will never have a second chance to get to the foreground. The fix is to use Arc to create a waker and check whether the task is ready each time we get back to the stackful poll loop. Signed-off-by: greatbridf --- crates/eonix_runtime/src/task.rs | 4 ++++ crates/eonix_runtime/src/task/task_state.rs | 4 ++++ src/kernel/task.rs | 24 ++++----------------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 7b89d3fe..8a1d6ef4 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -93,6 +93,10 @@ impl Task { return rq; } } + + pub fn is_ready(&self) -> bool { + self.state.is_ready() + } } impl Wake for Task { diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index 074acfb4..473310d7 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -22,4 +22,8 @@ impl TaskState { self.0 .fetch_update(Ordering::SeqCst, Ordering::SeqCst, func) } + + pub(crate) fn is_ready(&self) -> bool { + self.0.load(Ordering::SeqCst) & Self::READY == Self::READY + } } diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 35d293e3..13e2ec93 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -10,6 +10,7 @@ mod signal; mod thread; pub use clone::{do_clone, CloneArgs, CloneFlags}; +use eonix_runtime::task::Task; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -18,7 +19,7 @@ pub use process_group::ProcessGroup; pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; -pub use thread::{yield_now, Thread, ThreadBuilder}; +pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output where @@ -83,14 +84,10 @@ where interrupt::{default_fault_handler, default_irq_handler}, timer::{should_reschedule, timer_interrupt}, }; - use alloc::sync::Arc; - use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; - use core::sync::atomic::AtomicBool; - use core::sync::atomic::Ordering; use core::task::Context; use core::task::Poll; use core::task::Waker; @@ -108,27 +105,14 @@ where where F: Future, { - struct WokenUp(AtomicBool); - - impl Wake for WokenUp { - fn wake(self: Arc) { - self.wake_by_ref(); - } - - fn wake_by_ref(self: &Arc) { - self.0.swap(true, Ordering::AcqRel); - } - } - - let woken_up = Arc::new(WokenUp(AtomicBool::new(false))); - let waker = Waker::from(woken_up.clone()); + let waker = Waker::from(Task::current().clone()); let mut cx = Context::from_waker(&waker); let output = loop { match future.as_mut().poll(&mut cx) { Poll::Ready(output) => break output, Poll::Pending => { - if woken_up.0.swap(false, Ordering::Acquire) { + if Task::current().is_ready() { continue; } From dee96a3a6aac134ee95ce4217c7067c3236eb203 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 11 Aug 2025 00:59:52 +0800 Subject: [PATCH 24/29] syscall: migrate all syscalls to async... We introduced a per-thread allocator inside the future object to allocate space for the syscalls. This ensures performance and saves memory. The allocator takes up 8K for now and is enough for current use. Signed-off-by: greatbridf --- Cargo.lock | 11 +- Cargo.toml | 3 + crates/eonix_hal/src/arch/loongarch64/mm.rs | 2 + crates/eonix_hal/src/arch/riscv64/mm.rs | 2 + crates/eonix_mm/src/page_table/page_table.rs | 2 +- crates/posix_types/src/result.rs | 10 + macros/src/lib.rs | 61 ++-- src/fs/fat32.rs | 6 +- src/io.rs | 6 +- src/kernel/block.rs | 15 - src/kernel/interrupt.rs | 3 +- src/kernel/mem/mm_area.rs | 8 +- src/kernel/mem/mm_list.rs | 30 +- src/kernel/mem/mm_list/page_fault.rs | 20 +- src/kernel/mem/page_cache.rs | 2 + src/kernel/syscall.rs | 165 ++++++++- src/kernel/syscall/file_rw.rs | 259 ++++++++------ src/kernel/syscall/mm.rs | 105 +++--- src/kernel/syscall/net.rs | 2 +- src/kernel/syscall/procops.rs | 348 +++++++++---------- src/kernel/syscall/sysinfo.rs | 17 +- src/kernel/task/clone.rs | 20 +- src/kernel/task/futex.rs | 13 +- src/kernel/task/loader/elf.rs | 189 +++++----- src/kernel/task/loader/mod.rs | 4 +- src/kernel/task/process.rs | 57 ++- src/kernel/task/process_list.rs | 7 +- src/kernel/task/signal.rs | 6 +- src/kernel/task/signal/signal_action.rs | 5 +- src/kernel/task/thread.rs | 64 +++- src/kernel/terminal.rs | 16 +- src/kernel/user.rs | 8 +- src/kernel/user/dataflow.rs | 115 +++--- src/kernel/vfs/file.rs | 10 +- src/lib.rs | 2 + 35 files changed, 948 insertions(+), 645 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2caa0bad..484f2796 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,6 +152,7 @@ dependencies = [ "pointers", "posix_types", "slab_allocator", + "stalloc", "virtio-drivers", "xmas-elf", ] @@ -401,11 +402,17 @@ dependencies = [ "intrusive_list", ] +[[package]] +name = "stalloc" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" + [[package]] name = "syn" -version = "2.0.103" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5231dbb2..25768c83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,9 @@ acpi = "5.2.0" align_ext = "0.1.0" xmas-elf = "0.10.0" ext4_rs = "1.3.2" +stalloc = { version = "0.6.1", default-features = false, features = [ + "allocator-api", +] } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/eonix_hal/src/arch/loongarch64/mm.rs b/crates/eonix_hal/src/arch/loongarch64/mm.rs index 91a2aae5..d5b00a6b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/mm.rs +++ b/crates/eonix_hal/src/arch/loongarch64/mm.rs @@ -87,6 +87,8 @@ impl PagingMode for PagingMode48 { pub type ArchPagingMode = PagingMode48; +unsafe impl Send for RawPageTable48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTable48<'a> { type Entry = PTE64; diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 74ebc349..46dd9437 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -88,6 +88,8 @@ impl PagingMode for PagingModeSv48 { pub type ArchPagingMode = PagingModeSv48; +unsafe impl Send for RawPageTableSv48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTableSv48<'a> { type Entry = PTE64; diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 24253dc9..8318049f 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -11,7 +11,7 @@ use crate::{ }; use core::{marker::PhantomData, ptr::NonNull}; -pub trait RawPageTable<'a>: 'a { +pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; /// Return the entry at the given index. diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index fb251472..a10ff0ad 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -13,3 +13,13 @@ impl From for u32 { } } } + +impl core::fmt::Debug for PosixError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::EFAULT => write!(f, "EFAULT"), + Self::EXDEV => write!(f, "EXDEV"), + Self::EINVAL => write!(f, "EINVAL"), + } + } +} diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 022160a2..722fa5da 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -18,6 +18,11 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let args = item.sig.inputs.iter(); let ty_ret = item.sig.output; + assert!( + item.sig.asyncness.is_some(), + "Syscall must be async function" + ); + let args_mapped = item .sig .inputs @@ -100,36 +105,50 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { }; #[link_section = #syscall_fn_section] - fn #helper_fn ( - thd: &crate::kernel::task::Thread, + fn #helper_fn <'thd, 'alloc>( + thd: &'thd crate::kernel::task::Thread, + thd_alloc: crate::kernel::task::ThreadAlloc<'alloc>, args: [usize; 6] - ) -> Option { + ) -> core::pin::Pin> + Send + 'thd, + crate::kernel::task::ThreadAlloc<'alloc> + >> { use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; + use alloc::boxed::Box; #(#args_mapped)* - eonix_log::println_trace!( - "trace_syscall", - "tid{}: {}({}) => {{", - thd.tid, - #syscall_name_str, - format_args!(#trace_format_string, #trace_format_args), - ); - - let retval = #real_fn(thd, #(#args_call),*).into_retval(); - - eonix_log::println_trace!( - "trace_syscall", - "}} => {:x?}", - retval, - ); - - retval + unsafe { + core::pin::Pin::new_unchecked( + Box::new_in( + async move { + eonix_log::println_trace!( + "trace_syscall", + "tid{}: {}({}) => {{", + thd.tid, + #syscall_name_str, + format_args!(#trace_format_string, #trace_format_args), + ); + + let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); + + eonix_log::println_trace!( + "trace_syscall", + "}} => {:x?}", + retval, + ); + + retval + }, + thd_alloc + ) + ) + } } #(#attrs)* #[link_section = #syscall_fn_section] - #vis fn #real_fn( + #vis async fn #real_fn( thread: &crate::kernel::task::Thread, #(#args),* ) #ty_ret #body diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index f328dc74..9f0adac5 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -308,11 +308,11 @@ impl Inode for FileInode { Ok(buffer.wrote()) } - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } } @@ -322,7 +322,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult { todo!() } diff --git a/src/io.rs b/src/io.rs index f1eae9b9..85675dea 100644 --- a/src/io.rs +++ b/src/io.rs @@ -30,7 +30,7 @@ impl FillResult { } } -pub trait Buffer { +pub trait Buffer: Send { fn total(&self) -> usize; fn wrote(&self) -> usize; @@ -49,7 +49,7 @@ pub trait Buffer { } } -pub trait Stream { +pub trait Stream: Send { fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult>; fn ignore(&mut self, len: usize) -> KResult>; } @@ -131,6 +131,8 @@ pub struct UninitBuffer<'lt, T: Copy + Sized> { buffer: ByteBuffer<'lt>, } +unsafe impl<'lt, T: Copy> Send for UninitBuffer<'lt, T> {} + impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> { pub fn new() -> Self { let mut data = Box::new(MaybeUninit::uninit()); diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 4a10e4c7..ccd43c68 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -48,21 +48,6 @@ enum BlockDeviceType { }, } -#[derive(Debug, Clone)] -pub enum FileSystemType { - Ext4, - Fat32, -} - -impl FileSystemType { - pub fn as_str(&self) -> &'static str { - match self { - FileSystemType::Ext4 => "ext4", - FileSystemType::Fat32 => "fat32", - } - } -} - pub struct BlockDevice { /// Unique device identifier, major and minor numbers devid: DevId, diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 9394f64c..742727cb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,4 +1,5 @@ use super::mem::handle_kernel_page_fault; +use super::task::block_on; use super::timer::timer_interrupt; use crate::kernel::constants::EINVAL; use crate::prelude::*; @@ -36,7 +37,7 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { } => { let fault_pc = VAddr::from(trap_ctx.get_program_counter()); - if let Some(new_pc) = handle_kernel_page_fault(fault_pc, vaddr, error_code) { + if let Some(new_pc) = block_on(handle_kernel_page_fault(fault_pc, vaddr, error_code)) { trap_ctx.set_program_counter(new_pc.addr()); } } diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 528d79ad..731c5303 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -2,7 +2,6 @@ use super::mm_list::EMPTY_PAGE; use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, Mapping, Page, Permission}; use crate::kernel::constants::EINVAL; -use crate::kernel::task::block_on; use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; @@ -19,6 +18,9 @@ pub struct MMArea { pub is_shared: bool, } +unsafe impl Send for MMArea {} +unsafe impl Sync for MMArea {} + impl Clone for MMArea { fn clone(&self) -> Self { Self { @@ -200,7 +202,7 @@ impl MMArea { Ok(()) } - pub fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { + pub async fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { let mut attr = pte.get_attr().as_page_attr().expect("Not a page attribute"); let mut pfn = pte.get_pfn(); @@ -209,7 +211,7 @@ impl MMArea { } if attr.contains(PageAttribute::MAPPED) { - block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; + self.handle_mmap(&mut pfn, &mut attr, offset, write).await?; } attr.insert(PageAttribute::ACCESSED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 6593624b..ad1e45c2 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -7,7 +7,6 @@ use super::paging::AllocZeroed as _; use super::{AsMemoryBlock, MMArea, Page}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; use crate::kernel::mem::page_alloc::RawPagePtr; -use crate::kernel::task::block_on; use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; @@ -488,7 +487,7 @@ impl MMList { Ok(()) } - pub fn map_vdso(&self) -> KResult<()> { + pub async fn map_vdso(&self) -> KResult<()> { unsafe extern "C" { fn VDSO_PADDR(); } @@ -507,7 +506,7 @@ impl MMList { const VDSO_SIZE: usize = 0x1000; let inner = self.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let mut pte_iter = inner .page_table @@ -529,7 +528,7 @@ impl MMList { Ok(()) } - pub fn mmap_hint( + pub async fn mmap_hint( &self, hint: VAddr, len: usize, @@ -538,7 +537,7 @@ impl MMList { is_shared: bool, ) -> KResult { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; if hint == VAddr::NULL { let at = inner.find_available(hint, len).ok_or(ENOMEM)?; @@ -557,7 +556,7 @@ impl MMList { } } - pub fn mmap_fixed( + pub async fn mmap_fixed( &self, at: VAddr, len: usize, @@ -565,14 +564,17 @@ impl MMList { permission: Permission, is_shared: bool, ) -> KResult { - block_on(self.inner.borrow().lock()) + self.inner + .borrow() + .lock() + .await .mmap(at, len, mapping.clone(), permission, is_shared) .map(|_| at) } - pub fn set_break(&self, pos: Option) -> VAddr { + pub async fn set_break(&self, pos: Option) -> VAddr { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; // SAFETY: `set_break` is only called in syscalls, where program break should be valid. assert!(inner.break_start.is_some() && inner.break_pos.is_some()); @@ -629,9 +631,9 @@ impl MMList { } /// This should be called only **once** for every thread. - pub fn register_break(&self, start: VAddr) { + pub async fn register_break(&self, start: VAddr) { let inner = self.inner.borrow(); - let mut inner = block_on(inner.lock()); + let mut inner = inner.lock().await; assert!(inner.break_start.is_none() && inner.break_pos.is_none()); inner.break_start = Some(start.into()); @@ -640,7 +642,7 @@ impl MMList { /// Access the memory area with the given function. /// The function will be called with the offset of the area and the slice of the area. - pub fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> + pub async fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> where F: Fn(usize, &mut [u8]), { @@ -651,7 +653,7 @@ impl MMList { } let inner = self.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let mut offset = 0; let mut remaining = len; @@ -676,7 +678,7 @@ impl MMList { let page_end = page_start + 0x1000; // Prepare for the worst case that we might write to the page... - area.handle(pte, page_start - area_start, true)?; + area.handle(pte, page_start - area_start, true).await?; let start_offset; if page_start < current { diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 089fdf06..6f14583d 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,5 +1,5 @@ use super::{MMList, VAddr}; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::task::Thread; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; @@ -94,6 +94,7 @@ impl MMList { addr.floor() - area.range().start(), error.contains(PageFaultErrorCode::Write), ) + .await .map_err(|_| Signal::SIGBUS)?; flush_tlb(addr.floor().addr()); @@ -128,7 +129,7 @@ fn kernel_page_fault_die(vaddr: VAddr, pc: VAddr) -> ! { ) } -pub fn handle_kernel_page_fault( +pub async fn handle_kernel_page_fault( fault_pc: VAddr, addr: VAddr, error: PageFaultErrorCode, @@ -148,7 +149,7 @@ pub fn handle_kernel_page_fault( let mms = &Thread::current().process.mm_list; let inner = mms.inner.borrow(); - let inner = block_on(inner.lock()); + let inner = inner.lock().await; let area = match inner.areas.get(&VRange::from(addr)) { Some(area) => area, @@ -163,11 +164,14 @@ pub fn handle_kernel_page_fault( .next() .expect("If we can find the mapped area, we should be able to find the PTE"); - if let Err(_) = area.handle( - pte, - addr.floor() - area.range().start(), - error.contains(PageFaultErrorCode::Write), - ) { + if let Err(_) = area + .handle( + pte, + addr.floor() - area.range().start(), + error.contains(PageFaultErrorCode::Write), + ) + .await + { return Some(try_page_fault_fix(fault_pc, addr)); } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 863e538e..e0567d21 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -26,6 +26,8 @@ unsafe impl Sync for PageCache {} #[derive(Clone, Copy)] pub struct CachePage(RawPagePtr); +unsafe impl Send for CachePage {} + impl Buffer for CachePage { fn total(&self) -> usize { PAGE_SIZE diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 0276ebdf..4131f3c4 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,5 +1,10 @@ +use super::task::ThreadAlloc; use crate::kernel::task::Thread; +use alloc::boxed::Box; +use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; +use posix_types::ctypes::PtrT; pub mod file_rw; pub mod mm; @@ -12,15 +17,33 @@ const MAX_SYSCALL_NO: usize = 512; #[derive(Debug, Clone, Copy)] pub struct SyscallNoReturn; +#[derive(Clone, Copy)] +pub struct User(VAddr, PhantomData); + +#[derive(Clone, Copy)] +pub struct UserMut(VAddr, PhantomData); + #[repr(C)] pub(self) struct RawSyscallHandler { no: usize, - handler: fn(&Thread, [usize; 6]) -> Option, + handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, name: &'static str, } pub struct SyscallHandler { - pub handler: fn(&Thread, [usize; 6]) -> Option, + pub handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, pub name: &'static str, } @@ -80,6 +103,18 @@ impl SyscallRetVal for SyscallNoReturn { } } +impl SyscallRetVal for User { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + +impl SyscallRetVal for UserMut { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + #[cfg(not(target_arch = "x86_64"))] impl SyscallRetVal for u64 { fn into_retval(self) -> Option { @@ -112,15 +147,129 @@ impl FromSyscallArg for usize { } } -impl FromSyscallArg for *const T { - fn from_arg(value: usize) -> *const T { - value as *const T +impl FromSyscallArg for PtrT { + fn from_arg(value: usize) -> Self { + PtrT::new(value).expect("Invalid user pointer value") + } +} + +impl FromSyscallArg for User { + fn from_arg(value: usize) -> User { + User(VAddr::from(value), PhantomData) + } +} + +impl FromSyscallArg for UserMut { + fn from_arg(value: usize) -> UserMut { + UserMut(VAddr::from(value), PhantomData) + } +} + +impl User { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> User { + User(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const unsafe fn as_mut(self) -> UserMut { + UserMut(self.0, PhantomData) + } +} + +impl UserMut { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> UserMut { + UserMut(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const fn as_const(self) -> User { + User(self.0, PhantomData) + } + + pub const fn vaddr(&self) -> VAddr { + self.0 + } +} + +impl Deref for User { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Deref for UserMut { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::fmt::Debug for User { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "User({:#x?})", self.0.addr()) } } -impl FromSyscallArg for *mut T { - fn from_arg(value: usize) -> *mut T { - value as *mut T +impl core::fmt::Debug for UserMut { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "UserMut({:#x?})", self.0.addr()) } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 97d47c48..ef222123 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,18 +1,16 @@ -use super::FromSyscallArg; +use super::{FromSyscallArg, User}; use crate::io::IntoStream; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, }; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; use crate::{ io::{Buffer, BufferFill}, kernel::{ - user::{ - dataflow::{CheckedUserPointer, UserBuffer, UserString}, - UserPointer, UserPointerMut, - }, + user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, vfs::{ dentry::Dentry, file::{PollEvent, SeekOption}, @@ -47,7 +45,7 @@ impl FromSyscallArg for AtFlags { fn dentry_from( thread: &Thread, dirfd: FD, - pathname: *const u8, + pathname: User, follow_symlink: bool, ) -> KResult> { let path = UserString::new(pathname)?; @@ -72,83 +70,91 @@ fn dentry_from( } #[eonix_macros::define_syscall(SYS_READ)] -fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, None) + .await } #[eonix_macros::define_syscall(SYS_PREAD64)] -fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult { +async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .read(&mut buffer, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_WRITE)] -fn write(fd: FD, buffer: *const u8, count: usize) -> KResult { +async fn write(fd: FD, buffer: User, count: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, None) + .await } #[eonix_macros::define_syscall(SYS_PWRITE64)] -fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult { +async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .write(&mut stream, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_OPENAT)] -fn openat(dirfd: FD, pathname: *const u8, flags: OpenFlags, mode: u32) -> KResult { +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: u32) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; thread.files.open(&dentry, flags, mode) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -fn open(path: *const u8, flags: OpenFlags, mode: u32) -> KResult { - sys_openat(thread, FD::AT_FDCWD, path, flags, mode) +async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { + sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } #[eonix_macros::define_syscall(SYS_CLOSE)] -fn close(fd: FD) -> KResult<()> { +async fn close(fd: FD) -> KResult<()> { thread.files.close(fd) } #[eonix_macros::define_syscall(SYS_DUP)] -fn dup(fd: FD) -> KResult { +async fn dup(fd: FD) -> KResult { thread.files.dup(fd) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] -fn dup2(old_fd: FD, new_fd: FD) -> KResult { +async fn dup2(old_fd: FD, new_fd: FD) -> KResult { thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) } #[eonix_macros::define_syscall(SYS_DUP3)] -fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { +async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { thread.files.dup_to(old_fd, new_fd, flags) } #[eonix_macros::define_syscall(SYS_PIPE2)] -fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd as *mut u8, core::mem::size_of::<[FD; 2]>())?; +async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { + let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -156,13 +162,13 @@ fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_PIPE)] -fn pipe(pipe_fd: *mut [FD; 2]) -> KResult<()> { - sys_pipe2(thread, pipe_fd, OpenFlags::empty()) +async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { + sys_pipe2(thread, pipe_fd, OpenFlags::empty()).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; @@ -170,7 +176,7 @@ fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread.files.get(fd).ok_or(EBADF)?.getdents64(&mut buffer)?; @@ -182,7 +188,12 @@ fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { eonix_macros::define_syscall(SYS_NEWFSTATAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTATAT64))] -fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags) -> KResult<()> { +async fn newfstatat( + dirfd: FD, + pathname: User, + statbuf: UserMut, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -205,23 +216,17 @@ fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags eonix_macros::define_syscall(SYS_NEWFSTAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] -fn newfstat(fd: FD, statbuf: *mut Stat) -> KResult<()> { - sys_newfstatat( - thread, - fd, - core::ptr::null(), - statbuf, - AtFlags::AT_EMPTY_PATH, - ) +async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await } #[eonix_macros::define_syscall(SYS_STATX)] -fn statx( +async fn statx( dirfd: FD, - pathname: *const u8, + pathname: User, flags: AtFlags, mask: u32, - buffer: *mut StatX, + buffer: UserMut, ) -> KResult<()> { if !flags.statx_default_sync() { unimplemented!("statx with no default sync flags: {:?}", flags); @@ -244,7 +249,7 @@ fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { +async fn mkdirat(dirfd: FD, pathname: User, mode: u32) -> KResult<()> { let umask = *thread.fs_context.umask.lock(); let mode = mode & !umask & 0o777; @@ -254,19 +259,19 @@ fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -fn mkdir(pathname: *const u8, mode: u32) -> KResult<()> { - sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode) +async fn mkdir(pathname: User, mode: u32) -> KResult<()> { + sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] -fn truncate64(fd: FD, length: usize) -> KResult<()> { +async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.truncate(length) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_TRUNCATE)] -fn truncate(pathname: *const u8, length: usize) -> KResult<()> { +async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -276,18 +281,18 @@ fn truncate(pathname: *const u8, length: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UNLINKAT)] -fn unlinkat(dirfd: FD, pathname: *const u8) -> KResult<()> { +async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { dentry_from(thread, dirfd, pathname, false)?.unlink() } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] -fn unlink(pathname: *const u8) -> KResult<()> { +async fn unlink(pathname: User) -> KResult<()> { sys_unlinkat(thread, FD::AT_FDCWD, pathname) } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { +async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { let target = UserString::new(target)?; let dentry = dentry_from(thread, dirfd, linkpath, false)?; @@ -296,12 +301,12 @@ fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] -fn symlink(target: *const u8, linkpath: *const u8) -> KResult<()> { +async fn symlink(target: User, linkpath: User) -> KResult<()> { sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) } #[eonix_macros::define_syscall(SYS_MKNODAT)] -fn mknodat(dirfd: FD, pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mode: u32, dev: u32) -> KResult<()> { let dentry = dentry_from(thread, dirfd, pathname, true)?; let umask = *thread.fs_context.umask.lock(); @@ -312,12 +317,17 @@ fn mknodat(dirfd: FD, pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -fn mknod(pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { - sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev) +async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { + sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } #[eonix_macros::define_syscall(SYS_READLINKAT)] -fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { +async fn readlinkat( + dirfd: FD, + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, false)?; let mut buffer = UserBuffer::new(buffer, bufsize)?; @@ -326,11 +336,11 @@ fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -fn readlink(pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { - sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize) +async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { + sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { @@ -343,17 +353,23 @@ fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_LSEEK)] -fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { - do_lseek(thread, fd, offset, whence) +async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { + do_lseek(thread, fd, offset, whence).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek(fd: FD, offset_high: u32, offset_low: u32, result: *mut u64, whence: u32) -> KResult<()> { - let mut result = UserBuffer::new(result as *mut u8, core::mem::size_of::())?; +fn llseek( + fd: FD, + offset_high: u32, + offset_low: u32, + result: UserMut, + whence: u32, +) -> KResult<()> { + let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); - let new_offset = do_lseek(thread, fd, offset, whence)?; + let new_offset = do_lseek(thread, fd, offset, whence).await?; result.copy(&new_offset)?.ok_or(EFAULT) } @@ -366,7 +382,7 @@ struct IoVec { } #[eonix_macros::define_syscall(SYS_READV)] -fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -381,14 +397,16 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => Some(UserBuffer::new(base.addr() as *mut u8, len.get())), + Ok(IoVec { base, len }) => { + Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) + } }) .collect::>>()?; let mut tot = 0usize; for mut buffer in iov_buffers.into_iter() { // TODO!!!: `readv` - let nread = block_on(file.read(&mut buffer, None))?; + let nread = file.read(&mut buffer, None).await?; tot += nread; if nread != buffer.total() { @@ -400,7 +418,7 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_WRITEV)] -fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -416,7 +434,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(base.addr() as *mut u8, len.get()) + CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) .map(|ptr| ptr.into_stream()), ), }) @@ -424,7 +442,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { let mut tot = 0usize; for mut stream in iov_streams.into_iter() { - let nread = block_on(file.write(&mut stream, None))?; + let nread = file.write(&mut stream, None).await?; tot += nread; if nread == 0 || !stream.is_drained() { @@ -436,7 +454,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -462,12 +480,12 @@ fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KRes #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ACCESS)] -fn access(pathname: *const u8, mode: u32) -> KResult<()> { - sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn access(pathname: User, mode: u32) -> KResult<()> { + sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult { +async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -475,18 +493,18 @@ fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult KResult { +async fn ioctl(fd: FD, request: usize, arg3: usize) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; file.ioctl(request, arg3) } #[eonix_macros::define_syscall(SYS_FCNTL64)] -fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { +async fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { thread.files.fcntl(fd, cmd, arg) } @@ -498,7 +516,12 @@ struct UserPollFd { revents: u16, } -fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> KResult { +async fn do_poll( + thread: &Thread, + fds: UserMut, + nfds: u32, + _timeout: u32, +) -> KResult { match nfds { 0 => Ok(0), 2.. => unimplemented!("Poll with {} fds", nfds), @@ -511,7 +534,10 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K let mut fd = fds.read()?; let file = thread.files.get(fd.fd).ok_or(EBADF)?; - fd.revents = block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); + fd.revents = file + .poll(PollEvent::from_bits_retain(fd.events)) + .await? + .bits(); fds.write(fd)?; Ok(1) @@ -520,24 +546,24 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K } #[eonix_macros::define_syscall(SYS_PPOLL)] -fn ppoll( - fds: *mut UserPollFd, +async fn ppoll( + fds: UserMut, nfds: u32, - _timeout_ptr: *const TimeSpec, - _sigmask: *const SigSet, + _timeout_ptr: User, + _sigmask: User, ) -> KResult { // TODO: Implement ppoll with signal mask and timeout - do_poll(thread, fds, nfds, 0) + do_poll(thread, fds, nfds, 0).await } #[eonix_macros::define_syscall(SYS_PSELECT6)] -fn pselect6( +async fn pselect6( nfds: u32, - _readfds: *mut FDSet, - _writefds: *mut FDSet, - _exceptfds: *mut FDSet, - timeout: *mut TimeSpec, - _sigmask: *const (), + _readfds: UserMut, + _writefds: UserMut, + _exceptfds: UserMut, + timeout: UserMut, + _sigmask: User<()>, ) -> KResult { // According to [pthread6(2)](https://linux.die.net/man/2/pselect6): // Some code calls select() with all three sets empty, nfds zero, and @@ -552,7 +578,7 @@ fn pselect6( // Read here to check for invalid pointers. let _timeout_value = timeout.read()?; - block_on(sleep(Duration::from_millis(10))); + sleep(Duration::from_millis(10)).await; timeout.write(TimeSpec { tv_sec: 0, @@ -564,12 +590,18 @@ fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -fn poll(fds: *mut UserPollFd, nfds: u32, timeout: u32) -> KResult { - do_poll(thread, fds, nfds, timeout) +async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { + do_poll(thread, fds, nfds, timeout).await } #[eonix_macros::define_syscall(SYS_FCHOWNAT)] -fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) -> KResult<()> { +async fn fchownat( + dirfd: FD, + pathname: User, + uid: u32, + gid: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; if !dentry.is_valid() { return Err(ENOENT); @@ -579,7 +611,7 @@ fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResult<()> { +async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -595,15 +627,15 @@ fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResul } #[eonix_macros::define_syscall(SYS_FCHMOD)] -fn chmod(pathname: *const u8, mode: u32) -> KResult<()> { - sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn chmod(pathname: User, mode: u32) -> KResult<()> { + sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_UTIMENSAT)] -fn utimensat( +async fn utimensat( dirfd: FD, - pathname: *const u8, - times: *const TimeSpec, + pathname: User, + times: User, flags: AtFlags, ) -> KResult<()> { let dentry = if flags.at_empty_path() { @@ -630,11 +662,11 @@ fn utimensat( } #[eonix_macros::define_syscall(SYS_RENAMEAT2)] -fn renameat2( +async fn renameat2( old_dirfd: FD, - old_pathname: *const u8, + old_pathname: User, new_dirfd: FD, - new_pathname: *const u8, + new_pathname: User, flags: u32, ) -> KResult<()> { let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; @@ -652,7 +684,7 @@ fn renameat2( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_RENAME)] -fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { +async fn rename(old_pathname: User, new_pathname: User) -> KResult<()> { sys_renameat2( thread, FD::AT_FDCWD, @@ -661,6 +693,7 @@ fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { new_pathname, 0, ) + .await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index b6ba5fdc..547635d4 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -2,7 +2,7 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; -use crate::kernel::task::{block_on, Thread}; +use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; use crate::{ kernel::{ @@ -39,7 +39,7 @@ fn check_impl(condition: bool, err: u32) -> KResult<()> { } } -fn do_mmap2( +async fn do_mmap2( thread: &Thread, addr: usize, len: usize, @@ -67,7 +67,10 @@ fn do_mmap2( } else { // The mode is unimportant here, since we are checking prot in mm_area. let shared_area = - block_on(SHM_MANAGER.lock()).create_shared_area(len, thread.process.pid, 0x777); + SHM_MANAGER + .lock() + .await + .create_shared_area(len, thread.process.pid, 0x777); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -90,10 +93,14 @@ fn do_mmap2( // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether // `addr` is above user reachable memory. let addr = if flags.contains(UserMmapFlags::MAP_FIXED) { - block_on(mm_list.unmap(addr, len)); - mm_list.mmap_fixed(addr, len, mapping, permission, is_shared) + mm_list.unmap(addr, len).await?; + mm_list + .mmap_fixed(addr, len, mapping, permission, is_shared) + .await } else { - mm_list.mmap_hint(addr, len, mapping, permission, is_shared) + mm_list + .mmap_hint(addr, len, mapping, permission, is_shared) + .await }; addr.map(|addr| addr.addr()) @@ -101,7 +108,7 @@ fn do_mmap2( #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_MMAP)] -fn mmap( +async fn mmap( addr: usize, len: usize, prot: UserMmapProtocol, @@ -109,12 +116,12 @@ fn mmap( fd: FD, offset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, offset) + do_mmap2(thread, addr, len, prot, flags, fd, offset).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MMAP2)] -fn mmap2( +async fn mmap2( addr: usize, len: usize, prot: UserMmapProtocol, @@ -122,33 +129,33 @@ fn mmap2( fd: FD, pgoffset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, pgoffset) + do_mmap2(thread, addr, len, prot, flags, fd, pgoffset).await } #[eonix_macros::define_syscall(SYS_MUNMAP)] -fn munmap(addr: usize, len: usize) -> KResult { +async fn munmap(addr: usize, len: usize) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); } let len = len.align_up(PAGE_SIZE); - block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) + thread.process.mm_list.unmap(addr, len).await } #[eonix_macros::define_syscall(SYS_BRK)] -fn brk(addr: usize) -> KResult { +async fn brk(addr: usize) -> KResult { let vaddr = if addr == 0 { None } else { Some(VAddr::from(addr)) }; - Ok(thread.process.mm_list.set_break(vaddr).addr()) + Ok(thread.process.mm_list.set_break(vaddr).await.addr()) } #[eonix_macros::define_syscall(SYS_MADVISE)] -fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { +async fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_MPROTECT)] -fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { +async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); @@ -156,22 +163,26 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let len = len.align_up(PAGE_SIZE); - block_on(thread.process.mm_list.protect( - addr, - len, - Permission { - read: prot.contains(UserMmapProtocol::PROT_READ), - write: prot.contains(UserMmapProtocol::PROT_WRITE), - execute: prot.contains(UserMmapProtocol::PROT_EXEC), - }, - )) + thread + .process + .mm_list + .protect( + addr, + len, + Permission { + read: prot.contains(UserMmapProtocol::PROT_READ), + write: prot.contains(UserMmapProtocol::PROT_WRITE), + execute: prot.contains(UserMmapProtocol::PROT_EXEC), + }, + ) + .await } #[eonix_macros::define_syscall(SYS_SHMGET)] -fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { +async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let size = size.align_up(PAGE_SIZE); - let mut shm_manager = block_on(SHM_MANAGER.lock()); + let mut shm_manager = SHM_MANAGER.lock().await; let shmid = gen_shm_id(key)?; let mode = shmflg & 0o777; @@ -197,16 +208,17 @@ fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { return Ok(shmid); } - return Err(ENOENT); + Err(ENOENT) } #[eonix_macros::define_syscall(SYS_SHMAT)] -fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { +async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { let mm_list = &thread.process.mm_list; - let shm_manager = block_on(SHM_MANAGER.lock()); + let shm_manager = SHM_MANAGER.lock().await; let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - let mode = shmflg & 0o777; + // Why is this not used? + let _mode = shmflg & 0o777; let shmflg = ShmFlags::from_bits_truncate(shmflg); let mut permission = Permission { @@ -235,9 +247,13 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { return Err(EINVAL); } let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list.mmap_fixed(addr, size, mapping, permission, true) + mm_list + .mmap_fixed(addr, size, mapping, permission, true) + .await } else { - mm_list.mmap_hint(VAddr::NULL, size, mapping, permission, true) + mm_list + .mmap_hint(VAddr::NULL, size, mapping, permission, true) + .await }?; thread.process.shm_areas.lock().insert(addr, size); @@ -246,22 +262,29 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_SHMDT)] -fn shmdt(addr: usize) -> KResult { +async fn shmdt(addr: usize) -> KResult<()> { let addr = VAddr::from(addr); - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - drop(shm_areas); - return block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); + + let size = { + let mut shm_areas = thread.process.shm_areas.lock(); + let size = *shm_areas.get(&addr).ok_or(EINVAL)?; + shm_areas.remove(&addr); + + size + }; + + thread.process.mm_list.unmap(addr, size).await } #[eonix_macros::define_syscall(SYS_SHMCTL)] -fn shmctl(shmid: u32, op: i32, shmid_ds: usize) -> KResult { +async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { + // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_MEMBARRIER)] -fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { +async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { + // TODO Ok(()) } diff --git a/src/kernel/syscall/net.rs b/src/kernel/syscall/net.rs index 82ec9152..41ac58e6 100644 --- a/src/kernel/syscall/net.rs +++ b/src/kernel/syscall/net.rs @@ -3,7 +3,7 @@ use crate::prelude::*; use posix_types::syscall_no::*; #[eonix_macros::define_syscall(SYS_SOCKET)] -fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { +async fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { Err(EINVAL) } diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 62194691..1dee462d 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -7,26 +7,26 @@ use crate::kernel::constants::{ ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; +use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - block_on, do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, - ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, + RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::dataflow::UserString; +use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; -use crate::{kernel::user::dataflow::UserBuffer, prelude::*}; +use crate::{kernel::user::UserBuffer, prelude::*}; use alloc::borrow::ToOwned; use alloc::ffi::CString; use bitflags::bitflags; -use core::ptr::NonNull; use core::time::Duration; use eonix_hal::processor::UserTLS; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_mm::address::{Addr as _, VAddr}; +use eonix_mm::address::Addr as _; use eonix_sync::AsProof as _; use posix_types::ctypes::PtrT; use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; @@ -49,7 +49,7 @@ bitflags! { } #[eonix_macros::define_syscall(SYS_NANOSLEEP)] -fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { +async fn nanosleep(req: User<(u32, u32)>, rem: UserMut<(u32, u32)>) -> KResult { let req = UserPointer::new(req)?.read()?; let rem = if rem.is_null() { None @@ -58,7 +58,7 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -68,11 +68,11 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { } #[eonix_macros::define_syscall(SYS_CLOCK_NANOSLEEP)] -fn clock_nanosleep( +async fn clock_nanosleep( clock_id: u32, - flags: u32, - req: *const (u32, u32), - rem: *mut (u32, u32), + _flags: u32, + req: User<(u32, u32)>, + rem: UserMut<(u32, u32)>, ) -> KResult { if clock_id != CLOCK_REALTIME && clock_id != CLOCK_REALTIME_COARSE @@ -89,7 +89,7 @@ fn clock_nanosleep( }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -99,7 +99,7 @@ fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -fn umask(mask: u32) -> KResult { +async fn umask(mask: u32) -> KResult { let mut umask = thread.fs_context.umask.lock(); let old = *umask; @@ -108,7 +108,7 @@ fn umask(mask: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETCWD)] -fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { +async fn getcwd(buffer: UserMut, bufsize: usize) -> KResult { let mut user_buffer = UserBuffer::new(buffer, bufsize)?; let mut buffer = PageBuffer::new(); @@ -121,7 +121,7 @@ fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_CHDIR)] -fn chdir(path: *const u8) -> KResult<()> { +async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -139,7 +139,7 @@ fn chdir(path: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UMOUNT)] -fn umount(source: *const u8) -> KResult<()> { +async fn umount(source: User) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "./mnt" { return Ok(()); @@ -148,7 +148,7 @@ fn umount(source: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_MOUNT)] -fn mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usize) -> KResult<()> { +async fn mount(source: User, target: User, fstype: User, flags: usize) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "/dev/vda2" { return Ok(()); @@ -184,7 +184,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> break; } - let user_string = UserString::new(ptr.addr() as *const u8)?; + let user_string = UserString::new(User::with_addr(ptr.addr()))?; strings.push(user_string.as_cstr().to_owned()); ptr_strings = ptr_strings.offset(1)?; } @@ -193,7 +193,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> } #[eonix_macros::define_syscall(SYS_EXECVE)] -fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult { +async fn execve(exec: User, argv: User, envp: User) -> KResult { let exec = UserString::new(exec)?; let exec = exec.as_cstr().to_owned(); @@ -207,11 +207,12 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult KResult SyscallNoReturn { +async fn exit(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = block_on(ProcessList::get().write()); - block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); + procs + .do_exit(&thread, WaitType::Exited(status), false) + .await; } SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] -fn exit_group(status: u32) -> SyscallNoReturn { +async fn exit_group(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = block_on(ProcessList::get().write()); - block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); + procs.do_exit(&thread, WaitType::Exited(status), true).await; } SyscallNoReturn } enum WaitInfo { - SigInfo(NonNull), - Status(NonNull), + SigInfo(UserMut), + Status(UserMut), None, } -fn do_waitid( +async fn do_waitid( thread: &Thread, wait_id: WaitId, info: WaitInfo, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { if !rusage.is_null() { unimplemented!("waitid with rusage pointer"); @@ -277,12 +282,15 @@ fn do_waitid( Some(options) => options, }; - let Some(wait_object) = block_on(thread.process.wait( - wait_id, - options.contains(UserWaitOptions::WNOHANG), - options.contains(UserWaitOptions::WUNTRACED), - options.contains(UserWaitOptions::WCONTINUED), - ))? + let Some(wait_object) = thread + .process + .wait( + wait_id, + options.contains(UserWaitOptions::WNOHANG), + options.contains(UserWaitOptions::WUNTRACED), + options.contains(UserWaitOptions::WCONTINUED), + ) + .await? else { return Ok(0); }; @@ -298,11 +306,11 @@ fn do_waitid( siginfo.si_status = status; siginfo.si_code = code; - UserPointerMut::new(siginfo_ptr.as_ptr())?.write(siginfo)?; + UserPointerMut::new(siginfo_ptr)?.write(siginfo)?; Ok(0) } WaitInfo::Status(status_ptr) => { - UserPointerMut::new(status_ptr.as_ptr())?.write(wait_object.code.to_wstatus())?; + UserPointerMut::new(status_ptr)?.write(wait_object.code.to_wstatus())?; Ok(wait_object.pid) } WaitInfo::None => Ok(wait_object.pid), @@ -310,18 +318,16 @@ fn do_waitid( } #[eonix_macros::define_syscall(SYS_WAITID)] -fn waitid( +async fn waitid( id_type: u32, id: u32, - info: *mut SigInfo, + info: UserMut, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { let wait_id = WaitId::from_type_and_id(id_type, id)?; - if let Some(info) = NonNull::new(info) { - do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage) - } else { + if info.is_null() { /* * According to POSIX.1-2008, an application calling waitid() must * ensure that infop points to a siginfo_t structure (i.e., that it @@ -332,34 +338,41 @@ fn waitid( */ unimplemented!("waitid with null info pointer"); } + + do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage).await } #[eonix_macros::define_syscall(SYS_WAIT4)] -fn wait4(wait_id: i32, arg1: *mut u32, options: u32, rusage: *mut RUsage) -> KResult { - let waitinfo = if let Some(status) = NonNull::new(arg1) { - WaitInfo::Status(status) - } else { +async fn wait4( + wait_id: i32, + arg1: UserMut, + options: u32, + rusage: UserMut, +) -> KResult { + let waitinfo = if arg1.is_null() { WaitInfo::None + } else { + WaitInfo::Status(arg1) }; let wait_id = WaitId::from_id(wait_id, thread); - do_waitid(thread, wait_id, waitinfo, options, rusage) + do_waitid(thread, wait_id, waitinfo, options, rusage).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] -fn waitpid(waitpid: i32, arg1: *mut u32, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()) +async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { + sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await } #[eonix_macros::define_syscall(SYS_SETSID)] -fn setsid() -> KResult { - thread.process.setsid() +async fn setsid() -> KResult { + thread.process.setsid().await } #[eonix_macros::define_syscall(SYS_SETPGID)] -fn setpgid(pid: u32, pgid: i32) -> KResult<()> { +async fn setpgid(pid: u32, pgid: i32) -> KResult<()> { let pid = if pid == 0 { thread.process.pid } else { pid }; let pgid = match pgid { @@ -368,15 +381,15 @@ fn setpgid(pid: u32, pgid: i32) -> KResult<()> { _ => return Err(EINVAL), }; - thread.process.setpgid(pid, pgid) + thread.process.setpgid(pid, pgid).await } #[eonix_macros::define_syscall(SYS_GETSID)] -fn getsid(pid: u32) -> KResult { +async fn getsid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.session_rcu().sid) } else { - let procs = block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.session(procs.prove()).sid) @@ -385,11 +398,11 @@ fn getsid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPGID)] -fn getpgid(pid: u32) -> KResult { +async fn getpgid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.pgroup_rcu().pgid) } else { - let procs = block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.pgroup(procs.prove()).pgid) @@ -398,12 +411,12 @@ fn getpgid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPID)] -fn getpid() -> KResult { +async fn getpid() -> KResult { Ok(thread.process.pid) } #[eonix_macros::define_syscall(SYS_GETPPID)] -fn getppid() -> KResult { +async fn getppid() -> KResult { Ok(thread.process.parent_rcu().map_or(0, |x| x.pid)) } @@ -419,78 +432,61 @@ fn do_getuid(_thread: &Thread) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETUID32)] -fn getuid32() -> KResult { +async fn getuid32() -> KResult { do_getuid(thread) } #[eonix_macros::define_syscall(SYS_GETUID)] -fn getuid() -> KResult { +async fn getuid() -> KResult { do_getuid(thread) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETEUID32)] -fn geteuid32() -> KResult { +async fn geteuid32() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEUID)] -fn geteuid() -> KResult { +async fn geteuid() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEGID)] -fn getegid() -> KResult { +async fn getegid() -> KResult { // All users are root for now. Ok(0) } #[eonix_macros::define_syscall(SYS_GETGID)] -fn getgid() -> KResult { - sys_getegid(thread) +async fn getgid() -> KResult { + sys_getegid(thread).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETGID32)] -fn getgid32() -> KResult { - sys_getegid(thread) -} - -#[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize { - if buf.is_null() || buflen == 0 { - return -14; - } - - static mut SEED: u64 = 1; - unsafe { - for i in 0..buflen { - SEED = SEED.wrapping_mul(1103515245).wrapping_add(12345); - *buf.add(i) = (SEED >> 8) as u8; - } - } - - buflen as isize +async fn getgid32() -> KResult { + sys_getegid(thread).await } #[eonix_macros::define_syscall(SYS_SCHED_YIELD)] -fn sched_yield() -> KResult<()> { - block_on(yield_now()); +async fn sched_yield() -> KResult<()> { + yield_now().await; Ok(()) } #[eonix_macros::define_syscall(SYS_SYNC)] -fn sync() -> KResult<()> { +async fn sync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_FSYNC)] -fn fsync() -> KResult<()> { +async fn fsync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_GETTID)] -fn gettid() -> KResult { +async fn gettid() -> KResult { Ok(thread.tid) } @@ -530,7 +526,7 @@ pub fn parse_user_tls(arch_tls: usize) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -fn set_thread_area(arch_tls: usize) -> KResult<()> { +async fn set_thread_area(arch_tls: usize) -> KResult<()> { thread.set_user_tls(parse_user_tls(arch_tls)?)?; // SAFETY: Preemption is disabled on calling `load_thread_area32()`. @@ -544,16 +540,16 @@ fn set_thread_area(arch_tls: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SET_TID_ADDRESS)] -fn set_tid_address(tidptr: usize) -> KResult { +async fn set_tid_address(tidptr: UserMut) -> KResult { thread.clear_child_tid(Some(tidptr)); Ok(thread.tid) } #[eonix_macros::define_syscall(SYS_PRCTL)] -fn prctl(option: u32, arg2: usize) -> KResult<()> { +async fn prctl(option: u32, arg2: PtrT) -> KResult<()> { match option { PR_SET_NAME => { - let name = UserPointer::new(arg2 as *mut [u8; 16])?.read()?; + let name = UserPointer::<[u8; 16]>::new(User::with_addr(arg2.addr()))?.read()?; let len = name.iter().position(|&c| c == 0).unwrap_or(15); thread.set_name(name[..len].into()); Ok(()) @@ -562,7 +558,7 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { let name = thread.get_name(); let len = name.len().min(15); let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); - UserPointerMut::new(arg2 as *mut [u8; 16])?.write(name)?; + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(arg2.addr()))?.write(name)?; Ok(()) } _ => Err(EINVAL), @@ -570,8 +566,8 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_KILL)] -fn kill(pid: i32, sig: u32) -> KResult<()> { - let procs = block_on(ProcessList::get().read()); +async fn kill(pid: i32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; match pid { // Send signal to every process for which the calling process has // permission to send signals. @@ -597,8 +593,10 @@ fn kill(pid: i32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TKILL)] -fn tkill(tid: u32, sig: u32) -> KResult<()> { - block_on(ProcessList::get().read()) +async fn tkill(tid: u32, sig: u32) -> KResult<()> { + ProcessList::get() + .read() + .await .try_find_thread(tid) .ok_or(ESRCH)? .raise(Signal::try_from_raw(sig)?); @@ -606,8 +604,8 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TGKILL)] -fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { - let procs = block_on(ProcessList::get().read()); +async fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?; if thread_to_kill.process.pid != tgid { @@ -619,10 +617,10 @@ fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_RT_SIGPROCMASK)] -fn rt_sigprocmask( +async fn rt_sigprocmask( how: u32, - set: *mut SigSet, - oldset: *mut SigSet, + set: UserMut, + oldset: UserMut, sigsetsize: usize, ) -> KResult<()> { if sigsetsize != size_of::() { @@ -635,7 +633,7 @@ fn rt_sigprocmask( } let new_mask = if !set.is_null() { - UserPointer::new(set)?.read()? + UserPointer::new(set.as_const())?.read()? } else { return Ok(()); }; @@ -657,27 +655,21 @@ struct TimeSpec32 { tv_nsec: i32, } -impl TimeSpec32 { - fn to_duration(&self) -> Duration { - Duration::new(self.tv_sec as u64, self.tv_nsec as u32) - } -} - #[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -fn rt_sigtimedwait_time32( - _uthese: *const SigSet, - _uinfo: *mut SigInfo, - _uts: *const TimeSpec32, +async fn rt_sigtimedwait_time32( + _uthese: User, + _uinfo: UserMut, + _uts: User, ) -> KResult { // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_RT_SIGACTION)] -fn rt_sigaction( +async fn rt_sigaction( signum: u32, - act: *const SigAction, - oldact: *mut SigAction, + act: User, + oldact: UserMut, sigsetsize: usize, ) -> KResult<()> { let signal = Signal::try_from_raw(signum)?; @@ -706,11 +698,11 @@ fn rt_sigaction( } #[eonix_macros::define_syscall(SYS_PRLIMIT64)] -fn prlimit64( +async fn prlimit64( pid: u32, resource: u32, - new_limit: *const RLimit, - old_limit: *mut RLimit, + new_limit: User, + old_limit: UserMut, ) -> KResult<()> { if pid != 0 { return Err(ENOSYS); @@ -742,13 +734,13 @@ fn prlimit64( } #[eonix_macros::define_syscall(SYS_GETRLIMIT)] -fn getrlimit(resource: u32, rlimit: *mut RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, core::ptr::null(), rlimit) +async fn getrlimit(resource: u32, rlimit: UserMut) -> KResult<()> { + sys_prlimit64(thread, 0, resource, User::null(), rlimit).await } #[eonix_macros::define_syscall(SYS_SETRLIMIT)] -fn setrlimit(resource: u32, rlimit: *const RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, rlimit, core::ptr::null_mut()) +async fn setrlimit(resource: u32, rlimit: User) -> KResult<()> { + sys_prlimit64(thread, 0, resource, rlimit, UserMut::null()).await } #[repr(C)] @@ -773,7 +765,7 @@ struct RUsage { } #[eonix_macros::define_syscall(SYS_GETRUSAGE)] -fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { +async fn getrusage(who: u32, rusage: UserMut) -> KResult<()> { if who != 0 { return Err(ENOSYS); } @@ -803,52 +795,52 @@ fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_VFORK)] -fn vfork() -> KResult { +async fn vfork() -> KResult { let clone_args = CloneArgs::for_vfork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_FORK)] -fn fork() -> KResult { +async fn fork() -> KResult { let clone_args = CloneArgs::for_fork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } // Some old platforms including x86_32, riscv and arm have the last two arguments // swapped, so we need to define two versions of `clone` syscall. #[cfg(not(target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, + parent_tidptr: UserMut, tls: usize, - child_tidptr: usize, + child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "loongarch64")] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, - child_tidptr: usize, + parent_tidptr: UserMut, + child_tidptr: UserMut, tls: usize, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[eonix_macros::define_syscall(SYS_FUTEX)] -fn futex( +async fn futex( uaddr: usize, op: u32, val: u32, @@ -866,11 +858,11 @@ fn futex( match futex_op { FutexOp::FUTEX_WAIT => { - block_on(futex_wait(uaddr, pid, val as u32, None))?; + futex_wait(uaddr, pid, val as u32, None).await?; return Ok(0); } FutexOp::FUTEX_WAKE => { - return block_on(futex_wake(uaddr, pid, val as u32)); + return futex_wake(uaddr, pid, val as u32).await; } FutexOp::FUTEX_REQUEUE => { todo!() @@ -882,60 +874,56 @@ fn futex( } #[eonix_macros::define_syscall(SYS_SET_ROBUST_LIST)] -fn set_robust_list(head: usize, len: usize) -> KResult<()> { +async fn set_robust_list(head: User, len: usize) -> KResult<()> { if len != size_of::() { return Err(EINVAL); } - thread.set_robust_list(Some(VAddr::from(head))); + thread.set_robust_list(Some(head)); Ok(()) } #[eonix_macros::define_syscall(SYS_RT_SIGRETURN)] -fn rt_sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - false, - ) - .inspect_err(|err| { - println_warn!( - "`rt_sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn rt_sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + false, + ) { + println_warn!( + "`rt_sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SIGRETURN)] -fn sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - true, - ) - .inspect_err(|err| { - println_warn!( - "`sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + true, + ) { + println_warn!( + "`sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr) +async fn arch_prctl(option: u32, addr: u32) -> KResult { + sys_arch_prctl(thread, option, addr).await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/sysinfo.rs b/src/kernel/syscall/sysinfo.rs index 5092c8a6..69316b2a 100644 --- a/src/kernel/syscall/sysinfo.rs +++ b/src/kernel/syscall/sysinfo.rs @@ -2,6 +2,7 @@ use crate::{ io::Buffer as _, kernel::{ constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINTR, EINVAL}, + syscall::UserMut, task::Thread, timer::{Instant, Ticks}, user::{UserBuffer, UserPointerMut}, @@ -30,7 +31,7 @@ fn copy_cstr_to_array(cstr: &[u8], array: &mut [u8]) { } #[eonix_macros::define_syscall(SYS_NEWUNAME)] -fn newuname(buffer: *mut NewUTSName) -> KResult<()> { +async fn newuname(buffer: UserMut) -> KResult<()> { let buffer = UserPointerMut::new(buffer)?; let mut uname = NewUTSName { sysname: [0; 65], @@ -62,7 +63,7 @@ fn newuname(buffer: *mut NewUTSName) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETTIMEOFDAY)] -fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { +async fn gettimeofday(timeval: UserMut, timezone: UserMut<()>) -> KResult<()> { if !timezone.is_null() { return Err(EINVAL); } @@ -81,7 +82,7 @@ fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { Ok(()) } -fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: UserMut) -> KResult<()> { let timespec = UserPointerMut::new(timespec)?; match clock_id { @@ -106,13 +107,13 @@ fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME)] -fn clock_gettime(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME64)] -fn clock_gettime64(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime64(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } @@ -135,7 +136,7 @@ struct Sysinfo { } #[eonix_macros::define_syscall(SYS_SYSINFO)] -fn sysinfo(info: *mut Sysinfo) -> KResult<()> { +async fn sysinfo(info: UserMut) -> KResult<()> { let info = UserPointerMut::new(info)?; info.write(Sysinfo { uptime: Ticks::since_boot().as_secs() as u32, @@ -164,7 +165,7 @@ struct TMS { } #[eonix_macros::define_syscall(SYS_TIMES)] -fn times(tms: *mut TMS) -> KResult<()> { +async fn times(tms: UserMut) -> KResult<()> { let tms = UserPointerMut::new(tms)?; tms.write(TMS { tms_utime: 0, @@ -175,7 +176,7 @@ fn times(tms: *mut TMS) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn get_random(buf: *mut u8, len: usize, flags: u32) -> KResult { +async fn get_random(buf: UserMut, len: usize, flags: u32) -> KResult { if flags != 0 { return Err(EINVAL); } diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index 574cdfc9..e0d578c1 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,7 +1,6 @@ -use super::block_on; use crate::{ kernel::{ - syscall::procops::parse_user_tls, + syscall::{procops::parse_user_tls, UserMut}, task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, user::UserPointerMut, }, @@ -49,9 +48,9 @@ pub struct CloneArgs { pub flags: CloneFlags, pub sp: Option>, // Stack pointer for the new thread. pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option, // Pointer to parent TID in user space. + pub set_tid_ptr: Option>, // Pointer to set child TID in user space. + pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. + pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. pub tls: Option, // Pointer to TLS information. } @@ -61,8 +60,8 @@ impl CloneArgs { pub fn for_clone( flags: usize, sp: usize, - child_tid_ptr: usize, - parent_tid_ptr: usize, + child_tid_ptr: UserMut, + parent_tid_ptr: UserMut, tls: usize, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); @@ -131,8 +130,8 @@ impl CloneArgs { } } -pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { - let mut procs = block_on(ProcessList::get().write()); +pub async fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { + let mut procs = ProcessList::get().write().await; let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?; let current_process = thread.process.clone(); @@ -152,6 +151,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { let (new_thread, _) = ProcessBuilder::new() .clone_from(current_process, &clone_args) + .await .pid(new_pid) .pgroup(current_pgroup) .session(current_session) @@ -161,7 +161,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { }; if let Some(parent_tid_ptr) = clone_args.parent_tid_ptr { - UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? + UserPointerMut::new(parent_tid_ptr)?.write(new_pid)? } RUNTIME.spawn(new_thread.run()); diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index af42a396..a04d7091 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -9,6 +9,7 @@ use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicL use crate::{ kernel::{ constants::{EAGAIN, EINVAL}, + syscall::User, user::UserPointer, }, prelude::KResult, @@ -174,7 +175,7 @@ pub async fn futex_wait( let (_, futex_bucket_ref) = FUTEX_TABLE.get_bucket(&futex_key); let mut futex_bucket = futex_bucket_ref.lock().await; - let val = UserPointer::new(uaddr as *const u32)?.read()?; + let val = UserPointer::new(User::::with_addr(uaddr))?.read()?; if val != expected_val { return Err(EAGAIN); @@ -238,20 +239,20 @@ async fn futex_requeue( pid: Option, wake_count: u32, requeue_uaddr: usize, - requeue_count: u32, + _requeue_count: u32, ) -> KResult { let futex_key = FutexKey::new(uaddr, pid); let futex_requeue_key = FutexKey::new(requeue_uaddr, pid); - let (bucket_idx0, bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); - let (bucket_idx1, bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); + let (bucket_idx0, _bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); + let (bucket_idx1, _bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); if bucket_idx0 == bucket_idx1 { // If the keys are the same, we can just wake up the waiters. return futex_wake(uaddr, pid, wake_count).await; } - let (futex_bucket, futex_requeue_bucket) = + let (_futex_bucket, _futex_requeue_bucket) = double_lock_bucket(futex_key, futex_requeue_key).await; todo!() @@ -299,7 +300,7 @@ impl RobustListHead { futex_wake(futex_addr, None, usize::MAX as u32).await?; // Move to the next entry in the robust list. - let robust_list = UserPointer::new(entry_ptr as *const RobustList)?.read()?; + let robust_list = UserPointer::new(User::::with_addr(entry_ptr))?.read()?; entry_ptr = robust_list.next; diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 073026a9..859e0010 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -215,20 +215,20 @@ impl Elf { }) } - fn load(&self, args: Vec, envs: Vec) -> KResult { + async fn load(&self, args: Vec, envs: Vec) -> KResult { let mm_list = MMList::new(); // Load Segments - let (elf_base, data_segment_end) = self.load_segments(&mm_list)?; + let (elf_base, data_segment_end) = self.load_segments(&mm_list).await?; // Load ldso (if any) - let ldso_load_info = self.load_ldso(&mm_list)?; + let ldso_load_info = self.load_ldso(&mm_list).await?; // Load vdso - self.load_vdso(&mm_list)?; + self.load_vdso(&mm_list).await?; // Heap - mm_list.register_break(data_segment_end + 0x10000); + mm_list.register_break(data_segment_end + 0x10000).await; let aux_vec = self.init_aux_vec( elf_base, @@ -238,7 +238,9 @@ impl Elf { )?; // Map stack - let sp = self.create_and_init_stack(&mm_list, args, envs, aux_vec)?; + let sp = self + .create_and_init_stack(&mm_list, args, envs, aux_vec) + .await?; let entry_ip = if let Some(ldso_load_info) = ldso_load_info { // Normal shared object(DYN) @@ -258,26 +260,30 @@ impl Elf { }) } - fn create_and_init_stack( + async fn create_and_init_stack( &self, mm_list: &MMList, args: Vec, envs: Vec, aux_vec: AuxVec, ) -> KResult { - mm_list.mmap_fixed( - VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), - INIT_STACK_SIZE, - Mapping::Anonymous, - Permission { - read: true, - write: true, - execute: false, - }, - false, - )?; + mm_list + .mmap_fixed( + VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), + INIT_STACK_SIZE, + Mapping::Anonymous, + Permission { + read: true, + write: true, + execute: false, + }, + false, + ) + .await?; - StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec).init() + StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec) + .init() + .await } fn init_aux_vec(&self, elf_base: VAddr, ldso_base: Option) -> KResult> { @@ -309,7 +315,7 @@ impl Elf { Ok(aux_vec) } - fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { + async fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { let base: VAddr = if self.is_shared_object() { E::DYN_BASE_ADDR } else { 0 }.into(); let mut segments_end = VAddr::NULL; @@ -318,7 +324,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - let segment_end = self.load_segment(program_header, mm_list, base)?; + let segment_end = self.load_segment(program_header, mm_list, base).await?; if segment_end > segments_end { segments_end = segment_end; @@ -329,7 +335,7 @@ impl Elf { Ok((base, segments_end)) } - fn load_segment( + async fn load_segment( &self, program_header: &E::Ph, mm_list: &MMList, @@ -353,33 +359,37 @@ impl Elf { if file_len != 0 { let real_file_length = load_vaddr_end - vmap_start; - mm_list.mmap_fixed( - vmap_start, - file_len, - Mapping::File(FileMapping::new( - self.file.get_inode()?, - file_offset, - real_file_length, - )), - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start, + file_len, + Mapping::File(FileMapping::new( + self.file.get_inode()?, + file_offset, + real_file_length, + )), + permission, + false, + ) + .await?; } if vmem_len > file_len { - mm_list.mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; } Ok(vmap_start + vmem_len) } - fn load_ldso(&self, mm_list: &MMList) -> KResult> { + async fn load_ldso(&self, mm_list: &MMList) -> KResult> { let ldso_path = self.ldso_path()?; if let Some(ldso_path) = ldso_path { @@ -393,7 +403,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - ldso_elf.load_segment(program_header, mm_list, base)?; + ldso_elf.load_segment(program_header, mm_list, base).await?; } } @@ -406,8 +416,8 @@ impl Elf { Ok(None) } - fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { - mm_list.map_vdso() + async fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { + mm_list.map_vdso().await } fn ldso_path(&self) -> KResult> { @@ -449,10 +459,10 @@ impl ELF { } } - pub fn load(&self, args: Vec, envs: Vec) -> KResult { + pub async fn load(&self, args: Vec, envs: Vec) -> KResult { match &self { - ELF::Elf32(elf32) => elf32.load(args, envs), - ELF::Elf64(elf64) => elf64.load(args, envs), + ELF::Elf32(elf32) => elf32.load(args, envs).await, + ELF::Elf64(elf64) => elf64.load(args, envs).await, } } } @@ -483,21 +493,21 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { } // return sp after stack init - fn init(mut self) -> KResult { - let env_pointers = self.push_envs()?; - let arg_pointers = self.push_args()?; + async fn init(mut self) -> KResult { + let env_pointers = self.push_envs().await?; + let arg_pointers = self.push_args().await?; self.stack_alignment(); - self.push_aux_vec()?; - self.push_pointers(env_pointers)?; - self.push_pointers(arg_pointers)?; - self.push_argc(T::from_usize(self.args.len()))?; + self.push_aux_vec().await?; + self.push_pointers(env_pointers).await?; + self.push_pointers(arg_pointers).await?; + self.push_argc(T::from_usize(self.args.len())).await?; assert_eq!(self.sp.align_down(16), self.sp); Ok(VAddr::from(self.sp)) } - fn push_envs(&mut self) -> KResult> { + async fn push_envs(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.envs.len()); for string in self.envs.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -505,14 +515,15 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); Ok(addrs) } - fn push_args(&mut self) -> KResult> { + async fn push_args(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.args.len()); for string in self.args.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -520,7 +531,8 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); @@ -538,27 +550,29 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp = align_sp + all_size; } - fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { + async fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { pointers.push(T::from_usize(0)); self.sp -= pointers.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - pointers.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - pointers.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + pointers.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + pointers.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } - fn push_argc(&mut self, val: T) -> KResult<()> { + async fn push_argc(&mut self, val: T) -> KResult<()> { self.sp -= size_of::(); self.mm_list @@ -566,12 +580,13 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { data.copy_from_slice(unsafe { core::slice::from_raw_parts(&val as *const _ as *const u8, data.len()) }) - })?; + }) + .await?; Ok(()) } - fn push_aux_vec(&mut self) -> KResult<()> { + async fn push_aux_vec(&mut self) -> KResult<()> { let mut longs: Vec = vec![]; // Write Auxiliary vectors @@ -593,18 +608,20 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp -= longs.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - longs.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - longs.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + longs.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + longs.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index fc9374be..4e3f4db1 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -106,9 +106,9 @@ impl ProgramLoader { }) } - pub fn load(self) -> KResult { + pub async fn load(self) -> KResult { match self.object { - Object::ELF(elf) => elf.load(self.args, self.envs), + Object::ELF(elf) => elf.load(self.args, self.envs).await, } } } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 3e69fc4b..421e4b8b 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,4 +1,3 @@ -use super::block_on; use super::{ process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, ProcessList, Session, Thread, @@ -109,6 +108,7 @@ pub struct DrainExited<'waitlist> { wait_procs: SpinGuard<'waitlist, VecDeque>, } +#[derive(Debug, Clone, Copy)] pub enum WaitId { Any, Pid(u32), @@ -121,23 +121,17 @@ impl WaitId { P_ALL => Ok(WaitId::Any), P_PID => Ok(WaitId::Pid(id)), P_PGID => Ok(WaitId::Pgid(id)), - P_PIDFD => { - panic!("PDIFD type is unsupported") - } + P_PIDFD => panic!("P_PIDFD type is not supported"), _ => Err(EINVAL), } } pub fn from_id(id: i32, thread: &Thread) -> Self { - if id < -1 { - WaitId::Pgid((-id).cast_unsigned()) - } else if id == -1 { - WaitId::Any - } else if id == 0 { - let procs = block_on(ProcessList::get().read()); - WaitId::Pgid(thread.process.pgroup(procs.prove()).pgid) - } else { - WaitId::Pid(id.cast_unsigned()) + match id { + ..-1 => WaitId::Pgid((-id).cast_unsigned()), + -1 => WaitId::Any, + 0 => WaitId::Pgid(thread.process.pgroup_rcu().pgid), + _ => WaitId::Pid(id.cast_unsigned()), } } } @@ -206,11 +200,11 @@ impl ProcessBuilder { } } - pub fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { - block_on(process.mm_list.new_shared()) + process.mm_list.new_shared().await } else { - block_on(process.mm_list.new_cloned()) + process.mm_list.new_cloned().await }; if let Some(exit_signal) = clone_args.exit_signal { @@ -351,12 +345,18 @@ impl Process { trace_continue: bool, ) -> KResult> { let wait_object = { - let mut waits = self - .wait_list - .entry(wait_id, trace_stop, trace_continue) - .await; + let mut unlocked_waits = None; loop { + let mut waits = match unlocked_waits { + Some(wait) => wait.await?, + None => { + self.wait_list + .entry(wait_id, trace_stop, trace_continue) + .await + } + }; + if let Some(object) = waits.get() { break object; } @@ -374,7 +374,7 @@ impl Process { return Ok(None); } - waits = waits.wait(no_block).await?; + unlocked_waits = Some(waits.wait(no_block)); } }; @@ -395,8 +395,8 @@ impl Process { } /// Create a new session for the process. - pub fn setsid(self: &Arc) -> KResult { - let mut process_list = block_on(ProcessList::get().write()); + pub async fn setsid(self: &Arc) -> KResult { + let mut process_list = ProcessList::get().write().await; // If there exists a session that has the same sid as our pid, we can't create a new // session. The standard says that we should create a new process group and be the // only process in the new process group and session. @@ -473,8 +473,8 @@ impl Process { /// /// This function should be called on the process that issued the syscall in order to do /// permission checks. - pub fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { - let mut procs = block_on(ProcessList::get().write()); + pub async fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { + let mut procs = ProcessList::get().write().await; // We may set pgid of either the calling process or a child process. if pid == self.pid { self.do_setpgid(pgid, &mut procs) @@ -609,9 +609,8 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - let procs = block_on(ProcessList::get().read()); - if let Some(process) = procs.try_find_process(item.pid) { - return process.pgroup(procs.prove()).pgid == pgid; + if let Some(process) = self.process_list.try_find_process(item.pid) { + return process.pgroup(self.process_list.prove()).pgid == pgid; } false } @@ -625,7 +624,7 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> { + pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index 5293b4b7..856030ba 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -9,6 +9,7 @@ use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; +use eonix_mm::address::Addr; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; pub struct ProcessList { @@ -134,11 +135,9 @@ impl ProcessList { } if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid as *mut u32) - .unwrap() - .write(0u32); + let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); - let _ = futex_wake(clear_ctid, None, 1).await; + let _ = futex_wake(clear_ctid.addr(), None, 1).await; } if let Some(robust_list) = thread.get_robust_list() { diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index b6ed34bf..d9970cad 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -293,15 +293,15 @@ impl SignalList { let old_fpu_state_vaddr = old_trap_ctx_vaddr + size_of::(); let old_mask_vaddr = old_fpu_state_vaddr + size_of::(); - *trap_ctx = UserPointer::::new_vaddr(old_trap_ctx_vaddr)?.read()?; + *trap_ctx = UserPointer::::with_addr(old_trap_ctx_vaddr)?.read()?; // Make sure that at least we won't crash the kernel. if !trap_ctx.is_user_mode() || !trap_ctx.is_interrupt_enabled() { return Err(EFAULT)?; } - *fpu_state = UserPointer::::new_vaddr(old_fpu_state_vaddr)?.read()?; - self.inner.lock().mask = UserPointer::::new_vaddr(old_mask_vaddr)?.read()?; + *fpu_state = UserPointer::::with_addr(old_fpu_state_vaddr)?.read()?; + self.inner.lock().mask = UserPointer::::with_addr(old_mask_vaddr)?.read()?; Ok(()) } diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 98682547..708f9802 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -3,6 +3,7 @@ use crate::{ io::BufferFill as _, kernel::{ constants::{EFAULT, EINVAL}, + syscall::UserMut, user::UserBuffer, }, }; @@ -152,7 +153,7 @@ impl SignalAction { let saved_data_addr = (current_sp - SAVED_DATA_SIZE).floor_to(16); let mut saved_data_buffer = - UserBuffer::new(saved_data_addr.addr() as *mut u8, SAVED_DATA_SIZE)?; + UserBuffer::new(UserMut::new(saved_data_addr), SAVED_DATA_SIZE)?; saved_data_buffer.copy(trap_ctx)?.ok_or(EFAULT)?; saved_data_buffer.copy(fpu_state)?.ok_or(EFAULT)?; @@ -200,7 +201,7 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(vaddr.addr() as *mut u8, data.len())?; + let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 50600436..3132a9a9 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -5,7 +5,7 @@ use super::{ use crate::{ kernel::{ interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler}, + syscall::{syscall_handlers, SyscallHandler, User, UserMut}, task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, timer::{should_reschedule, timer_interrupt}, user::{UserPointer, UserPointerMut}, @@ -13,7 +13,7 @@ use crate::{ }, prelude::*, }; -use alloc::sync::Arc; +use alloc::{alloc::Allocator, sync::Arc}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ future::{poll_fn, Future}, @@ -36,10 +36,14 @@ use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; +use stalloc::UnsafeStalloc; #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; +#[derive(Clone, Copy)] +pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<255, 32>); + pub struct ThreadBuilder { tid: Option, name: Option>, @@ -48,8 +52,8 @@ pub struct ThreadBuilder { fs_context: Option>, signal_list: Option, tls: Option, - set_child_tid: Option, - clear_child_tid: Option, + set_child_tid: Option>, + clear_child_tid: Option>, trap_ctx: Option, fpu_state: Option, @@ -65,11 +69,11 @@ struct ThreadInner { /// User pointer /// Store child thread's tid when child thread returns to user space. - set_child_tid: Option, + set_child_tid: Option>, - clear_child_tid: Option, + clear_child_tid: Option>, - robust_list_address: Option, + robust_list_address: Option>, } pub struct Thread { @@ -141,12 +145,12 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option) -> Self { + pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option) -> Self { + pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -285,13 +289,13 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option) { + pub fn set_robust_list(&self, robust_list_address: Option>) { self.inner.lock().robust_list_address = robust_list_address; } pub fn get_robust_list(&self) -> Option { let addr = self.inner.lock().robust_list_address?; - let user_pointer = UserPointer::new(addr.addr() as *const RobustListHead).ok()?; + let user_pointer = UserPointer::new(addr).ok()?; user_pointer.read().ok() } @@ -304,25 +308,30 @@ impl Thread { self.inner.lock().name.clone() } - pub fn clear_child_tid(&self, clear_child_tid: Option) { + pub fn clear_child_tid(&self, clear_child_tid: Option>) { self.inner.lock().clear_child_tid = clear_child_tid; } - pub fn get_set_ctid(&self) -> Option { + pub fn get_set_ctid(&self) -> Option> { self.inner.lock().set_child_tid } - pub fn get_clear_ctid(&self) -> Option { + pub fn get_clear_ctid(&self) -> Option> { self.inner.lock().clear_child_tid } - pub fn handle_syscall(&self, no: usize, args: [usize; 6]) -> Option { + pub async fn handle_syscall( + &self, + thd_alloc: ThreadAlloc<'_>, + no: usize, + args: [usize; 6], + ) -> Option { match syscall_handlers().get(no) { Some(Some(SyscallHandler { handler, name: _name, .. - })) => handler(self, args), + })) => handler(self, thd_alloc, args).await, _ => { println_warn!("Syscall {no}({no:#x}) isn't implemented."); self.raise(Signal::SIGSYS); @@ -347,12 +356,18 @@ impl Thread { async fn real_run(&self) { if let Some(set_ctid) = self.get_set_ctid() { - UserPointerMut::new(set_ctid as *mut u32) + UserPointerMut::new(set_ctid) .expect("set_child_tid pointer is invalid") .write(self.tid) .expect("set_child_tid write failed"); } + let stack_alloc = unsafe { + // SAFETY: The allocator will only be used within the context of this thread. + UnsafeStalloc::new() + }; + let thd_alloc = ThreadAlloc(&stack_alloc); + while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list @@ -401,7 +416,7 @@ impl Thread { } } TrapType::Syscall { no, args } => { - if let Some(retval) = self.handle_syscall(no, args) { + if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); @@ -452,6 +467,19 @@ impl Thread { } } +unsafe impl Allocator for ThreadAlloc<'_> { + fn allocate( + &self, + layout: core::alloc::Layout, + ) -> Result, alloc::alloc::AllocError> { + self.0.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: core::alloc::Layout) { + self.0.deallocate(ptr, layout); + } +} + pub async fn yield_now() { struct Yield { yielded: bool, diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 134021a8..86024338 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,5 +1,5 @@ use super::{ - task::{block_on, ProcessList, Session, Thread}, + task::{ProcessList, Session, Thread}, user::{UserPointer, UserPointerMut}, }; use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; @@ -446,18 +446,18 @@ impl Terminal { } } - fn signal(&self, inner: &mut TerminalInner, signal: Signal) { + async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { if let Some(session) = inner.session.upgrade() { - block_on(session.raise_foreground(signal)); + session.raise_foreground(signal).await; } if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { self.echo_char(inner, ch); - self.signal(inner, signal); + self.signal(inner, signal).await; } fn do_commit_char(&self, inner: &mut TerminalInner, ch: u8) { @@ -481,13 +481,13 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await } _ => {} } diff --git a/src/kernel/user.rs b/src/kernel/user.rs index b3701507..5e410c81 100644 --- a/src/kernel/user.rs +++ b/src/kernel/user.rs @@ -1,7 +1,3 @@ -pub mod dataflow; +mod dataflow; -#[allow(unused_imports)] -pub use dataflow::{UserBuffer, UserString}; - -pub type UserPointer<'a, T> = dataflow::UserPointer<'a, T, true>; -pub type UserPointerMut<'a, T> = dataflow::UserPointer<'a, T, false>; +pub use dataflow::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}; diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 17dbd4c9..02e7d791 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,17 +1,20 @@ +use crate::{ + io::{Buffer, FillResult}, + prelude::*, +}; use crate::{ io::{IntoStream, Stream}, - kernel::constants::{EFAULT, EINVAL}, + kernel::{ + constants::{EFAULT, EINVAL}, + syscall::{User, UserMut}, + }, }; use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; - pub struct CheckedUserPointer<'a> { - ptr: *const u8, + ptr: User, len: usize, _phantom: PhantomData<&'a ()>, } @@ -27,7 +30,12 @@ pub struct UserString<'a> { len: usize, } -pub struct UserPointer<'a, T: Copy, const CONST: bool> { +pub struct UserPointer<'a, T: Copy> { + pointer: CheckedUserPointer<'a>, + _phantom: PhantomData, +} + +pub struct UserPointerMut<'a, T: Copy> { pointer: CheckedUserPointer<'a>, _phantom: PhantomData, } @@ -37,9 +45,9 @@ pub struct UserStream<'a> { cur: usize, } -impl UserPointer<'_, T, CONST> { - pub fn new(ptr: *const T) -> KResult { - let pointer = CheckedUserPointer::new(ptr as *const u8, core::mem::size_of::())?; +impl UserPointer<'_, T> { + pub fn new(ptr: User) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast(), core::mem::size_of::())?; Ok(Self { pointer, @@ -47,8 +55,8 @@ impl UserPointer<'_, T, CONST> { }) } - pub fn new_vaddr(vaddr: usize) -> KResult { - Self::new(vaddr as *mut T) + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(User::with_addr(vaddr)) } /// # Might Sleep @@ -60,22 +68,48 @@ impl UserPointer<'_, T, CONST> { } pub fn offset(&self, offset: isize) -> KResult { - let new_vaddr = self.pointer.ptr as isize + offset * size_of::() as isize; - Self::new_vaddr(new_vaddr as usize) + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(new_ptr.cast()) } } -impl<'a, T: Copy> UserPointer<'a, T, false> { +impl<'a, T: Copy> UserPointerMut<'a, T> { + pub fn new(ptr: UserMut) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast().as_const(), core::mem::size_of::())?; + + Ok(Self { + pointer, + _phantom: PhantomData, + }) + } + + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(UserMut::with_addr(vaddr)) + } + + /// # Might Sleep + pub fn read(&self) -> KResult { + let mut value = core::mem::MaybeUninit::::uninit(); + self.pointer + .read(value.as_mut_ptr() as *mut (), core::mem::size_of::())?; + Ok(unsafe { value.assume_init() }) + } + + pub fn offset(&self, offset: isize) -> KResult { + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(unsafe { new_ptr.cast().as_mut() }) + } + pub fn write(&self, value: T) -> KResult<()> { self.pointer - .write(&value as *const T as *mut (), core::mem::size_of::()) + .write(&raw const value as *mut (), core::mem::size_of::()) } } impl CheckedUserPointer<'_> { - pub fn new(ptr: *const u8, len: usize) -> KResult { + pub fn new(ptr: User, len: usize) -> KResult { const USER_MAX_ADDR: usize = 0x7ff_fff_fff_fff; - let end = (ptr as usize).checked_add(len); + let end = ptr.addr().checked_add(len); if ptr.is_null() || end.ok_or(EFAULT)? > USER_MAX_ADDR { Err(EFAULT) } else { @@ -89,19 +123,10 @@ impl CheckedUserPointer<'_> { pub fn forward(&mut self, offset: usize) { assert!(offset <= self.len); - self.ptr = self.ptr.wrapping_offset(offset as isize); + self.ptr = self.ptr.offset(offset as isize); self.len -= offset; } - pub fn get_const(&self) -> *const T { - self.ptr as *const T - } - - pub fn as_slice(&self) -> &[u8] { - // SAFETY: the pointer's validity is checked in `new` - unsafe { core::slice::from_raw_parts(self.ptr, self.len) } - } - /// # Might Sleep pub fn read(&self, buffer: *mut (), total: usize) -> KResult<()> { assert_preempt_enabled!("UserPointer::read"); @@ -126,7 +151,7 @@ impl CheckedUserPointer<'_> { ".quad 0x3", // type: load ".popsection", inout("rcx") total => error_bytes, - inout("rsi") self.ptr => _, + inout("rsi") self.ptr.addr() => _, inout("rdi") buffer => _, ); @@ -148,7 +173,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("a0") total => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, inout("a2") buffer => _, out("t0") _, ); @@ -171,7 +196,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("$a0") total => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, inout("$a2") buffer => _, out("$t0") _, ); @@ -210,7 +235,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("rcx") total => error_bytes, inout("rsi") data => _, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, ); #[cfg(target_arch = "riscv64")] @@ -232,7 +257,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("a0") total => error_bytes, inout("a1") data => _, - inout("a2") self.ptr => _, + inout("a2") self.ptr.addr() => _, out("t0") _, ); @@ -255,7 +280,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("$a0") total => error_bytes, inout("$a1") data => _, - inout("$a2") self.ptr => _, + inout("$a2") self.ptr.addr() => _, out("$t0") _, ); }; @@ -293,7 +318,7 @@ impl CheckedUserPointer<'_> { ".popsection", in("rax") 0, inout("rcx") self.len => error_bytes, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, options(att_syntax) ); @@ -313,7 +338,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("a0") self.len => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -332,7 +357,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("$a0") self.len => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, ); }; @@ -345,8 +370,8 @@ impl CheckedUserPointer<'_> { } impl UserBuffer<'_> { - pub fn new(ptr: *mut u8, size: usize) -> KResult { - let ptr = CheckedUserPointer::new(ptr, size)?; + pub fn new(ptr: UserMut, size: usize) -> KResult { + let ptr = CheckedUserPointer::new(ptr.as_const(), size)?; Ok(Self { ptr, size, cur: 0 }) } @@ -388,7 +413,7 @@ impl<'lt> Buffer for UserBuffer<'lt> { impl<'lt> UserString<'lt> { /// # Might Sleep - pub fn new(ptr: *const u8) -> KResult { + pub fn new(ptr: User) -> KResult { assert_preempt_enabled!("UserString::new"); const MAX_LEN: usize = 4096; @@ -416,7 +441,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("al") _, inout("rcx") MAX_LEN => result, - ptr = inout(reg) ptr.ptr => _, + ptr = inout(reg) ptr.ptr.addr() => _, options(att_syntax), ); @@ -439,7 +464,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("t0") _, inout("a0") MAX_LEN => result, - inout("a1") ptr.ptr => _, + inout("a1") ptr.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -461,7 +486,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("$t0") _, inout("$a0") MAX_LEN => result, - inout("$a1") ptr.ptr => _, + inout("$a1") ptr.ptr.addr() => _, ); }; @@ -478,7 +503,7 @@ impl<'lt> UserString<'lt> { pub fn as_cstr(&self) -> &'lt CStr { unsafe { CStr::from_bytes_with_nul_unchecked(core::slice::from_raw_parts( - self.ptr.get_const(), + self.ptr.ptr.addr() as *const u8, self.len + 1, )) } diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index 75e4df2f..d3739146 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -466,11 +466,11 @@ impl TerminalFile { fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { block_on(self.terminal.ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::new_vaddr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::new_vaddr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::new_vaddr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::new_vaddr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::new_vaddr(arg3)?), + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), _ => return Err(EINVAL), })) } diff --git a/src/lib.rs b/src/lib.rs index beebe7c1..fe4796de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![no_std] #![no_main] +#![feature(allocator_api)] #![feature(c_size_t)] #![feature(concat_idents)] #![feature(arbitrary_self_types)] @@ -253,6 +254,7 @@ async fn init_process(early_kstack: PRange) { ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) .expect("Failed to parse init program") .load() + .await .expect("Failed to load init program") }; From 973f6f2c710f651add6756405998ab9b5bc5ebd8 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 12 Aug 2025 00:09:30 +0800 Subject: [PATCH 25/29] partial work: vfs asynchronize Signed-off-by: greatbridf --- .../eonix_sync_base/src/locked/proof.rs | 3 + src/kernel/syscall/file_rw.rs | 15 ++-- src/kernel/task/thread.rs | 2 +- src/kernel/vfs/file.rs | 70 ++++++++++--------- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs index bac02301..52a5db49 100644 --- a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs +++ b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs @@ -25,6 +25,9 @@ where _phantom: PhantomData<&'pos ()>, } +unsafe impl Send for Proof<'_, T> {} +unsafe impl Send for ProofMut<'_, T> {} + /// A trait for types that can be converted to a proof of mutable access. /// /// This is used to prove that a mutable reference is valid for the lifetime `'pos` diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index ef222123..3c23b6e9 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -179,7 +179,12 @@ async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents64(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents64(&mut buffer) + .await?; Ok(buffer.wrote()) } @@ -344,9 +349,9 @@ async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult< let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { - SEEK_SET => file.seek(SeekOption::Set(offset as usize))?, - SEEK_CUR => file.seek(SeekOption::Current(offset as isize))?, - SEEK_END => file.seek(SeekOption::End(offset as isize))?, + SEEK_SET => file.seek(SeekOption::Set(offset as usize)).await?, + SEEK_CUR => file.seek(SeekOption::Current(offset as isize)).await?, + SEEK_END => file.seek(SeekOption::End(offset as isize)).await?, _ => return Err(EINVAL), } as u64) } @@ -500,7 +505,7 @@ async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> async fn ioctl(fd: FD, request: usize, arg3: usize) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; - file.ioctl(request, arg3) + file.ioctl(request, arg3).await } #[eonix_macros::define_syscall(SYS_FCNTL64)] diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 3132a9a9..11348e51 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -42,7 +42,7 @@ use stalloc::UnsafeStalloc; static CURRENT_THREAD: Option> = None; #[derive(Clone, Copy)] -pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<255, 32>); +pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<1023, 32>); pub struct ThreadBuilder { tid: Option, diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs index d3739146..6616cbcc 100644 --- a/src/kernel/vfs/file.rs +++ b/src/kernel/vfs/file.rs @@ -8,7 +8,7 @@ use crate::{ kernel::{ constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, mem::{paging::Page, AsMemoryBlock as _}, - task::{block_on, Thread}, + task::Thread, terminal::{Terminal, TerminalIORequest}, user::{UserPointer, UserPointerMut}, vfs::inode::Inode, @@ -157,8 +157,8 @@ impl Pipe { ) } - fn close_read(&self) { - let mut inner = block_on(self.inner.lock()); + async fn close_read(&self) { + let mut inner = self.inner.lock().await; if inner.read_closed { return; } @@ -167,8 +167,8 @@ impl Pipe { self.cv_write.notify_all(); } - fn close_write(&self) { - let mut inner = block_on(self.inner.lock()); + async fn close_write(&self) { + let mut inner = self.inner.lock().await; if inner.write_closed { return; } @@ -316,8 +316,8 @@ impl InodeFile { }) } - fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = block_on(self.cursor.lock()); + async fn seek(&self, option: SeekOption) -> KResult { + let mut cursor = self.cursor.lock().await; let new_cursor = match option { SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, @@ -333,12 +333,12 @@ impl InodeFile { Ok(new_cursor) } - fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { if !self.write { return Err(EBADF); } - let mut cursor = block_on(self.cursor.lock()); + let mut cursor = self.cursor.lock().await; if self.append { let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; @@ -357,7 +357,7 @@ impl InodeFile { } } - fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { if !self.read { return Err(EBADF); } @@ -366,7 +366,7 @@ impl InodeFile { let nread = self.dentry.read(buffer, offset)?; nread } else { - let mut cursor = block_on(self.cursor.lock()); + let mut cursor = self.cursor.lock().await; let nread = self.dentry.read(buffer, *cursor)?; @@ -377,8 +377,8 @@ impl InodeFile { Ok(nread) } - fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = block_on(self.cursor.lock()); + async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let mut cursor = self.cursor.lock().await; let nread = self.dentry.readdir(*cursor, |filename, ino| { // Filename length + 1 for padding '\0' @@ -407,8 +407,8 @@ impl InodeFile { Ok(()) } - fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = block_on(self.cursor.lock()); + async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let mut cursor = self.cursor.lock().await; let nread = self.dentry.readdir(*cursor, |filename, ino| { // + 1 for filename length padding '\0', + 1 for d_type. @@ -464,22 +464,24 @@ impl TerminalFile { self.terminal.poll_in().await.map(|_| PollEvent::Readable) } - fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - block_on(self.terminal.ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), - _ => return Err(EINVAL), - })) + async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { + self.terminal + .ioctl(match request as u32 { + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + _ => return Err(EINVAL), + }) + .await } } impl FileType { pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { match self { - FileType::Inode(inode) => inode.read(buffer, offset), + FileType::Inode(inode) => inode.read(buffer, offset).await, FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await, FileType::TTY(tty) => tty.read(buffer).await, FileType::CharDev(device) => device.read(buffer), @@ -504,7 +506,7 @@ impl FileType { pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { match self { - FileType::Inode(inode) => inode.write(stream, offset), + FileType::Inode(inode) => inode.write(stream, offset).await, FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await, FileType::TTY(tty) => tty.write(stream), FileType::CharDev(device) => device.write(stream), @@ -512,23 +514,23 @@ impl FileType { } } - pub fn seek(&self, option: SeekOption) -> KResult { + pub async fn seek(&self, option: SeekOption) -> KResult { match self { - FileType::Inode(inode) => inode.seek(option), + FileType::Inode(inode) => inode.seek(option).await, _ => Err(ESPIPE), } } - pub fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { match self { - FileType::Inode(inode) => inode.getdents(buffer), + FileType::Inode(inode) => inode.getdents(buffer).await, _ => Err(ENOTDIR), } } - pub fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { match self { - FileType::Inode(inode) => inode.getdents64(buffer), + FileType::Inode(inode) => inode.getdents64(buffer).await, _ => Err(ENOTDIR), } } @@ -568,9 +570,9 @@ impl FileType { Ok(nsent) } - pub fn ioctl(&self, request: usize, arg3: usize) -> KResult { + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { match self { - FileType::TTY(tty) => tty.ioctl(request, arg3).map(|_| 0), + FileType::TTY(tty) => tty.ioctl(request, arg3).await.map(|_| 0), _ => Err(ENOTTY), } } From db931a80384bc3f379105024719c204b866a3cc7 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Fri, 15 Aug 2025 02:05:07 +0800 Subject: [PATCH 26/29] partial work: file array rework and asynchronize Signed-off-by: greatbridf --- Cargo.lock | 14 +- Cargo.toml | 2 +- crates/posix_types/src/getdent.rs | 28 ++ crates/posix_types/src/lib.rs | 1 + src/fs/ext4.rs | 37 +- src/fs/fat32.rs | 7 +- src/fs/procfs.rs | 12 +- src/fs/tmpfs.rs | 56 +-- src/kernel/chardev.rs | 7 +- src/kernel/syscall.rs | 10 +- src/kernel/syscall/file_rw.rs | 35 +- src/kernel/syscall/mm.rs | 13 +- src/kernel/syscall/procops.rs | 9 +- src/kernel/task/process_list.rs | 2 +- src/kernel/vfs/dentry.rs | 24 +- src/kernel/vfs/dentry/dcache.rs | 8 +- src/kernel/vfs/file.rs | 637 --------------------------- src/kernel/vfs/file/inode_file.rs | 223 ++++++++++ src/kernel/vfs/file/mod.rs | 232 ++++++++++ src/kernel/vfs/file/pipe.rs | 211 +++++++++ src/kernel/vfs/file/terminal_file.rs | 55 +++ src/kernel/vfs/filearray.rs | 387 ++++++++++------ src/kernel/vfs/inode.rs | 156 ++++++- src/kernel/vfs/mod.rs | 27 +- src/lib.rs | 3 +- 25 files changed, 1280 insertions(+), 916 deletions(-) create mode 100644 crates/posix_types/src/getdent.rs delete mode 100644 src/kernel/vfs/file.rs create mode 100644 src/kernel/vfs/file/inode_file.rs create mode 100644 src/kernel/vfs/file/mod.rs create mode 100644 src/kernel/vfs/file/pipe.rs create mode 100644 src/kernel/vfs/file/terminal_file.rs diff --git a/Cargo.lock b/Cargo.lock index 484f2796..f4ed3bd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,7 +146,7 @@ dependencies = [ "eonix_runtime", "eonix_sync", "ext4_rs", - "intrusive-collections", + "intrusive-collections 0.9.8", "intrusive_list", "itertools", "pointers", @@ -213,7 +213,7 @@ dependencies = [ "eonix_percpu", "eonix_preempt", "eonix_sync", - "intrusive-collections", + "intrusive-collections 0.9.7", "pointers", ] @@ -246,7 +246,7 @@ dependencies = [ "eonix_preempt", "eonix_spin", "eonix_sync_base", - "intrusive-collections", + "intrusive-collections 0.9.7", ] [[package]] @@ -274,6 +274,14 @@ dependencies = [ "memoffset", ] +[[package]] +name = "intrusive-collections" +version = "0.9.8" +source = "git+https://github.com/greatbridf/intrusive-rs#0e2d88bffc9df606566fba2d61d1217182b06975" +dependencies = [ + "memoffset", +] + [[package]] name = "intrusive_list" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 25768c83..4bc8bbe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } bitflags = "2.6.0" -intrusive-collections = "0.9.7" +intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" diff --git a/crates/posix_types/src/getdent.rs b/crates/posix_types/src/getdent.rs new file mode 100644 index 00000000..922121f6 --- /dev/null +++ b/crates/posix_types/src/getdent.rs @@ -0,0 +1,28 @@ +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent64 { + /// Inode number + pub d_ino: u64, + /// Implementation defined. We ignore it + pub d_off: u64, + /// Length of this record + pub d_reclen: u16, + /// File type. Set to 0 + pub d_type: u8, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} + +/// File type is at offset `d_reclen - 1`. Set it to 0 +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent { + /// Inode number + pub d_ino: u32, + /// Implementation defined. We ignore it + pub d_off: u32, + /// Length of this record + pub d_reclen: u16, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} diff --git a/crates/posix_types/src/lib.rs b/crates/posix_types/src/lib.rs index dfe8d089..49d2ac5f 100644 --- a/crates/posix_types/src/lib.rs +++ b/crates/posix_types/src/lib.rs @@ -2,6 +2,7 @@ pub mod constants; pub mod ctypes; +pub mod getdent; pub mod namei; pub mod open; pub mod poll; diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index b4953491..7225d99b 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -2,6 +2,7 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use crate::kernel::mem::{PageCache, PageCacheBackend}; use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ io::{Buffer, ByteBuffer}, kernel::{ @@ -12,7 +13,6 @@ use crate::{ dentry::Dentry, inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData}, mount::{register_filesystem, Mount, MountCreator}, - s_isdir, s_isreg, vfs::Vfs, DevId, FsContext, }, @@ -86,30 +86,27 @@ impl Ext4Fs { fn get_or_insert( &self, icache: &mut BTreeMap, - mut idata: InodeData, + idata: InodeData, ) -> Arc { match icache.entry(idata.ino) { Entry::Occupied(occupied) => occupied.get().clone().into_inner(), - Entry::Vacant(vacant) => { - let mode = *idata.mode.get_mut(); - if s_isreg(mode) { - vacant - .insert(Ext4Inode::File(FileInode::new(idata))) - .clone() - .into_inner() - } else if s_isdir(mode) { - vacant - .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) - .clone() - .into_inner() - } else { - println_warn!("ext4: Unsupported inode type: {mode:#o}"); + Entry::Vacant(vacant) => match idata.mode.load().format() { + Mode::REG => vacant + .insert(Ext4Inode::File(FileInode::new(idata))) + .clone() + .into_inner(), + Mode::DIR => vacant + .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) + .clone() + .into_inner(), + mode => { + println_warn!("ext4: Unsupported inode type: {:#o}", mode.format_bits()); vacant .insert(Ext4Inode::File(FileInode::new(idata))) .clone() .into_inner() } - } + }, } } } @@ -137,7 +134,7 @@ impl Ext4Fs { nlink: AtomicNlink::new(root_inode.inode.links_count() as _), uid: AtomicU32::new(root_inode.inode.uid() as _), gid: AtomicU32::new(root_inode.inode.gid() as _), - mode: AtomicU32::new(root_inode.inode.mode() as _), + mode: AtomicMode::new(root_inode.inode.mode() as _), atime: Spin::new(Instant::new( root_inode.inode.atime() as _, root_inode.inode.i_atime_extra() as _, @@ -201,7 +198,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &crate::kernel::mem::CachePage, _offset: usize) -> KResult { todo!() } @@ -269,7 +266,7 @@ impl Inode for DirInode { nlink: AtomicNlink::new(attr.nlink as _), uid: AtomicU32::new(attr.uid), gid: AtomicU32::new(attr.gid), - mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm), + mode: AtomicMode::new(attr.kind.bits() as u32 | real_perm), atime: Spin::new(Instant::new(attr.atime as _, 0)), ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 9f0adac5..781d539b 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -5,12 +5,11 @@ use crate::io::Stream; use crate::kernel::constants::EIO; use crate::kernel::mem::AsMemoryBlock; use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::WriteOffset; +use crate::kernel::vfs::inode::{Mode, WriteOffset}; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, kernel::{ block::{make_device, BlockDevice, BlockDeviceRequest}, - constants::{S_IFDIR, S_IFREG}, mem::{ paging::Page, {CachePage, PageCache, PageCacheBackend}, @@ -253,7 +252,7 @@ impl FileInode { // Safety: We are initializing the inode inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(S_IFREG | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -343,7 +342,7 @@ impl DirInode { // Safety: We are initializing the inode inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(S_IFDIR | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::DIR.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 82f597b8..2ed24613 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,10 +1,10 @@ use crate::kernel::constants::{EACCES, ENOTDIR}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ io::Buffer, kernel::{ - constants::{S_IFDIR, S_IFREG}, mem::paging::PageBuffer, vfs::{ dentry::Dentry, @@ -69,12 +69,12 @@ define_struct_inode! { impl FileInode { pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = S_IFREG; + let mut mode = Mode::REG; if file.can_read() { - mode |= 0o444; + mode.set_perm(0o444); } if file.can_write() { - mode |= 0o200; + mode.set_perm(0o222); } let mut inode = Self { @@ -82,7 +82,7 @@ impl FileInode { file, }; - inode.idata.mode.store(mode, Ordering::Relaxed); + inode.idata.mode.store(mode); inode.idata.nlink.store(1, Ordering::Relaxed); *inode.ctime.get_mut() = Instant::now(); *inode.mtime.get_mut() = Instant::now(); @@ -123,7 +123,7 @@ impl DirInode { pub fn new(ino: Ino, vfs: Weak) -> Arc { Self::new_locked(ino, vfs, |inode, rwsem| unsafe { addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | 0o755).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 840f97b1..5bac591f 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -3,16 +3,14 @@ use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOT use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; +use crate::kernel::vfs::inode::{AtomicMode, InodeData}; use crate::{ io::Buffer, - kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG}, kernel::vfs::{ dentry::{dcache, Dentry}, inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - s_isblk, s_ischr, vfs::Vfs, DevId, }, @@ -46,7 +44,7 @@ impl NodeInode { Self::new_locked(ino, vfs, |inode, _| unsafe { addr_of_mut_field!(inode, devid).write(devid); - addr_of_mut_field!(&mut *inode, mode).write(mode.into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -74,7 +72,8 @@ impl DirectoryInode { .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | (mode & 0o777)).into()); + addr_of_mut_field!(&mut *inode, mode) + .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -108,7 +107,7 @@ impl DirectoryInode { _file_lock: ProofMut<()>, ) -> KResult<()> { // SAFETY: `file_lock` has done the synchronization - if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { + if file.mode.load().is_dir() { return Err(EISDIR); } @@ -163,7 +162,7 @@ impl Inode for DirectoryInode { } fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !s_ischr(mode) && !s_isblk(mode) { + if !mode.is_chr() && !mode.is_blk() { return Err(EINVAL); } @@ -173,12 +172,7 @@ impl Inode for DirectoryInode { let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); - let file = NodeInode::new( - ino, - self.vfs.clone(), - mode & (0o777 | S_IFBLK | S_IFCHR), - dev, - ); + let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); at.save_reg(file) @@ -243,9 +237,8 @@ impl Inode for DirectoryInode { let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -331,12 +324,10 @@ impl Inode for DirectoryInode { let _new_file_lock = block_on(new_file.rwsem.write()); // SAFETY: `new_file_lock` has done the synchronization - if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(EISDIR); - } else { - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(ENOTDIR); - } + match (new_file.mode.load(), old_file.mode.load()) { + (Mode::DIR, _) => return Err(EISDIR), + (_, Mode::DIR) => return Err(ENOTDIR), + _ => {} } entries.remove(new_idx); @@ -393,10 +384,10 @@ impl Inode for DirectoryInode { let new_file = new_file.unwrap(); let new_file_lock = block_on(new_file.rwsem.write()); - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 - && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0 - { - return Err(ENOTDIR); + match (old_file.mode.load(), new_file.mode.load()) { + (Mode::DIR, Mode::DIR) => {} + (Mode::DIR, _) => return Err(ENOTDIR), + (_, _) => {} } // Unlink the old file that was replaced @@ -442,7 +433,7 @@ impl SymlinkInode { let len = target.len(); addr_of_mut_field!(inode, target).write(target); - addr_of_mut_field!(&mut *inode, mode).write((S_IFLNK | 0o777).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -482,9 +473,7 @@ impl FileInode { pages: PageCache::new(weak_self.clone()), }); - inode - .mode - .store(S_IFREG | (mode & 0o777), Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(mode.non_format_bits())); inode.nlink.store(1, Ordering::Relaxed); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -557,9 +546,8 @@ impl Inode for FileInode { let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -600,7 +588,7 @@ impl TmpFs { }); let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, 0o755); + let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); Ok((tmpfs, root_dir)) } diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index 4e0d9d0b..aff3271e 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -4,10 +4,7 @@ use super::{ constants::{EEXIST, EIO}, task::{block_on, ProcessList, Thread}, terminal::Terminal, - vfs::{ - file::{File, FileType, TerminalFile}, - DevId, - }, + vfs::{DevId, File, FileType, TerminalFile}, }; use crate::{ io::{Buffer, Stream, StreamRead}, @@ -71,7 +68,7 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult> { + pub fn open(self: &Arc, flags: OpenFlags) -> KResult { Ok(match &self.device { CharDeviceType::Terminal(terminal) => { let procs = block_on(ProcessList::get().read()); diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 4131f3c4..78ddcd1c 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -263,13 +263,19 @@ impl Deref for UserMut { impl core::fmt::Debug for User { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "User({:#x?})", self.0.addr()) + match self.0 { + VAddr::NULL => write!(f, "User(NULL)"), + _ => write!(f, "User({:#018x?})", self.0.addr()), + } } } impl core::fmt::Debug for UserMut { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "UserMut({:#x?})", self.0.addr()) + match self.0 { + VAddr::NULL => write!(f, "UserMut(NULL)"), + _ => write!(f, "UserMut({:#018x?})", self.0.addr()), + } } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 3c23b6e9..1a48b255 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,20 +1,19 @@ use super::{FromSyscallArg, User}; use crate::io::IntoStream; use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::{PollEvent, SeekOption}; use crate::{ io::{Buffer, BufferFill}, kernel::{ user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, - vfs::{ - dentry::Dentry, - file::{PollEvent, SeekOption}, - }, + vfs::dentry::Dentry, }, path::Path, prelude::*, @@ -120,8 +119,12 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: u32) -> KResult { +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; + + let umask = *thread.fs_context.umask.lock(); + mode.mask_perm(!umask.non_format_bits()); + thread.files.open(&dentry, flags, mode) } @@ -133,7 +136,7 @@ async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { #[eonix_macros::define_syscall(SYS_CLOSE)] async fn close(fd: FD) -> KResult<()> { - thread.files.close(fd) + thread.files.close(fd).await } #[eonix_macros::define_syscall(SYS_DUP)] @@ -149,7 +152,7 @@ async fn dup2(old_fd: FD, new_fd: FD) -> KResult { #[eonix_macros::define_syscall(SYS_DUP3)] async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - thread.files.dup_to(old_fd, new_fd, flags) + thread.files.dup_to(old_fd, new_fd, flags).await } #[eonix_macros::define_syscall(SYS_PIPE2)] @@ -254,9 +257,9 @@ async fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -async fn mkdirat(dirfd: FD, pathname: User, mode: u32) -> KResult<()> { +async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { let umask = *thread.fs_context.umask.lock(); - let mode = mode & !umask & 0o777; + mode.mask_perm(!umask.non_format_bits()); let dentry = dentry_from(thread, dirfd, pathname, true)?; dentry.mkdir(mode) @@ -311,11 +314,15 @@ async fn symlink(target: User, linkpath: User) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mode: u32, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { + if !mode.is_blk() && !mode.is_chr() { + return Err(EINVAL); + } + let dentry = dentry_from(thread, dirfd, pathname, true)?; let umask = *thread.fs_context.umask.lock(); - let mode = mode & ((!umask & 0o777) | (S_IFBLK | S_IFCHR)); + mode.mask_perm(!umask.non_format_bits()); dentry.mknod(mode, dev) } @@ -616,7 +623,7 @@ async fn fchownat( } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> KResult<()> { +async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -632,7 +639,7 @@ async fn fchmodat(dirfd: FD, pathname: User, mode: u32, flags: AtFlags) -> K } #[eonix_macros::define_syscall(SYS_FCHMOD)] -async fn chmod(pathname: User, mode: u32) -> KResult<()> { +async fn chmod(pathname: User, mode: Mode) -> KResult<()> { sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index 547635d4..c6300ac7 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -4,6 +4,7 @@ use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; use crate::{ kernel::{ constants::{UserMmapFlags, UserMmapProtocol}, @@ -66,11 +67,11 @@ async fn do_mmap2( Mapping::Anonymous } else { // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = - SHM_MANAGER - .lock() - .await - .create_shared_area(len, thread.process.pid, 0x777); + let shared_area = SHM_MANAGER.lock().await.create_shared_area( + len, + thread.process.pid, + Mode::REG.perm(0o777), + ); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } } else { @@ -185,7 +186,7 @@ async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let mut shm_manager = SHM_MANAGER.lock().await; let shmid = gen_shm_id(key)?; - let mode = shmflg & 0o777; + let mode = Mode::REG.perm(shmflg); let shmflg = ShmFlags::from_bits_truncate(shmflg); if key == IPC_PRIVATE { diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 1dee462d..7dd573cc 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -16,6 +16,7 @@ use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::inode::Mode; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; use crate::{kernel::user::UserBuffer, prelude::*}; @@ -99,12 +100,10 @@ async fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -async fn umask(mask: u32) -> KResult { +async fn umask(mask: Mode) -> KResult { let mut umask = thread.fs_context.umask.lock(); - let old = *umask; - *umask = mask & 0o777; - Ok(old) + Ok(core::mem::replace(&mut *umask, mask.non_format())) } #[eonix_macros::define_syscall(SYS_GETCWD)] @@ -221,7 +220,7 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult Err(EISDIR), - mode if s_isreg(mode) => inode.read(buffer, offset), - mode if s_isblk(mode) => { + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.read(buffer, offset), + Mode::BLK => { let device = BlockDevice::get(inode.devid()?)?; Ok(device.read_some(offset, buffer)?.allow_partial()) } - mode if s_ischr(mode) => { + Mode::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -427,11 +427,11 @@ impl Dentry { pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load(Ordering::Relaxed) { - mode if s_isdir(mode) => Err(EISDIR), - mode if s_isreg(mode) => inode.write(stream, offset), - mode if s_isblk(mode) => Err(EINVAL), // TODO - mode if s_ischr(mode) => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.write(stream, offset), + Mode::BLK => Err(EINVAL), // TODO + Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 06a4e14d..188a1cfc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,9 +1,9 @@ use super::{Dentry, Inode}; use crate::kernel::constants::ENOENT; use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::Mode; use crate::rcu::RCUPointer; use crate::{ - kernel::vfs::{s_isdir, s_islnk}, prelude::*, rcu::{RCUIterator, RCUList}, }; @@ -57,9 +57,9 @@ pub fn d_try_revalidate(dentry: &Arc) { /// /// Dentry flags will be determined by the inode's mode. pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load(Ordering::Acquire) { - mode if s_isdir(mode) => dentry.save_dir(inode), - mode if s_islnk(mode) => dentry.save_symlink(inode), + match inode.mode.load().format() { + Mode::DIR => dentry.save_dir(inode), + Mode::LNK => dentry.save_symlink(inode), _ => dentry.save_reg(inode), } } diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs deleted file mode 100644 index 6616cbcc..00000000 --- a/src/kernel/vfs/file.rs +++ /dev/null @@ -1,637 +0,0 @@ -use super::{ - dentry::Dentry, - inode::{Mode, WriteOffset}, - s_isblk, s_isreg, -}; -use crate::{ - io::{Buffer, BufferFill, ByteBuffer, Chunks, IntoStream}, - kernel::{ - constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - mem::{paging::Page, AsMemoryBlock as _}, - task::Thread, - terminal::{Terminal, TerminalIORequest}, - user::{UserPointer, UserPointerMut}, - vfs::inode::Inode, - CharDevice, - }, - prelude::*, - sync::CondVar, -}; -use crate::{ - io::{Stream, StreamRead}, - kernel::constants::{ - EBADF, EFAULT, EINTR, EINVAL, ENOTDIR, ENOTTY, EOVERFLOW, EPIPE, ESPIPE, S_IFMT, - }, -}; -use alloc::{collections::vec_deque::VecDeque, sync::Arc}; -use bitflags::bitflags; -use core::{ - ops::{ControlFlow, Deref}, - sync::atomic::{AtomicU32, Ordering}, -}; -use eonix_sync::Mutex; -use posix_types::{open::OpenFlags, signal::Signal, stat::StatX}; - -pub struct InodeFile { - read: bool, - write: bool, - append: bool, - /// Only a few modes those won't possibly change are cached here to speed up file operations. - /// Specifically, `S_IFMT` masked bits. - mode: Mode, - cursor: Mutex, - dentry: Arc, -} - -pub struct PipeInner { - buffer: VecDeque, - read_closed: bool, - write_closed: bool, -} - -pub struct Pipe { - inner: Mutex, - cv_read: CondVar, - cv_write: CondVar, -} - -pub struct PipeReadEnd { - pipe: Arc, -} - -pub struct PipeWriteEnd { - pipe: Arc, -} - -pub struct TerminalFile { - terminal: Arc, -} - -// TODO: We should use `File` as the base type, instead of `Arc` -// If we need shared states, like for `InodeFile`, the files themselves should -// have their own shared semantics. All `File` variants will just keep the -// `Clone` semantics. -// -// e.g. The `CharDevice` itself is stateless. -pub enum FileType { - Inode(InodeFile), - PipeRead(PipeReadEnd), - PipeWrite(PipeWriteEnd), - TTY(TerminalFile), - CharDev(Arc), -} - -pub struct File { - flags: AtomicU32, - file_type: FileType, -} - -impl File { - pub fn get_inode(&self) -> KResult>> { - match &self.file_type { - FileType::Inode(inode_file) => Ok(Some(inode_file.dentry.get_inode()?)), - _ => Ok(None), - } - } -} - -pub enum SeekOption { - Set(usize), - Current(isize), - End(isize), -} - -bitflags! { - pub struct PollEvent: u16 { - const Readable = 0x0001; - const Writable = 0x0002; - } -} - -impl Drop for PipeReadEnd { - fn drop(&mut self) { - self.pipe.close_read(); - } -} - -impl Drop for PipeWriteEnd { - fn drop(&mut self) { - self.pipe.close_write(); - } -} - -fn send_sigpipe_to_current() { - let current = Thread::current(); - current.raise(Signal::SIGPIPE); -} - -impl Pipe { - const PIPE_SIZE: usize = 4096; - - /// # Return - /// `(read_end, write_end)` - pub fn new(flags: OpenFlags) -> (Arc, Arc) { - let pipe = Arc::new(Self { - inner: Mutex::new(PipeInner { - buffer: VecDeque::with_capacity(Self::PIPE_SIZE), - read_closed: false, - write_closed: false, - }), - cv_read: CondVar::new(), - cv_write: CondVar::new(), - }); - - let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); - let mut write_flags = read_flags; - write_flags.insert(OpenFlags::O_WRONLY); - - ( - Arc::new(File { - flags: AtomicU32::new(read_flags.bits()), - file_type: FileType::PipeRead(PipeReadEnd { pipe: pipe.clone() }), - }), - Arc::new(File { - flags: AtomicU32::new(write_flags.bits()), - file_type: FileType::PipeWrite(PipeWriteEnd { pipe }), - }), - ) - } - - async fn close_read(&self) { - let mut inner = self.inner.lock().await; - if inner.read_closed { - return; - } - - inner.read_closed = true; - self.cv_write.notify_all(); - } - - async fn close_write(&self) { - let mut inner = self.inner.lock().await; - if inner.write_closed { - return; - } - - inner.write_closed = true; - self.cv_read.notify_all(); - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported."); - } - - let mut inner = self.inner.lock().await; - while inner.buffer.is_empty() && !inner.write_closed { - inner = self.cv_read.wait(inner).await; - } - - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - let mut retval = PollEvent::empty(); - if inner.write_closed { - retval |= PollEvent::Writable; - } - - if !inner.buffer.is_empty() { - retval |= PollEvent::Readable; - } - - Ok(retval) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - let mut inner = self.inner.lock().await; - - while !inner.write_closed && inner.buffer.is_empty() { - inner = self.cv_read.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - } - - let (data1, data2) = inner.buffer.as_slices(); - let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); - inner.buffer.drain(..nread); - - self.cv_write.notify_all(); - Ok(nread) - } - - async fn write_atomic(&self, data: &[u8]) -> KResult { - let mut inner = self.inner.lock().await; - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - - while inner.buffer.len() + data.len() > Self::PIPE_SIZE { - inner = self.cv_write.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - } - - inner.buffer.extend(data); - - self.cv_read.notify_all(); - return Ok(data.len()); - } - - async fn write(&self, stream: &mut dyn Stream) -> KResult { - let mut buffer = [0; Self::PIPE_SIZE]; - let mut total = 0; - while let Some(data) = stream.poll_data(&mut buffer)? { - let nwrote = self.write_atomic(data).await?; - total += nwrote; - if nwrote != data.len() { - break; - } - } - Ok(total) - } -} - -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent64 { - /// Inode number - d_ino: u64, - /// Implementation defined. We ignore it - d_off: u64, - /// Length of this record - d_reclen: u16, - /// File type. Set to 0 - d_type: u8, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -/// File type is at offset `d_reclen - 1`. Set it to 0 -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent { - /// Inode number - d_ino: u32, - /// Implementation defined. We ignore it - d_off: u32, - /// Length of this record - d_reclen: u16, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -impl InodeFile { - pub fn new(dentry: Arc, flags: OpenFlags) -> Arc { - // SAFETY: `dentry` used to create `InodeFile` is valid. - // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load(Ordering::Relaxed) - & S_IFMT; - - let (read, write, append) = flags.as_rwa(); - - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::Inode(InodeFile { - dentry, - read, - write, - append, - mode: cached_mode, - cursor: Mutex::new(0), - }), - }) - } - - async fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = self.cursor.lock().await; - - let new_cursor = match option { - SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, - SeekOption::Set(n) => n, - SeekOption::End(off) => { - let inode = self.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; - size.checked_add_signed(off).ok_or(EOVERFLOW)? - } - }; - - *cursor = new_cursor; - Ok(new_cursor) - } - - async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - if !self.write { - return Err(EBADF); - } - - let mut cursor = self.cursor.lock().await; - - if self.append { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; - - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) - } - } - - async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - if !self.read { - return Err(EBADF); - } - - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.read(buffer, *cursor)?; - - *cursor += nread; - nread - }; - - Ok(nread) - } - - async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } - - async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } -} - -impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> Arc { - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::TTY(TerminalFile { terminal: tty }), - }) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - self.terminal.read(buffer).await - } - - fn write(&self, stream: &mut dyn Stream) -> KResult { - stream.read_till_end(&mut [0; 128], |data| { - self.terminal.write(data); - Ok(()) - }) - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported.") - } - - self.terminal.poll_in().await.map(|_| PollEvent::Readable) - } - - async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - self.terminal - .ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), - _ => return Err(EINVAL), - }) - .await - } -} - -impl FileType { - pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.read(buffer, offset).await, - FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await, - FileType::TTY(tty) => tty.read(buffer).await, - FileType::CharDev(device) => device.read(buffer), - _ => Err(EBADF), - } - } - - // TODO - // /// Read from the file into the given buffers. - // /// - // /// Reads are atomic, not intermingled with other reads or writes. - // pub fn readv<'r, 'i, I: Iterator>( - // &'r self, - // buffers: I, - // ) -> KResult { - // match self { - // File::Inode(inode) => inode.readv(buffers), - // File::PipeRead(pipe) => pipe.pipe.readv(buffers), - // _ => Err(EBADF), - // } - // } - - pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.write(stream, offset).await, - FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await, - FileType::TTY(tty) => tty.write(stream), - FileType::CharDev(device) => device.write(stream), - _ => Err(EBADF), - } - } - - pub async fn seek(&self, option: SeekOption) -> KResult { - match self { - FileType::Inode(inode) => inode.seek(option).await, - _ => Err(ESPIPE), - } - } - - pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents(buffer).await, - _ => Err(ENOTDIR), - } - } - - pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents64(buffer).await, - _ => Err(ENOTDIR), - } - } - - pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; - - match self { - FileType::Inode(file) if s_isblk(file.mode) || s_isreg(file.mode) => (), - _ => return Err(EINVAL), - } - - let mut nsent = 0; - for (cur, len) in Chunks::new(0, count, buffer.len()) { - if Thread::current().signal_list.has_pending_signal() { - return if cur == 0 { Err(EINTR) } else { Ok(cur) }; - } - let nread = self - .read(&mut ByteBuffer::new(&mut buffer[..len]), None) - .await?; - if nread == 0 { - break; - } - - let nwrote = dest_file - .write(&mut buffer[..nread].into_stream(), None) - .await?; - nsent += nwrote; - - if nwrote != len { - break; - } - } - - Ok(nsent) - } - - pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { - match self { - FileType::TTY(tty) => tty.ioctl(request, arg3).await.map(|_| 0), - _ => Err(ENOTTY), - } - } - - pub async fn poll(&self, event: PollEvent) -> KResult { - match self { - FileType::Inode(_) => Ok(event), - FileType::TTY(tty) => tty.poll(event).await, - FileType::PipeRead(PipeReadEnd { pipe }) - | FileType::PipeWrite(PipeWriteEnd { pipe }) => pipe.poll(event).await, - _ => unimplemented!("Poll event not supported."), - } - } - - pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.dentry.statx(buffer, mask), - _ => Err(EBADF), - } - } - - pub fn as_path(&self) -> Option<&Arc> { - match self { - FileType::Inode(inode_file) => Some(&inode_file.dentry), - _ => None, - } - } -} - -impl File { - pub fn new(flags: OpenFlags, file_type: FileType) -> Arc { - Arc::new(Self { - flags: AtomicU32::new(flags.bits()), - file_type, - }) - } - - pub fn get_flags(&self) -> OpenFlags { - OpenFlags::from_bits_retain(self.flags.load(Ordering::Relaxed)) - } - - pub fn set_flags(&self, flags: OpenFlags) { - let flags = flags.difference( - OpenFlags::O_WRONLY - | OpenFlags::O_RDWR - | OpenFlags::O_CREAT - | OpenFlags::O_TRUNC - | OpenFlags::O_EXCL, - // | OpenFlags::O_NOCTTY, - ); - - self.flags.store(flags.bits(), Ordering::Relaxed); - } -} - -impl Deref for File { - type Target = FileType; - - fn deref(&self) -> &Self::Target { - &self.file_type - } -} diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs new file mode 100644 index 00000000..6386ba92 --- /dev/null +++ b/src/kernel/vfs/file/inode_file.rs @@ -0,0 +1,223 @@ +use super::{File, FileType, SeekOption}; +use crate::{ + io::{Buffer, BufferFill, Stream}, + kernel::{ + constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, + vfs::{ + dentry::Dentry, + inode::{Inode, Mode, WriteOffset}, + }, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use core::{ops::ControlFlow, sync::atomic::Ordering}; +use eonix_sync::Mutex; +use posix_types::{ + getdent::{UserDirent, UserDirent64}, + open::OpenFlags, + stat::StatX, +}; + +pub struct InodeFile { + pub r: bool, + pub w: bool, + pub a: bool, + /// Only a few modes those won't possibly change are cached here to speed up file operations. + /// Specifically, `S_IFMT` masked bits. + pub mode: Mode, + cursor: Mutex, + dentry: Arc, +} + +impl InodeFile { + pub fn new(dentry: Arc, flags: OpenFlags) -> File { + // SAFETY: `dentry` used to create `InodeFile` is valid. + // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. + let cached_mode = dentry + .get_inode() + .expect("`dentry` is invalid") + .mode + .load() + .format(); + + let (r, w, a) = flags.as_rwa(); + + File::new( + flags, + FileType::Inode(InodeFile { + dentry, + r, + w, + a, + mode: cached_mode, + cursor: Mutex::new(0), + }), + ) + } + + pub fn sendfile_check(&self) -> KResult<()> { + match self.mode { + Mode::REG | Mode::BLK => Ok(()), + _ => Err(EBADF), + } + } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + if !self.w { + return Err(EBADF); + } + + let mut cursor = self.cursor.lock().await; + + if self.a { + let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + + Ok(nwrote) + } else { + let nwrote = if let Some(offset) = offset { + self.dentry.write(stream, WriteOffset::Position(offset))? + } else { + let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; + *cursor += nwrote; + nwrote + }; + + Ok(nwrote) + } + } + + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + if !self.r { + return Err(EBADF); + } + + let nread = if let Some(offset) = offset { + let nread = self.dentry.read(buffer, offset)?; + nread + } else { + let mut cursor = self.cursor.lock().await; + + let nread = self.dentry.read(buffer, *cursor)?; + + *cursor += nread; + nread + }; + + Ok(nread) + } +} + +impl File { + pub fn get_inode(&self) -> KResult>> { + if let FileType::Inode(inode_file) = &**self { + Ok(Some(inode_file.dentry.get_inode()?)) + } else { + Ok(None) + } + } + + pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent { + d_ino: ino as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent64 { + d_ino: ino, + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn seek(&self, option: SeekOption) -> KResult { + let FileType::Inode(inode_file) = &**self else { + return Err(ESPIPE); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let new_cursor = match option { + SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, + SeekOption::Set(n) => n, + SeekOption::End(off) => { + let inode = inode_file.dentry.get_inode()?; + let size = inode.size.load(Ordering::Relaxed) as usize; + size.checked_add_signed(off).ok_or(EOVERFLOW)? + } + }; + + *cursor = new_cursor; + Ok(new_cursor) + } + + pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { + if let FileType::Inode(inode) = &**self { + inode.dentry.statx(buffer, mask) + } else { + Err(EBADF) + } + } + + pub fn as_path(&self) -> Option<&Arc> { + if let FileType::Inode(inode_file) = &**self { + Some(&inode_file.dentry) + } else { + None + } + } +} diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs new file mode 100644 index 00000000..bb1c66ec --- /dev/null +++ b/src/kernel/vfs/file/mod.rs @@ -0,0 +1,232 @@ +mod inode_file; +mod pipe; +mod terminal_file; + +use crate::{ + io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, + kernel::{ + constants::{EBADF, EINTR, EINVAL, ENOTTY}, + mem::{AsMemoryBlock, Page}, + task::Thread, + CharDevice, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use bitflags::bitflags; +use core::{ + ops::Deref, + sync::atomic::{AtomicI32, AtomicU32, Ordering}, +}; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; + +pub use inode_file::InodeFile; +pub use pipe::Pipe; +pub use terminal_file::TerminalFile; + +pub enum FileType { + Inode(InodeFile), + PipeRead(PipeReadEnd), + PipeWrite(PipeWriteEnd), + Terminal(TerminalFile), + CharDev(Arc), +} + +struct FileData { + flags: AtomicU32, + open_count: AtomicI32, + file_type: FileType, +} + +#[derive(Clone)] +pub struct File(Arc); + +pub enum SeekOption { + Set(usize), + Current(isize), + End(isize), +} + +bitflags! { + pub struct PollEvent: u16 { + const Readable = 0x0001; + const Writable = 0x0002; + } +} + +impl FileType { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.read(buffer, offset).await, + FileType::PipeRead(pipe) => pipe.read(buffer).await, + FileType::Terminal(tty) => tty.read(buffer).await, + FileType::CharDev(device) => device.read(buffer), + _ => Err(EBADF), + } + } + + // TODO + // /// Read from the file into the given buffers. + // /// + // /// Reads are atomic, not intermingled with other reads or writes. + // pub fn readv<'r, 'i, I: Iterator>( + // &'r self, + // buffers: I, + // ) -> KResult { + // match self { + // File::Inode(inode) => inode.readv(buffers), + // File::PipeRead(pipe) => pipe.pipe.readv(buffers), + // _ => Err(EBADF), + // } + // } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.write(stream, offset).await, + FileType::PipeWrite(pipe) => pipe.write(stream).await, + FileType::Terminal(tty) => tty.write(stream), + FileType::CharDev(device) => device.write(stream), + _ => Err(EBADF), + } + } + + fn sendfile_check(&self) -> KResult<()> { + match self { + FileType::Inode(file) => file.sendfile_check(), + _ => Err(EINVAL), + } + } + + pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { + let buffer_page = Page::alloc(); + // SAFETY: We are the only owner of the page. + let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + + self.sendfile_check()?; + + let mut nsent = 0; + for (cur, len) in Chunks::new(0, count, buffer.len()) { + if Thread::current().signal_list.has_pending_signal() { + return if cur == 0 { Err(EINTR) } else { Ok(cur) }; + } + let nread = self + .read(&mut ByteBuffer::new(&mut buffer[..len]), None) + .await?; + if nread == 0 { + break; + } + + let nwrote = dest_file + .write(&mut buffer[..nread].into_stream(), None) + .await?; + nsent += nwrote; + + if nwrote != len { + break; + } + } + + Ok(nsent) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { + match self { + FileType::Terminal(tty) => tty.ioctl(request, arg3).await.map(|_| 0), + _ => Err(ENOTTY), + } + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + match self { + FileType::Inode(_) => Ok(event), + FileType::Terminal(tty) => tty.poll(event).await, + FileType::PipeRead(pipe) => pipe.poll(event).await, + FileType::PipeWrite(pipe) => pipe.poll(event).await, + _ => unimplemented!("Poll event not supported."), + } + } +} + +impl File { + pub fn new(flags: OpenFlags, file_type: FileType) -> Self { + Self(Arc::new(FileData { + flags: AtomicU32::new(flags.bits()), + open_count: AtomicI32::new(1), + file_type, + })) + } + + pub fn get_flags(&self) -> OpenFlags { + OpenFlags::from_bits_retain(self.0.flags.load(Ordering::Relaxed)) + } + + pub fn set_flags(&self, flags: OpenFlags) { + let flags = flags.difference( + OpenFlags::O_WRONLY + | OpenFlags::O_RDWR + | OpenFlags::O_CREAT + | OpenFlags::O_TRUNC + | OpenFlags::O_EXCL, + // | OpenFlags::O_NOCTTY, + ); + + self.0.flags.store(flags.bits(), Ordering::Relaxed); + } + + /// Duplicate the file descriptor in order to store it in some [FileArray]. + /// + /// The [`File`]s stored in [FileArray]s hold an "open count", which is used + /// to track how many references to the file are currently open. + /// + /// # Panics + /// The [`File`]s stored in [FileArray]s MUST be retrieved by calling this + /// method. Otherwise, when the last reference to the file is dropped, + /// something bad will happen. ;) + /// + /// [FileArray]: crate::kernel::vfs::filearray::FileArray + pub fn dup(&self) -> Self { + self.0.open_count.fetch_add(1, Ordering::Relaxed); + Self(self.0.clone()) + } + + /// Close the file descriptor, decrementing the open count. + pub async fn close(self) { + // Due to rust async drop limits, we have to do this manually... + // + // Users of files can clone and drop it freely, but references held by + // file arrays must be dropped by calling this function (in order to + // await for the async close operation of the inner FileType). + match self.0.open_count.fetch_sub(1, Ordering::Relaxed) { + ..1 => panic!("File open count underflow."), + 1 => {} + _ => return, + } + + match &self.0.file_type { + FileType::PipeRead(pipe) => pipe.close().await, + FileType::PipeWrite(pipe) => pipe.close().await, + _ => {} + } + } +} + +impl Drop for FileData { + fn drop(&mut self) { + // If you're "lucky" enough to see this, it means that you've violated + // the file reference counting rules. Check File::close() for details. ;) + assert_eq!( + self.open_count.load(Ordering::Relaxed), + 0, + "File dropped with open count 0, check the comments for details." + ); + } +} + +impl Deref for File { + type Target = FileType; + + fn deref(&self) -> &Self::Target { + &self.0.file_type + } +} diff --git a/src/kernel/vfs/file/pipe.rs b/src/kernel/vfs/file/pipe.rs new file mode 100644 index 00000000..910f04fa --- /dev/null +++ b/src/kernel/vfs/file/pipe.rs @@ -0,0 +1,211 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream}, + kernel::{ + constants::{EINTR, EPIPE}, + task::Thread, + }, + prelude::KResult, + sync::CondVar, +}; +use alloc::{collections::vec_deque::VecDeque, sync::Arc}; +use eonix_sync::Mutex; +use posix_types::{open::OpenFlags, signal::Signal}; + +struct PipeInner { + buffer: VecDeque, + read_closed: bool, + write_closed: bool, +} + +pub struct Pipe { + inner: Mutex, + cv_read: CondVar, + cv_write: CondVar, +} + +pub struct PipeReadEnd { + pipe: Arc, +} + +pub struct PipeWriteEnd { + pipe: Arc, +} + +fn send_sigpipe_to_current() { + let current = Thread::current(); + current.raise(Signal::SIGPIPE); +} + +impl Pipe { + const PIPE_SIZE: usize = 4096; + + /// # Return + /// `(read_end, write_end)` + pub fn new(flags: OpenFlags) -> (File, File) { + let pipe = Arc::new(Self { + inner: Mutex::new(PipeInner { + buffer: VecDeque::with_capacity(Self::PIPE_SIZE), + read_closed: false, + write_closed: false, + }), + cv_read: CondVar::new(), + cv_write: CondVar::new(), + }); + + let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); + let mut write_flags = read_flags; + write_flags.insert(OpenFlags::O_WRONLY); + + let read_pipe = pipe.clone(); + let write_pipe = pipe; + + ( + File::new( + read_flags, + FileType::PipeRead(PipeReadEnd { pipe: read_pipe }), + ), + File::new( + write_flags, + FileType::PipeWrite(PipeWriteEnd { pipe: write_pipe }), + ), + ) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported."); + } + + let mut inner = self.inner.lock().await; + while inner.buffer.is_empty() && !inner.write_closed { + inner = self.cv_read.wait(inner).await; + } + + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + let mut retval = PollEvent::empty(); + if inner.write_closed { + retval |= PollEvent::Writable; + } + + if !inner.buffer.is_empty() { + retval |= PollEvent::Readable; + } + + Ok(retval) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + let mut inner = self.inner.lock().await; + + while !inner.write_closed && inner.buffer.is_empty() { + inner = self.cv_read.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + } + + let (data1, data2) = inner.buffer.as_slices(); + let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); + inner.buffer.drain(..nread); + + self.cv_write.notify_all(); + Ok(nread) + } + + async fn write_atomic(&self, data: &[u8]) -> KResult { + let mut inner = self.inner.lock().await; + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + + while inner.buffer.len() + data.len() > Self::PIPE_SIZE { + inner = self.cv_write.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + } + + inner.buffer.extend(data); + + self.cv_read.notify_all(); + return Ok(data.len()); + } + + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + let mut buffer = [0; Self::PIPE_SIZE]; + let mut total = 0; + while let Some(data) = stream.poll_data(&mut buffer)? { + let nwrote = self.write_atomic(data).await?; + total += nwrote; + if nwrote != data.len() { + break; + } + } + Ok(total) + } +} + +impl PipeReadEnd { + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.pipe.read(buffer).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.read_closed { + return; + } + + inner.read_closed = true; + self.pipe.cv_write.notify_all(); + } +} + +impl PipeWriteEnd { + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + self.pipe.write(stream).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.write_closed { + return; + } + + inner.write_closed = true; + self.pipe.cv_read.notify_all(); + } +} + +impl Drop for Pipe { + fn drop(&mut self) { + debug_assert!( + self.inner.get_mut().read_closed, + "Pipe read end should be closed before dropping (check File::close())." + ); + + debug_assert!( + self.inner.get_mut().write_closed, + "Pipe write end should be closed before dropping (check File::close())." + ); + } +} diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs new file mode 100644 index 00000000..f318c5b2 --- /dev/null +++ b/src/kernel/vfs/file/terminal_file.rs @@ -0,0 +1,55 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream, StreamRead}, + kernel::{ + constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, + terminal::TerminalIORequest, + user::{UserPointer, UserPointerMut}, + Terminal, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use posix_types::open::OpenFlags; + +pub struct TerminalFile { + terminal: Arc, +} + +impl TerminalFile { + pub fn new(tty: Arc, flags: OpenFlags) -> File { + File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.terminal.read(buffer).await + } + + pub fn write(&self, stream: &mut dyn Stream) -> KResult { + stream.read_till_end(&mut [0; 128], |data| { + self.terminal.write(data); + Ok(()) + }) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported.") + } + + self.terminal.poll_in().await.map(|_| PollEvent::Readable) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { + self.terminal + .ioctl(match request as u32 { + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + _ => return Err(EINVAL), + }) + .await + } +} diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 0fb9205d..b457a425 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,7 +1,7 @@ use super::{ - file::{File, InodeFile, TerminalFile}, + file::{File, InodeFile, Pipe}, inode::Mode, - s_ischr, Spin, + Spin, TerminalFile, }; use crate::kernel::{ constants::{ @@ -10,19 +10,13 @@ use crate::kernel::{ syscall::{FromSyscallArg, SyscallRetVal}, }; use crate::{ - kernel::{ - console::get_console, - constants::ENXIO, - vfs::{dentry::Dentry, file::Pipe, s_isdir, s_isreg}, - CharDevice, - }, + kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, prelude::*, }; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, +use alloc::sync::Arc; +use intrusive_collections::{ + intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use core::sync::atomic::Ordering; use itertools::{ FoldWhile::{Continue, Done}, Itertools, @@ -34,14 +28,33 @@ pub struct FD(u32); #[derive(Clone)] struct OpenFile { + fd: FD, flags: FDFlags, - file: Arc, + file: File, + + link: RBTreeAtomicLink, +} + +intrusive_adapter!( + OpenFileAdapter = Box: OpenFile { link: RBTreeAtomicLink } +); + +impl<'a> KeyAdapter<'a> for OpenFileAdapter { + type Key = FD; + + fn get_key(&self, value: &'a OpenFile) -> Self::Key { + value.fd + } } #[derive(Clone)] +struct FDAllocator { + min_avail: FD, +} + struct FileArrayInner { - files: BTreeMap, - fd_min_avail: FD, + files: RBTree, + fd_alloc: FDAllocator, } pub struct FileArray { @@ -49,109 +62,202 @@ pub struct FileArray { } impl OpenFile { + fn new(fd: FD, flags: FDFlags, file: File) -> Box { + Box::new(Self { + fd, + flags, + file, + link: RBTreeAtomicLink::new(), + }) + } + pub fn close_on_exec(&self) -> bool { self.flags.contains(FDFlags::FD_CLOEXEC) } } +impl FDAllocator { + const fn new() -> Self { + Self { min_avail: FD(0) } + } + + fn reinit(&mut self) { + self.min_avail = FD(0); + } + + fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + files + .range(Bound::Included(&from), Bound::Unbounded) + .fold_while(from, |current, OpenFile { fd, .. }| { + if current == *fd { + Continue(FD(current.0 + 1)) + } else { + Done(current) + } + }) + .into_inner() + } + + /// Allocate a new file descriptor starting from `from`. + /// + /// Returned file descriptor should be used immediately. + /// + fn allocate_fd(&mut self, from: FD, files: &RBTree) -> FD { + let from = FD::max(from, self.min_avail); + + if from == self.min_avail { + let next_min_avail = self.find_available(FD(from.0 + 1), files); + let allocated = self.min_avail; + self.min_avail = next_min_avail; + allocated + } else { + self.find_available(from, files) + } + } + + fn release_fd(&mut self, fd: FD) { + if fd < self.min_avail { + self.min_avail = fd; + } + } + + fn next_fd(&mut self, files: &RBTree) -> FD { + self.allocate_fd(self.min_avail, files) + } +} + impl FileArray { pub fn new() -> Arc { Arc::new(FileArray { inner: Spin::new(FileArrayInner { - files: BTreeMap::new(), - fd_min_avail: FD(0), + files: RBTree::new(OpenFileAdapter::new()), + fd_alloc: FDAllocator::new(), }), }) } - #[allow(dead_code)] pub fn new_shared(other: &Arc) -> Arc { other.clone() } pub fn new_cloned(other: &Self) -> Arc { Arc::new(Self { - inner: Spin::new(other.inner.lock().clone()), + inner: Spin::new({ + let (new_files, new_fd_alloc) = { + let mut new_files = RBTree::new(OpenFileAdapter::new()); + let other_inner = other.inner.lock(); + + for file in other_inner.files.iter() { + let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + new_files.insert(new_file); + } + (new_files, other_inner.fd_alloc.clone()) + }; + + FileArrayInner { + files: new_files, + fd_alloc: new_fd_alloc, + } + }), }) } /// Acquires the file array lock. - pub fn get(&self, fd: FD) -> Option> { + pub fn get(&self, fd: FD) -> Option { self.inner.lock().get(fd) } - pub fn close_all(&self) { - let _old_files = { + pub async fn close_all(&self) { + let old_files = { let mut inner = self.inner.lock(); - inner.fd_min_avail = FD(0); - core::mem::take(&mut inner.files) + inner.fd_alloc.reinit(); + inner.files.take() }; + + for file in old_files.into_iter() { + file.file.close().await; + } } - pub fn close(&self, fd: FD) -> KResult<()> { - let _file = { + pub async fn close(&self, fd: FD) -> KResult<()> { + let file = { let mut inner = self.inner.lock(); - let file = inner.files.remove(&fd).ok_or(EBADF)?; - inner.release_fd(fd); - file + let file = inner.files.find_mut(&fd).remove().ok_or(EBADF)?; + inner.fd_alloc.release_fd(file.fd); + file.file }; + + file.close().await; Ok(()) } - pub fn on_exec(&self) -> () { - let mut inner = self.inner.lock(); + pub async fn on_exec(&self) { + let files_to_close = { + let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - // TODO: This is not efficient. We should avoid cloning. - let fds_to_close = inner - .files - .iter() - .filter(|(_, ofile)| ofile.close_on_exec()) - .map(|(&fd, _)| fd) - .collect::>(); + files.pick(|ofile| { + if ofile.close_on_exec() { + fd_alloc.release_fd(ofile.fd); + true + } else { + false + } + }) + }; - inner.files.retain(|_, ofile| !ofile.close_on_exec()); - fds_to_close.into_iter().for_each(|fd| inner.release_fd(fd)); + for open_file in files_to_close.into_iter() { + open_file.file.close().await; + } } -} -impl FileArray { pub fn dup(&self, old_fd: FD) -> KResult { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; - let new_file_data = old_file.file.clone(); + let new_file_data = old_file.file.dup(); let new_file_flags = old_file.flags; - let new_fd = inner.next_fd(); + let new_fd = fd_alloc.next_fd(files); inner.do_insert(new_fd, new_file_flags, new_file_data); Ok(new_fd) } - pub fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - let fdflags = flags.as_fd_flags(); - + /// Duplicates the file to a new file descriptor, returning the old file + /// description to be dropped. + fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); - let new_file_data = old_file.file.clone(); + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; + let new_file_data = old_file.file.dup(); - match inner.files.entry(new_fd) { - Entry::Vacant(_) => {} - Entry::Occupied(entry) => { - let new_file = entry.into_mut(); - let mut file_swap = new_file_data; + match files.entry(&new_fd) { + Entry::Vacant(_) => { + assert_eq!(new_fd, fd_alloc.allocate_fd(new_fd, files)); + inner.do_insert(new_fd, fd_flags, new_file_data); - new_file.flags = fdflags; - core::mem::swap(&mut file_swap, &mut new_file.file); + Ok(None) + } + Entry::Occupied(mut entry) => { + let mut file = entry.remove().unwrap(); + file.flags = fd_flags; + let old_file = core::mem::replace(&mut file.file, new_file_data); - drop(inner); - return Ok(new_fd); + entry.insert(file); + + Ok(Some(old_file)) } } + } - assert_eq!(new_fd, inner.allocate_fd(new_fd)); - inner.do_insert(new_fd, fdflags, new_file_data); + pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { + if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + old_file.close().await; + } Ok(new_fd) } @@ -160,9 +266,10 @@ impl FileArray { /// `(read_fd, write_fd)` pub fn pipe(&self, flags: OpenFlags) -> KResult<(FD, FD)> { let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - let read_fd = inner.next_fd(); - let write_fd = inner.next_fd(); + let read_fd = fd_alloc.next_fd(files); + let write_fd = fd_alloc.next_fd(files); let fdflag = flags.as_fd_flags(); @@ -179,23 +286,20 @@ impl FileArray { let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let filemode = inode.mode.load(Ordering::Relaxed); + let file_format = inode.mode.load().format(); - if flags.directory() { - if !s_isdir(filemode) { - return Err(ENOTDIR); - } - } else { - if s_isdir(filemode) && flags.write() { - return Err(EISDIR); - } + match (flags.directory(), file_format, flags.write()) { + (true, Mode::DIR, _) => {} + (true, _, _) => return Err(ENOTDIR), + (false, Mode::DIR, true) => return Err(EISDIR), + _ => {} } - if flags.truncate() && flags.write() && s_isreg(filemode) { + if flags.truncate() && flags.write() && file_format.is_reg() { inode.truncate(0)?; } - let file = if s_ischr(filemode) { + let file = if file_format.is_chr() { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { @@ -203,7 +307,8 @@ impl FileArray { }; let mut inner = self.inner.lock(); - let fd = inner.next_fd(); + let (files, fd_alloc) = inner.split_borrow(); + let fd = fd_alloc.next_fd(files); inner.do_insert(fd, fdflag, file); Ok(fd) @@ -211,43 +316,59 @@ impl FileArray { pub fn fcntl(&self, fd: FD, cmd: u32, arg: usize) -> KResult { let mut inner = self.inner.lock(); - let ofile = inner.files.get_mut(&fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let mut cursor = files.find_mut(&fd); - match cmd { + let ret = match cmd { F_DUPFD | F_DUPFD_CLOEXEC => { + let ofile = cursor.get().ok_or(EBADF)?; + let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); - let new_file_data = ofile.file.clone(); - let new_fd = inner.allocate_fd(FD(arg as u32)); + let new_file_data = ofile.file.dup(); + let new_fd = fd_alloc.allocate_fd(FD(arg as u32), files); inner.do_insert(new_fd, flags, new_file_data); - Ok(new_fd.0 as usize) + new_fd.0 as usize } - F_GETFD => Ok(ofile.flags.bits() as usize), + F_GETFD => cursor.get().ok_or(EBADF)?.flags.bits() as usize, F_SETFD => { + let mut ofile = cursor.remove().ok_or(EBADF)?; ofile.flags = FDFlags::from_bits_truncate(arg as u32); - Ok(0) + cursor.insert(ofile); + 0 } - F_GETFL => Ok(ofile.file.get_flags().bits() as usize), + F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, F_SETFL => { - ofile + cursor + .get() + .ok_or(EBADF)? .file .set_flags(OpenFlags::from_bits_retain(arg as u32)); - Ok(0) + 0 } _ => unimplemented!("fcntl: cmd={}", cmd), - } + }; + + Ok(ret) } /// Only used for init process. pub fn open_console(&self) { let mut inner = self.inner.lock(); - let (stdin, stdout, stderr) = (inner.next_fd(), inner.next_fd(), inner.next_fd()); + let (files, fd_alloc) = inner.split_borrow(); + + let (stdin, stdout, stderr) = ( + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + ); let console_terminal = get_console().expect("No console terminal"); inner.do_insert( @@ -269,53 +390,25 @@ impl FileArray { } impl FileArrayInner { - fn get(&mut self, fd: FD) -> Option> { - self.files.get(&fd).map(|f| f.file.clone()) - } - - fn find_available(&mut self, from: FD) -> FD { - self.files - .range(&from..) - .fold_while(from, |current, (&key, _)| { - if current == key { - Continue(FD(current.0 + 1)) - } else { - Done(current) - } - }) - .into_inner() - } - - /// Allocate a new file descriptor starting from `from`. - /// - /// Returned file descriptor should be used immediately. - /// - fn allocate_fd(&mut self, from: FD) -> FD { - let from = FD::max(from, self.fd_min_avail); - - if from == self.fd_min_avail { - let next_min_avail = self.find_available(FD(from.0 + 1)); - let allocated = self.fd_min_avail; - self.fd_min_avail = next_min_avail; - allocated - } else { - self.find_available(from) - } + fn get(&mut self, fd: FD) -> Option { + self.files.get_fd(fd).map(|open| open.file.clone()) } - fn release_fd(&mut self, fd: FD) { - if fd < self.fd_min_avail { - self.fd_min_avail = fd; + /// Insert a file description to the file array. + fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { + match self.files.entry(&fd) { + Entry::Occupied(_) => { + panic!("File descriptor {fd:?} already exists in the file array."); + } + Entry::Vacant(insert_cursor) => { + insert_cursor.insert(OpenFile::new(fd, flags, file)); + } } } - fn next_fd(&mut self) -> FD { - self.allocate_fd(self.fd_min_avail) - } - - /// Insert a file description to the file array. - fn do_insert(&mut self, fd: FD, flags: FDFlags, file: Arc) { - assert!(self.files.insert(fd, OpenFile { flags, file }).is_none()); + fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + let Self { files, fd_alloc } = self; + (files, fd_alloc) } } @@ -343,3 +436,39 @@ impl SyscallRetVal for FD { Some(self.0 as usize) } } + +trait FilesExt { + fn get_fd(&self, fd: FD) -> Option<&OpenFile>; + + fn pick

(&mut self, pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool; +} + +impl FilesExt for RBTree { + fn get_fd(&self, fd: FD) -> Option<&OpenFile> { + self.find(&fd).get() + } + + fn pick

(&mut self, mut pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool, + { + let mut picked = RBTree::new(OpenFileAdapter::new()); + + // TODO: might be better if we start picking from somewhere else + // or using a different approach. + let mut cursor = self.front_mut(); + while let Some(open_file) = cursor.get() { + if !pred(open_file) { + cursor.move_next(); + continue; + } + + picked.insert(cursor.remove().unwrap()); + cursor.move_next(); + } + + picked + } +} diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 3eb6c8dc..e47df25d 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -1,10 +1,12 @@ -use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId}; +use super::{dentry::Dentry, vfs::Vfs, DevId}; use crate::io::Stream; use crate::kernel::constants::{ EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT, + STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, + S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, }; use crate::kernel::mem::PageCache; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{io::Buffer, prelude::*}; @@ -32,8 +34,11 @@ pub type AtomicUid = AtomicU32; #[allow(dead_code)] pub type Gid = u32; pub type AtomicGid = AtomicU32; -pub type Mode = u32; -pub type AtomicMode = AtomicU32; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +pub struct AtomicMode(AtomicU32); #[derive(Debug)] pub struct InodeData { @@ -97,7 +102,7 @@ pub struct RenameData<'a, 'b> { #[allow(unused_variables)] pub trait Inode: Send + Sync + InodeInner + Any { fn is_dir(&self) -> bool { - self.mode.load(Ordering::SeqCst) & S_IFDIR != 0 + self.mode.load().is_dir() } fn lookup(&self, dentry: &Arc) -> KResult>> { @@ -181,7 +186,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { let vfs = self.vfs.upgrade().expect("Vfs is dropped"); let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(Ordering::Relaxed); + let mode = self.mode.load(); if mask & STATX_NLINK != 0 { stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; @@ -213,13 +218,13 @@ pub trait Inode: Send + Sync + InodeInner + Any { stat.stx_mode = 0; if mask & STATX_MODE != 0 { - stat.stx_mode |= (mode & !S_IFMT) as u16; + stat.stx_mode |= mode.non_format_bits() as u16; stat.stx_mask |= STATX_MODE; } if mask & STATX_TYPE != 0 { - stat.stx_mode |= (mode & S_IFMT) as u16; - if s_isblk(mode) || s_ischr(mode) { + stat.stx_mode |= mode.format_bits() as u16; + if mode.is_blk() || mode.is_chr() { let devid = self.devid(); stat.stx_rdev_major = (devid? >> 8) & 0xff; stat.stx_rdev_minor = devid? & 0xff; @@ -354,3 +359,136 @@ macro_rules! define_struct_inode { } pub(crate) use define_struct_inode; + +impl Mode { + pub const REG: Self = Self(S_IFREG); + pub const DIR: Self = Self(S_IFDIR); + pub const LNK: Self = Self(S_IFLNK); + pub const BLK: Self = Self(S_IFBLK); + pub const CHR: Self = Self(S_IFCHR); + + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn is_reg(&self) -> bool { + (self.0 & S_IFMT) == S_IFREG + } + + pub const fn is_dir(&self) -> bool { + (self.0 & S_IFMT) == S_IFDIR + } + + pub const fn is_lnk(&self) -> bool { + (self.0 & S_IFMT) == S_IFLNK + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn format(&self) -> Self { + Self::new(self.format_bits()) + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub const fn non_format(&self) -> Self { + Self::new(self.non_format_bits()) + } + + pub const fn perm(self, perm: u32) -> Self { + Self::new((self.0 & !0o777) | (perm & 0o777)) + } + + pub const fn set_perm(&mut self, perm: u32) { + *self = self.perm(perm); + } + + pub const fn mask_perm(&mut self, perm_mask: u32) { + let perm_mask = perm_mask & 0o777; + let self_perm = self.non_format_bits() & 0o777; + + *self = self.perm(self_perm & perm_mask); + } +} + +impl AtomicMode { + pub const fn new(bits: u32) -> Self { + Self(AtomicU32::new(bits)) + } + + pub const fn from(mode: Mode) -> Self { + Self::new(mode.0) + } + + pub fn load(&self) -> Mode { + Mode(self.0.load(Ordering::Relaxed)) + } + + pub fn store(&self, mode: Mode) { + self.0.store(mode.0, Ordering::Relaxed); + } +} + +impl core::fmt::Debug for AtomicMode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("AtomicMode") + .field("bits", &self.load().0) + .finish() + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let format_name = match self.format() { + Mode::REG => "REG", + Mode::DIR => "DIR", + Mode::LNK => "LNK", + Mode::BLK => "BLK", + Mode::CHR => "CHR", + _ => "UNK", + }; + + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format_name}, {perm:#o})", + perm = self.non_format_bits() + )?, + rem => write!( + f, + "Mode({format_name}, {perm:#o}, rem={rem:#x})", + perm = self.non_format_bits() & 0o777 + )?, + } + + Ok(()) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index efd68aa7..f62cb9b9 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,4 +1,3 @@ -use crate::kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}; use crate::prelude::*; use alloc::sync::Arc; use dentry::Dentry; @@ -6,33 +5,15 @@ use eonix_sync::LazyLock; use inode::Mode; pub mod dentry; -pub mod file; +mod file; pub mod filearray; pub mod inode; pub mod mount; pub mod vfs; -pub type DevId = u32; - -pub fn s_isreg(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFREG -} +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; -pub fn s_isdir(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFDIR -} - -pub fn s_ischr(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFCHR -} - -pub fn s_isblk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFBLK -} - -pub fn s_islnk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFLNK -} +pub type DevId = u32; pub struct FsContext { pub fsroot: Arc, @@ -44,7 +25,7 @@ static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(0o022), + umask: Spin::new(Mode::new(0o022)), }) }); diff --git a/src/lib.rs b/src/lib.rs index fe4796de..e75f8653 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,7 @@ use kernel::{ task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, + inode::Mode, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, FsContext, }, @@ -214,7 +215,7 @@ async fn init_process(early_kstack: PRange) { let fs_context = FsContext::global(); let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); - mnt_dir.mkdir(0o755).unwrap(); + mnt_dir.mkdir(Mode::new(0o755)).unwrap(); do_mount( &mnt_dir, From 34a625296871b821968eeba64914cc134d9f6e5e Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 17 Aug 2025 00:22:24 +0800 Subject: [PATCH 27/29] feat: unwinding and printing stack backtrace Use unwinding crate to unwind the stack and print stack trace. Sightly adjust the linker script and move eh_frame into rodata section. Due to limited kernel image size, there might be some problems on x86_64 platforms. Further fixes needed but won't be fixed for now. Signed-off-by: greatbridf (cherry picked from commit 6bb54d9eae13b76768f011c44222b25b785b83e0) Signed-off-by: greatbridf --- .cargo/config.toml | 1 + Cargo.lock | 16 ++++++++++ Cargo.toml | 6 ++-- crates/eonix_hal/src/arch/loongarch64/link.x | 4 +-- .../eonix_hal/src/arch/loongarch64/memory.x | 2 -- crates/eonix_hal/src/arch/riscv64/link.x | 5 ++-- crates/eonix_hal/src/arch/riscv64/memory.x | 2 -- crates/eonix_hal/src/link.x.in | 23 +++++++++------ src/lib.rs | 8 +++++ src/panic.rs | 29 +++++++++++++++++++ 10 files changed, 76 insertions(+), 20 deletions(-) create mode 100644 src/panic.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index 76b69dd5..9c7ba798 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,6 +1,7 @@ [build] target = "riscv64gc-unknown-none-elf" target-dir = 'build' +rustflags = ["-C", "force-unwind-tables"] [unstable] build-std-features = ['compiler-builtins-mem'] diff --git a/Cargo.lock b/Cargo.lock index f4ed3bd8..1eb868dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,6 +153,7 @@ dependencies = [ "posix_types", "slab_allocator", "stalloc", + "unwinding", "virtio-drivers", "xmas-elf", ] @@ -265,6 +266,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "gimli" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93563d740bc9ef04104f9ed6f86f1e3275c2cdafb95664e26584b9ca807a8ffe" + [[package]] name = "intrusive-collections" version = "0.9.7" @@ -453,6 +460,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unwinding" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60612c845ef41699f39dc8c5391f252942c0a88b7d15da672eff0d14101bbd6d" +dependencies = [ + "gimli", +] + [[package]] name = "virtio-drivers" version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 4bc8bbe8..ab042dc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,9 @@ stalloc = { version = "0.6.1", default-features = false, features = [ [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } +[target.'cfg(target_arch = "riscv64")'.dependencies] +unwinding = { version = "0.2.8", default-features = false, features = ["unwinder", "fde-static", "personality", "panic"] } + [features] default = [] trace_pci = [] @@ -50,9 +53,6 @@ smp = [] [profile.release] debug = true -[profile.dev] -panic = "abort" - [profile.dev.package.eonix_preempt] opt-level = "s" diff --git a/crates/eonix_hal/src/arch/loongarch64/link.x b/crates/eonix_hal/src/arch/loongarch64/link.x index 11ef5192..f673ad5a 100644 --- a/crates/eonix_hal/src/arch/loongarch64/link.x +++ b/crates/eonix_hal/src/arch/loongarch64/link.x @@ -91,6 +91,6 @@ SECTIONS { } > VDSO AT> RAM VDSO_PADDR = LOADADDR(.vdso); - __kernel_end = ABSOLUTE(LOADADDR(.vdso) + SIZEOF(.vdso)); + __kernel_end = __edata; } -INSERT BEFORE .bss; +INSERT BEFORE .data.after; diff --git a/crates/eonix_hal/src/arch/loongarch64/memory.x b/crates/eonix_hal/src/arch/loongarch64/memory.x index f210b9b5..2a70f81b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/memory.x +++ b/crates/eonix_hal/src/arch/loongarch64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index 3465a0ae..e348e1be 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -43,7 +43,6 @@ SECTIONS { KIMAGE_PAGES = (__edata - _stext + 0x1000 - 1) / 0x1000; KIMAGE_32K_COUNT = (KIMAGE_PAGES + 8 - 1) / 8; - __kernel_end = .; BSS_LENGTH = ABSOLUTE(__ebss - __sbss); } @@ -89,4 +88,6 @@ SECTIONS { VDSO_PADDR = LOADADDR(.vdso); } -INSERT AFTER .data; +INSERT BEFORE .data.after; + +__kernel_end = __edata; diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 9c5ca2ee..0dc7c4ff 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index b4ea6e0a..81c269c2 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,6 +18,15 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); + + . = ALIGN(8); + + PROVIDE(__eh_frame = .); + PROVIDE(__executable_start = __stext); + + KEEP(*(.eh_frame_hdr)); + KEEP(*(.eh_frame)); + KEEP(*(.eh_frame.*)); } > REGION_RODATA AT> LINK_REGION_RODATA @@ -32,6 +41,11 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA + .data.after : + { + __data_after = .; + } > REGION_DATA AT> LINK_REGION_DATA + __edata = .; .bss (NOLOAD) : ALIGN(16) @@ -45,16 +59,7 @@ SECTIONS { __ebss = .; - .eh_frame : ALIGN(16) - { - __seh_frame = .; - - KEEP(*(.eh_frame .eh_frame*)); - - } > REGION_EHFRAME AT> LINK_REGION_EHFRAME - . = ALIGN(0x1000); - __eeh_frame = .; } SECTIONS { diff --git a/src/lib.rs b/src/lib.rs index e75f8653..80d24c28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,9 @@ extern crate alloc; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +extern crate unwinding; + mod driver; mod fs; mod hash; @@ -16,6 +19,8 @@ mod io; mod kernel; mod kernel_init; mod net; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +mod panic; mod path; mod prelude; mod rcu; @@ -53,6 +58,9 @@ use prelude::*; #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { + #[cfg(target_arch = "riscv64")] + panic::stack_trace(); + shutdown(); } diff --git a/src/panic.rs b/src/panic.rs new file mode 100644 index 00000000..3c9c5f34 --- /dev/null +++ b/src/panic.rs @@ -0,0 +1,29 @@ +use core::ffi::c_void; + +use eonix_log::println_fatal; +use unwinding::abi::{ + UnwindContext, UnwindReasonCode, _Unwind_Backtrace, _Unwind_GetIP, _Unwind_GetRegionStart, +}; + +pub fn stack_trace() { + struct CallbackData { + counter: usize, + } + + extern "C" fn callback(unwind_ctx: &UnwindContext<'_>, arg: *mut c_void) -> UnwindReasonCode { + let data = unsafe { &mut *(arg as *mut CallbackData) }; + data.counter += 1; + + println_fatal!( + "{:4}: {:#018x} - at function {:#018x}", + data.counter, + _Unwind_GetIP(unwind_ctx), + _Unwind_GetRegionStart(unwind_ctx), + ); + + UnwindReasonCode::NO_REASON + } + + let mut data = CallbackData { counter: 0 }; + _Unwind_Backtrace(callback, &raw mut data as *mut c_void); +} From 3fb4966118201eeb064743b7fca94224397fdb9f Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 17 Aug 2025 00:43:13 +0800 Subject: [PATCH 28/29] task: fix infinite sleep in stackful tasks The stackful tasks might be woken up before actually being put into sleep by returning a Poll::Pending. Thus, infinite sleep will occur since we are no longer on both the wait list and the ready queue. The solution is to remember that we are woken up in stackful wakers and check before putting us to sleep by wait_for_wakeups(). Also, implement Drop for RCUPointer by using call_rcu to drop the underlying data. We must mark T: Send + Sync + 'static in order to send the arc to the runtime... Signed-off-by: greatbridf --- src/kernel/task.rs | 42 +++++++++++++++++++++++++++++++++++++++--- src/rcu.rs | 27 ++++++++++++++++++--------- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 13e2ec93..2ef58069 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -10,7 +10,6 @@ mod signal; mod thread; pub use clone::{do_clone, CloneArgs, CloneFlags}; -use eonix_runtime::task::Task; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -84,10 +83,14 @@ where interrupt::{default_fault_handler, default_irq_handler}, timer::{should_reschedule, timer_interrupt}, }; + use alloc::sync::Arc; + use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; + use core::sync::atomic::AtomicBool; + use core::sync::atomic::Ordering; use core::task::Context; use core::task::Poll; use core::task::Waker; @@ -97,6 +100,7 @@ where use eonix_hal::trap::TrapContext; use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; + use eonix_runtime::task::Task; use thread::wait_for_wakeups; let stack = KernelStack::new(); @@ -105,18 +109,46 @@ where where F: Future, { - let waker = Waker::from(Task::current().clone()); + struct WakeSaver { + task: Arc, + woken: AtomicBool, + } + + impl Wake for WakeSaver { + fn wake_by_ref(self: &Arc) { + // SAFETY: If we read true below in the loop, we must have been + // woken up and acquired our waker's work by the runtime. + self.woken.store(true, Ordering::Relaxed); + self.task.wake_by_ref(); + } + + fn wake(self: Arc) { + self.wake_by_ref(); + } + } + + let wake_saver = Arc::new(WakeSaver { + task: Task::current().clone(), + woken: AtomicBool::new(false), + }); + let waker = Waker::from(wake_saver.clone()); let mut cx = Context::from_waker(&waker); let output = loop { match future.as_mut().poll(&mut cx) { Poll::Ready(output) => break output, Poll::Pending => { + assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + if Task::current().is_ready() { continue; } - assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + // SAFETY: The runtime must have ensured that we can see the + // work done by the waker. + if wake_saver.woken.swap(false, Ordering::Relaxed) { + continue; + } unsafe { #[cfg(target_arch = "riscv64")] @@ -129,6 +161,10 @@ where } }; + drop(cx); + drop(waker); + drop(wake_saver); + unsafe { output_ptr.write(Some(output)); } diff --git a/src/rcu.rs b/src/rcu.rs index 32ff7657..c1645d33 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -194,9 +194,15 @@ impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { } } -pub struct RCUPointer(AtomicPtr); - -impl core::fmt::Debug for RCUPointer { +pub struct RCUPointer(AtomicPtr) +where + T: Send + Sync + 'static; + +impl core::fmt::Debug for RCUPointer +where + T: core::fmt::Debug, + T: Send + Sync + 'static, +{ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match NonNull::new(self.0.load(Ordering::Acquire)) { Some(pointer) => { @@ -209,7 +215,10 @@ impl core::fmt::Debug for RCUPointer { } } -impl RCUPointer { +impl RCUPointer +where + T: Send + Sync + 'static, +{ pub const fn empty() -> Self { Self(AtomicPtr::new(core::ptr::null_mut())) } @@ -266,16 +275,16 @@ impl RCUPointer { } } -impl Drop for RCUPointer { +impl Drop for RCUPointer +where + T: Send + Sync + 'static, +{ fn drop(&mut self) { // SAFETY: We call `rcu_sync()` to ensure that all readers are done. if let Some(arc) = unsafe { self.swap(None) } { // We only wait if there are other references. if Arc::strong_count(&arc) == 1 { - call_rcu(move || { - let _ = arc; - todo!(); - }); + call_rcu(move || drop(arc)); } } } From 8c656b5898113f9a5bf0dcc678e4119d0f2f9a6c Mon Sep 17 00:00:00 2001 From: greatbridf Date: Mon, 25 Aug 2025 22:38:24 +0800 Subject: [PATCH 29/29] configure: check and use ARCH given in env The current implementation ignores the given argument and uses the default arch. Change the wrong behavior... Signed-off-by: greatbridf --- configure | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 87033ea6..1b8efeee 100755 --- a/configure +++ b/configure @@ -11,7 +11,7 @@ event() { printf "$1... " } -ARCH=${ARCH:-x86_64} +ARCH=${ARCH:-"$DEFAULT_ARCH"} # Define toolchain and QEMU/GDB settings for per architecture event "target architecture" @@ -40,7 +40,7 @@ esac if [ "$QEMU" = "" ]; then event "checking default qemu" - QEMU="qemu-system-$DEFAULT_ARCH" + QEMU="qemu-system-$ARCH" if $QEMU --version > /dev/null 2>&1; then QEMU="qemu-system-\$(ARCH)" break @@ -65,7 +65,7 @@ check_gdb_arch() { local item="$1" if $item --init-eval-command 'set arch' \ --init-eval-command 'q' 2>&1 \ - | grep "$DEFAULT_ARCH" >/dev/null 2>&1; then + | grep "$ARCH" >/dev/null 2>&1; then return 0 else return 1 @@ -74,7 +74,7 @@ check_gdb_arch() { if [ "$GDB" = "" ]; then event "checking default gdb" - if check_gdb_arch "$DEFAULT_ARCH-elf-gdb"; then + if check_gdb_arch "$ARCH-elf-gdb"; then GDB="\$(ARCH)-elf-gdb" break fi @@ -126,7 +126,7 @@ else fi cp Makefile.src "$OUT" -sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" "$OUT" > /dev/null 2>&1 +sed -i '' -e "s|##DEFAULT_ARCH##|$ARCH|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##GDB##|$GDB|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##QEMU##|$QEMU|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##FDISK##|$FDISK|" "$OUT" > /dev/null 2>&1