diff --git a/.cargo/config.toml b/.cargo/config.toml index 76b69dd5..9c7ba798 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,6 +1,7 @@ [build] target = "riscv64gc-unknown-none-elf" target-dir = 'build' +rustflags = ["-C", "force-unwind-tables"] [unstable] build-std-features = ['compiler-builtins-mem'] diff --git a/Cargo.lock b/Cargo.lock index 2caa0bad..59242bbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c330e503236d0b06386ae6cc42a513ef1ccc23c52b603c1b52f018564faf44" +[[package]] +name = "another_ext4" +version = "0.1.0" +source = "git+https://github.com/SMS-Derfflinger/another_ext4?branch=main#ed6d91718db721eb4a744483c289cc44a6f34bf4" +dependencies = [ + "bitflags", + "log", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" @@ -134,6 +143,7 @@ version = "0.1.0" dependencies = [ "acpi", "align_ext", + "another_ext4", "atomic_unique_refcell", "bitflags", "buddy_allocator", @@ -145,13 +155,14 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", - "ext4_rs", - "intrusive-collections", + "intrusive-collections 0.9.8", "intrusive_list", "itertools", "pointers", "posix_types", "slab_allocator", + "stalloc", + "unwinding", "virtio-drivers", "xmas-elf", ] @@ -212,7 +223,7 @@ dependencies = [ "eonix_percpu", "eonix_preempt", "eonix_sync", - "intrusive-collections", + "intrusive-collections 0.9.7", "pointers", ] @@ -245,17 +256,7 @@ dependencies = [ "eonix_preempt", "eonix_spin", "eonix_sync_base", - "intrusive-collections", -] - -[[package]] -name = "ext4_rs" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1a97344bde15b0ace15e265dab27228d4bdc37a0bfa8548c5645d7cfa6a144" -dependencies = [ - "bitflags", - "log", + "intrusive-collections 0.9.7", ] [[package]] @@ -264,6 +265,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "gimli" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93563d740bc9ef04104f9ed6f86f1e3275c2cdafb95664e26584b9ca807a8ffe" + [[package]] name = "intrusive-collections" version = "0.9.7" @@ -273,6 +280,14 @@ dependencies = [ "memoffset", ] +[[package]] +name = "intrusive-collections" +version = "0.9.8" +source = "git+https://github.com/greatbridf/intrusive-rs#0e2d88bffc9df606566fba2d61d1217182b06975" +dependencies = [ + "memoffset", +] + [[package]] name = "intrusive_list" version = "0.1.0" @@ -401,11 +416,17 @@ dependencies = [ "intrusive_list", ] +[[package]] +name = "stalloc" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" + [[package]] name = "syn" -version = "2.0.103" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -438,6 +459,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unwinding" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60612c845ef41699f39dc8c5391f252942c0a88b7d15da672eff0d14101bbd6d" +dependencies = [ + "gimli", +] + [[package]] name = "virtio-drivers" version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 15df5f15..e70d8c65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,21 +25,32 @@ posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } bitflags = "2.6.0" -intrusive-collections = "0.9.7" +intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" xmas-elf = "0.10.0" -ext4_rs = "1.3.2" +another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch = "main" } +stalloc = { version = "0.6.1", default-features = false, features = [ + "allocator-api", +] } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } +[target.'cfg(target_arch = "riscv64")'.dependencies] +unwinding = { version = "0.2.8", default-features = false, features = [ + "unwinder", + "fde-static", + "personality", + "panic", +] } + [features] default = [] trace_pci = [] trace_syscall = [] -trace_scheduler = [] +trace_scheduler = ["eonix_runtime/trace_scheduler"] log_trace = ["trace_pci", "trace_syscall", "trace_scheduler"] log_debug = [] smp = [] @@ -47,9 +58,6 @@ smp = [] [profile.release] debug = true -[profile.dev] -panic = "abort" - [profile.dev.package.eonix_preempt] opt-level = "s" diff --git a/Makefile.src b/Makefile.src index 2701ecd6..ab13f5b8 100644 --- a/Makefile.src +++ b/Makefile.src @@ -22,7 +22,15 @@ KERNEL_CARGO_MANIFESTS += $(shell find src macros crates -name Cargo.toml -type KERNEL_DEPS := $(KERNEL_SOURCES) $(KERNEL_CARGO_MANIFESTS) QEMU_ARGS ?= -no-reboot -CARGO_FLAGS := --profile $(PROFILE) --features $(FEATURES)$(if $(SMP),$(COMMA)smp,) +CARGO_FLAGS := --profile $(PROFILE) + +ifneq ($(SMP),) +CARGO_FLAGS += --features smp +endif + +ifneq ($(FEATURES),) +CARGO_FLAGS += --features $(FEATURES) +endif ifeq ($(HOST),darwin) QEMU_ACCEL ?= -accel tcg diff --git a/configure b/configure index 87033ea6..1b8efeee 100755 --- a/configure +++ b/configure @@ -11,7 +11,7 @@ event() { printf "$1... " } -ARCH=${ARCH:-x86_64} +ARCH=${ARCH:-"$DEFAULT_ARCH"} # Define toolchain and QEMU/GDB settings for per architecture event "target architecture" @@ -40,7 +40,7 @@ esac if [ "$QEMU" = "" ]; then event "checking default qemu" - QEMU="qemu-system-$DEFAULT_ARCH" + QEMU="qemu-system-$ARCH" if $QEMU --version > /dev/null 2>&1; then QEMU="qemu-system-\$(ARCH)" break @@ -65,7 +65,7 @@ check_gdb_arch() { local item="$1" if $item --init-eval-command 'set arch' \ --init-eval-command 'q' 2>&1 \ - | grep "$DEFAULT_ARCH" >/dev/null 2>&1; then + | grep "$ARCH" >/dev/null 2>&1; then return 0 else return 1 @@ -74,7 +74,7 @@ check_gdb_arch() { if [ "$GDB" = "" ]; then event "checking default gdb" - if check_gdb_arch "$DEFAULT_ARCH-elf-gdb"; then + if check_gdb_arch "$ARCH-elf-gdb"; then GDB="\$(ARCH)-elf-gdb" break fi @@ -126,7 +126,7 @@ else fi cp Makefile.src "$OUT" -sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" "$OUT" > /dev/null 2>&1 +sed -i '' -e "s|##DEFAULT_ARCH##|$ARCH|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##GDB##|$GDB|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##QEMU##|$QEMU|" "$OUT" > /dev/null 2>&1 sed -i '' -e "s|##FDISK##|$FDISK|" "$OUT" > /dev/null 2>&1 diff --git a/crates/eonix_hal/eonix_hal_traits/src/trap.rs b/crates/eonix_hal/eonix_hal_traits/src/trap.rs index d698dbaa..e51a9eb4 100644 --- a/crates/eonix_hal/eonix_hal_traits/src/trap.rs +++ b/crates/eonix_hal/eonix_hal_traits/src/trap.rs @@ -66,6 +66,7 @@ where { Syscall { no: usize, args: [usize; 6] }, Fault(Fault), + Breakpoint, Irq { callback: FIrq }, Timer { callback: FTimer }, } diff --git a/crates/eonix_hal/src/arch/loongarch64/link.x b/crates/eonix_hal/src/arch/loongarch64/link.x index 11ef5192..f673ad5a 100644 --- a/crates/eonix_hal/src/arch/loongarch64/link.x +++ b/crates/eonix_hal/src/arch/loongarch64/link.x @@ -91,6 +91,6 @@ SECTIONS { } > VDSO AT> RAM VDSO_PADDR = LOADADDR(.vdso); - __kernel_end = ABSOLUTE(LOADADDR(.vdso) + SIZEOF(.vdso)); + __kernel_end = __edata; } -INSERT BEFORE .bss; +INSERT BEFORE .data.after; diff --git a/crates/eonix_hal/src/arch/loongarch64/memory.x b/crates/eonix_hal/src/arch/loongarch64/memory.x index f210b9b5..2a70f81b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/memory.x +++ b/crates/eonix_hal/src/arch/loongarch64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/arch/loongarch64/mm.rs b/crates/eonix_hal/src/arch/loongarch64/mm.rs index 91a2aae5..d5b00a6b 100644 --- a/crates/eonix_hal/src/arch/loongarch64/mm.rs +++ b/crates/eonix_hal/src/arch/loongarch64/mm.rs @@ -87,6 +87,8 @@ impl PagingMode for PagingMode48 { pub type ArchPagingMode = PagingMode48; +unsafe impl Send for RawPageTable48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTable48<'a> { type Entry = PTE64; diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs index f529bf61..4a6c4754 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/mod.rs @@ -278,11 +278,18 @@ impl TrapReturn for TrapContext { to_ctx.set_interrupt_enabled(false); unsafe { + let mut old_trap_ctx: usize; + let mut old_task_ctx: usize; + asm!( + "csrrd {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrrd {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", "csrwr {captured_trap_context}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", "csrwr {capturer_task_context}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", captured_trap_context = inout(reg) &raw mut *self => _, capturer_task_context = inout(reg) &raw mut capturer_ctx => _, + old_trap_ctx = out(reg) old_trap_ctx, + old_task_ctx = out(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), @@ -291,8 +298,10 @@ impl TrapReturn for TrapContext { TaskContext::switch(&mut capturer_ctx, &mut to_ctx); asm!( - "csrwr $zero, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", - "csrwr $zero, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + "csrwr {old_trap_ctx}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}", + "csrwr {old_task_ctx}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}", + old_trap_ctx = inout(reg) old_trap_ctx, + old_task_ctx = inout(reg) old_task_ctx, CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR, CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR, options(nomem, nostack, preserves_flags), diff --git a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs index 83e28cfa..56bf59b6 100644 --- a/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs @@ -1,4 +1,4 @@ -use crate::processor::CPU; +use crate::{arch::trap::CSR_KERNEL_TP, processor::CPU}; use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, @@ -173,6 +173,7 @@ impl RawTrapContext for TrapContext { | Exception::MemoryAccessAddressError | Exception::PagePrivilegeIllegal, ) => TrapType::Fault(Fault::BadAccess), + Trap::Exception(Exception::Breakpoint) => TrapType::Breakpoint, Trap::Exception(Exception::InstructionNotExist) => TrapType::Fault(Fault::InvalidOp), Trap::Exception(Exception::Syscall) => TrapType::Syscall { no: self.syscall_no(), @@ -226,7 +227,17 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.prmd |= 0x3, - false => self.prmd &= !0x3, + false => { + unsafe { + asm!( + "csrrd {tp}, {CSR_KERNEL_TP}", + tp = out(reg) self.regs.tp, + CSR_KERNEL_TP = const CSR_KERNEL_TP, + options(nomem, nostack, preserves_flags), + ) + } + self.prmd &= !0x3; + } } } diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 7c6a6ae0..0f1dff63 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -3,7 +3,6 @@ use super::{ console::write_str, cpu::{CPUID, CPU_COUNT}, time::set_next_timer, - trap::TRAP_SCRATCH, }; use crate::{ arch::{ @@ -234,13 +233,6 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { } percpu_area.register(cpu.cpuid()); - - unsafe { - // SAFETY: Interrupts are disabled. - TRAP_SCRATCH - .as_mut() - .set_kernel_tp(PercpuArea::get_for(cpu.cpuid()).unwrap().cast()); - } } fn get_ap_start_addr() -> usize { diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 8d62e741..9c843eaf 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -1,9 +1,13 @@ use super::{ interrupt::InterruptControl, - trap::{setup_trap, TRAP_SCRATCH}, + trap::{setup_trap, TrapContext}, }; use crate::arch::fdt::{FdtExt, FDT}; -use core::{arch::asm, pin::Pin, ptr::NonNull, sync::atomic::AtomicUsize}; +use core::{ + arch::asm, cell::UnsafeCell, mem::MaybeUninit, pin::Pin, ptr::NonNull, + sync::atomic::AtomicUsize, +}; +use eonix_hal_traits::trap::RawTrapContext; use eonix_preempt::PreemptGuard; use eonix_sync_base::LazyLock; use riscv::register::{ @@ -17,6 +21,9 @@ pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0); #[eonix_percpu::define_percpu] pub static CPUID: usize = 0; +#[eonix_percpu::define_percpu] +static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); @@ -56,14 +63,10 @@ impl CPU { interrupt.init(); sstatus::set_sum(); - sscratch::write(TRAP_SCRATCH.as_ptr() as usize); + sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) { - TRAP_SCRATCH - .as_mut() - .set_trap_context(NonNull::new(sp as *mut _).unwrap()); - } + pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { // nothing diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index 3465a0ae..e348e1be 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -43,7 +43,6 @@ SECTIONS { KIMAGE_PAGES = (__edata - _stext + 0x1000 - 1) / 0x1000; KIMAGE_32K_COUNT = (KIMAGE_PAGES + 8 - 1) / 8; - __kernel_end = .; BSS_LENGTH = ABSOLUTE(__ebss - __sbss); } @@ -89,4 +88,6 @@ SECTIONS { VDSO_PADDR = LOADADDR(.vdso); } -INSERT AFTER .data; +INSERT BEFORE .data.after; + +__kernel_end = __edata; diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 9c5ca2ee..0dc7c4ff 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -12,12 +12,10 @@ REGION_ALIAS("REGION_TEXT", KIMAGE); REGION_ALIAS("REGION_RODATA", KIMAGE); REGION_ALIAS("REGION_DATA", KIMAGE); REGION_ALIAS("REGION_BSS", KBSS); -REGION_ALIAS("REGION_EHFRAME", KIMAGE); REGION_ALIAS("LINK_REGION_TEXT", RAM); REGION_ALIAS("LINK_REGION_RODATA", RAM); REGION_ALIAS("LINK_REGION_DATA", RAM); REGION_ALIAS("LINK_REGION_BSS", RAM); -REGION_ALIAS("LINK_REGION_EHFRAME", RAM); _stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM); diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 74ebc349..46dd9437 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -88,6 +88,8 @@ impl PagingMode for PagingModeSv48 { pub type ArchPagingMode = PagingModeSv48; +unsafe impl Send for RawPageTableSv48<'_> {} + impl<'a> RawPageTable<'a> for RawPageTableSv48<'a> { type Entry = PTE64; diff --git a/crates/eonix_hal/src/arch/riscv64/trap/captured.rs b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs new file mode 100644 index 00000000..d4c00e9f --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/captured.rs @@ -0,0 +1,177 @@ +use crate::{arch::trap::Registers, context::TaskContext, trap::TrapContext}; +use core::{arch::naked_asm, mem::MaybeUninit}; +use eonix_hal_traits::context::RawTaskContext; + +static mut DIRTY_TASK_CONTEXT: MaybeUninit = MaybeUninit::uninit(); + +// If captured trap context is present, we use it directly. +// We need to restore the kernel tp from that TrapContext but sp is +// fine since we will use TaskContext::switch. +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "ld tp, {ra}(t0)", // Load kernel tp from trap_ctx.ra + "sd ra, {ra}(t0)", + "ld ra, {sp}(t0)", // Load capturer task context from trap_ctx.sp + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "csrr t2, sstatus", + "csrr t3, sepc", + "csrr t4, scause", + "csrr t5, stval", + "csrrw t6, sscratch, t0", + "sd t6, {t0}(t0)", + "sd t2, {sstatus}(t0)", + "sd t3, {sepc}(t0)", + "sd t4, {scause}(t0)", + "sd t5, {stval}(t0)", + "la a0, {dirty_task_context}", + "mv a1, ra", + "j {task_context_switch}", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + stval = const TrapContext::OFFSET_STVAL, + dirty_task_context = sym DIRTY_TASK_CONTEXT, + task_context_switch = sym TaskContext::switch, + ); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _captured_trap_return(ctx: &mut TrapContext) -> ! { + naked_asm!( + "csrr t0, sscratch", + "ld t1, {sstatus}(t0)", + "ld t2, {sepc}(t0)", + "csrw sstatus, t1", + "csrw sepc, t2", + "mv t4, tp", + "mv t5, sp", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "sd t4, {ra}(t0)", // Store kernel tp to trap_ctx.ra + "sd t5, {sp}(t0)", // Store capturer task context to trap_ctx.sp + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + tp = const Registers::OFFSET_TP, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t0 = const Registers::OFFSET_T0, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sstatus = const TrapContext::OFFSET_SSTATUS, + sepc = const TrapContext::OFFSET_SEPC, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/default.rs b/crates/eonix_hal/src/arch/riscv64/trap/default.rs new file mode 100644 index 00000000..4025b719 --- /dev/null +++ b/crates/eonix_hal/src/arch/riscv64/trap/default.rs @@ -0,0 +1,134 @@ +use super::Registers; +use crate::trap::TrapContext; +use core::arch::naked_asm; + +unsafe extern "C" { + fn _default_trap_handler(trap_context: &mut TrapContext); +} + +#[unsafe(naked)] +pub(super) unsafe extern "C" fn _default_trap_entry() -> ! { + naked_asm!( + "csrrw t0, sscratch, t0", + "sd tp, {tp}(t0)", + "sd ra, {ra}(t0)", + "sd sp, {sp}(t0)", + "sd gp, {gp}(t0)", + "sd a0, {a0}(t0)", + "sd a1, {a1}(t0)", + "sd a2, {a2}(t0)", + "sd a3, {a3}(t0)", + "sd a4, {a4}(t0)", + "sd t1, {t1}(t0)", + "sd a5, {a5}(t0)", + "sd a6, {a6}(t0)", + "sd a7, {a7}(t0)", + "sd t3, {t3}(t0)", + "sd t4, {t4}(t0)", + "sd t5, {t5}(t0)", + "sd t2, {t2}(t0)", + "sd t6, {t6}(t0)", + "sd s0, {s0}(t0)", + "sd s1, {s1}(t0)", + "sd s2, {s2}(t0)", + "sd s3, {s3}(t0)", + "sd s4, {s4}(t0)", + "sd s5, {s5}(t0)", + "sd s6, {s6}(t0)", + "sd s7, {s7}(t0)", + "sd s8, {s8}(t0)", + "sd s9, {s9}(t0)", + "sd s10, {s10}(t0)", + "sd s11, {s11}(t0)", + "mv a0, t0", + "csrrw t0, sscratch, t0", + "sd t0, {t0}(a0)", + "csrr t0, sepc", + "csrr t1, scause", + "csrr t2, sstatus", + "csrr t3, stval", + "sd t0, {sepc}(a0)", + "sd t1, {scause}(a0)", + "sd t2, {sstatus}(a0)", + "sd t3, {stval}(a0)", + + "la t0, {default_trap_handler}", + "jalr t0", + + "csrr t0, sscratch", + "ld t1, {sepc}(t0)", + "ld t2, {sstatus}(t0)", + "ld tp, {tp}(t0)", + "ld ra, {ra}(t0)", + "ld sp, {sp}(t0)", + "ld gp, {gp}(t0)", + "ld a0, {a0}(t0)", + "ld a1, {a1}(t0)", + "ld a2, {a2}(t0)", + "ld a3, {a3}(t0)", + "ld a4, {a4}(t0)", + + "csrw sepc, t1", + "csrw sstatus, t2", + + "ld t1, {t1}(t0)", + "ld a5, {a5}(t0)", + "ld a6, {a6}(t0)", + "ld a7, {a7}(t0)", + "ld t3, {t3}(t0)", + "ld t4, {t4}(t0)", + "ld t5, {t5}(t0)", + "ld t2, {t2}(t0)", + "ld t6, {t6}(t0)", + "ld s0, {s0}(t0)", + "ld s1, {s1}(t0)", + "ld s2, {s2}(t0)", + "ld s3, {s3}(t0)", + "ld s4, {s4}(t0)", + "ld s5, {s5}(t0)", + "ld s6, {s6}(t0)", + "ld s7, {s7}(t0)", + "ld s8, {s8}(t0)", + "ld s9, {s9}(t0)", + "ld s10, {s10}(t0)", + "ld s11, {s11}(t0)", + "ld t0, {t0}(t0)", + "sret", + tp = const Registers::OFFSET_TP, + ra = const Registers::OFFSET_RA, + sp = const Registers::OFFSET_SP, + gp = const Registers::OFFSET_GP, + t0 = const Registers::OFFSET_T0, + t1 = const Registers::OFFSET_T1, + t2 = const Registers::OFFSET_T2, + t3 = const Registers::OFFSET_T3, + t4 = const Registers::OFFSET_T4, + t5 = const Registers::OFFSET_T5, + t6 = const Registers::OFFSET_T6, + a0 = const Registers::OFFSET_A0, + a1 = const Registers::OFFSET_A1, + a2 = const Registers::OFFSET_A2, + a3 = const Registers::OFFSET_A3, + a4 = const Registers::OFFSET_A4, + a5 = const Registers::OFFSET_A5, + a6 = const Registers::OFFSET_A6, + a7 = const Registers::OFFSET_A7, + s0 = const Registers::OFFSET_S0, + s1 = const Registers::OFFSET_S1, + s2 = const Registers::OFFSET_S2, + s3 = const Registers::OFFSET_S3, + s4 = const Registers::OFFSET_S4, + s5 = const Registers::OFFSET_S5, + s6 = const Registers::OFFSET_S6, + s7 = const Registers::OFFSET_S7, + s8 = const Registers::OFFSET_S8, + s9 = const Registers::OFFSET_S9, + s10 = const Registers::OFFSET_S10, + s11 = const Registers::OFFSET_S11, + sepc = const TrapContext::OFFSET_SEPC, + scause = const TrapContext::OFFSET_SCAUSE, + sstatus = const TrapContext::OFFSET_SSTATUS, + stval = const TrapContext::OFFSET_STVAL, + default_trap_handler = sym _default_trap_handler, + ); +} diff --git a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs index 2d359759..28689111 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/mod.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/mod.rs @@ -1,18 +1,22 @@ +mod captured; +mod default; mod trap_context; use super::config::platform::virt::*; use super::context::TaskContext; +use captured::{_captured_trap_entry, _captured_trap_return}; use core::arch::{global_asm, naked_asm}; use core::mem::{offset_of, size_of}; use core::num::NonZero; use core::ptr::NonNull; +use default::_default_trap_entry; use eonix_hal_traits::{ context::RawTaskContext, trap::{IrqState as IrqStateTrait, TrapReturn}, }; use riscv::register::sstatus::{self, Sstatus}; use riscv::register::stvec::TrapMode; -use riscv::register::{scause, sepc, stval}; +use riscv::register::{scause, sepc, sscratch, stval}; use riscv::{ asm::sfence_vma_all, register::stvec::{self, Stvec}, @@ -21,281 +25,36 @@ use sbi::SbiError; pub use trap_context::*; -#[repr(C)] -pub struct TrapScratch { - t1: u64, - t2: u64, - kernel_tp: Option>, - trap_context: Option>, - handler: unsafe extern "C" fn(), - capturer_context: TaskContext, -} - -#[eonix_percpu::define_percpu] -pub(crate) static TRAP_SCRATCH: TrapScratch = TrapScratch { - t1: 0, - t2: 0, - kernel_tp: None, - trap_context: None, - handler: default_trap_handler, - capturer_context: TaskContext::new(), -}; - -static mut DIRTY_TASK_CONTEXT: TaskContext = TaskContext::new(); - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_entry() -> ! { - naked_asm!( - "csrrw t0, sscratch, t0", // Swap t0 and sscratch - "sd t1, 0(t0)", - "sd t2, 8(t0)", - "csrr t1, sstatus", - "andi t1, t1, 0x100", - "beqz t1, 2f", - // else SPP = 1, supervisor mode - "addi t1, sp, -{trap_context_size}", - "mv t2, tp", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "mv sp, t1", - "j 4f", - // SPP = 0, user mode - "2:", - "ld t1, 24(t0)", // Load captured TrapContext address - "mv t2, tp", - "ld tp, 16(t0)", // Restore kernel tp - // t0: &mut TrapScratch, t1: &mut TrapContext, t2: tp before trap - "3:", - "sd ra, {ra}(t1)", - "sd sp, {sp}(t1)", - "4:", - "sd gp, {gp}(t1)", - "sd t2, {tp}(t1)", - "ld ra, 0(t0)", - "ld t2, 8(t0)", - "sd ra, {t1}(t1)", // Save t1 - "sd t2, {t2}(t1)", // Save t2 - "ld ra, 32(t0)", // Load handler address - "csrrw t2, sscratch, t0", // Swap t0 and sscratch - "sd t2, {t0}(t1)", - "sd a0, {a0}(t1)", - "sd a1, {a1}(t1)", - "sd a2, {a2}(t1)", - "sd a3, {a3}(t1)", - "sd a4, {a4}(t1)", - "sd a5, {a5}(t1)", - "sd a6, {a6}(t1)", - "sd a7, {a7}(t1)", - "sd t3, {t3}(t1)", - "sd t4, {t4}(t1)", - "sd t5, {t5}(t1)", - "sd t6, {t6}(t1)", - "sd s0, {s0}(t1)", - "sd s1, {s1}(t1)", - "sd s2, {s2}(t1)", - "sd s3, {s3}(t1)", - "sd s4, {s4}(t1)", - "sd s5, {s5}(t1)", - "sd s6, {s6}(t1)", - "sd s7, {s7}(t1)", - "sd s8, {s8}(t1)", - "sd s9, {s9}(t1)", - "sd s10, {s10}(t1)", - "sd s11, {s11}(t1)", - "csrr t2, sstatus", - "csrr t3, sepc", - "csrr t4, scause", - "sd t2, {sstatus}(t1)", - "sd t3, {sepc}(t1)", - "sd t4, {scause}(t1)", - "ret", - trap_context_size = const size_of::(), - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - scause = const TrapContext::OFFSET_SCAUSE, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! { - naked_asm!( - "ld ra, {ra}(a0)", - "ld sp, {sp}(a0)", - "ld gp, {gp}(a0)", - "ld tp, {tp}(a0)", - "ld t1, {t1}(a0)", - "ld t2, {t2}(a0)", - "ld t0, {t0}(a0)", - "ld a1, {a1}(a0)", - "ld a2, {a2}(a0)", - "ld a3, {a3}(a0)", - "ld a4, {a4}(a0)", - "ld a5, {a5}(a0)", - "ld a6, {a6}(a0)", - "ld a7, {a7}(a0)", - "ld t3, {t3}(a0)", - "ld t4, {sepc}(a0)", // Load sepc from TrapContext - "ld t5, {sstatus}(a0)", // Load sstatus from TrapContext - "ld s0, {s0}(a0)", - "ld s1, {s1}(a0)", - "ld s2, {s2}(a0)", - "ld s3, {s3}(a0)", - "ld s4, {s4}(a0)", - "ld s5, {s5}(a0)", - "ld s6, {s6}(a0)", - "ld s7, {s7}(a0)", - "ld s8, {s8}(a0)", - "ld s9, {s9}(a0)", - "ld s10, {s10}(a0)", - "ld s11, {s11}(a0)", - "csrw sepc, t4", // Restore sepc - "csrw sstatus, t5", // Restore sstatus - "ld t4, {t4}(a0)", - "ld t5, {t5}(a0)", - "ld t6, {t6}(a0)", - "ld a0, {a0}(a0)", - "sret", - ra = const Registers::OFFSET_RA, - sp = const Registers::OFFSET_SP, - gp = const Registers::OFFSET_GP, - tp = const Registers::OFFSET_TP, - t1 = const Registers::OFFSET_T1, - t2 = const Registers::OFFSET_T2, - t0 = const Registers::OFFSET_T0, - a0 = const Registers::OFFSET_A0, - a1 = const Registers::OFFSET_A1, - a2 = const Registers::OFFSET_A2, - a3 = const Registers::OFFSET_A3, - a4 = const Registers::OFFSET_A4, - a5 = const Registers::OFFSET_A5, - a6 = const Registers::OFFSET_A6, - a7 = const Registers::OFFSET_A7, - t3 = const Registers::OFFSET_T3, - t4 = const Registers::OFFSET_T4, - t5 = const Registers::OFFSET_T5, - t6 = const Registers::OFFSET_T6, - s0 = const Registers::OFFSET_S0, - s1 = const Registers::OFFSET_S1, - s2 = const Registers::OFFSET_S2, - s3 = const Registers::OFFSET_S3, - s4 = const Registers::OFFSET_S4, - s5 = const Registers::OFFSET_S5, - s6 = const Registers::OFFSET_S6, - s7 = const Registers::OFFSET_S7, - s8 = const Registers::OFFSET_S8, - s9 = const Registers::OFFSET_S9, - s10 = const Registers::OFFSET_S10, - s11 = const Registers::OFFSET_S11, - sstatus = const TrapContext::OFFSET_SSTATUS, - sepc = const TrapContext::OFFSET_SEPC, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn default_trap_handler() { - unsafe extern "C" { - fn _default_trap_handler(trap_context: &mut TrapContext); - } - - naked_asm!( - "andi sp, sp, -16", // Align stack pointer to 16 bytes - "addi sp, sp, -16", - "mv a0, t1", // TrapContext pointer in t1 - "sd a0, 0(sp)", // Save TrapContext pointer - "", - "call {default_handler}", - "", - "ld a0, 0(sp)", // Restore TrapContext pointer - "j {trap_return}", - default_handler = sym _default_trap_handler, - trap_return = sym _raw_trap_return, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_handler() { - naked_asm!( - "la a0, {dirty_task_context}", - "addi a1, t0, {capturer_context_offset}", - "j {switch}", - dirty_task_context = sym DIRTY_TASK_CONTEXT, - capturer_context_offset = const offset_of!(TrapScratch, capturer_context), - switch = sym TaskContext::switch, - ); -} - -#[unsafe(naked)] -unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! { - naked_asm!( - "mv a0, sp", - "j {raw_trap_return}", - raw_trap_return = sym _raw_trap_return, - ); -} - -impl TrapScratch { - pub fn set_trap_context(&mut self, ctx: NonNull) { - self.trap_context = Some(ctx); - } - - pub fn clear_trap_context(&mut self) { - self.trap_context = None; - } - - pub fn set_kernel_tp(&mut self, tp: NonNull) { - self.kernel_tp = Some(NonZero::new(tp.addr().get() as u64).unwrap()); - } -} - impl TrapReturn for TrapContext { type TaskContext = TaskContext; unsafe fn trap_return(&mut self) { let irq_states = disable_irqs_save(); - let old_handler = - core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler); + let old_stvec = stvec::read(); + stvec::write({ + let mut stvec_val = Stvec::from_bits(0); + stvec_val.set_address(_captured_trap_entry as usize); + stvec_val.set_trap_mode(TrapMode::Direct); + stvec_val + }); + + let old_trap_ctx = sscratch::read(); + sscratch::write(&raw mut *self as usize); + + let mut from_ctx = TaskContext::new(); let mut to_ctx = TaskContext::new(); - to_ctx.set_program_counter(captured_trap_return as usize); - to_ctx.set_stack_pointer(&raw mut *self as usize); + to_ctx.set_program_counter(_captured_trap_return as usize); + to_ctx.set_stack_pointer(&raw mut from_ctx as usize); to_ctx.set_interrupt_enabled(false); unsafe { - TaskContext::switch(&mut TRAP_SCRATCH.as_mut().capturer_context, &mut to_ctx); + TaskContext::switch(&mut from_ctx, &mut to_ctx); } - TRAP_SCRATCH.as_mut().handler = old_handler; + sscratch::write(old_trap_ctx); + stvec::write(old_stvec); + irq_states.restore(); } } @@ -311,7 +70,7 @@ fn setup_trap_handler(trap_entry_addr: usize) { } pub fn setup_trap() { - setup_trap_handler(_raw_trap_entry as usize); + setup_trap_handler(_default_trap_entry as usize); } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs index 776fab2a..ab4ca9a2 100644 --- a/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs +++ b/crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs @@ -1,5 +1,5 @@ use crate::{arch::time::set_next_timer, processor::CPU}; -use core::arch::asm; +use core::{arch::asm, mem::offset_of}; use eonix_hal_traits::{ fault::{Fault, PageFaultErrorCode}, trap::{RawTrapContext, TrapType}, @@ -18,24 +18,23 @@ use riscv::{ #[repr(C)] #[derive(Default, Clone, Copy)] pub struct Registers { + tp: u64, ra: u64, sp: u64, gp: u64, - tp: u64, - t1: u64, - t2: u64, - t0: u64, a0: u64, a1: u64, a2: u64, a3: u64, a4: u64, + t1: u64, a5: u64, a6: u64, a7: u64, t3: u64, t4: u64, t5: u64, + t2: u64, t6: u64, s0: u64, s1: u64, @@ -49,10 +48,11 @@ pub struct Registers { s9: u64, s10: u64, s11: u64, + t0: u64, } /// Saved CPU context when a trap (interrupt or exception) occurs on RISC-V 64. -#[repr(C)] +#[repr(C, align(16))] #[derive(Clone, Copy)] pub struct TrapContext { regs: Registers, @@ -60,46 +60,48 @@ pub struct TrapContext { sstatus: Sstatus, sepc: usize, scause: Scause, + stval: usize, } impl Registers { - pub const OFFSET_RA: usize = 0 * 8; - pub const OFFSET_SP: usize = 1 * 8; - pub const OFFSET_GP: usize = 2 * 8; - pub const OFFSET_TP: usize = 3 * 8; - pub const OFFSET_T1: usize = 4 * 8; - pub const OFFSET_T2: usize = 5 * 8; - pub const OFFSET_T0: usize = 6 * 8; - pub const OFFSET_A0: usize = 7 * 8; - pub const OFFSET_A1: usize = 8 * 8; - pub const OFFSET_A2: usize = 9 * 8; - pub const OFFSET_A3: usize = 10 * 8; - pub const OFFSET_A4: usize = 11 * 8; - pub const OFFSET_A5: usize = 12 * 8; - pub const OFFSET_A6: usize = 13 * 8; - pub const OFFSET_A7: usize = 14 * 8; - pub const OFFSET_T3: usize = 15 * 8; - pub const OFFSET_T4: usize = 16 * 8; - pub const OFFSET_T5: usize = 17 * 8; - pub const OFFSET_T6: usize = 18 * 8; - pub const OFFSET_S0: usize = 19 * 8; - pub const OFFSET_S1: usize = 20 * 8; - pub const OFFSET_S2: usize = 21 * 8; - pub const OFFSET_S3: usize = 22 * 8; - pub const OFFSET_S4: usize = 23 * 8; - pub const OFFSET_S5: usize = 24 * 8; - pub const OFFSET_S6: usize = 25 * 8; - pub const OFFSET_S7: usize = 26 * 8; - pub const OFFSET_S8: usize = 27 * 8; - pub const OFFSET_S9: usize = 28 * 8; - pub const OFFSET_S10: usize = 29 * 8; - pub const OFFSET_S11: usize = 30 * 8; + pub const OFFSET_TP: usize = offset_of!(Registers, tp); + pub const OFFSET_SP: usize = offset_of!(Registers, sp); + pub const OFFSET_RA: usize = offset_of!(Registers, ra); + pub const OFFSET_GP: usize = offset_of!(Registers, gp); + pub const OFFSET_T1: usize = offset_of!(Registers, t1); + pub const OFFSET_T2: usize = offset_of!(Registers, t2); + pub const OFFSET_T0: usize = offset_of!(Registers, t0); + pub const OFFSET_A0: usize = offset_of!(Registers, a0); + pub const OFFSET_A1: usize = offset_of!(Registers, a1); + pub const OFFSET_A2: usize = offset_of!(Registers, a2); + pub const OFFSET_A3: usize = offset_of!(Registers, a3); + pub const OFFSET_A4: usize = offset_of!(Registers, a4); + pub const OFFSET_A5: usize = offset_of!(Registers, a5); + pub const OFFSET_A6: usize = offset_of!(Registers, a6); + pub const OFFSET_A7: usize = offset_of!(Registers, a7); + pub const OFFSET_T3: usize = offset_of!(Registers, t3); + pub const OFFSET_T4: usize = offset_of!(Registers, t4); + pub const OFFSET_T5: usize = offset_of!(Registers, t5); + pub const OFFSET_T6: usize = offset_of!(Registers, t6); + pub const OFFSET_S0: usize = offset_of!(Registers, s0); + pub const OFFSET_S1: usize = offset_of!(Registers, s1); + pub const OFFSET_S2: usize = offset_of!(Registers, s2); + pub const OFFSET_S3: usize = offset_of!(Registers, s3); + pub const OFFSET_S4: usize = offset_of!(Registers, s4); + pub const OFFSET_S5: usize = offset_of!(Registers, s5); + pub const OFFSET_S6: usize = offset_of!(Registers, s6); + pub const OFFSET_S7: usize = offset_of!(Registers, s7); + pub const OFFSET_S8: usize = offset_of!(Registers, s8); + pub const OFFSET_S9: usize = offset_of!(Registers, s9); + pub const OFFSET_S10: usize = offset_of!(Registers, s10); + pub const OFFSET_S11: usize = offset_of!(Registers, s11); } impl TrapContext { - pub const OFFSET_SSTATUS: usize = 31 * 8; - pub const OFFSET_SEPC: usize = 32 * 8; - pub const OFFSET_SCAUSE: usize = 33 * 8; + pub const OFFSET_SSTATUS: usize = offset_of!(TrapContext, sstatus); + pub const OFFSET_SEPC: usize = offset_of!(TrapContext, sepc); + pub const OFFSET_SCAUSE: usize = offset_of!(TrapContext, scause); + pub const OFFSET_STVAL: usize = offset_of!(TrapContext, stval); fn syscall_no(&self) -> usize { self.regs.a7 as usize @@ -131,6 +133,7 @@ impl RawTrapContext for TrapContext { sstatus, sepc: 0, scause: Scause::from_bits(0), + stval: 0, } } @@ -163,6 +166,7 @@ impl RawTrapContext for TrapContext { } Trap::Exception(e) => { match Exception::from_number(e).unwrap() { + Exception::Breakpoint => TrapType::Breakpoint, Exception::InstructionMisaligned | Exception::LoadMisaligned | Exception::InstructionFault @@ -176,16 +180,10 @@ impl RawTrapContext for TrapContext { }, exception @ (Exception::InstructionPageFault | Exception::LoadPageFault - | Exception::StorePageFault) => { - #[inline(always)] - fn get_page_fault_address() -> VAddr { - VAddr::from(stval::read()) - } - TrapType::Fault(Fault::PageFault { - error_code: self.get_page_fault_error_code(exception), - address: get_page_fault_address(), - }) - } + | Exception::StorePageFault) => TrapType::Fault(Fault::PageFault { + error_code: self.get_page_fault_error_code(exception), + address: VAddr::from(self.stval), + }), // breakpoint and supervisor env call _ => TrapType::Fault(Fault::Unknown(e)), } @@ -224,7 +222,15 @@ impl RawTrapContext for TrapContext { fn set_user_mode(&mut self, user: bool) { match user { true => self.sstatus.set_spp(SPP::User), - false => self.sstatus.set_spp(SPP::Supervisor), + false => { + unsafe { + core::arch::asm!( + "mv {}, tp", + out(reg) self.regs.tp, + ); + }; + self.sstatus.set_spp(SPP::Supervisor); + } } } diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index b4ea6e0a..81c269c2 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,6 +18,15 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); + + . = ALIGN(8); + + PROVIDE(__eh_frame = .); + PROVIDE(__executable_start = __stext); + + KEEP(*(.eh_frame_hdr)); + KEEP(*(.eh_frame)); + KEEP(*(.eh_frame.*)); } > REGION_RODATA AT> LINK_REGION_RODATA @@ -32,6 +41,11 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA + .data.after : + { + __data_after = .; + } > REGION_DATA AT> LINK_REGION_DATA + __edata = .; .bss (NOLOAD) : ALIGN(16) @@ -45,16 +59,7 @@ SECTIONS { __ebss = .; - .eh_frame : ALIGN(16) - { - __seh_frame = .; - - KEEP(*(.eh_frame .eh_frame*)); - - } > REGION_EHFRAME AT> LINK_REGION_EHFRAME - . = ALIGN(0x1000); - __eeh_frame = .; } SECTIONS { diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 24253dc9..8318049f 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -11,7 +11,7 @@ use crate::{ }; use core::{marker::PhantomData, ptr::NonNull}; -pub trait RawPageTable<'a>: 'a { +pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; /// Return the entry at the given index. diff --git a/crates/eonix_runtime/src/executor.rs b/crates/eonix_runtime/src/executor.rs index 12eb9556..3b858a47 100644 --- a/crates/eonix_runtime/src/executor.rs +++ b/crates/eonix_runtime/src/executor.rs @@ -1,125 +1,94 @@ -mod builder; -mod execute_status; +// mod builder; mod output_handle; mod stack; -use crate::{ - run::{Contexted, Run, RunState}, - scheduler::Scheduler, - task::Task, +use alloc::{ + boxed::Box, + sync::{Arc, Weak}, }; -use alloc::sync::Weak; use core::{ + marker::PhantomData, pin::Pin, - sync::atomic::{compiler_fence, fence, AtomicBool, Ordering}, - task::Waker, + task::{Context, Poll}, }; use eonix_sync::Spin; -pub use builder::ExecutorBuilder; -pub use execute_status::ExecuteStatus; pub use output_handle::OutputHandle; pub use stack::Stack; -/// An `Executor` executes a `Run` object in a separate thread of execution -/// where we have a dedicated stack and context. -pub trait Executor: Send { - fn progress(&self) -> ExecuteStatus; +/// An `Executor` executes a Future object in a separate thread of execution. +/// +/// When the Future is finished, the `Executor` will call the `OutputHandle` to commit the output. +/// Then the `Executor` will release the resources associated with the Future. +pub struct Executor(Option>>); + +trait TypeErasedExecutor: Send { + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()>; } -struct RealExecutor +struct RealExecutor<'a, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send + 'a, + F::Output: Send + 'a, { - _stack: S, - runnable: R, - output_handle: Weak>>, - finished: AtomicBool, + future: F, + output_handle: Weak>>, + _phantom: PhantomData<&'a ()>, } -impl RealExecutor +impl TypeErasedExecutor for RealExecutor<'_, F> where - R: Run + Send + Contexted + 'static, - R::Output: Send, + F: Future + Send, + F::Output: Send, { - extern "C" fn execute(self: Pin<&Self>) -> ! { - // We get here with preempt count == 1. - eonix_preempt::enable(); - - { - let waker = Waker::from(Task::current().clone()); + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + if self.output_handle.as_ptr().is_null() { + return Poll::Ready(()); + } - let output_data = loop { - // TODO!!!!!!: CHANGE THIS. - let runnable_pointer = &raw const self.get_ref().runnable; + let future = unsafe { + // SAFETY: We don't move the future. + self.as_mut().map_unchecked_mut(|me| &mut me.future) + }; - // SAFETY: We don't move the runnable object and we MIGHT not be using the - // part that is used in `pinned_run` in the runnable...? - let mut pinned_runnable = - unsafe { Pin::new_unchecked(&mut *(runnable_pointer as *mut R)) }; + future.poll(cx).map(|output| { + if let Some(output_handle) = self.output_handle.upgrade() { + output_handle.lock().commit_output(output); - match pinned_runnable.as_mut().run(&waker) { - RunState::Finished(output) => break output, - RunState::Running => Task::park(), + unsafe { + // SAFETY: `output_handle` is Unpin. + self.get_unchecked_mut().output_handle = Weak::new(); } - }; - - if let Some(output_handle) = self.output_handle.upgrade() { - output_handle.lock().commit_output(output_data); } - } - - // SAFETY: We are on the same CPU as the task. - self.finished.store(true, Ordering::Relaxed); - - unsafe { - // SAFETY: `preempt::count()` == 1. - eonix_preempt::disable(); - Scheduler::goto_scheduler_noreturn() - } + }) } } -impl Executor for RealExecutor -where - S: Send, - R: Run + Contexted + Send, - R::Output: Send, -{ - fn progress(&self) -> ExecuteStatus { - // TODO!!!: If the task comes from another cpu, we need to sync. - // - // The other cpu should see the changes of kernel stack of the target thread - // made in this cpu. - // - // Can we find a better way other than `fence`s? - // - // An alternative way is to use an atomic variable to store the cpu id of - // the current task. Then we can use acquire release swap to ensure that the - // other cpu sees the changes. - fence(Ordering::SeqCst); - compiler_fence(Ordering::SeqCst); - - // TODO!!!: We should load the context only if the previous task is - // different from the current task. - - self.runnable.load_running_context(); - - unsafe { - // SAFETY: We are in the scheduler context and we are not preempted. - Scheduler::go_from_scheduler(&Task::current().execution_context); - } - - self.runnable.restore_running_context(); - - compiler_fence(Ordering::SeqCst); - fence(Ordering::SeqCst); +impl Executor { + pub fn new(future: F) -> (Self, Arc>>) + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + let output_handle = OutputHandle::new(); + + ( + Executor(Some(Box::pin(RealExecutor { + future, + output_handle: Arc::downgrade(&output_handle), + _phantom: PhantomData, + }))), + output_handle, + ) + } - if self.finished.load(Ordering::Acquire) { - ExecuteStatus::Finished + pub fn poll(&mut self, cx: &mut Context<'_>) -> Poll<()> { + if let Some(executor) = self.0.as_mut() { + executor.as_mut().poll(cx).map(|_| { + self.0.take(); + }) } else { - ExecuteStatus::Executing + Poll::Ready(()) } } } diff --git a/crates/eonix_runtime/src/executor/builder.rs b/crates/eonix_runtime/src/executor/builder.rs index eb073dc8..2729270b 100644 --- a/crates/eonix_runtime/src/executor/builder.rs +++ b/crates/eonix_runtime/src/executor/builder.rs @@ -1,8 +1,5 @@ use super::{Executor, OutputHandle, RealExecutor, Stack}; -use crate::{ - context::ExecutionContext, - run::{Contexted, Run}, -}; +use crate::context::ExecutionContext; use alloc::{boxed::Box, sync::Arc}; use core::{pin::Pin, sync::atomic::AtomicBool}; use eonix_sync::Spin; @@ -15,7 +12,6 @@ pub struct ExecutorBuilder { impl ExecutorBuilder where S: Stack, - R: Run + Contexted + Send + 'static, R::Output: Send, { pub fn new() -> Self { diff --git a/crates/eonix_runtime/src/executor/execute_status.rs b/crates/eonix_runtime/src/executor/execute_status.rs deleted file mode 100644 index 9c95aa6f..00000000 --- a/crates/eonix_runtime/src/executor/execute_status.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub enum ExecuteStatus { - Executing, - Finished, -} diff --git a/crates/eonix_runtime/src/lib.rs b/crates/eonix_runtime/src/lib.rs index 1df43fa4..458e01d9 100644 --- a/crates/eonix_runtime/src/lib.rs +++ b/crates/eonix_runtime/src/lib.rs @@ -3,7 +3,6 @@ pub mod context; pub mod executor; mod ready_queue; -pub mod run; pub mod scheduler; pub mod task; diff --git a/crates/eonix_runtime/src/run.rs b/crates/eonix_runtime/src/run.rs deleted file mode 100644 index 368b567b..00000000 --- a/crates/eonix_runtime/src/run.rs +++ /dev/null @@ -1,34 +0,0 @@ -mod future_run; - -use core::{pin::Pin, task::Waker}; -pub use future_run::FutureRun; - -pub enum RunState { - Running, - Finished(Output), -} - -pub trait Contexted { - /// # Safety - /// This function should be called in a preemption disabled context. - fn load_running_context(&self) {} - - /// # Safety - /// This function should be called in a preemption disabled context. - fn restore_running_context(&self) {} -} - -pub trait Run { - type Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState; - - fn join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output { - loop { - match self.as_mut().run(waker) { - RunState::Running => continue, - RunState::Finished(output) => break output, - } - } - } -} diff --git a/crates/eonix_runtime/src/run/future_run.rs b/crates/eonix_runtime/src/run/future_run.rs deleted file mode 100644 index 813f8d2f..00000000 --- a/crates/eonix_runtime/src/run/future_run.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::{Contexted, Run, RunState}; -use core::{ - pin::Pin, - task::{Context, Poll, Waker}, -}; - -pub struct FutureRun(F); - -impl FutureRun -where - F: Future, -{ - pub const fn new(future: F) -> Self { - Self(future) - } -} - -impl Contexted for FutureRun where F: Future {} -impl Run for FutureRun -where - F: Future + 'static, -{ - type Output = F::Output; - - fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut future = unsafe { self.map_unchecked_mut(|me| &mut me.0) }; - let mut context = Context::from_waker(waker); - - match future.as_mut().poll(&mut context) { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, - } - } -} diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 9031d3a1..3f72fbf4 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,16 @@ use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, OutputHandle, Stack}, - ready_queue::{cpu_rq, local_rq}, - run::{Contexted, Run}, - task::{Task, TaskAdapter, TaskHandle}, + executor::OutputHandle, + ready_queue::{local_rq, ReadyQueue}, + task::{Task, TaskAdapter, TaskHandle, TaskState}, }; -use alloc::sync::Arc; +use alloc::{sync::Arc, task::Wake}; use core::{ - mem::forget, + ops::{Deref, DerefMut}, ptr::NonNull, - sync::atomic::{compiler_fence, Ordering}, - task::Waker, + task::{Context, Poll, Waker}, }; use eonix_hal::processor::halt; use eonix_log::println_trace; -use eonix_preempt::assert_preempt_count_eq; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; @@ -22,13 +18,12 @@ use pointers::BorrowedArc; #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; -#[eonix_percpu::define_percpu] -static LOCAL_SCHEDULER_CONTEXT: ExecutionContext = ExecutionContext::new(); - static TASKS: LazyLock>> = LazyLock::new(|| Spin::new(RBTree::new(TaskAdapter::new()))); -pub struct Scheduler; +pub static RUNTIME: Runtime = Runtime(); + +pub struct Runtime(); pub struct JoinHandle(Arc>>) where @@ -68,209 +63,164 @@ where } } -impl Scheduler { - /// `Scheduler` might be used in various places. Do not hold it for a long time. - /// - /// # Safety - /// The locked returned by this function should be locked with `lock_irq` to prevent from - /// rescheduling during access to the scheduler. Disabling preemption will do the same. - /// - /// Drop the lock before calling `schedule`. - pub fn get() -> &'static Self { - static GLOBAL_SCHEDULER: Scheduler = Scheduler; - &GLOBAL_SCHEDULER - } - - pub fn init_local_scheduler() +impl Runtime { + pub fn spawn(&self, future: F) -> JoinHandle where - S: Stack, + F: Future + Send + 'static, + F::Output: Send + 'static, { - let stack = S::new(); - - unsafe { - eonix_preempt::disable(); - // SAFETY: Preemption is disabled. - let context: &mut ExecutionContext = LOCAL_SCHEDULER_CONTEXT.as_mut(); - context.set_ip(local_scheduler as _); - context.set_sp(stack.get_bottom().addr().get() as usize); - context.set_interrupt(true); - eonix_preempt::enable(); - } - - // We don't need to keep the stack around. - forget(stack); - } + let TaskHandle { + task, + output_handle, + } = Task::new(future); - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn go_from_scheduler(to: &ExecutionContext) { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }.switch_to(to); - } + self.add_task(task.clone()); + task.wake_by_ref(); - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler(from: &ExecutionContext) { - // SAFETY: Preemption is disabled. - from.switch_to(unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref() }); + JoinHandle(output_handle) } - /// # Safety - /// This function must not be called inside of the scheulder context. - /// - /// The caller must ensure that `preempt::count` == 1. - pub unsafe fn goto_scheduler_noreturn() -> ! { - // SAFETY: Preemption is disabled. - unsafe { LOCAL_SCHEDULER_CONTEXT.as_ref().switch_noreturn() } + fn add_task(&self, task: Arc) { + TASKS.lock_irq().insert(task); } - fn add_task(task: Arc) { - TASKS.lock().insert(task); + fn remove_task(&self, task: &impl Deref>) { + unsafe { + TASKS + .lock_irq() + .cursor_mut_from_ptr(Arc::as_ptr(task)) + .remove(); + } } - fn remove_task(task: &Task) { - unsafe { TASKS.lock().cursor_mut_from_ptr(task as *const _).remove() }; + fn current(&self) -> Option> { + CURRENT_TASK + .get() + .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) } - fn select_cpu_for_task(&self, task: &Task) -> usize { - task.cpu.load(Ordering::Relaxed) as _ - } + fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { + let Some(current) = CURRENT_TASK + .swap(None) + .map(|cur| unsafe { Arc::from_raw(cur.as_ptr()) }) + else { + return; + }; + + match current.state.update(|state| match state { + TaskState::READY_RUNNING => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::BLOCKED), + _ => { + unreachable!("Current task should be at least in RUNNING state, but got {state:?}") + } + }) { + Ok(TaskState::READY_RUNNING) => { + println_trace!( + "trace_scheduler", + "Re-enqueueing task {:?} (CPU{})", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); - pub fn activate(&self, task: &Arc) { - // Only one cpu can be activating the task at a time. - // TODO: Add some checks. - - if task.on_rq.swap(true, Ordering::Acquire) { - // Lock the rq and check whether the task is on the rq again. - let cpuid = task.cpu.load(Ordering::Acquire); - let mut rq = cpu_rq(cpuid as _).lock_irq(); - - if !task.on_rq.load(Ordering::Acquire) { - // Task has just got off the rq. Put it back. - rq.put(task.clone()); - } else { - // Task is already on the rq. Do nothing. - return; + rq.put(current); + } + Ok(_) => { + println_trace!( + "trace_scheduler", + "Current task {:?} (CPU{}) is blocked, not re-enqueueing", + current.id, + eonix_hal::processor::CPU::local().cpuid(), + ); } - } else { - // Task not on some rq. Select one and put it here. - let cpu = self.select_cpu_for_task(&task); - let mut rq = cpu_rq(cpu).lock_irq(); - task.cpu.store(cpu as _, Ordering::Release); - rq.put(task.clone()); + _ => unreachable!(), } } - pub fn spawn(&self, runnable: R) -> JoinHandle - where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, - { - let TaskHandle { - task, - output_handle, - } = Task::new::(runnable); + pub fn block_till_woken(set_waker: impl FnOnce(&Waker)) -> impl Future { + struct BlockTillWoken { + set_waker: Option, + slept: bool, + } - Self::add_task(task.clone()); - self.activate(&task); + impl Future for BlockTillWoken { + type Output = (); - JoinHandle(output_handle) - } + fn poll(self: core::pin::Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + if self.slept { + Poll::Ready(()) + } else { + let (set_waker, slept) = unsafe { + let me = self.get_unchecked_mut(); + (me.set_waker.take().unwrap(), &mut me.slept) + }; + + set_waker(cx.waker()); + *slept = true; + Poll::Pending + } + } + } - /// Go to idle task. Call this with `preempt_count == 1`. - /// The preempt count will be decremented by this function. - /// - /// # Safety - /// We might never return from here. - /// Drop all variables that take ownership of some resource before calling this function. - pub fn schedule() { - assert_preempt_count_eq!(1, "Scheduler::schedule"); - - // Make sure all works are done before scheduling. - compiler_fence(Ordering::SeqCst); - - // TODO!!!!!: Use of reference here needs further consideration. - // - // Since we might never return to here, we can't take ownership of `current()`. - // Is it safe to believe that `current()` will never change across calls? - unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context); + BlockTillWoken { + set_waker: Some(set_waker), + slept: false, } - eonix_preempt::enable(); } -} -extern "C" fn local_scheduler() -> ! { - loop { - assert_preempt_count_eq!(1, "Scheduler::idle_task"); - let mut rq = local_rq().lock_irq(); + /// Enter the runtime with an "init" future and run till its completion. + /// + /// The "init" future has the highest priority and when it completes, + /// the runtime will exit immediately and yield its output. + pub fn enter(&self) { + loop { + let mut rq = local_rq().lock_irq(); - let previous_task = CURRENT_TASK - .get() - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }); - let next_task = rq.get(); + self.remove_and_enqueue_current(&mut rq); - match (previous_task, next_task) { - (None, None) => { - // Nothing to do, halt the cpu and rerun the loop. + let Some(next) = rq.get() else { drop(rq); halt(); continue; + }; + + println_trace!( + "trace_scheduler", + "Switching to task {:?} (CPU{})", + next.id, + eonix_hal::processor::CPU::local().cpuid(), + ); + + let old_state = next.state.swap(TaskState::RUNNING); + assert_eq!( + old_state, + TaskState::READY, + "Next task should be in READY state" + ); + + unsafe { + CURRENT_TASK.set(Some(NonNull::new_unchecked(Arc::into_raw(next) as *mut _))); } - (None, Some(next)) => { - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); - } - (Some(previous), None) => { - if previous.state.is_running() { - // Previous thread is `Running`, return to the current running thread. - println_trace!( - "trace_scheduler", - "Returning to task id({}) without doing context switch", - previous.id - ); - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - } else { - // Nothing to do, halt the cpu and rerun the loop. - CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _)); - drop(rq); - halt(); - continue; - } - } - (Some(previous), Some(next)) => { + + drop(rq); + + // TODO: MAYBE we can move the release of finished tasks to some worker thread. + if Task::current().poll().is_ready() { + let old_state = Task::current().state.swap(TaskState::DEAD); + assert!( + old_state & TaskState::RUNNING != 0, + "Current task should be at least in RUNNING state" + ); + println_trace!( "trace_scheduler", - "Switching from task id({}) to task id({})", - previous.id, - next.id + "Task {:?} finished execution, removing...", + Task::current().id, ); - debug_assert_ne!(previous.id, next.id, "Switching to the same task"); + self.remove_task(&Task::current()); - if previous.state.is_running() || !previous.state.try_park() { - rq.put(previous); - } else { - previous.on_rq.store(false, Ordering::Release); - } - - CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _)); + CURRENT_TASK.set(None); } } - - drop(rq); - // TODO: We can move the release of finished tasks to some worker thread. - if let ExecuteStatus::Finished = Task::current().run() { - let current = CURRENT_TASK - .swap(None) - .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) }) - .expect("Current task should be present"); - Scheduler::remove_task(¤t); - } } } diff --git a/crates/eonix_runtime/src/task.rs b/crates/eonix_runtime/src/task.rs index 66746ba2..8a1d6ef4 100644 --- a/crates/eonix_runtime/src/task.rs +++ b/crates/eonix_runtime/src/task.rs @@ -2,25 +2,22 @@ mod adapter; mod task_state; use crate::{ - context::ExecutionContext, - executor::{ExecuteStatus, Executor, ExecutorBuilder, OutputHandle, Stack}, - run::{Contexted, Run}, - scheduler::Scheduler, + executor::{Executor, OutputHandle}, + ready_queue::{cpu_rq, ReadyQueue}, }; -use alloc::{boxed::Box, sync::Arc, task::Wake}; +use alloc::{sync::Arc, task::Wake}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - pin::{pin, Pin}, - sync::atomic::{AtomicBool, AtomicU32, Ordering}, + ops::DerefMut, + sync::atomic::{AtomicU32, Ordering}, task::{Context, Poll, Waker}, }; use eonix_hal::processor::CPU; -use eonix_preempt::assert_preempt_enabled; -use eonix_sync::Spin; -use intrusive_collections::RBTreeAtomicLink; -use task_state::TaskState; +use eonix_sync::{Spin, SpinIrq}; +use intrusive_collections::{LinkedListAtomicLink, RBTreeAtomicLink}; -pub use adapter::TaskAdapter; +pub use adapter::{TaskAdapter, TaskRqAdapter}; +pub(crate) use task_state::TaskState; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct TaskId(u32); @@ -33,160 +30,72 @@ where pub(crate) output_handle: Arc>>, } -/// A `Task` represents a schedulable unit. -/// -/// Initial: state = Running, unparked = false -/// -/// Task::park() => swap state <- Parking, assert prev == Running -/// => swap unparked <- false -/// -> true => store state <- Running => return -/// -> false => goto scheduler => get rq lock => load state -/// -> Running => enqueue -/// -> Parking => cmpxchg Parking -> Parked -/// -> Running => enqueue -/// -> Parking => on_rq <- false -/// -> Parked => ??? -/// -/// Task::unpark() => swap unparked <- true -/// -> true => return -/// -> false => swap state <- Running -/// -> Running => return -/// -> Parking | Parked => Scheduler::activate pub struct Task { /// Unique identifier of the task. pub id: TaskId, - /// Whether the task is on some run queue (a.k.a ready). - pub(crate) on_rq: AtomicBool, - /// Whether someone has called `unpark` on this task. - pub(crate) unparked: AtomicBool, /// The last cpu that the task was executed on. /// If `on_rq` is `false`, we can't assume that this task is still on the cpu. pub(crate) cpu: AtomicU32, /// Task state. pub(crate) state: TaskState, - /// Task execution context. - pub(crate) execution_context: ExecutionContext, /// Executor object. - executor: AtomicUniqueRefCell>>>, + executor: AtomicUniqueRefCell, /// Link in the global task list. link_task_list: RBTreeAtomicLink, + /// Link in the ready queue. + link_ready_queue: LinkedListAtomicLink, } impl Task { - pub fn new(runnable: R) -> TaskHandle + pub fn new(future: F) -> TaskHandle where - S: Stack + 'static, - R: Run + Contexted + Send + 'static, - R::Output: Send + 'static, + F: Future + Send + 'static, + F::Output: Send + 'static, { static ID: AtomicU32 = AtomicU32::new(0); - let (executor, execution_context, output) = ExecutorBuilder::new() - .stack(S::new()) - .runnable(runnable) - .build(); + let (executor, output_handle) = Executor::new(future); let task = Arc::new(Self { id: TaskId(ID.fetch_add(1, Ordering::Relaxed)), - on_rq: AtomicBool::new(false), - unparked: AtomicBool::new(false), cpu: AtomicU32::new(CPU::local().cpuid() as u32), - state: TaskState::new(TaskState::RUNNING), - executor: AtomicUniqueRefCell::new(Some(executor)), - execution_context, + state: TaskState::new(TaskState::BLOCKED), + executor: AtomicUniqueRefCell::new(executor), link_task_list: RBTreeAtomicLink::new(), + link_ready_queue: LinkedListAtomicLink::new(), }); TaskHandle { task, - output_handle: output, + output_handle, } } - pub fn run(&self) -> ExecuteStatus { + pub fn poll(self: &Arc) -> Poll<()> { let mut executor_borrow = self.executor.borrow(); + let waker = Waker::from(self.clone()); + let mut cx = Context::from_waker(&waker); - let executor = executor_borrow - .as_ref() - .expect("Executor should be present") - .as_ref() - .get_ref(); - - if let ExecuteStatus::Finished = executor.progress() { - executor_borrow.take(); - ExecuteStatus::Finished - } else { - ExecuteStatus::Executing - } + executor_borrow.poll(&mut cx) } - pub fn unpark(self: &Arc) { - if self.unparked.swap(true, Ordering::Release) { - return; - } - - eonix_preempt::disable(); + /// Get the stabilized lock for the task's run queue. + pub fn rq(&self) -> impl DerefMut + 'static { + loop { + let cpu = self.cpu.load(Ordering::Relaxed); + let rq = cpu_rq(cpu as usize).lock_irq(); - match self.state.swap(TaskState::RUNNING) { - TaskState::RUNNING => {} - TaskState::PARKED | TaskState::PARKING => { - // We are waking up from sleep or someone else is parking this task. - // Try to wake it up. - Scheduler::get().activate(self); + // We stabilize the task cpu with the cpu rq here for now. + if cpu != self.cpu.load(Ordering::Acquire) { + continue; } - _ => unreachable!(), - } - eonix_preempt::enable(); - } - - pub fn park() { - eonix_preempt::disable(); - Self::park_preempt_disabled(); - } - - /// Park the current task with `preempt::count() == 1`. - pub fn park_preempt_disabled() { - let task = Task::current(); - - let old_state = task.state.swap(TaskState::PARKING); - assert_eq!( - old_state, - TaskState::RUNNING, - "Parking a task that is not running." - ); - - if task.unparked.swap(false, Ordering::AcqRel) { - // Someone has called `unpark` on this task previously. - task.state.swap(TaskState::RUNNING); - } else { - unsafe { - // SAFETY: Preemption is disabled. - Scheduler::goto_scheduler(&Task::current().execution_context) - }; - assert!(task.unparked.swap(false, Ordering::Acquire)); + return rq; } - - eonix_preempt::enable(); } - pub fn block_on(future: F) -> F::Output - where - F: Future, - { - assert_preempt_enabled!("block_on() must be called with preemption enabled"); - - let waker = Waker::from(Task::current().clone()); - let mut context = Context::from_waker(&waker); - let mut future = pin!(future); - - loop { - if let Poll::Ready(output) = future.as_mut().poll(&mut context) { - break output; - } - - Task::park(); - } + pub fn is_ready(&self) -> bool { + self.state.is_ready() } } @@ -196,6 +105,18 @@ impl Wake for Task { } fn wake_by_ref(self: &Arc) { - self.unpark(); + let Ok(old) = self.state.update(|state| match state { + TaskState::BLOCKED => Some(TaskState::READY), + TaskState::RUNNING => Some(TaskState::READY | TaskState::RUNNING), + TaskState::READY | TaskState::READY_RUNNING => None, + state => unreachable!("Waking a {state:?} task"), + }) else { + return; + }; + + if old == TaskState::BLOCKED { + // If the task was blocked, we need to put it back to the ready queue. + self.rq().put(self.clone()); + } } } diff --git a/crates/eonix_runtime/src/task/adapter.rs b/crates/eonix_runtime/src/task/adapter.rs index de1d0bad..3b5d1583 100644 --- a/crates/eonix_runtime/src/task/adapter.rs +++ b/crates/eonix_runtime/src/task/adapter.rs @@ -1,8 +1,9 @@ use super::{Task, TaskId}; use alloc::sync::Arc; -use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; +use intrusive_collections::{intrusive_adapter, KeyAdapter, LinkedListAtomicLink, RBTreeAtomicLink}; intrusive_adapter!(pub TaskAdapter = Arc: Task { link_task_list: RBTreeAtomicLink }); +intrusive_adapter!(pub TaskRqAdapter = Arc: Task { link_ready_queue: LinkedListAtomicLink }); impl<'a> KeyAdapter<'a> for TaskAdapter { type Key = TaskId; diff --git a/crates/eonix_runtime/src/task/task_state.rs b/crates/eonix_runtime/src/task/task_state.rs index b22ad889..473310d7 100644 --- a/crates/eonix_runtime/src/task/task_state.rs +++ b/crates/eonix_runtime/src/task/task_state.rs @@ -4,32 +4,26 @@ use core::sync::atomic::{AtomicU32, Ordering}; pub struct TaskState(AtomicU32); impl TaskState { - pub const RUNNING: u32 = 0; - pub const PARKING: u32 = 1; - pub const PARKED: u32 = 2; + pub const BLOCKED: u32 = 0; + pub const READY: u32 = 1; + pub const RUNNING: u32 = 2; + pub const READY_RUNNING: u32 = TaskState::READY | TaskState::RUNNING; + pub const DEAD: u32 = 1 << 31; pub(crate) const fn new(state: u32) -> Self { Self(AtomicU32::new(state)) } pub(crate) fn swap(&self, state: u32) -> u32 { - self.0.swap(state, Ordering::AcqRel) + self.0.swap(state, Ordering::SeqCst) } - pub(crate) fn try_park(&self) -> bool { - match self.0.compare_exchange( - TaskState::PARKING, - TaskState::PARKED, - Ordering::AcqRel, - Ordering::Acquire, - ) { - Ok(_) => true, - Err(TaskState::RUNNING) => false, - Err(_) => unreachable!("Invalid task state while trying to park."), - } + pub(crate) fn update(&self, func: impl FnMut(u32) -> Option) -> Result { + self.0 + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, func) } - pub(crate) fn is_running(&self) -> bool { - self.0.load(Ordering::Acquire) == Self::RUNNING + pub(crate) fn is_ready(&self) -> bool { + self.0.load(Ordering::SeqCst) & Self::READY == Self::READY } } diff --git a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs index bac02301..52a5db49 100644 --- a/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs +++ b/crates/eonix_sync/eonix_sync_base/src/locked/proof.rs @@ -25,6 +25,9 @@ where _phantom: PhantomData<&'pos ()>, } +unsafe impl Send for Proof<'_, T> {} +unsafe impl Send for ProofMut<'_, T> {} + /// A trait for types that can be converted to a proof of mutable access. /// /// This is used to prove that a mutable reference is valid for the lifetime `'pos` diff --git a/crates/posix_types/src/getdent.rs b/crates/posix_types/src/getdent.rs new file mode 100644 index 00000000..922121f6 --- /dev/null +++ b/crates/posix_types/src/getdent.rs @@ -0,0 +1,28 @@ +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent64 { + /// Inode number + pub d_ino: u64, + /// Implementation defined. We ignore it + pub d_off: u64, + /// Length of this record + pub d_reclen: u16, + /// File type. Set to 0 + pub d_type: u8, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} + +/// File type is at offset `d_reclen - 1`. Set it to 0 +#[derive(Copy, Clone, Debug)] +#[repr(C, packed)] +pub struct UserDirent { + /// Inode number + pub d_ino: u32, + /// Implementation defined. We ignore it + pub d_off: u32, + /// Length of this record + pub d_reclen: u16, + /// Filename with a padding '\0' + pub d_name: [u8; 0], +} diff --git a/crates/posix_types/src/lib.rs b/crates/posix_types/src/lib.rs index dfe8d089..49d2ac5f 100644 --- a/crates/posix_types/src/lib.rs +++ b/crates/posix_types/src/lib.rs @@ -2,6 +2,7 @@ pub mod constants; pub mod ctypes; +pub mod getdent; pub mod namei; pub mod open; pub mod poll; diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index fb251472..a10ff0ad 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -13,3 +13,13 @@ impl From for u32 { } } } + +impl core::fmt::Debug for PosixError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::EFAULT => write!(f, "EFAULT"), + Self::EXDEV => write!(f, "EXDEV"), + Self::EINVAL => write!(f, "EINVAL"), + } + } +} diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 022160a2..722fa5da 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -18,6 +18,11 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let args = item.sig.inputs.iter(); let ty_ret = item.sig.output; + assert!( + item.sig.asyncness.is_some(), + "Syscall must be async function" + ); + let args_mapped = item .sig .inputs @@ -100,36 +105,50 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { }; #[link_section = #syscall_fn_section] - fn #helper_fn ( - thd: &crate::kernel::task::Thread, + fn #helper_fn <'thd, 'alloc>( + thd: &'thd crate::kernel::task::Thread, + thd_alloc: crate::kernel::task::ThreadAlloc<'alloc>, args: [usize; 6] - ) -> Option { + ) -> core::pin::Pin> + Send + 'thd, + crate::kernel::task::ThreadAlloc<'alloc> + >> { use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; + use alloc::boxed::Box; #(#args_mapped)* - eonix_log::println_trace!( - "trace_syscall", - "tid{}: {}({}) => {{", - thd.tid, - #syscall_name_str, - format_args!(#trace_format_string, #trace_format_args), - ); - - let retval = #real_fn(thd, #(#args_call),*).into_retval(); - - eonix_log::println_trace!( - "trace_syscall", - "}} => {:x?}", - retval, - ); - - retval + unsafe { + core::pin::Pin::new_unchecked( + Box::new_in( + async move { + eonix_log::println_trace!( + "trace_syscall", + "tid{}: {}({}) => {{", + thd.tid, + #syscall_name_str, + format_args!(#trace_format_string, #trace_format_args), + ); + + let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); + + eonix_log::println_trace!( + "trace_syscall", + "}} => {:x?}", + retval, + ); + + retval + }, + thd_alloc + ) + ) + } } #(#attrs)* #[link_section = #syscall_fn_section] - #vis fn #real_fn( + #vis async fn #real_fn( thread: &crate::kernel::task::Thread, #(#args),* ) #ty_ret #body diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index e988c9c3..c3b1cfa0 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -6,6 +6,7 @@ use crate::{ constants::{EINVAL, EIO}, interrupt::register_irq_handler, pcie::{self, Header, PCIDevice, PCIDriver, PciError}, + task::block_on, }, prelude::*, }; @@ -13,7 +14,6 @@ use alloc::{format, sync::Arc}; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::SpinIrq as _; use port::AdapterPort; @@ -133,7 +133,7 @@ impl Device<'static> { port, )?; - Task::block_on(port.partprobe())?; + block_on(port.partprobe())?; Ok(()) })() { diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index 27333d5d..f558f6e1 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -9,11 +9,11 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::paging::Page; use crate::kernel::mem::AsMemoryBlock as _; +use crate::kernel::task::block_on; use crate::prelude::*; use alloc::collections::vec_deque::VecDeque; use core::pin::pin; use eonix_mm::address::{Addr as _, PAddr}; -use eonix_runtime::task::Task; use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. @@ -156,7 +156,7 @@ impl AdapterPort<'_> { wait.as_mut().add_to_wait_list(); drop(free_list); - Task::block_on(wait); + block_on(wait); } } @@ -222,7 +222,7 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = Task::block_on(slot.wait_finish()) { + if let Err(_) = block_on(slot.wait_finish()) { self.stats.inc_cmd_error(); return Err(EIO); }; diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d7fabbbd..d69965f4 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -3,14 +3,14 @@ mod io; use crate::{ kernel::{ block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - task::KernelStack, CharDevice, CharDeviceType, Terminal, TerminalDevice, + CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; use alloc::{collections::vec_deque::VecDeque, format, sync::Arc}; use bitflags::bitflags; use core::pin::pin; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{SpinIrq as _, WaitList}; use io::SerialIO; @@ -161,7 +161,7 @@ impl Serial { })?; } - Scheduler::get().spawn::(FutureRun::new(Self::worker(port.clone()))); + RUNTIME.spawn(Self::worker(port.clone())); let _ = set_console(terminal.clone()); eonix_log::set_console(terminal.clone()); diff --git a/src/driver/virtio/loongarch64.rs b/src/driver/virtio/loongarch64.rs index bcd7e713..996683bd 100644 --- a/src/driver/virtio/loongarch64.rs +++ b/src/driver/virtio/loongarch64.rs @@ -3,13 +3,13 @@ use crate::kernel::{ block::{make_device, BlockDevice}, constants::EIO, pcie::{self, PCIDevice, PCIDriver, PciError, SegmentGroup}, + task::block_on, }; use alloc::sync::Arc; use core::sync::atomic::{AtomicUsize, Ordering}; use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; use eonix_log::println_warn; use eonix_mm::address::PhysAccess; -use eonix_runtime::task::Task; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, @@ -134,7 +134,7 @@ impl PCIDriver for VirtIODriver { Arc::new(Spin::new(virtio_block)), )?; - Task::block_on(block_device.partprobe()).map_err(|err| { + block_on(block_device.partprobe()).map_err(|err| { println_warn!( "Failed to probe partitions for VirtIO Block device: {}", err diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 66f150c3..9bdbf6ce 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,23 +1,17 @@ use super::virtio_blk::HAL; use crate::kernel::{ block::{make_device, BlockDevice}, - mem::{AsMemoryBlock, MemoryBlock, Page}, + task::block_on, }; use alloc::{sync::Arc, vec::Vec}; -use core::num::NonZero; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; use eonix_log::{println_info, println_warn}; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; -use eonix_runtime::task::Task; +use eonix_mm::address::{PAddr, PhysAccess}; use eonix_sync::Spin; use virtio_drivers::{ device::blk::VirtIOBlk, transport::{mmio::MmioTransport, Transport}, - Hal, }; pub fn init() { @@ -55,7 +49,7 @@ pub fn init() { ) .expect("Failed to register VirtIO Block device"); - Task::block_on(block_device.partprobe()) + block_on(block_device.partprobe()) .expect("Failed to probe partitions for VirtIO Block device"); disk_id += 1; diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index e8723cac..37e4fe77 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -84,7 +84,23 @@ where fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { match req { - BlockDeviceRequest::Write { .. } => todo!(), + BlockDeviceRequest::Write { + sector, + count, + buffer, + } => { + let mut dev = self.lock(); + for ((start, len), buffer_page) in + Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) + { + let buffer = unsafe { + // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. + &buffer_page.as_memblk().as_bytes()[..len as usize * 512] + }; + + dev.write_blocks(start, buffer).map_err(|_| EIO)?; + } + } BlockDeviceRequest::Read { sector, count, diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index a2138a09..76ca4a34 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,17 +1,21 @@ use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; -use crate::kernel::mem::{PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; +use crate::kernel::timer::Ticks; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ - io::{Buffer, ByteBuffer}, + io::{Buffer, ByteBuffer, Stream}, kernel::{ block::BlockDevice, - constants::EIO, + constants::{EEXIST, EINVAL, EIO, ENOSYS}, timer::Instant, vfs::{ - dentry::Dentry, - inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData}, + dentry::{dcache, Dentry}, + inode::{ + define_struct_inode, AtomicNlink, Ino, Inode, InodeData, RenameData, WriteOffset, + }, mount::{register_filesystem, Mount, MountCreator}, - s_isdir, s_isreg, vfs::Vfs, DevId, FsContext, }, @@ -24,10 +28,10 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; +use another_ext4::{ + Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, +}; use eonix_sync::RwLock; -use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error}; -use ext4_rs::{Errno, Ext4}; pub struct Ext4BlockDevice { device: Arc, @@ -40,20 +44,25 @@ impl Ext4BlockDevice { } impl Ext4BlockDeviceTrait for Ext4BlockDevice { - fn read_offset(&self, offset: usize) -> Vec { - let mut buffer = vec![0u8; 4096]; + fn read_block(&self, block_id: PBlockId) -> Block { + let mut buffer = [0u8; 4096]; let mut byte_buffer = ByteBuffer::new(buffer.as_mut_slice()); let _ = self .device - .read_some(offset, &mut byte_buffer) + .read_some((block_id as usize) * 4096, &mut byte_buffer) .expect("Failed to read from block device"); - buffer + Block { + id: block_id, + data: buffer, + } } - fn write_offset(&self, _offset: usize, _data: &[u8]) { - todo!() + fn write_block(&self, block: &another_ext4::Block) { + let _ = self + .device + .write_some((block.id as usize) * 4096, &block.data); } } @@ -74,7 +83,7 @@ impl Vfs for Ext4Fs { } fn is_read_only(&self) -> bool { - true + false } } @@ -83,33 +92,67 @@ impl Ext4Fs { icache.get(&ino).cloned().map(Ext4Inode::into_inner) } + fn modify_inode_stat(&self, ino: u32, size: Option, mtime: u32) { + let _ = self + .inner + .setattr(ino, None, None, None, size, None, Some(mtime), None, None); + } + + fn create_inode_stat(&self, parent: u32, child: u32, mtime: u32) { + let _ = self.inner.setattr( + parent, + None, + None, + None, + None, + None, + Some(mtime), + None, + None, + ); + let _ = self + .inner + .setattr(child, None, None, None, None, None, Some(mtime), None, None); + } + + fn chmod_stat(&self, ino: u32, new_mode: u16, ctime: u32) { + let _ = self.inner.setattr( + ino, + Some(InodeMode::from_bits_retain(new_mode.try_into().unwrap())), + None, + None, + None, + None, + None, + Some(ctime), + None, + ); + } + fn get_or_insert( &self, icache: &mut BTreeMap, - mut idata: InodeData, + idata: InodeData, ) -> Arc { match icache.entry(idata.ino) { Entry::Occupied(occupied) => occupied.get().clone().into_inner(), - Entry::Vacant(vacant) => { - let mode = *idata.mode.get_mut(); - if s_isreg(mode) { - vacant - .insert(Ext4Inode::File(FileInode::new(idata))) - .clone() - .into_inner() - } else if s_isdir(mode) { + Entry::Vacant(vacant) => match idata.mode.load().format() { + Mode::REG => vacant + .insert(Ext4Inode::File(FileInode::with_idata(idata))) + .clone() + .into_inner(), + Mode::DIR => vacant + .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) + .clone() + .into_inner(), + mode => { + println_warn!("ext4: Unsupported inode type: {:#o}", mode.format_bits()); vacant - .insert(Ext4Inode::Dir(Arc::new(DirInode { idata }))) - .clone() - .into_inner() - } else { - println_warn!("ext4: Unsupported inode type: {mode:#o}"); - vacant - .insert(Ext4Inode::File(FileInode::new(idata))) + .insert(Ext4Inode::File(FileInode::with_idata(idata))) .clone() .into_inner() } - } + }, } } } @@ -117,7 +160,7 @@ impl Ext4Fs { impl Ext4Fs { pub fn create(device: Arc) -> KResult<(Arc, Arc)> { let ext4_device = Ext4BlockDevice::new(device.clone()); - let ext4 = Ext4::open(Arc::new(ext4_device)); + let ext4 = Ext4::load(Arc::new(ext4_device)).unwrap(); let ext4fs = Arc::new(Self { inner: ext4, @@ -126,29 +169,29 @@ impl Ext4Fs { }); let root_inode = { - let mut icache = Task::block_on(ext4fs.icache.write()); - let root_inode = ext4fs.inner.get_inode_ref(2); + let mut icache = block_on(ext4fs.icache.write()); + let root_inode = ext4fs.inner.read_root_inode(); ext4fs.get_or_insert( &mut icache, InodeData { - ino: root_inode.inode_num as Ino, + ino: root_inode.id as Ino, size: AtomicU64::new(root_inode.inode.size()), - nlink: AtomicNlink::new(root_inode.inode.links_count() as _), + nlink: AtomicNlink::new(root_inode.inode.link_count() as _), uid: AtomicU32::new(root_inode.inode.uid() as _), gid: AtomicU32::new(root_inode.inode.gid() as _), - mode: AtomicU32::new(root_inode.inode.mode() as _), + mode: AtomicMode::new(root_inode.inode.mode().bits() as _), atime: Spin::new(Instant::new( root_inode.inode.atime() as _, - root_inode.inode.i_atime_extra() as _, + root_inode.inode.atime_extra() as _, )), ctime: Spin::new(Instant::new( root_inode.inode.ctime() as _, - root_inode.inode.i_ctime_extra() as _, + root_inode.inode.ctime_extra() as _, )), mtime: Spin::new(Instant::new( root_inode.inode.mtime() as _, - root_inode.inode.i_mtime_extra() as _, + root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), vfs: Arc::downgrade(&ext4fs) as _, @@ -177,6 +220,7 @@ impl Ext4Inode { define_struct_inode! { struct FileInode { + last_sync: AtomicU64, page_cache: PageCache, } } @@ -186,23 +230,49 @@ define_struct_inode! { } impl FileInode { - fn new(idata: InodeData) -> Arc { + fn with_idata(idata: InodeData) -> Arc { let inode = Arc::new_cyclic(|weak_self: &Weak| Self { idata, + last_sync: AtomicU64::new(0), page_cache: PageCache::new(weak_self.clone()), }); inode } + + pub fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|weak_self: &Weak| Self { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data.mode.store(Mode::REG.perm(mode.bits())); + inode_data.nlink.store(1, Ordering::Relaxed); + inode_data + }, + last_sync: AtomicU64::new(0), + page_cache: PageCache::new(weak_self.clone()), + }) + } + + fn sync_if_needed(&self) { + let now = Ticks::now().in_secs(); + let last = self.last_sync.load(Ordering::Relaxed); + + // TODO: this is a temporary implement, + // consider change this with some update strategy such as LRU future + if now - last > 10 { + self.last_sync.store(now, Ordering::Relaxed); + let _ = block_on(self.page_cache.fsync()); + } + } } impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult { + fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { self.read_direct(page, offset) } - fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult { - todo!() + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { + self.write_direct(page, offset) } fn size(&self) -> usize { @@ -216,7 +286,7 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { @@ -224,13 +294,135 @@ impl Inode for FileInode { let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; - match ext4fs.inner.read_at(self.ino as u32, offset, &mut temp_buf) { + match ext4fs.inner.read(self.ino as u32, offset, &mut temp_buf) { Ok(bytes_read) => { let _ = buffer.fill(&temp_buf[..bytes_read])?; Ok(buffer.wrote()) } - Err(e) => Err(e.error() as u32), + Err(e) => Err(e.code() as u32), + } + } + + fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + let _lock = block_on(self.rwsem.write()); + + let mut store_new_end = None; + let offset = match offset { + WriteOffset::Position(offset) => offset, + // TODO: here need to add some operate + WriteOffset::End(end) => { + store_new_end = Some(end); + self.size.load(Ordering::Relaxed) as usize + } + }; + + let total_written = block_on(self.page_cache.write(stream, offset))?; + let cursor_end = offset + total_written; + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + let mtime = Instant::now(); + *self.mtime.lock() = mtime; + self.size.store(cursor_end as u64, Ordering::Relaxed); + + self.sync_if_needed(); + + Ok(total_written) + } + + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { + //let _lock = Task::block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let mut temp_buf = vec![0u8; 4096]; + let mut total_written = 0; + + while let Some(data) = stream.poll_data(&mut temp_buf)? { + let written = ext4fs + .inner + .write(self.ino as u32, offset + total_written, data) + .unwrap(); + total_written += written; + if written < data.len() { + break; + } } + + ext4fs.modify_inode_stat( + self.ino as u32, + Some(self.size() as u64), + self.mtime.lock().since_epoch().as_secs() as u32, + ); + + Ok(total_written) + } + + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(); + let new_mode = old_mode.perm(mode.bits()); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode.bits() as u16, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode); + *self.ctime.lock() = now; + + Ok(()) + } + + // TODO + fn truncate(&self, _length: usize) -> KResult<()> { + Ok(()) + } +} + +impl DirInode { + fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { + Arc::new_cyclic(|_| DirInode { + idata: { + let inode_data = InodeData::new(ino, vfs); + inode_data.mode.store(Mode::DIR.perm(mode.bits())); + inode_data.nlink.store(2, Ordering::Relaxed); + inode_data.size.store(4096, Ordering::Relaxed); + inode_data + }, + }) + } + + fn update_time(&self, time: Instant) { + *self.ctime.lock() = time; + *self.mtime.lock() = time; + } + + fn update_child_time(&self, child: &dyn Inode, time: Instant) { + self.update_time(time); + *child.ctime.lock() = time; + *child.mtime.lock() = time; + } + + fn link_file(&self) { + self.size.fetch_add(1, Ordering::Relaxed); + } + + fn link_dir(&self) { + self.nlink.fetch_add(1, Ordering::Relaxed); + self.size.fetch_add(1, Ordering::Relaxed); + } + + fn unlink_dir(&self) { + self.nlink.fetch_sub(1, Ordering::Relaxed); } } @@ -241,36 +433,46 @@ impl Inode for DirInode { let name = dentry.get_name(); let name = String::from_utf8_lossy(&name); - let lookup_result = ext4fs.inner.fuse_lookup(self.ino, &name); + let lookup_result = ext4fs.inner.lookup(self.ino as u32, &name); - const EXT4_ERROR_ENOENT: Ext4Error = Ext4Error::new(Errno::ENOENT); + // TODO: wtf + //const EXT4_ERROR_ENOENT: Ext4Error_ = Ext4Error_::new(ErrCode::ENOENT); let attr = match lookup_result { - Ok(attr) => attr, - Err(EXT4_ERROR_ENOENT) => return Ok(None), - Err(error) => return Err(error.error() as u32), + Ok(inode_id) => ext4fs.inner.getattr(inode_id).unwrap(), + //Err(EXT4_ERROR_ENOENT) => return Ok(None), + Err(error) => return Err(error.code() as u32), }; // Fast path: if the inode is already in the cache, return it. - if let Some(inode) = ext4fs.try_get(&Task::block_on(ext4fs.icache.read()), attr.ino as u64) - { + if let Some(inode) = ext4fs.try_get(&block_on(ext4fs.icache.read()), attr.ino as u64) { return Ok(Some(inode)); } - let extra_perm = attr.perm.bits() as u32 & 0o7000; - let perm = attr.perm.bits() as u32 & 0o0700; - let real_perm = extra_perm | perm | perm >> 3 | perm >> 6; + let file_type_bits = match attr.ftype { + FileType::RegularFile => InodeMode::FILE.bits(), + FileType::Directory => InodeMode::DIRECTORY.bits(), + FileType::CharacterDev => InodeMode::CHARDEV.bits(), + FileType::BlockDev => InodeMode::BLOCKDEV.bits(), + FileType::Fifo => InodeMode::FIFO.bits(), + FileType::Socket => InodeMode::SOCKET.bits(), + FileType::SymLink => InodeMode::SOFTLINK.bits(), + FileType::Unknown => 0, + }; + + let perm_bits = attr.perm.bits() & InodeMode::PERM_MASK.bits(); + let mode = file_type_bits | perm_bits; // Create a new inode based on the attributes. - let mut icache = Task::block_on(ext4fs.icache.write()); + let mut icache = block_on(ext4fs.icache.write()); let inode = ext4fs.get_or_insert( &mut icache, InodeData { ino: attr.ino as Ino, size: AtomicU64::new(attr.size), - nlink: AtomicNlink::new(attr.nlink as _), + nlink: AtomicNlink::new(attr.links as _), uid: AtomicU32::new(attr.uid), gid: AtomicU32::new(attr.gid), - mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm), + mode: AtomicMode::new(mode as _), atime: Spin::new(Instant::new(attr.atime as _, 0)), ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), @@ -292,22 +494,206 @@ impl Inode for DirInode { let entries = ext4fs .inner - .fuse_readdir(self.ino as u64, 0, offset as i64) - .map_err(|err| err.error() as u32)?; - let mut current_offset = 0; + .listdir(self.ino as u32) + .map_err(|err| err.code() as u32)?; - for entry in entries { - let name_len = entry.name_len as usize; - let name = &entry.name[..name_len]; + let entries_to_process = if offset < entries.len() { + &entries[offset..] + } else { + &entries[0..0] + }; + let mut current_offset = 0; + for entry in entries_to_process { + let name_string = entry.name(); + let name = name_string.as_bytes(); + let inode = entry.inode() as Ino; - if callback(name, entry.inode as Ino)?.is_break() { + if callback(name, inode)?.is_break() { break; } - current_offset += 1; } Ok(current_offset) } + + fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { + let _lock = block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .create( + self.ino as u32, + &name, + InodeMode::from_bits_retain(Mode::REG.perm(mode.bits()).bits() as u16), + ) + .unwrap(); + + let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + self.update_child_time(file.as_ref(), now); + self.link_file(); + + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); + + at.save_reg(file) + } + + fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { + let _lock = block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + + let new_ino = ext4fs + .inner + .mkdir( + self.ino as u32, + &name, + InodeMode::from(Mode::DIR.perm(mode.bits())), + ) + .unwrap(); + + let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let now = Instant::now(); + self.update_child_time(new_dir.as_ref(), now); + self.link_dir(); + + ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32); + + at.save_dir(new_dir) + } + + fn unlink(&self, at: &Arc) -> KResult<()> { + let _dir_lock = block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let file = at.get_inode()?; + + let name = at.get_name(); + let name = String::from_utf8_lossy(&name); + let _file_lock = block_on(file.rwsem.write()); + + if file.is_dir() { + let _ = ext4fs.inner.rmdir(self.ino as u32, &name); + self.unlink_dir(); + } else { + let _ = ext4fs.inner.unlink(self.ino as u32, &name); + } + let now = Instant::now(); + self.update_time(now); + ext4fs.modify_inode_stat(self.ino as u32, None, now.since_epoch().as_secs() as u32); + + dcache::d_remove(at); + + Ok(()) + } + + fn chmod(&self, mode: Mode) -> KResult<()> { + let _lock = block_on(self.rwsem.write()); + + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + let old_mode = self.mode.load(); + let new_mode = old_mode.perm(mode.bits()); + + let now = Instant::now(); + ext4fs.chmod_stat( + self.ino as u32, + new_mode.non_format_bits() as _, + now.since_epoch().as_secs() as u32, + ); + + // SAFETY: `rwsem` has done the synchronization + self.mode.store(new_mode); + *self.ctime.lock() = now; + + Ok(()) + } + + fn rename(&self, rename_data: RenameData) -> KResult<()> { + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + .. + } = rename_data; + + if is_exchange { + println_warn!("Ext4Fs does not support exchange rename for now"); + return Err(ENOSYS); + } + + // TODO: may need another lock + let _lock = block_on(self.rwsem.write()); + let vfs = self.vfs.upgrade().ok_or(EIO)?; + let ext4fs = vfs.as_any().downcast_ref::().unwrap(); + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.get_inode(); + if no_replace && new_file.is_ok() { + return Err(EEXIST); + } + + let name = old_dentry.name(); + let name = core::str::from_utf8(&*name).map_err(|_| EINVAL)?; + let new_name = new_dentry.name(); + let new_name = core::str::from_utf8(&*new_name).map_err(|_| EINVAL)?; + + ext4fs + .inner + .rename(self.ino as u32, name, new_parent.ino as u32, new_name) + .map_err(|err| err.code() as u32)?; + + // TODO: may need more operations + let now = Instant::now(); + *old_file.ctime.lock() = now; + *self.mtime.lock() = now; + + let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; + if !same_parent { + *new_parent.mtime.lock() = now; + if old_file.is_dir() { + self.nlink.fetch_sub(1, Ordering::Relaxed); + new_parent.nlink.fetch_add(1, Ordering::Relaxed); + } + } + + if let Ok(replaced_file) = new_dentry.get_inode() { + if !no_replace { + *replaced_file.ctime.lock() = now; + replaced_file.nlink.fetch_sub(1, Ordering::Relaxed); + } + } + + block_on(dcache::d_exchange(old_dentry, new_dentry)); + + Ok(()) + } +} + +impl From for InodeMode { + fn from(value: Mode) -> Self { + InodeMode::from_bits_retain(value.bits() as u16) + } +} + +impl From for Mode { + fn from(value: InodeMode) -> Self { + Mode::new(value.bits() as u32) + } } struct Ext4MountCreator; diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 852d8673..12eabdd5 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -3,13 +3,13 @@ mod file; use crate::io::Stream; use crate::kernel::constants::EIO; -use crate::kernel::mem::AsMemoryBlock; -use crate::kernel::vfs::inode::WriteOffset; +use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; +use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::{Mode, WriteOffset}; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, kernel::{ block::{make_device, BlockDevice, BlockDeviceRequest}, - constants::{S_IFDIR, S_IFREG}, mem::{ paging::Page, {CachePage, PageCache, PageCacheBackend}, @@ -32,7 +32,6 @@ use alloc::{ }; use core::{ops::ControlFlow, sync::atomic::Ordering}; use dir::Dirs as _; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use file::ClusterRead; @@ -253,7 +252,7 @@ impl FileInode { // Safety: We are initializing the inode inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(S_IFREG | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -266,13 +265,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Task::block_on(self.page_cache.read(buffer, offset)) + block_on(self.page_cache.read(buffer, offset)) } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); if self.size.load(Ordering::Relaxed) as usize == 0 { return Ok(0); @@ -308,11 +307,11 @@ impl Inode for FileInode { Ok(buffer.wrote()) } - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { todo!() } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { todo!() } } @@ -322,7 +321,7 @@ impl PageCacheBackend for FileInode { self.read_direct(page, offset) } - fn write_page(&self, page: &CachePage, offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { todo!() } @@ -343,7 +342,7 @@ impl DirInode { // Safety: We are initializing the inode inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(S_IFDIR | 0o777, Ordering::Relaxed); + inode.mode.store(Mode::DIR.perm(0o777)); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -354,7 +353,7 @@ impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, 0) @@ -385,7 +384,7 @@ impl Inode for DirInode { ) -> KResult { let vfs = self.vfs.upgrade().ok_or(EIO)?; let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = Task::block_on(vfs.fat.read()); + let fat = block_on(vfs.fat.read()); let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) .read(vfs, offset) diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 85c0ecbb..2ed24613 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,9 +1,10 @@ use crate::kernel::constants::{EACCES, ENOTDIR}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{AtomicMode, Mode}; use crate::{ io::Buffer, kernel::{ - constants::{S_IFDIR, S_IFREG}, mem::paging::PageBuffer, vfs::{ dentry::Dentry, @@ -17,7 +18,6 @@ use crate::{ }; use alloc::sync::{Arc, Weak}; use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; use itertools::Itertools; @@ -69,12 +69,12 @@ define_struct_inode! { impl FileInode { pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = S_IFREG; + let mut mode = Mode::REG; if file.can_read() { - mode |= 0o444; + mode.set_perm(0o444); } if file.can_write() { - mode |= 0o200; + mode.set_perm(0o222); } let mut inode = Self { @@ -82,7 +82,7 @@ impl FileInode { file, }; - inode.idata.mode.store(mode, Ordering::Relaxed); + inode.idata.mode.store(mode); inode.idata.nlink.store(1, Ordering::Relaxed); *inode.ctime.get_mut() = Instant::now(); *inode.mtime.get_mut() = Instant::now(); @@ -123,7 +123,7 @@ impl DirInode { pub fn new(ino: Ino, vfs: Weak) -> Arc { Self::new_locked(ino, vfs, |inode, rwsem| unsafe { addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | 0o755).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -134,7 +134,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); Ok(self .entries .access(lock.prove()) @@ -147,7 +147,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -234,7 +234,7 @@ pub fn creat( let inode = FileInode::new(ino, Arc::downgrade(&fs), file); { - let lock = Task::block_on(parent.idata.rwsem.write()); + let lock = block_on(parent.idata.rwsem.write()); parent .entries .access_mut(lock.prove_mut()) @@ -258,7 +258,7 @@ pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { parent .entries - .access_mut(Task::block_on(inode.rwsem.write()).prove_mut()) + .access_mut(block_on(inode.rwsem.write()).prove_mut()) .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); Ok(ProcFsNode::Dir(inode)) diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs index 334e2781..7a5bd52b 100644 --- a/src/fs/tmpfs.rs +++ b/src/fs/tmpfs.rs @@ -1,17 +1,16 @@ use crate::io::Stream; use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend}; +use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::InodeData; use crate::kernel::vfs::inode::RenameData; +use crate::kernel::vfs::inode::{AtomicMode, InodeData}; use crate::{ io::Buffer, - kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG}, kernel::vfs::{ dentry::{dcache, Dentry}, inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - s_isblk, s_ischr, vfs::Vfs, DevId, }, @@ -21,7 +20,6 @@ use alloc::sync::{Arc, Weak}; use core::fmt::Debug; use core::{ops::ControlFlow, sync::atomic::Ordering}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; use itertools::Itertools; @@ -46,7 +44,7 @@ impl NodeInode { Self::new_locked(ino, vfs, |inode, _| unsafe { addr_of_mut_field!(inode, devid).write(devid); - addr_of_mut_field!(&mut *inode, mode).write(mode.into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -74,7 +72,8 @@ impl DirectoryInode { .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | (mode & 0o777)).into()); + addr_of_mut_field!(&mut *inode, mode) + .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -108,7 +107,7 @@ impl DirectoryInode { _file_lock: ProofMut<()>, ) -> KResult<()> { // SAFETY: `file_lock` has done the synchronization - if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { + if file.mode.load().is_dir() { return Err(EISDIR); } @@ -138,7 +137,7 @@ impl Inode for DirectoryInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let lock = Task::block_on(self.rwsem.read()); + let lock = block_on(self.rwsem.read()); self.entries .access(lock.prove()) .iter() @@ -153,7 +152,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = FileInode::new(ino, self.vfs.clone(), 0, mode); @@ -163,22 +162,17 @@ impl Inode for DirectoryInode { } fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !s_ischr(mode) && !s_isblk(mode) { + if !mode.is_chr() && !mode.is_blk() { return Err(EINVAL); } let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); - let file = NodeInode::new( - ino, - self.vfs.clone(), - mode & (0o777 | S_IFBLK | S_IFCHR), - dev, - ); + let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); at.save_reg(file) @@ -188,7 +182,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); @@ -201,7 +195,7 @@ impl Inode for DirectoryInode { let vfs = acquire(&self.vfs)?; let vfs = astmp(&vfs); - let rwsem = Task::block_on(self.rwsem.write()); + let rwsem = block_on(self.rwsem.write()); let ino = vfs.assign_ino(); let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); @@ -213,11 +207,11 @@ impl Inode for DirectoryInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let dir_lock = Task::block_on(self.rwsem.write()); + let dir_lock = block_on(self.rwsem.write()); let file = at.get_inode()?; let filename = at.get_name(); - let file_lock = Task::block_on(file.rwsem.write()); + let file_lock = block_on(file.rwsem.write()); let entries = self.entries.access_mut(dir_lock.prove_mut()); @@ -240,12 +234,11 @@ impl Inode for DirectoryInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -271,7 +264,7 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("vfs must be a TmpFs"); - let _rename_lock = Task::block_on(vfs.rename_lock.lock()); + let _rename_lock = block_on(vfs.rename_lock.lock()); let old_file = old_dentry.get_inode()?; let new_file = new_dentry.get_inode(); @@ -284,7 +277,7 @@ impl Inode for DirectoryInode { if same_parent { // Same directory rename // Remove from old location and add to new location - let parent_lock = Task::block_on(self.rwsem.write()); + let parent_lock = block_on(self.rwsem.write()); let entries = self.entries.access_mut(parent_lock.prove_mut()); fn rename_old( @@ -328,15 +321,13 @@ impl Inode for DirectoryInode { if let Some(new_idx) = new_entry_idx { // Replace existing file (i.e. rename the old and unlink the new) let new_file = new_file.unwrap(); - let _new_file_lock = Task::block_on(new_file.rwsem.write()); + let _new_file_lock = block_on(new_file.rwsem.write()); // SAFETY: `new_file_lock` has done the synchronization - if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(EISDIR); - } else { - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 { - return Err(ENOTDIR); - } + match (new_file.mode.load(), old_file.mode.load()) { + (Mode::DIR, _) => return Err(EISDIR), + (_, Mode::DIR) => return Err(ENOTDIR), + _ => {} } entries.remove(new_idx); @@ -364,8 +355,8 @@ impl Inode for DirectoryInode { .downcast_ref::() .expect("new parent must be a DirectoryInode"); - let old_parent_lock = Task::block_on(self.rwsem.write()); - let new_parent_lock = Task::block_on(new_parent_inode.rwsem.write()); + let old_parent_lock = block_on(self.rwsem.write()); + let new_parent_lock = block_on(new_parent_inode.rwsem.write()); let old_ino = old_file.ino; let new_ino = new_file.as_ref().ok().map(|f| f.ino); @@ -391,12 +382,12 @@ impl Inode for DirectoryInode { if has_new { // Replace existing file (i.e. move the old and unlink the new) let new_file = new_file.unwrap(); - let new_file_lock = Task::block_on(new_file.rwsem.write()); + let new_file_lock = block_on(new_file.rwsem.write()); - if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 - && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0 - { - return Err(ENOTDIR); + match (old_file.mode.load(), new_file.mode.load()) { + (Mode::DIR, Mode::DIR) => {} + (Mode::DIR, _) => return Err(ENOTDIR), + (_, _) => {} } // Unlink the old file that was replaced @@ -424,7 +415,7 @@ impl Inode for DirectoryInode { *old_file.ctime.lock() = now; } - Task::block_on(dcache::d_exchange(old_dentry, new_dentry)); + block_on(dcache::d_exchange(old_dentry, new_dentry)); Ok(()) } @@ -442,7 +433,7 @@ impl SymlinkInode { let len = target.len(); addr_of_mut_field!(inode, target).write(target); - addr_of_mut_field!(&mut *inode, mode).write((S_IFLNK | 0o777).into()); + addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); @@ -482,9 +473,7 @@ impl FileInode { pages: PageCache::new(weak_self.clone()), }); - inode - .mode - .store(S_IFREG | (mode & 0o777), Ordering::Relaxed); + inode.mode.store(Mode::REG.perm(mode.non_format_bits())); inode.nlink.store(1, Ordering::Relaxed); inode.size.store(size as u64, Ordering::Relaxed); inode @@ -496,7 +485,7 @@ impl PageCacheBackend for FileInode { Ok(PAGE_SIZE) } - fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult { + fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { Ok(PAGE_SIZE) } @@ -511,13 +500,13 @@ impl Inode for FileInode { } fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.read(buffer, offset)) + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.read(buffer, offset)) } fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { // TODO: We don't need that strong guarantee, find some way to avoid locks - let lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); let mut store_new_end = None; let offset = match offset { @@ -530,7 +519,7 @@ impl Inode for FileInode { } }; - let wrote = Task::block_on(self.pages.write(stream, offset))?; + let wrote = block_on(self.pages.write(stream, offset))?; let cursor_end = offset + wrote; if let Some(store_end) = store_new_end { @@ -545,8 +534,8 @@ impl Inode for FileInode { } fn truncate(&self, length: usize) -> KResult<()> { - let lock = Task::block_on(self.rwsem.write()); - Task::block_on(self.pages.resize(length))?; + let _lock = block_on(self.rwsem.write()); + block_on(self.pages.resize(length))?; self.size.store(length as u64, Ordering::Relaxed); *self.mtime.lock() = Instant::now(); Ok(()) @@ -554,12 +543,11 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _vfs = acquire(&self.vfs)?; - let _lock = Task::block_on(self.rwsem.write()); + let _lock = block_on(self.rwsem.write()); // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(Ordering::Relaxed); - self.mode - .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed); + let old = self.mode.load(); + self.mode.store(old.perm(mode.non_format_bits())); *self.ctime.lock() = Instant::now(); Ok(()) @@ -600,7 +588,7 @@ impl TmpFs { }); let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, 0o755); + let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); Ok((tmpfs, root_dir)) } diff --git a/src/io.rs b/src/io.rs index f1eae9b9..85675dea 100644 --- a/src/io.rs +++ b/src/io.rs @@ -30,7 +30,7 @@ impl FillResult { } } -pub trait Buffer { +pub trait Buffer: Send { fn total(&self) -> usize; fn wrote(&self) -> usize; @@ -49,7 +49,7 @@ pub trait Buffer { } } -pub trait Stream { +pub trait Stream: Send { fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult>; fn ignore(&mut self, len: usize) -> KResult>; } @@ -131,6 +131,8 @@ pub struct UninitBuffer<'lt, T: Copy + Sized> { buffer: ByteBuffer<'lt>, } +unsafe impl<'lt, T: Copy> Send for UninitBuffer<'lt, T> {} + impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> { pub fn new() -> Self { let mut data = Box::new(MaybeUninit::uninit()); diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 4a10e4c7..349e3656 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -48,21 +48,6 @@ enum BlockDeviceType { }, } -#[derive(Debug, Clone)] -pub enum FileSystemType { - Ext4, - Fat32, -} - -impl FileSystemType { - pub fn as_str(&self) -> &'static str { - match self { - FileSystemType::Ext4 => "ext4", - FileSystemType::Fat32 => "fat32", - } - } -} - pub struct BlockDevice { /// Unique device identifier, major and minor numbers devid: DevId, @@ -285,6 +270,103 @@ impl BlockDevice { Ok(FillResult::Partial(nfilled)) } } + + /// Write some data to the block device, may involve some copy and fragmentation + /// + /// # Arguments + /// `offset` - offset in bytes + /// `data` - data to write + /// + pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { + let mut sector_start = offset as u64 / 512; + let mut first_sector_offset = offset as u64 % 512; + let mut remaining_data = data; + let mut nwritten = 0; + + while !remaining_data.is_empty() { + let pages: &[Page]; + let page: Option; + let page_vec: Option>; + + // Calculate sectors needed for this write + let write_end = first_sector_offset + remaining_data.len() as u64; + let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); + + match sector_count { + count if count <= 8 => { + let _page = Page::alloc(); + page = Some(_page); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count if count <= 16 => { + let _pages = Page::alloc_order(1); + page = Some(_pages); + pages = core::slice::from_ref(page.as_ref().unwrap()); + } + count => { + let npages = (count + 15) / 16; + let mut _page_vec = Vec::with_capacity(npages as usize); + for _ in 0..npages { + _page_vec.push(Page::alloc_order(1)); + } + page_vec = Some(_page_vec); + pages = page_vec.as_ref().unwrap().as_slice(); + } + } + + if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { + let read_req = BlockDeviceRequest::Read { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(read_req)?; + } + + let mut data_offset = 0; + let mut page_offset = first_sector_offset as usize; + + for page in pages.iter() { + // SAFETY: We own the page and can modify it + let page_data = unsafe { + let memblk = page.as_memblk(); + core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) + }; + + let copy_len = + (remaining_data.len() - data_offset).min(page_data.len() - page_offset); + + if copy_len == 0 { + break; + } + + page_data[page_offset..page_offset + copy_len] + .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); + + data_offset += copy_len; + page_offset = 0; // Only first page has offset + + if data_offset >= remaining_data.len() { + break; + } + } + + let write_req = BlockDeviceRequest::Write { + sector: sector_start, + count: sector_count, + buffer: pages, + }; + self.commit_request(write_req)?; + + let bytes_written = data_offset; + nwritten += bytes_written; + remaining_data = &remaining_data[bytes_written..]; + sector_start += sector_count; + first_sector_offset = 0; + } + + Ok(nwritten) + } } pub enum BlockDeviceRequest<'lt> { diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index cd23fc14..aff3271e 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -2,12 +2,9 @@ use super::{ block::make_device, console::get_console, constants::{EEXIST, EIO}, - task::{ProcessList, Thread}, + task::{block_on, ProcessList, Thread}, terminal::Terminal, - vfs::{ - file::{File, FileType, TerminalFile}, - DevId, - }, + vfs::{DevId, File, FileType, TerminalFile}, }; use crate::{ io::{Buffer, Stream, StreamRead}, @@ -18,7 +15,6 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; -use eonix_runtime::task::Task; use eonix_sync::AsProof as _; use posix_types::open::OpenFlags; @@ -43,7 +39,7 @@ static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => Task::block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -72,16 +68,16 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult> { + pub fn open(self: &Arc, flags: OpenFlags) -> KResult { Ok(match &self.device { CharDeviceType::Terminal(terminal) => { - let procs = Task::block_on(ProcessList::get().read()); + let procs = block_on(ProcessList::get().read()); let current = Thread::current(); let session = current.process.session(procs.prove()); // We only set the control terminal if the process is the session leader. if session.sid == Thread::current().process.pid { // Silently fail if we can't set the control terminal. - dont_check!(Task::block_on(session.set_control_terminal( + dont_check!(block_on(session.set_control_terminal( &terminal, false, procs.prove() @@ -123,7 +119,7 @@ struct ConsoleDevice; impl VirtualCharDevice for ConsoleDevice { fn read(&self, buffer: &mut dyn Buffer) -> KResult { let console_terminal = get_console().ok_or(EIO)?; - Task::block_on(console_terminal.read(buffer)) + block_on(console_terminal.read(buffer)) } fn write(&self, stream: &mut dyn Stream) -> KResult { diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 1a84d534..742727cb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,5 +1,6 @@ use super::mem::handle_kernel_page_fault; -use super::timer::{should_reschedule, timer_interrupt}; +use super::task::block_on; +use super::timer::timer_interrupt; use crate::kernel::constants::EINVAL; use crate::prelude::*; use alloc::sync::Arc; @@ -7,7 +8,6 @@ use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::scheduler::Scheduler; use eonix_sync::SpinIrq as _; static IRQ_HANDLERS: Spin<[Vec>; 16]> = @@ -37,7 +37,7 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { } => { let fault_pc = VAddr::from(trap_ctx.get_program_counter()); - if let Some(new_pc) = handle_kernel_page_fault(fault_pc, vaddr, error_code) { + if let Some(new_pc) = block_on(handle_kernel_page_fault(fault_pc, vaddr, error_code)) { trap_ctx.set_program_counter(new_pc.addr()); } } @@ -49,17 +49,10 @@ pub fn default_fault_handler(fault_type: Fault, trap_ctx: &mut TrapContext) { pub fn interrupt_handler(trap_ctx: &mut TrapContext) { match trap_ctx.trap_type() { TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no), + TrapType::Breakpoint => unreachable!("Breakpoint in kernel space."), TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx), TrapType::Irq { callback } => callback(default_irq_handler), - TrapType::Timer { callback } => { - callback(timer_interrupt); - - if eonix_preempt::count() == 0 && should_reschedule() { - // To make scheduler satisfied. - eonix_preempt::disable(); - Scheduler::schedule(); - } - } + TrapType::Timer { callback } => callback(timer_interrupt), } } diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index ce705cff..efd06824 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, PageCache, PageCacheBackend}; +pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; pub use paging::{Page, PageBuffer}; diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 956ae7e4..731c5303 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -9,7 +9,6 @@ use core::cmp; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; use eonix_mm::paging::{PAGE_SIZE, PFN}; -use eonix_runtime::task::Task; #[derive(Debug)] pub struct MMArea { @@ -19,6 +18,9 @@ pub struct MMArea { pub is_shared: bool, } +unsafe impl Send for MMArea {} +unsafe impl Sync for MMArea {} + impl Clone for MMArea { fn clone(&self) -> Self { Self { @@ -200,7 +202,7 @@ impl MMArea { Ok(()) } - pub fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { + pub async fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> { let mut attr = pte.get_attr().as_page_attr().expect("Not a page attribute"); let mut pfn = pte.get_pfn(); @@ -209,7 +211,7 @@ impl MMArea { } if attr.contains(PageAttribute::MAPPED) { - Task::block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?; + self.handle_mmap(&mut pfn, &mut attr, offset, write).await?; } attr.insert(PageAttribute::ACCESSED); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 1d142546..ad1e45c2 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -23,7 +23,6 @@ use eonix_mm::{ page_table::{PageTable, RawAttribute, PTE}, paging::PAGE_SIZE, }; -use eonix_runtime::task::Task; use eonix_sync::{LazyLock, Mutex}; pub use mapping::{FileMapping, Mapping}; @@ -488,7 +487,7 @@ impl MMList { Ok(()) } - pub fn map_vdso(&self) -> KResult<()> { + pub async fn map_vdso(&self) -> KResult<()> { unsafe extern "C" { fn VDSO_PADDR(); } @@ -507,7 +506,7 @@ impl MMList { const VDSO_SIZE: usize = 0x1000; let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = inner.lock().await; let mut pte_iter = inner .page_table @@ -529,7 +528,7 @@ impl MMList { Ok(()) } - pub fn mmap_hint( + pub async fn mmap_hint( &self, hint: VAddr, len: usize, @@ -538,7 +537,7 @@ impl MMList { is_shared: bool, ) -> KResult { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = inner.lock().await; if hint == VAddr::NULL { let at = inner.find_available(hint, len).ok_or(ENOMEM)?; @@ -557,7 +556,7 @@ impl MMList { } } - pub fn mmap_fixed( + pub async fn mmap_fixed( &self, at: VAddr, len: usize, @@ -565,14 +564,17 @@ impl MMList { permission: Permission, is_shared: bool, ) -> KResult { - Task::block_on(self.inner.borrow().lock()) + self.inner + .borrow() + .lock() + .await .mmap(at, len, mapping.clone(), permission, is_shared) .map(|_| at) } - pub fn set_break(&self, pos: Option) -> VAddr { + pub async fn set_break(&self, pos: Option) -> VAddr { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = inner.lock().await; // SAFETY: `set_break` is only called in syscalls, where program break should be valid. assert!(inner.break_start.is_some() && inner.break_pos.is_some()); @@ -629,9 +631,9 @@ impl MMList { } /// This should be called only **once** for every thread. - pub fn register_break(&self, start: VAddr) { + pub async fn register_break(&self, start: VAddr) { let inner = self.inner.borrow(); - let mut inner = Task::block_on(inner.lock()); + let mut inner = inner.lock().await; assert!(inner.break_start.is_none() && inner.break_pos.is_none()); inner.break_start = Some(start.into()); @@ -640,7 +642,7 @@ impl MMList { /// Access the memory area with the given function. /// The function will be called with the offset of the area and the slice of the area. - pub fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> + pub async fn access_mut(&self, start: VAddr, len: usize, func: F) -> KResult<()> where F: Fn(usize, &mut [u8]), { @@ -651,7 +653,7 @@ impl MMList { } let inner = self.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = inner.lock().await; let mut offset = 0; let mut remaining = len; @@ -676,7 +678,7 @@ impl MMList { let page_end = page_start + 0x1000; // Prepare for the worst case that we might write to the page... - area.handle(pte, page_start - area_start, true)?; + area.handle(pte, page_start - area_start, true).await?; let start_offset; if page_start < current { diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index bb62b388..6f14583d 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -4,7 +4,6 @@ use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::signal::Signal; #[repr(C)] @@ -95,6 +94,7 @@ impl MMList { addr.floor() - area.range().start(), error.contains(PageFaultErrorCode::Write), ) + .await .map_err(|_| Signal::SIGBUS)?; flush_tlb(addr.floor().addr()); @@ -129,7 +129,7 @@ fn kernel_page_fault_die(vaddr: VAddr, pc: VAddr) -> ! { ) } -pub fn handle_kernel_page_fault( +pub async fn handle_kernel_page_fault( fault_pc: VAddr, addr: VAddr, error: PageFaultErrorCode, @@ -149,7 +149,7 @@ pub fn handle_kernel_page_fault( let mms = &Thread::current().process.mm_list; let inner = mms.inner.borrow(); - let inner = Task::block_on(inner.lock()); + let inner = inner.lock().await; let area = match inner.areas.get(&VRange::from(addr)) { Some(area) => area, @@ -164,11 +164,14 @@ pub fn handle_kernel_page_fault( .next() .expect("If we can find the mapped area, we should be able to find the PTE"); - if let Err(_) = area.handle( - pte, - addr.floor() - area.range().start(), - error.contains(PageFaultErrorCode::Write), - ) { + if let Err(_) = area + .handle( + pte, + addr.floor() - area.range().start(), + error.contains(PageFaultErrorCode::Write), + ) + .await + { return Some(try_page_fault_fix(fault_pc, addr)); } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 4b420255..54d4d590 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -6,7 +6,6 @@ use core::{ sync::atomic::{AtomicU32, AtomicUsize, Ordering}, }; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::paging::PAGE_SIZE; use eonix_mm::{ address::{PAddr, PhysAccess as _}, paging::{RawPage as RawPageTrait, PFN}, diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 863e538e..3ccf3255 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -26,6 +26,8 @@ unsafe impl Sync for PageCache {} #[derive(Clone, Copy)] pub struct CachePage(RawPagePtr); +unsafe impl Send for CachePage {} + impl Buffer for CachePage { fn total(&self) -> usize { PAGE_SIZE @@ -125,27 +127,32 @@ impl PageCache { pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; + let size = self.backend.upgrade().unwrap().size(); loop { + if offset >= size { + break; + } let page_id = offset >> PAGE_SIZE_BITS; let page = pages.get(&page_id); match page { Some(page) => { let inner_offset = offset % PAGE_SIZE; + let available_in_file = size.saturating_sub(offset); // TODO: still cause unnecessary IO if valid_size < PAGESIZE // and fill result is Done - if page.valid_size() == 0 - || buffer - .fill(&page.valid_data()[inner_offset..])? - .should_stop() + let page_data = &page.valid_data()[inner_offset..]; + let read_size = page_data.len().min(available_in_file); + + if read_size == 0 + || buffer.fill(&page_data[..read_size])?.should_stop() || buffer.available() == 0 { break; } - - offset += PAGE_SIZE - inner_offset; + offset += read_size; } None => { let mut new_page = CachePage::new(); @@ -217,7 +224,7 @@ impl PageCache { self.backend .upgrade() .unwrap() - .write_page(page, page_id << PAGE_SIZE_BITS)?; + .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; page.clear_dirty(); } } @@ -293,6 +300,51 @@ impl PageCache { } } +pub struct CachePageStream { + page: CachePage, + cur: usize, +} + +impl CachePageStream { + pub fn new(page: CachePage) -> Self { + Self { page, cur: 0 } + } + + pub fn remaining(&self) -> usize { + self.page.valid_size().saturating_sub(self.cur) + } + + pub fn is_drained(&self) -> bool { + self.cur >= self.page.valid_size() + } +} + +impl Stream for CachePageStream { + fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let page_data = &self.page.all()[self.cur..self.page.valid_size()]; + let to_read = buf.len().min(page_data.len()); + + buf[..to_read].copy_from_slice(&page_data[..to_read]); + self.cur += to_read; + + Ok(Some(&mut buf[..to_read])) + } + + fn ignore(&mut self, len: usize) -> KResult> { + if self.cur >= self.page.valid_size() { + return Ok(None); + } + + let to_ignore = len.min(self.page.valid_size() - self.cur); + self.cur += to_ignore; + Ok(Some(to_ignore)) + } +} + // with this trait, "page cache" and "block cache" are unified, // for fs, offset is file offset (floor algin to PAGE_SIZE) // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) @@ -300,7 +352,7 @@ impl PageCache { pub trait PageCacheBackend { fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - fn write_page(&self, page: &CachePage, offset: usize) -> KResult; + fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; fn size(&self) -> usize; } diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 0276ebdf..78ddcd1c 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,5 +1,10 @@ +use super::task::ThreadAlloc; use crate::kernel::task::Thread; +use alloc::boxed::Box; +use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; +use posix_types::ctypes::PtrT; pub mod file_rw; pub mod mm; @@ -12,15 +17,33 @@ const MAX_SYSCALL_NO: usize = 512; #[derive(Debug, Clone, Copy)] pub struct SyscallNoReturn; +#[derive(Clone, Copy)] +pub struct User(VAddr, PhantomData); + +#[derive(Clone, Copy)] +pub struct UserMut(VAddr, PhantomData); + #[repr(C)] pub(self) struct RawSyscallHandler { no: usize, - handler: fn(&Thread, [usize; 6]) -> Option, + handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, name: &'static str, } pub struct SyscallHandler { - pub handler: fn(&Thread, [usize; 6]) -> Option, + pub handler: for<'thd, 'alloc> fn( + &'thd Thread, + ThreadAlloc<'alloc>, + [usize; 6], + ) -> Pin< + Box> + Send + 'thd, ThreadAlloc<'alloc>>, + >, pub name: &'static str, } @@ -80,6 +103,18 @@ impl SyscallRetVal for SyscallNoReturn { } } +impl SyscallRetVal for User { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + +impl SyscallRetVal for UserMut { + fn into_retval(self) -> Option { + Some(self.0.addr()) + } +} + #[cfg(not(target_arch = "x86_64"))] impl SyscallRetVal for u64 { fn into_retval(self) -> Option { @@ -112,15 +147,135 @@ impl FromSyscallArg for usize { } } -impl FromSyscallArg for *const T { - fn from_arg(value: usize) -> *const T { - value as *const T +impl FromSyscallArg for PtrT { + fn from_arg(value: usize) -> Self { + PtrT::new(value).expect("Invalid user pointer value") + } +} + +impl FromSyscallArg for User { + fn from_arg(value: usize) -> User { + User(VAddr::from(value), PhantomData) + } +} + +impl FromSyscallArg for UserMut { + fn from_arg(value: usize) -> UserMut { + UserMut(VAddr::from(value), PhantomData) + } +} + +impl User { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> User { + User(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const unsafe fn as_mut(self) -> UserMut { + UserMut(self.0, PhantomData) } } -impl FromSyscallArg for *mut T { - fn from_arg(value: usize) -> *mut T { - value as *mut T +impl UserMut { + pub const fn new(addr: VAddr) -> Self { + Self(addr, PhantomData) + } + + pub const fn with_addr(addr: usize) -> Self { + Self::new(VAddr::from(addr)) + } + + pub const fn null() -> Self { + Self(VAddr::NULL, PhantomData) + } + + pub fn is_null(&self) -> bool { + self.0.addr() == 0 + } + + pub const fn cast(self) -> UserMut { + UserMut(self.0, PhantomData) + } + + pub fn offset(self, off: isize) -> Self { + Self( + VAddr::from( + self.0 + .addr() + .checked_add_signed(off) + .expect("offset overflow"), + ), + PhantomData, + ) + } + + pub const fn as_const(self) -> User { + User(self.0, PhantomData) + } + + pub const fn vaddr(&self) -> VAddr { + self.0 + } +} + +impl Deref for User { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Deref for UserMut { + type Target = VAddr; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::fmt::Debug for User { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.0 { + VAddr::NULL => write!(f, "User(NULL)"), + _ => write!(f, "User({:#018x?})", self.0.addr()), + } + } +} + +impl core::fmt::Debug for UserMut { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.0 { + VAddr::NULL => write!(f, "UserMut(NULL)"), + _ => write!(f, "UserMut({:#018x?})", self.0.addr()), + } } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 5683b27e..1a48b255 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,30 +1,25 @@ -use core::time::Duration; - -use super::FromSyscallArg; +use super::{FromSyscallArg, User}; use crate::io::IntoStream; use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR, + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, }; +use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::{PollEvent, SeekOption}; use crate::{ io::{Buffer, BufferFill}, kernel::{ - user::{ - dataflow::{CheckedUserPointer, UserBuffer, UserString}, - UserPointer, UserPointerMut, - }, - vfs::{ - dentry::Dentry, - file::{PollEvent, SeekOption}, - }, + user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, + vfs::dentry::Dentry, }, path::Path, prelude::*, }; use alloc::sync::Arc; -use eonix_runtime::task::Task; +use core::time::Duration; use posix_types::ctypes::{Long, PtrT}; use posix_types::namei::RenameFlags; use posix_types::open::{AtFlags, OpenFlags}; @@ -49,7 +44,7 @@ impl FromSyscallArg for AtFlags { fn dentry_from( thread: &Thread, dirfd: FD, - pathname: *const u8, + pathname: User, follow_symlink: bool, ) -> KResult> { let path = UserString::new(pathname)?; @@ -74,83 +69,95 @@ fn dentry_from( } #[eonix_macros::define_syscall(SYS_READ)] -fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, None) + .await } #[eonix_macros::define_syscall(SYS_PREAD64)] -fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult { +async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - Task::block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .read(&mut buffer, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .read(&mut buffer, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_WRITE)] -fn write(fd: FD, buffer: *const u8, count: usize) -> KResult { +async fn write(fd: FD, buffer: User, count: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - Task::block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None)) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, None) + .await } #[eonix_macros::define_syscall(SYS_PWRITE64)] -fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult { +async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); - Task::block_on( - thread - .files - .get(fd) - .ok_or(EBADF)? - .write(&mut stream, Some(offset)), - ) + thread + .files + .get(fd) + .ok_or(EBADF)? + .write(&mut stream, Some(offset)) + .await } #[eonix_macros::define_syscall(SYS_OPENAT)] -fn openat(dirfd: FD, pathname: *const u8, flags: OpenFlags, mode: u32) -> KResult { +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; + + let umask = *thread.fs_context.umask.lock(); + mode.mask_perm(!umask.non_format_bits()); + thread.files.open(&dentry, flags, mode) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -fn open(path: *const u8, flags: OpenFlags, mode: u32) -> KResult { - sys_openat(thread, FD::AT_FDCWD, path, flags, mode) +async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { + sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } #[eonix_macros::define_syscall(SYS_CLOSE)] -fn close(fd: FD) -> KResult<()> { - thread.files.close(fd) +async fn close(fd: FD) -> KResult<()> { + thread.files.close(fd).await } #[eonix_macros::define_syscall(SYS_DUP)] -fn dup(fd: FD) -> KResult { +async fn dup(fd: FD) -> KResult { thread.files.dup(fd) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] -fn dup2(old_fd: FD, new_fd: FD) -> KResult { +async fn dup2(old_fd: FD, new_fd: FD) -> KResult { thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) } #[eonix_macros::define_syscall(SYS_DUP3)] -fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - thread.files.dup_to(old_fd, new_fd, flags) +async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { + thread.files.dup_to(old_fd, new_fd, flags).await } #[eonix_macros::define_syscall(SYS_PIPE2)] -fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd as *mut u8, core::mem::size_of::<[FD; 2]>())?; +async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { + let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -158,13 +165,13 @@ fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_PIPE)] -fn pipe(pipe_fd: *mut [FD; 2]) -> KResult<()> { - sys_pipe2(thread, pipe_fd, OpenFlags::empty()) +async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { + sys_pipe2(thread, pipe_fd, OpenFlags::empty()).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; @@ -172,10 +179,15 @@ fn getdents(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { +async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents64(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents64(&mut buffer) + .await?; Ok(buffer.wrote()) } @@ -184,7 +196,12 @@ fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult { eonix_macros::define_syscall(SYS_NEWFSTATAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTATAT64))] -fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags) -> KResult<()> { +async fn newfstatat( + dirfd: FD, + pathname: User, + statbuf: UserMut, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -207,23 +224,17 @@ fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags eonix_macros::define_syscall(SYS_NEWFSTAT) )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] -fn newfstat(fd: FD, statbuf: *mut Stat) -> KResult<()> { - sys_newfstatat( - thread, - fd, - core::ptr::null(), - statbuf, - AtFlags::AT_EMPTY_PATH, - ) +async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await } #[eonix_macros::define_syscall(SYS_STATX)] -fn statx( +async fn statx( dirfd: FD, - pathname: *const u8, + pathname: User, flags: AtFlags, mask: u32, - buffer: *mut StatX, + buffer: UserMut, ) -> KResult<()> { if !flags.statx_default_sync() { unimplemented!("statx with no default sync flags: {:?}", flags); @@ -246,9 +257,9 @@ fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { +async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { let umask = *thread.fs_context.umask.lock(); - let mode = mode & !umask & 0o777; + mode.mask_perm(!umask.non_format_bits()); let dentry = dentry_from(thread, dirfd, pathname, true)?; dentry.mkdir(mode) @@ -256,19 +267,19 @@ fn mkdirat(dirfd: FD, pathname: *const u8, mode: u32) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -fn mkdir(pathname: *const u8, mode: u32) -> KResult<()> { - sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode) +async fn mkdir(pathname: User, mode: u32) -> KResult<()> { + sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] -fn truncate64(fd: FD, length: usize) -> KResult<()> { +async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.truncate(length) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_TRUNCATE)] -fn truncate(pathname: *const u8, length: usize) -> KResult<()> { +async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -278,18 +289,18 @@ fn truncate(pathname: *const u8, length: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UNLINKAT)] -fn unlinkat(dirfd: FD, pathname: *const u8) -> KResult<()> { +async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { dentry_from(thread, dirfd, pathname, false)?.unlink() } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] -fn unlink(pathname: *const u8) -> KResult<()> { +async fn unlink(pathname: User) -> KResult<()> { sys_unlinkat(thread, FD::AT_FDCWD, pathname) } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { +async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { let target = UserString::new(target)?; let dentry = dentry_from(thread, dirfd, linkpath, false)?; @@ -298,28 +309,37 @@ fn symlinkat(target: *const u8, dirfd: FD, linkpath: *const u8) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] -fn symlink(target: *const u8, linkpath: *const u8) -> KResult<()> { +async fn symlink(target: User, linkpath: User) -> KResult<()> { sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) } #[eonix_macros::define_syscall(SYS_MKNODAT)] -fn mknodat(dirfd: FD, pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { + if !mode.is_blk() && !mode.is_chr() { + return Err(EINVAL); + } + let dentry = dentry_from(thread, dirfd, pathname, true)?; let umask = *thread.fs_context.umask.lock(); - let mode = mode & ((!umask & 0o777) | (S_IFBLK | S_IFCHR)); + mode.mask_perm(!umask.non_format_bits()); dentry.mknod(mode, dev) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -fn mknod(pathname: *const u8, mode: u32, dev: u32) -> KResult<()> { - sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev) +async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { + sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } #[eonix_macros::define_syscall(SYS_READLINKAT)] -fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { +async fn readlinkat( + dirfd: FD, + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { let dentry = dentry_from(thread, dirfd, pathname, false)?; let mut buffer = UserBuffer::new(buffer, bufsize)?; @@ -328,34 +348,40 @@ fn readlinkat(dirfd: FD, pathname: *const u8, buffer: *mut u8, bufsize: usize) - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -fn readlink(pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult { - sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize) +async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { + sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { - SEEK_SET => file.seek(SeekOption::Set(offset as usize))?, - SEEK_CUR => file.seek(SeekOption::Current(offset as isize))?, - SEEK_END => file.seek(SeekOption::End(offset as isize))?, + SEEK_SET => file.seek(SeekOption::Set(offset as usize)).await?, + SEEK_CUR => file.seek(SeekOption::Current(offset as isize)).await?, + SEEK_END => file.seek(SeekOption::End(offset as isize)).await?, _ => return Err(EINVAL), } as u64) } #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_LSEEK)] -fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { - do_lseek(thread, fd, offset, whence) +async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { + do_lseek(thread, fd, offset, whence).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek(fd: FD, offset_high: u32, offset_low: u32, result: *mut u64, whence: u32) -> KResult<()> { - let mut result = UserBuffer::new(result as *mut u8, core::mem::size_of::())?; +fn llseek( + fd: FD, + offset_high: u32, + offset_low: u32, + result: UserMut, + whence: u32, +) -> KResult<()> { + let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); - let new_offset = do_lseek(thread, fd, offset, whence)?; + let new_offset = do_lseek(thread, fd, offset, whence).await?; result.copy(&new_offset)?.ok_or(EFAULT) } @@ -368,7 +394,7 @@ struct IoVec { } #[eonix_macros::define_syscall(SYS_READV)] -fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -383,14 +409,16 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => Some(UserBuffer::new(base.addr() as *mut u8, len.get())), + Ok(IoVec { base, len }) => { + Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) + } }) .collect::>>()?; let mut tot = 0usize; for mut buffer in iov_buffers.into_iter() { // TODO!!!: `readv` - let nread = Task::block_on(file.read(&mut buffer, None))?; + let nread = file.read(&mut buffer, None).await?; tot += nread; if nread != buffer.total() { @@ -402,7 +430,7 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_WRITEV)] -fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { +async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; let mut iov_user = UserPointer::new(iov_user)?; @@ -418,7 +446,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(base.addr() as *mut u8, len.get()) + CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) .map(|ptr| ptr.into_stream()), ), }) @@ -426,7 +454,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { let mut tot = 0usize; for mut stream in iov_streams.into_iter() { - let nread = Task::block_on(file.write(&mut stream, None))?; + let nread = file.write(&mut stream, None).await?; tot += nread; if nread == 0 || !stream.is_drained() { @@ -438,7 +466,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -464,12 +492,12 @@ fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KRes #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ACCESS)] -fn access(pathname: *const u8, mode: u32) -> KResult<()> { - sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn access(pathname: User, mode: u32) -> KResult<()> { + sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult { +async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -477,18 +505,18 @@ fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult KResult { +async fn ioctl(fd: FD, request: usize, arg3: usize) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; - file.ioctl(request, arg3) + file.ioctl(request, arg3).await } #[eonix_macros::define_syscall(SYS_FCNTL64)] -fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { +async fn fcntl64(fd: FD, cmd: u32, arg: usize) -> KResult { thread.files.fcntl(fd, cmd, arg) } @@ -500,7 +528,12 @@ struct UserPollFd { revents: u16, } -fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> KResult { +async fn do_poll( + thread: &Thread, + fds: UserMut, + nfds: u32, + _timeout: u32, +) -> KResult { match nfds { 0 => Ok(0), 2.. => unimplemented!("Poll with {} fds", nfds), @@ -513,7 +546,10 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K let mut fd = fds.read()?; let file = thread.files.get(fd.fd).ok_or(EBADF)?; - fd.revents = Task::block_on(file.poll(PollEvent::from_bits_retain(fd.events)))?.bits(); + fd.revents = file + .poll(PollEvent::from_bits_retain(fd.events)) + .await? + .bits(); fds.write(fd)?; Ok(1) @@ -522,24 +558,24 @@ fn do_poll(thread: &Thread, fds: *mut UserPollFd, nfds: u32, _timeout: u32) -> K } #[eonix_macros::define_syscall(SYS_PPOLL)] -fn ppoll( - fds: *mut UserPollFd, +async fn ppoll( + fds: UserMut, nfds: u32, - _timeout_ptr: *const TimeSpec, - _sigmask: *const SigSet, + _timeout_ptr: User, + _sigmask: User, ) -> KResult { // TODO: Implement ppoll with signal mask and timeout - do_poll(thread, fds, nfds, 0) + do_poll(thread, fds, nfds, 0).await } #[eonix_macros::define_syscall(SYS_PSELECT6)] -fn pselect6( +async fn pselect6( nfds: u32, - _readfds: *mut FDSet, - _writefds: *mut FDSet, - _exceptfds: *mut FDSet, - timeout: *mut TimeSpec, - _sigmask: *const (), + _readfds: UserMut, + _writefds: UserMut, + _exceptfds: UserMut, + timeout: UserMut, + _sigmask: User<()>, ) -> KResult { // According to [pthread6(2)](https://linux.die.net/man/2/pselect6): // Some code calls select() with all three sets empty, nfds zero, and @@ -550,11 +586,11 @@ fn pselect6( } let timeout = UserPointerMut::new(timeout)?; - + // Read here to check for invalid pointers. let _timeout_value = timeout.read()?; - Task::block_on(sleep(Duration::from_millis(10))); + sleep(Duration::from_millis(10)).await; timeout.write(TimeSpec { tv_sec: 0, @@ -566,12 +602,18 @@ fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -fn poll(fds: *mut UserPollFd, nfds: u32, timeout: u32) -> KResult { - do_poll(thread, fds, nfds, timeout) +async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { + do_poll(thread, fds, nfds, timeout).await } #[eonix_macros::define_syscall(SYS_FCHOWNAT)] -fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) -> KResult<()> { +async fn fchownat( + dirfd: FD, + pathname: User, + uid: u32, + gid: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; if !dentry.is_valid() { return Err(ENOENT); @@ -581,7 +623,7 @@ fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResult<()> { +async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -597,15 +639,15 @@ fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResul } #[eonix_macros::define_syscall(SYS_FCHMOD)] -fn chmod(pathname: *const u8, mode: u32) -> KResult<()> { - sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()) +async fn chmod(pathname: User, mode: Mode) -> KResult<()> { + sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty()).await } #[eonix_macros::define_syscall(SYS_UTIMENSAT)] -fn utimensat( +async fn utimensat( dirfd: FD, - pathname: *const u8, - times: *const TimeSpec, + pathname: User, + times: User, flags: AtFlags, ) -> KResult<()> { let dentry = if flags.at_empty_path() { @@ -632,11 +674,11 @@ fn utimensat( } #[eonix_macros::define_syscall(SYS_RENAMEAT2)] -fn renameat2( +async fn renameat2( old_dirfd: FD, - old_pathname: *const u8, + old_pathname: User, new_dirfd: FD, - new_pathname: *const u8, + new_pathname: User, flags: u32, ) -> KResult<()> { let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; @@ -654,7 +696,7 @@ fn renameat2( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_RENAME)] -fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { +async fn rename(old_pathname: User, new_pathname: User) -> KResult<()> { sys_renameat2( thread, FD::AT_FDCWD, @@ -663,6 +705,7 @@ fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> { new_pathname, 0, ) + .await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index b639650d..c6300ac7 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,9 +1,10 @@ use super::FromSyscallArg; use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT, ENOMEM}; +use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; +use crate::kernel::vfs::inode::Mode; use crate::{ kernel::{ constants::{UserMmapFlags, UserMmapProtocol}, @@ -14,7 +15,6 @@ use crate::{ use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; -use eonix_runtime::task::Task; use posix_types::syscall_no::*; impl FromSyscallArg for UserMmapProtocol { @@ -40,7 +40,7 @@ fn check_impl(condition: bool, err: u32) -> KResult<()> { } } -fn do_mmap2( +async fn do_mmap2( thread: &Thread, addr: usize, len: usize, @@ -67,10 +67,10 @@ fn do_mmap2( Mapping::Anonymous } else { // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = Task::block_on(SHM_MANAGER.lock()).create_shared_area( + let shared_area = SHM_MANAGER.lock().await.create_shared_area( len, thread.process.pid, - 0x777, + Mode::REG.perm(0o777), ); Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) } @@ -94,10 +94,14 @@ fn do_mmap2( // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether // `addr` is above user reachable memory. let addr = if flags.contains(UserMmapFlags::MAP_FIXED) { - Task::block_on(mm_list.unmap(addr, len)); - mm_list.mmap_fixed(addr, len, mapping, permission, is_shared) + mm_list.unmap(addr, len).await?; + mm_list + .mmap_fixed(addr, len, mapping, permission, is_shared) + .await } else { - mm_list.mmap_hint(addr, len, mapping, permission, is_shared) + mm_list + .mmap_hint(addr, len, mapping, permission, is_shared) + .await }; addr.map(|addr| addr.addr()) @@ -105,7 +109,7 @@ fn do_mmap2( #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_MMAP)] -fn mmap( +async fn mmap( addr: usize, len: usize, prot: UserMmapProtocol, @@ -113,12 +117,12 @@ fn mmap( fd: FD, offset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, offset) + do_mmap2(thread, addr, len, prot, flags, fd, offset).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MMAP2)] -fn mmap2( +async fn mmap2( addr: usize, len: usize, prot: UserMmapProtocol, @@ -126,33 +130,33 @@ fn mmap2( fd: FD, pgoffset: usize, ) -> KResult { - do_mmap2(thread, addr, len, prot, flags, fd, pgoffset) + do_mmap2(thread, addr, len, prot, flags, fd, pgoffset).await } #[eonix_macros::define_syscall(SYS_MUNMAP)] -fn munmap(addr: usize, len: usize) -> KResult { +async fn munmap(addr: usize, len: usize) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); } let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.unmap(addr, len)).map(|_| 0) + thread.process.mm_list.unmap(addr, len).await } #[eonix_macros::define_syscall(SYS_BRK)] -fn brk(addr: usize) -> KResult { +async fn brk(addr: usize) -> KResult { let vaddr = if addr == 0 { None } else { Some(VAddr::from(addr)) }; - Ok(thread.process.mm_list.set_break(vaddr).addr()) + Ok(thread.process.mm_list.set_break(vaddr).await.addr()) } #[eonix_macros::define_syscall(SYS_MADVISE)] -fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { +async fn madvise(_addr: usize, _len: usize, _advice: u32) -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_MPROTECT)] -fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { +async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let addr = VAddr::from(addr); if !addr.is_page_aligned() || len == 0 { return Err(EINVAL); @@ -160,25 +164,29 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> { let len = len.align_up(PAGE_SIZE); - Task::block_on(thread.process.mm_list.protect( - addr, - len, - Permission { - read: prot.contains(UserMmapProtocol::PROT_READ), - write: prot.contains(UserMmapProtocol::PROT_WRITE), - execute: prot.contains(UserMmapProtocol::PROT_EXEC), - }, - )) + thread + .process + .mm_list + .protect( + addr, + len, + Permission { + read: prot.contains(UserMmapProtocol::PROT_READ), + write: prot.contains(UserMmapProtocol::PROT_WRITE), + execute: prot.contains(UserMmapProtocol::PROT_EXEC), + }, + ) + .await } #[eonix_macros::define_syscall(SYS_SHMGET)] -fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { +async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { let size = size.align_up(PAGE_SIZE); - let mut shm_manager = Task::block_on(SHM_MANAGER.lock()); + let mut shm_manager = SHM_MANAGER.lock().await; let shmid = gen_shm_id(key)?; - let mode = shmflg & 0o777; + let mode = Mode::REG.perm(shmflg); let shmflg = ShmFlags::from_bits_truncate(shmflg); if key == IPC_PRIVATE { @@ -201,16 +209,17 @@ fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { return Ok(shmid); } - return Err(ENOENT); + Err(ENOENT) } #[eonix_macros::define_syscall(SYS_SHMAT)] -fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { +async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { let mm_list = &thread.process.mm_list; - let shm_manager = Task::block_on(SHM_MANAGER.lock()); + let shm_manager = SHM_MANAGER.lock().await; let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - let mode = shmflg & 0o777; + // Why is this not used? + let _mode = shmflg & 0o777; let shmflg = ShmFlags::from_bits_truncate(shmflg); let mut permission = Permission { @@ -239,9 +248,13 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { return Err(EINVAL); } let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list.mmap_fixed(addr, size, mapping, permission, true) + mm_list + .mmap_fixed(addr, size, mapping, permission, true) + .await } else { - mm_list.mmap_hint(VAddr::NULL, size, mapping, permission, true) + mm_list + .mmap_hint(VAddr::NULL, size, mapping, permission, true) + .await }?; thread.process.shm_areas.lock().insert(addr, size); @@ -250,22 +263,29 @@ fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_SHMDT)] -fn shmdt(addr: usize) -> KResult { +async fn shmdt(addr: usize) -> KResult<()> { let addr = VAddr::from(addr); - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - drop(shm_areas); - return Task::block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0); + + let size = { + let mut shm_areas = thread.process.shm_areas.lock(); + let size = *shm_areas.get(&addr).ok_or(EINVAL)?; + shm_areas.remove(&addr); + + size + }; + + thread.process.mm_list.unmap(addr, size).await } #[eonix_macros::define_syscall(SYS_SHMCTL)] -fn shmctl(shmid: u32, op: i32, shmid_ds: usize) -> KResult { +async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { + // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_MEMBARRIER)] -fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { +async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { + // TODO Ok(()) } diff --git a/src/kernel/syscall/net.rs b/src/kernel/syscall/net.rs index 82ec9152..41ac58e6 100644 --- a/src/kernel/syscall/net.rs +++ b/src/kernel/syscall/net.rs @@ -3,7 +3,7 @@ use crate::prelude::*; use posix_types::syscall_no::*; #[eonix_macros::define_syscall(SYS_SOCKET)] -fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { +async fn socket(_domain: u32, _socket_type: u32, _protocol: u32) -> KResult { Err(EINVAL) } diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index c21aade5..7dd573cc 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -7,27 +7,27 @@ use crate::kernel::constants::{ ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; +use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::dataflow::UserString; +use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::inode::Mode; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; -use crate::{kernel::user::dataflow::UserBuffer, prelude::*}; +use crate::{kernel::user::UserBuffer, prelude::*}; use alloc::borrow::ToOwned; use alloc::ffi::CString; use bitflags::bitflags; -use core::ptr::NonNull; use core::time::Duration; use eonix_hal::processor::UserTLS; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::task::Task; +use eonix_mm::address::Addr as _; use eonix_sync::AsProof as _; use posix_types::ctypes::PtrT; use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; @@ -50,7 +50,7 @@ bitflags! { } #[eonix_macros::define_syscall(SYS_NANOSLEEP)] -fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { +async fn nanosleep(req: User<(u32, u32)>, rem: UserMut<(u32, u32)>) -> KResult { let req = UserPointer::new(req)?.read()?; let rem = if rem.is_null() { None @@ -59,7 +59,7 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -69,11 +69,11 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult { } #[eonix_macros::define_syscall(SYS_CLOCK_NANOSLEEP)] -fn clock_nanosleep( +async fn clock_nanosleep( clock_id: u32, - flags: u32, - req: *const (u32, u32), - rem: *mut (u32, u32), + _flags: u32, + req: User<(u32, u32)>, + rem: UserMut<(u32, u32)>, ) -> KResult { if clock_id != CLOCK_REALTIME && clock_id != CLOCK_REALTIME_COARSE @@ -90,7 +90,7 @@ fn clock_nanosleep( }; let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64); - Task::block_on(sleep(duration)); + sleep(duration).await; if let Some(rem) = rem { rem.write((0, 0))?; @@ -100,16 +100,14 @@ fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -fn umask(mask: u32) -> KResult { +async fn umask(mask: Mode) -> KResult { let mut umask = thread.fs_context.umask.lock(); - let old = *umask; - *umask = mask & 0o777; - Ok(old) + Ok(core::mem::replace(&mut *umask, mask.non_format())) } #[eonix_macros::define_syscall(SYS_GETCWD)] -fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { +async fn getcwd(buffer: UserMut, bufsize: usize) -> KResult { let mut user_buffer = UserBuffer::new(buffer, bufsize)?; let mut buffer = PageBuffer::new(); @@ -122,7 +120,7 @@ fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_CHDIR)] -fn chdir(path: *const u8) -> KResult<()> { +async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; @@ -140,7 +138,7 @@ fn chdir(path: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_UMOUNT)] -fn umount(source: *const u8) -> KResult<()> { +async fn umount(source: User) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "./mnt" { return Ok(()); @@ -149,7 +147,7 @@ fn umount(source: *const u8) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_MOUNT)] -fn mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usize) -> KResult<()> { +async fn mount(source: User, target: User, fstype: User, flags: usize) -> KResult<()> { let source = UserString::new(source)?; if source.as_cstr().to_str().unwrap() == "/dev/vda2" { return Ok(()); @@ -185,7 +183,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> break; } - let user_string = UserString::new(ptr.addr() as *const u8)?; + let user_string = UserString::new(User::with_addr(ptr.addr()))?; strings.push(user_string.as_cstr().to_owned()); ptr_strings = ptr_strings.offset(1)?; } @@ -194,7 +192,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> } #[eonix_macros::define_syscall(SYS_EXECVE)] -fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult { +async fn execve(exec: User, argv: User, envp: User) -> KResult { let exec = UserString::new(exec)?; let exec = exec.as_cstr().to_owned(); @@ -208,11 +206,12 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult KResult KResult SyscallNoReturn { +async fn exit(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), false)); + procs + .do_exit(&thread, WaitType::Exited(status), false) + .await; } SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] -fn exit_group(status: u32) -> SyscallNoReturn { +async fn exit_group(status: u32) -> SyscallNoReturn { + let mut procs = ProcessList::get().write().await; + unsafe { - let mut procs = Task::block_on(ProcessList::get().write()); - Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), true)); + procs.do_exit(&thread, WaitType::Exited(status), true).await; } SyscallNoReturn } enum WaitInfo { - SigInfo(NonNull), - Status(NonNull), + SigInfo(UserMut), + Status(UserMut), None, } -fn do_waitid( +async fn do_waitid( thread: &Thread, wait_id: WaitId, info: WaitInfo, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { if !rusage.is_null() { unimplemented!("waitid with rusage pointer"); @@ -278,12 +281,15 @@ fn do_waitid( Some(options) => options, }; - let Some(wait_object) = Task::block_on(thread.process.wait( - wait_id, - options.contains(UserWaitOptions::WNOHANG), - options.contains(UserWaitOptions::WUNTRACED), - options.contains(UserWaitOptions::WCONTINUED), - ))? + let Some(wait_object) = thread + .process + .wait( + wait_id, + options.contains(UserWaitOptions::WNOHANG), + options.contains(UserWaitOptions::WUNTRACED), + options.contains(UserWaitOptions::WCONTINUED), + ) + .await? else { return Ok(0); }; @@ -299,11 +305,11 @@ fn do_waitid( siginfo.si_status = status; siginfo.si_code = code; - UserPointerMut::new(siginfo_ptr.as_ptr())?.write(siginfo)?; + UserPointerMut::new(siginfo_ptr)?.write(siginfo)?; Ok(0) } WaitInfo::Status(status_ptr) => { - UserPointerMut::new(status_ptr.as_ptr())?.write(wait_object.code.to_wstatus())?; + UserPointerMut::new(status_ptr)?.write(wait_object.code.to_wstatus())?; Ok(wait_object.pid) } WaitInfo::None => Ok(wait_object.pid), @@ -311,18 +317,16 @@ fn do_waitid( } #[eonix_macros::define_syscall(SYS_WAITID)] -fn waitid( +async fn waitid( id_type: u32, id: u32, - info: *mut SigInfo, + info: UserMut, options: u32, - rusage: *mut RUsage, + rusage: UserMut, ) -> KResult { let wait_id = WaitId::from_type_and_id(id_type, id)?; - if let Some(info) = NonNull::new(info) { - do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage) - } else { + if info.is_null() { /* * According to POSIX.1-2008, an application calling waitid() must * ensure that infop points to a siginfo_t structure (i.e., that it @@ -333,34 +337,41 @@ fn waitid( */ unimplemented!("waitid with null info pointer"); } + + do_waitid(thread, wait_id, WaitInfo::SigInfo(info), options, rusage).await } #[eonix_macros::define_syscall(SYS_WAIT4)] -fn wait4(wait_id: i32, arg1: *mut u32, options: u32, rusage: *mut RUsage) -> KResult { - let waitinfo = if let Some(status) = NonNull::new(arg1) { - WaitInfo::Status(status) - } else { +async fn wait4( + wait_id: i32, + arg1: UserMut, + options: u32, + rusage: UserMut, +) -> KResult { + let waitinfo = if arg1.is_null() { WaitInfo::None + } else { + WaitInfo::Status(arg1) }; let wait_id = WaitId::from_id(wait_id, thread); - do_waitid(thread, wait_id, waitinfo, options, rusage) + do_waitid(thread, wait_id, waitinfo, options, rusage).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] -fn waitpid(waitpid: i32, arg1: *mut u32, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()) +async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { + sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await } #[eonix_macros::define_syscall(SYS_SETSID)] -fn setsid() -> KResult { - thread.process.setsid() +async fn setsid() -> KResult { + thread.process.setsid().await } #[eonix_macros::define_syscall(SYS_SETPGID)] -fn setpgid(pid: u32, pgid: i32) -> KResult<()> { +async fn setpgid(pid: u32, pgid: i32) -> KResult<()> { let pid = if pid == 0 { thread.process.pid } else { pid }; let pgid = match pgid { @@ -369,15 +380,15 @@ fn setpgid(pid: u32, pgid: i32) -> KResult<()> { _ => return Err(EINVAL), }; - thread.process.setpgid(pid, pgid) + thread.process.setpgid(pid, pgid).await } #[eonix_macros::define_syscall(SYS_GETSID)] -fn getsid(pid: u32) -> KResult { +async fn getsid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.session_rcu().sid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.session(procs.prove()).sid) @@ -386,11 +397,11 @@ fn getsid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPGID)] -fn getpgid(pid: u32) -> KResult { +async fn getpgid(pid: u32) -> KResult { if pid == 0 { Ok(thread.process.pgroup_rcu().pgid) } else { - let procs = Task::block_on(ProcessList::get().read()); + let procs = ProcessList::get().read().await; procs .try_find_process(pid) .map(|proc| proc.pgroup(procs.prove()).pgid) @@ -399,12 +410,12 @@ fn getpgid(pid: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_GETPID)] -fn getpid() -> KResult { +async fn getpid() -> KResult { Ok(thread.process.pid) } #[eonix_macros::define_syscall(SYS_GETPPID)] -fn getppid() -> KResult { +async fn getppid() -> KResult { Ok(thread.process.parent_rcu().map_or(0, |x| x.pid)) } @@ -420,78 +431,61 @@ fn do_getuid(_thread: &Thread) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETUID32)] -fn getuid32() -> KResult { +async fn getuid32() -> KResult { do_getuid(thread) } #[eonix_macros::define_syscall(SYS_GETUID)] -fn getuid() -> KResult { +async fn getuid() -> KResult { do_getuid(thread) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETEUID32)] -fn geteuid32() -> KResult { +async fn geteuid32() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEUID)] -fn geteuid() -> KResult { +async fn geteuid() -> KResult { do_geteuid(thread) } #[eonix_macros::define_syscall(SYS_GETEGID)] -fn getegid() -> KResult { +async fn getegid() -> KResult { // All users are root for now. Ok(0) } #[eonix_macros::define_syscall(SYS_GETGID)] -fn getgid() -> KResult { - sys_getegid(thread) +async fn getgid() -> KResult { + sys_getegid(thread).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETGID32)] -fn getgid32() -> KResult { - sys_getegid(thread) -} - -#[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize { - if buf.is_null() || buflen == 0 { - return -14; - } - - static mut SEED: u64 = 1; - unsafe { - for i in 0..buflen { - SEED = SEED.wrapping_mul(1103515245).wrapping_add(12345); - *buf.add(i) = (SEED >> 8) as u8; - } - } - - buflen as isize +async fn getgid32() -> KResult { + sys_getegid(thread).await } #[eonix_macros::define_syscall(SYS_SCHED_YIELD)] -fn sched_yield() -> KResult<()> { - Task::block_on(yield_now()); +async fn sched_yield() -> KResult<()> { + yield_now().await; Ok(()) } #[eonix_macros::define_syscall(SYS_SYNC)] -fn sync() -> KResult<()> { +async fn sync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_FSYNC)] -fn fsync() -> KResult<()> { +async fn fsync() -> KResult<()> { Ok(()) } #[eonix_macros::define_syscall(SYS_GETTID)] -fn gettid() -> KResult { +async fn gettid() -> KResult { Ok(thread.tid) } @@ -531,7 +525,7 @@ pub fn parse_user_tls(arch_tls: usize) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -fn set_thread_area(arch_tls: usize) -> KResult<()> { +async fn set_thread_area(arch_tls: usize) -> KResult<()> { thread.set_user_tls(parse_user_tls(arch_tls)?)?; // SAFETY: Preemption is disabled on calling `load_thread_area32()`. @@ -545,16 +539,16 @@ fn set_thread_area(arch_tls: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SET_TID_ADDRESS)] -fn set_tid_address(tidptr: usize) -> KResult { +async fn set_tid_address(tidptr: UserMut) -> KResult { thread.clear_child_tid(Some(tidptr)); Ok(thread.tid) } #[eonix_macros::define_syscall(SYS_PRCTL)] -fn prctl(option: u32, arg2: usize) -> KResult<()> { +async fn prctl(option: u32, arg2: PtrT) -> KResult<()> { match option { PR_SET_NAME => { - let name = UserPointer::new(arg2 as *mut [u8; 16])?.read()?; + let name = UserPointer::<[u8; 16]>::new(User::with_addr(arg2.addr()))?.read()?; let len = name.iter().position(|&c| c == 0).unwrap_or(15); thread.set_name(name[..len].into()); Ok(()) @@ -563,7 +557,7 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { let name = thread.get_name(); let len = name.len().min(15); let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); - UserPointerMut::new(arg2 as *mut [u8; 16])?.write(name)?; + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(arg2.addr()))?.write(name)?; Ok(()) } _ => Err(EINVAL), @@ -571,8 +565,8 @@ fn prctl(option: u32, arg2: usize) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_KILL)] -fn kill(pid: i32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); +async fn kill(pid: i32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; match pid { // Send signal to every process for which the calling process has // permission to send signals. @@ -598,8 +592,10 @@ fn kill(pid: i32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TKILL)] -fn tkill(tid: u32, sig: u32) -> KResult<()> { - Task::block_on(ProcessList::get().read()) +async fn tkill(tid: u32, sig: u32) -> KResult<()> { + ProcessList::get() + .read() + .await .try_find_thread(tid) .ok_or(ESRCH)? .raise(Signal::try_from_raw(sig)?); @@ -607,8 +603,8 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_TGKILL)] -fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { - let procs = Task::block_on(ProcessList::get().read()); +async fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { + let procs = ProcessList::get().read().await; let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?; if thread_to_kill.process.pid != tgid { @@ -620,10 +616,10 @@ fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_RT_SIGPROCMASK)] -fn rt_sigprocmask( +async fn rt_sigprocmask( how: u32, - set: *mut SigSet, - oldset: *mut SigSet, + set: UserMut, + oldset: UserMut, sigsetsize: usize, ) -> KResult<()> { if sigsetsize != size_of::() { @@ -636,7 +632,7 @@ fn rt_sigprocmask( } let new_mask = if !set.is_null() { - UserPointer::new(set)?.read()? + UserPointer::new(set.as_const())?.read()? } else { return Ok(()); }; @@ -658,27 +654,21 @@ struct TimeSpec32 { tv_nsec: i32, } -impl TimeSpec32 { - fn to_duration(&self) -> Duration { - Duration::new(self.tv_sec as u64, self.tv_nsec as u32) - } -} - #[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -fn rt_sigtimedwait_time32( - _uthese: *const SigSet, - _uinfo: *mut SigInfo, - _uts: *const TimeSpec32, +async fn rt_sigtimedwait_time32( + _uthese: User, + _uinfo: UserMut, + _uts: User, ) -> KResult { // TODO Ok(0) } #[eonix_macros::define_syscall(SYS_RT_SIGACTION)] -fn rt_sigaction( +async fn rt_sigaction( signum: u32, - act: *const SigAction, - oldact: *mut SigAction, + act: User, + oldact: UserMut, sigsetsize: usize, ) -> KResult<()> { let signal = Signal::try_from_raw(signum)?; @@ -707,11 +697,11 @@ fn rt_sigaction( } #[eonix_macros::define_syscall(SYS_PRLIMIT64)] -fn prlimit64( +async fn prlimit64( pid: u32, resource: u32, - new_limit: *const RLimit, - old_limit: *mut RLimit, + new_limit: User, + old_limit: UserMut, ) -> KResult<()> { if pid != 0 { return Err(ENOSYS); @@ -743,13 +733,13 @@ fn prlimit64( } #[eonix_macros::define_syscall(SYS_GETRLIMIT)] -fn getrlimit(resource: u32, rlimit: *mut RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, core::ptr::null(), rlimit) +async fn getrlimit(resource: u32, rlimit: UserMut) -> KResult<()> { + sys_prlimit64(thread, 0, resource, User::null(), rlimit).await } #[eonix_macros::define_syscall(SYS_SETRLIMIT)] -fn setrlimit(resource: u32, rlimit: *const RLimit) -> KResult<()> { - sys_prlimit64(thread, 0, resource, rlimit, core::ptr::null_mut()) +async fn setrlimit(resource: u32, rlimit: User) -> KResult<()> { + sys_prlimit64(thread, 0, resource, rlimit, UserMut::null()).await } #[repr(C)] @@ -774,7 +764,7 @@ struct RUsage { } #[eonix_macros::define_syscall(SYS_GETRUSAGE)] -fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { +async fn getrusage(who: u32, rusage: UserMut) -> KResult<()> { if who != 0 { return Err(ENOSYS); } @@ -804,52 +794,52 @@ fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_VFORK)] -fn vfork() -> KResult { +async fn vfork() -> KResult { let clone_args = CloneArgs::for_vfork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_FORK)] -fn fork() -> KResult { +async fn fork() -> KResult { let clone_args = CloneArgs::for_fork(); - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } // Some old platforms including x86_32, riscv and arm have the last two arguments // swapped, so we need to define two versions of `clone` syscall. #[cfg(not(target_arch = "loongarch64"))] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, + parent_tidptr: UserMut, tls: usize, - child_tidptr: usize, + child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[cfg(target_arch = "loongarch64")] #[eonix_macros::define_syscall(SYS_CLONE)] -fn clone( +async fn clone( clone_flags: usize, new_sp: usize, - parent_tidptr: usize, - child_tidptr: usize, + parent_tidptr: UserMut, + child_tidptr: UserMut, tls: usize, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; - do_clone(thread, clone_args) + do_clone(thread, clone_args).await } #[eonix_macros::define_syscall(SYS_FUTEX)] -fn futex( +async fn futex( uaddr: usize, op: u32, val: u32, @@ -867,11 +857,11 @@ fn futex( match futex_op { FutexOp::FUTEX_WAIT => { - Task::block_on(futex_wait(uaddr, pid, val as u32, None))?; + futex_wait(uaddr, pid, val as u32, None).await?; return Ok(0); } FutexOp::FUTEX_WAKE => { - return Task::block_on(futex_wake(uaddr, pid, val as u32)); + return futex_wake(uaddr, pid, val as u32).await; } FutexOp::FUTEX_REQUEUE => { todo!() @@ -883,60 +873,56 @@ fn futex( } #[eonix_macros::define_syscall(SYS_SET_ROBUST_LIST)] -fn set_robust_list(head: usize, len: usize) -> KResult<()> { +async fn set_robust_list(head: User, len: usize) -> KResult<()> { if len != size_of::() { return Err(EINVAL); } - thread.set_robust_list(Some(VAddr::from(head))); + thread.set_robust_list(Some(head)); Ok(()) } #[eonix_macros::define_syscall(SYS_RT_SIGRETURN)] -fn rt_sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - false, - ) - .inspect_err(|err| { - println_warn!( - "`rt_sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn rt_sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + false, + ) { + println_warn!( + "`rt_sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SIGRETURN)] -fn sigreturn() -> KResult { - thread - .signal_list - .restore( - &mut thread.trap_ctx.borrow(), - &mut thread.fpu_state.borrow(), - true, - ) - .inspect_err(|err| { - println_warn!( - "`sigreturn` failed in thread {} with error {err}!", - thread.tid - ); - Task::block_on(thread.force_kill(Signal::SIGSEGV)); - })?; +async fn sigreturn() -> KResult { + if let Err(err) = thread.signal_list.restore( + &mut thread.trap_ctx.borrow(), + &mut thread.fpu_state.borrow(), + true, + ) { + println_warn!( + "`sigreturn` failed in thread {} with error {err}!", + thread.tid + ); + thread.force_kill(Signal::SIGSEGV).await; + return Err(err); + } Ok(SyscallNoReturn) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr) +async fn arch_prctl(option: u32, addr: u32) -> KResult { + sys_arch_prctl(thread, option, addr).await } pub fn keep_alive() {} diff --git a/src/kernel/syscall/sysinfo.rs b/src/kernel/syscall/sysinfo.rs index 5092c8a6..69316b2a 100644 --- a/src/kernel/syscall/sysinfo.rs +++ b/src/kernel/syscall/sysinfo.rs @@ -2,6 +2,7 @@ use crate::{ io::Buffer as _, kernel::{ constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINTR, EINVAL}, + syscall::UserMut, task::Thread, timer::{Instant, Ticks}, user::{UserBuffer, UserPointerMut}, @@ -30,7 +31,7 @@ fn copy_cstr_to_array(cstr: &[u8], array: &mut [u8]) { } #[eonix_macros::define_syscall(SYS_NEWUNAME)] -fn newuname(buffer: *mut NewUTSName) -> KResult<()> { +async fn newuname(buffer: UserMut) -> KResult<()> { let buffer = UserPointerMut::new(buffer)?; let mut uname = NewUTSName { sysname: [0; 65], @@ -62,7 +63,7 @@ fn newuname(buffer: *mut NewUTSName) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETTIMEOFDAY)] -fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { +async fn gettimeofday(timeval: UserMut, timezone: UserMut<()>) -> KResult<()> { if !timezone.is_null() { return Err(EINVAL); } @@ -81,7 +82,7 @@ fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> { Ok(()) } -fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: UserMut) -> KResult<()> { let timespec = UserPointerMut::new(timespec)?; match clock_id { @@ -106,13 +107,13 @@ fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) #[cfg(not(target_arch = "x86_64"))] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME)] -fn clock_gettime(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_CLOCK_GETTIME64)] -fn clock_gettime64(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> { +async fn clock_gettime64(clock_id: u32, timespec: UserMut) -> KResult<()> { do_clock_gettime64(thread, clock_id, timespec) } @@ -135,7 +136,7 @@ struct Sysinfo { } #[eonix_macros::define_syscall(SYS_SYSINFO)] -fn sysinfo(info: *mut Sysinfo) -> KResult<()> { +async fn sysinfo(info: UserMut) -> KResult<()> { let info = UserPointerMut::new(info)?; info.write(Sysinfo { uptime: Ticks::since_boot().as_secs() as u32, @@ -164,7 +165,7 @@ struct TMS { } #[eonix_macros::define_syscall(SYS_TIMES)] -fn times(tms: *mut TMS) -> KResult<()> { +async fn times(tms: UserMut) -> KResult<()> { let tms = UserPointerMut::new(tms)?; tms.write(TMS { tms_utime: 0, @@ -175,7 +176,7 @@ fn times(tms: *mut TMS) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_GETRANDOM)] -fn get_random(buf: *mut u8, len: usize, flags: u32) -> KResult { +async fn get_random(buf: UserMut, len: usize, flags: u32) -> KResult { if flags != 0 { return Err(EINVAL); } diff --git a/src/kernel/task.rs b/src/kernel/task.rs index e8d36e51..2ef58069 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -18,4 +18,212 @@ pub use process_group::ProcessGroup; pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; -pub use thread::{new_thread_runnable, yield_now, Thread, ThreadBuilder}; +pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; + +fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output +where + F: core::future::Future, +{ + let waker = core::task::Waker::noop(); + let mut cx = core::task::Context::from_waker(&waker); + + loop { + match future.as_mut().poll(&mut cx) { + core::task::Poll::Ready(output) => return output, + core::task::Poll::Pending => {} + } + } +} + +/// Constantly poll the given future until it is ready, blocking the current thread. +/// +/// # Warning +/// This function will block the current thread and should not be used in async +/// contexts as it might cause infinite blocking or deadlocks. The following is +/// a bad example: +/// +/// ```ignore +/// block_on(async { +/// // This will block the current thread forever. +/// loop { +/// println_debug!("This will never end!"); +/// } +/// }); +/// +/// // The code below will never be reached. +/// println_debug!("You'll never see this message!"); +/// ``` +/// +/// Use [`stackful`] instead to run async (or computational) code in a separate +/// stackful (and preemptive) context or `RUNTIME.spawn` to run async code in +/// the runtime's executor. +pub fn block_on(future: F) -> F::Output +where + F: core::future::Future, +{ + do_block_on(core::pin::pin!(future)) +} + +/// Run the given future in a stackful context, allowing it to be preempted by +/// timer interrupts. +/// +/// ```ignore +/// RUNTIME.spawn(stackful(async { +/// // Some simulated computation heavy task. +/// loop { +/// println_debug!("Hello from stackful future!"); +/// } +/// })); +/// ``` +pub async fn stackful(mut future: F) -> F::Output +where + F: core::future::Future, +{ + use crate::kernel::{ + interrupt::{default_fault_handler, default_irq_handler}, + timer::{should_reschedule, timer_interrupt}, + }; + use alloc::sync::Arc; + use alloc::task::Wake; + use core::cell::UnsafeCell; + use core::future::Future; + use core::pin::Pin; + use core::ptr::NonNull; + use core::sync::atomic::AtomicBool; + use core::sync::atomic::Ordering; + use core::task::Context; + use core::task::Poll; + use core::task::Waker; + use eonix_hal::traits::trap::RawTrapContext; + use eonix_hal::traits::trap::TrapReturn; + use eonix_hal::traits::trap::TrapType; + use eonix_hal::trap::TrapContext; + use eonix_preempt::assert_preempt_enabled; + use eonix_runtime::executor::Stack; + use eonix_runtime::task::Task; + use thread::wait_for_wakeups; + + let stack = KernelStack::new(); + + fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! + where + F: Future, + { + struct WakeSaver { + task: Arc, + woken: AtomicBool, + } + + impl Wake for WakeSaver { + fn wake_by_ref(self: &Arc) { + // SAFETY: If we read true below in the loop, we must have been + // woken up and acquired our waker's work by the runtime. + self.woken.store(true, Ordering::Relaxed); + self.task.wake_by_ref(); + } + + fn wake(self: Arc) { + self.wake_by_ref(); + } + } + + let wake_saver = Arc::new(WakeSaver { + task: Task::current().clone(), + woken: AtomicBool::new(false), + }); + let waker = Waker::from(wake_saver.clone()); + let mut cx = Context::from_waker(&waker); + + let output = loop { + match future.as_mut().poll(&mut cx) { + Poll::Ready(output) => break output, + Poll::Pending => { + assert_preempt_enabled!("Blocking in stackful futures is not allowed."); + + if Task::current().is_ready() { + continue; + } + + // SAFETY: The runtime must have ensured that we can see the + // work done by the waker. + if wake_saver.woken.swap(false, Ordering::Relaxed) { + continue; + } + + unsafe { + #[cfg(target_arch = "riscv64")] + core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); + } + } + } + }; + + drop(cx); + drop(waker); + drop(wake_saver); + + unsafe { + output_ptr.write(Some(output)); + } + + unsafe { + #[cfg(target_arch = "riscv64")] + core::arch::asm!("ebreak"); + + #[cfg(target_arch = "loongarch64")] + core::arch::asm!("break 1"); + } + + unreachable!() + } + + let sp = stack.get_bottom(); + let mut output = UnsafeCell::new(None); + + let mut trap_ctx = TrapContext::new(); + + trap_ctx.set_user_mode(false); + trap_ctx.set_interrupt_enabled(true); + let _ = trap_ctx.set_user_call_frame( + execute:: as usize, + Some(sp.addr().get()), + None, + &[(&raw mut future) as usize, output.get() as usize], + |_, _| Ok::<(), u32>(()), + ); + + loop { + unsafe { + trap_ctx.trap_return(); + } + + match trap_ctx.trap_type() { + TrapType::Syscall { .. } => {} + TrapType::Fault(fault) => default_fault_handler(fault, &mut trap_ctx), + TrapType::Irq { callback } => callback(default_irq_handler), + TrapType::Timer { callback } => { + callback(timer_interrupt); + + if eonix_preempt::count() == 0 && should_reschedule() { + yield_now().await; + } + } + TrapType::Breakpoint => { + if let Some(output) = output.get_mut().take() { + break output; + } else { + wait_for_wakeups().await; + } + + #[cfg(target_arch = "riscv64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 2); + + #[cfg(target_arch = "loongarch64")] + trap_ctx.set_program_counter(trap_ctx.get_program_counter() + 4); + } + } + } +} diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index c8efe5e8..e0d578c1 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,10 +1,7 @@ use crate::{ kernel::{ - syscall::procops::parse_user_tls, - task::{ - alloc_pid, new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, Thread, - ThreadBuilder, - }, + syscall::{procops::parse_user_tls, UserMut}, + task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, user::UserPointerMut, }, KResult, @@ -12,7 +9,7 @@ use crate::{ use bitflags::bitflags; use core::num::NonZero; use eonix_hal::processor::UserTLS; -use eonix_runtime::{scheduler::Scheduler, task::Task}; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; use posix_types::signal::Signal; @@ -51,9 +48,9 @@ pub struct CloneArgs { pub flags: CloneFlags, pub sp: Option>, // Stack pointer for the new thread. pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option, // Pointer to parent TID in user space. + pub set_tid_ptr: Option>, // Pointer to set child TID in user space. + pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. + pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. pub tls: Option, // Pointer to TLS information. } @@ -63,8 +60,8 @@ impl CloneArgs { pub fn for_clone( flags: usize, sp: usize, - child_tid_ptr: usize, - parent_tid_ptr: usize, + child_tid_ptr: UserMut, + parent_tid_ptr: UserMut, tls: usize, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); @@ -133,8 +130,8 @@ impl CloneArgs { } } -pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { - let mut procs = Task::block_on(ProcessList::get().write()); +pub async fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { + let mut procs = ProcessList::get().write().await; let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?; let current_process = thread.process.clone(); @@ -154,6 +151,7 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { let (new_thread, _) = ProcessBuilder::new() .clone_from(current_process, &clone_args) + .await .pid(new_pid) .pgroup(current_pgroup) .session(current_session) @@ -163,10 +161,10 @@ pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult { }; if let Some(parent_tid_ptr) = clone_args.parent_tid_ptr { - UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)? + UserPointerMut::new(parent_tid_ptr)?.write(new_pid)? } - Scheduler::get().spawn::(new_thread_runnable(new_thread)); + RUNTIME.spawn(new_thread.run()); Ok(new_pid) } diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index af42a396..a04d7091 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -9,6 +9,7 @@ use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicL use crate::{ kernel::{ constants::{EAGAIN, EINVAL}, + syscall::User, user::UserPointer, }, prelude::KResult, @@ -174,7 +175,7 @@ pub async fn futex_wait( let (_, futex_bucket_ref) = FUTEX_TABLE.get_bucket(&futex_key); let mut futex_bucket = futex_bucket_ref.lock().await; - let val = UserPointer::new(uaddr as *const u32)?.read()?; + let val = UserPointer::new(User::::with_addr(uaddr))?.read()?; if val != expected_val { return Err(EAGAIN); @@ -238,20 +239,20 @@ async fn futex_requeue( pid: Option, wake_count: u32, requeue_uaddr: usize, - requeue_count: u32, + _requeue_count: u32, ) -> KResult { let futex_key = FutexKey::new(uaddr, pid); let futex_requeue_key = FutexKey::new(requeue_uaddr, pid); - let (bucket_idx0, bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); - let (bucket_idx1, bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); + let (bucket_idx0, _bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key); + let (bucket_idx1, _bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key); if bucket_idx0 == bucket_idx1 { // If the keys are the same, we can just wake up the waiters. return futex_wake(uaddr, pid, wake_count).await; } - let (futex_bucket, futex_requeue_bucket) = + let (_futex_bucket, _futex_requeue_bucket) = double_lock_bucket(futex_key, futex_requeue_key).await; todo!() @@ -299,7 +300,7 @@ impl RobustListHead { futex_wake(futex_addr, None, usize::MAX as u32).await?; // Move to the next entry in the robust list. - let robust_list = UserPointer::new(entry_ptr as *const RobustList)?.read()?; + let robust_list = UserPointer::new(User::::with_addr(entry_ptr))?.read()?; entry_ptr = robust_list.next; diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 073026a9..859e0010 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -215,20 +215,20 @@ impl Elf { }) } - fn load(&self, args: Vec, envs: Vec) -> KResult { + async fn load(&self, args: Vec, envs: Vec) -> KResult { let mm_list = MMList::new(); // Load Segments - let (elf_base, data_segment_end) = self.load_segments(&mm_list)?; + let (elf_base, data_segment_end) = self.load_segments(&mm_list).await?; // Load ldso (if any) - let ldso_load_info = self.load_ldso(&mm_list)?; + let ldso_load_info = self.load_ldso(&mm_list).await?; // Load vdso - self.load_vdso(&mm_list)?; + self.load_vdso(&mm_list).await?; // Heap - mm_list.register_break(data_segment_end + 0x10000); + mm_list.register_break(data_segment_end + 0x10000).await; let aux_vec = self.init_aux_vec( elf_base, @@ -238,7 +238,9 @@ impl Elf { )?; // Map stack - let sp = self.create_and_init_stack(&mm_list, args, envs, aux_vec)?; + let sp = self + .create_and_init_stack(&mm_list, args, envs, aux_vec) + .await?; let entry_ip = if let Some(ldso_load_info) = ldso_load_info { // Normal shared object(DYN) @@ -258,26 +260,30 @@ impl Elf { }) } - fn create_and_init_stack( + async fn create_and_init_stack( &self, mm_list: &MMList, args: Vec, envs: Vec, aux_vec: AuxVec, ) -> KResult { - mm_list.mmap_fixed( - VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), - INIT_STACK_SIZE, - Mapping::Anonymous, - Permission { - read: true, - write: true, - execute: false, - }, - false, - )?; + mm_list + .mmap_fixed( + VAddr::from(E::STACK_BASE_ADDR - INIT_STACK_SIZE), + INIT_STACK_SIZE, + Mapping::Anonymous, + Permission { + read: true, + write: true, + execute: false, + }, + false, + ) + .await?; - StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec).init() + StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec) + .init() + .await } fn init_aux_vec(&self, elf_base: VAddr, ldso_base: Option) -> KResult> { @@ -309,7 +315,7 @@ impl Elf { Ok(aux_vec) } - fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { + async fn load_segments(&self, mm_list: &MMList) -> KResult<(VAddr, VAddr)> { let base: VAddr = if self.is_shared_object() { E::DYN_BASE_ADDR } else { 0 }.into(); let mut segments_end = VAddr::NULL; @@ -318,7 +324,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - let segment_end = self.load_segment(program_header, mm_list, base)?; + let segment_end = self.load_segment(program_header, mm_list, base).await?; if segment_end > segments_end { segments_end = segment_end; @@ -329,7 +335,7 @@ impl Elf { Ok((base, segments_end)) } - fn load_segment( + async fn load_segment( &self, program_header: &E::Ph, mm_list: &MMList, @@ -353,33 +359,37 @@ impl Elf { if file_len != 0 { let real_file_length = load_vaddr_end - vmap_start; - mm_list.mmap_fixed( - vmap_start, - file_len, - Mapping::File(FileMapping::new( - self.file.get_inode()?, - file_offset, - real_file_length, - )), - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start, + file_len, + Mapping::File(FileMapping::new( + self.file.get_inode()?, + file_offset, + real_file_length, + )), + permission, + false, + ) + .await?; } if vmem_len > file_len { - mm_list.mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - )?; + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; } Ok(vmap_start + vmem_len) } - fn load_ldso(&self, mm_list: &MMList) -> KResult> { + async fn load_ldso(&self, mm_list: &MMList) -> KResult> { let ldso_path = self.ldso_path()?; if let Some(ldso_path) = ldso_path { @@ -393,7 +403,7 @@ impl Elf { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; if type_ == program::Type::Load { - ldso_elf.load_segment(program_header, mm_list, base)?; + ldso_elf.load_segment(program_header, mm_list, base).await?; } } @@ -406,8 +416,8 @@ impl Elf { Ok(None) } - fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { - mm_list.map_vdso() + async fn load_vdso(&self, mm_list: &MMList) -> KResult<()> { + mm_list.map_vdso().await } fn ldso_path(&self) -> KResult> { @@ -449,10 +459,10 @@ impl ELF { } } - pub fn load(&self, args: Vec, envs: Vec) -> KResult { + pub async fn load(&self, args: Vec, envs: Vec) -> KResult { match &self { - ELF::Elf32(elf32) => elf32.load(args, envs), - ELF::Elf64(elf64) => elf64.load(args, envs), + ELF::Elf32(elf32) => elf32.load(args, envs).await, + ELF::Elf64(elf64) => elf64.load(args, envs).await, } } } @@ -483,21 +493,21 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { } // return sp after stack init - fn init(mut self) -> KResult { - let env_pointers = self.push_envs()?; - let arg_pointers = self.push_args()?; + async fn init(mut self) -> KResult { + let env_pointers = self.push_envs().await?; + let arg_pointers = self.push_args().await?; self.stack_alignment(); - self.push_aux_vec()?; - self.push_pointers(env_pointers)?; - self.push_pointers(arg_pointers)?; - self.push_argc(T::from_usize(self.args.len()))?; + self.push_aux_vec().await?; + self.push_pointers(env_pointers).await?; + self.push_pointers(arg_pointers).await?; + self.push_argc(T::from_usize(self.args.len())).await?; assert_eq!(self.sp.align_down(16), self.sp); Ok(VAddr::from(self.sp)) } - fn push_envs(&mut self) -> KResult> { + async fn push_envs(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.envs.len()); for string in self.envs.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -505,14 +515,15 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); Ok(addrs) } - fn push_args(&mut self) -> KResult> { + async fn push_args(&mut self) -> KResult> { let mut addrs = Vec::with_capacity(self.args.len()); for string in self.args.iter().rev() { let len = string.as_bytes_with_nul().len(); @@ -520,7 +531,8 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.mm_list .access_mut(VAddr::from(self.sp), len, |offset, data| { data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()]) - })?; + }) + .await?; addrs.push(T::from_usize(self.sp)); } addrs.reverse(); @@ -538,27 +550,29 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp = align_sp + all_size; } - fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { + async fn push_pointers(&mut self, mut pointers: Vec) -> KResult<()> { pointers.push(T::from_usize(0)); self.sp -= pointers.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - pointers.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - pointers.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + pointers.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + pointers.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } - fn push_argc(&mut self, val: T) -> KResult<()> { + async fn push_argc(&mut self, val: T) -> KResult<()> { self.sp -= size_of::(); self.mm_list @@ -566,12 +580,13 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { data.copy_from_slice(unsafe { core::slice::from_raw_parts(&val as *const _ as *const u8, data.len()) }) - })?; + }) + .await?; Ok(()) } - fn push_aux_vec(&mut self) -> KResult<()> { + async fn push_aux_vec(&mut self) -> KResult<()> { let mut longs: Vec = vec![]; // Write Auxiliary vectors @@ -593,18 +608,20 @@ impl<'a, T: ElfAddr + Clone + Copy> StackInitializer<'a, T> { self.sp -= longs.len() * size_of::(); - self.mm_list.access_mut( - VAddr::from(self.sp), - longs.len() * size_of::(), - |offset, data| { - data.copy_from_slice(unsafe { - core::slice::from_raw_parts( - longs.as_ptr().byte_add(offset) as *const u8, - data.len(), - ) - }) - }, - )?; + self.mm_list + .access_mut( + VAddr::from(self.sp), + longs.len() * size_of::(), + |offset, data| { + data.copy_from_slice(unsafe { + core::slice::from_raw_parts( + longs.as_ptr().byte_add(offset) as *const u8, + data.len(), + ) + }) + }, + ) + .await?; Ok(()) } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index fc9374be..4e3f4db1 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -106,9 +106,9 @@ impl ProgramLoader { }) } - pub fn load(self) -> KResult { + pub async fn load(self) -> KResult { match self.object { - Object::ELF(elf) => elf.load(self.args, self.envs), + Object::ELF(elf) => elf.load(self.args, self.envs).await, } } } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 53499a06..421e4b8b 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -4,10 +4,11 @@ use super::{ }; use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::rcu::call_rcu; use crate::{ kernel::mem::MMList, prelude::*, - rcu::{rcu_sync, RCUPointer, RCUReadGuard}, + rcu::{RCUPointer, RCUReadGuard}, sync::CondVar, }; use alloc::{ @@ -16,7 +17,6 @@ use alloc::{ }; use core::sync::atomic::{AtomicU32, Ordering}; use eonix_mm::address::VAddr; -use eonix_runtime::task::Task; use eonix_sync::{ AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, UnlockableGuard as _, UnlockedGuard as _, @@ -108,6 +108,7 @@ pub struct DrainExited<'waitlist> { wait_procs: SpinGuard<'waitlist, VecDeque>, } +#[derive(Debug, Clone, Copy)] pub enum WaitId { Any, Pid(u32), @@ -120,23 +121,17 @@ impl WaitId { P_ALL => Ok(WaitId::Any), P_PID => Ok(WaitId::Pid(id)), P_PGID => Ok(WaitId::Pgid(id)), - P_PIDFD => { - panic!("PDIFD type is unsupported") - } + P_PIDFD => panic!("P_PIDFD type is not supported"), _ => Err(EINVAL), } } pub fn from_id(id: i32, thread: &Thread) -> Self { - if id < -1 { - WaitId::Pgid((-id).cast_unsigned()) - } else if id == -1 { - WaitId::Any - } else if id == 0 { - let procs = Task::block_on(ProcessList::get().read()); - WaitId::Pgid(thread.process.pgroup(procs.prove()).pgid) - } else { - WaitId::Pid(id.cast_unsigned()) + match id { + ..-1 => WaitId::Pgid((-id).cast_unsigned()), + -1 => WaitId::Any, + 0 => WaitId::Pgid(thread.process.pgroup_rcu().pgid), + _ => WaitId::Pid(id.cast_unsigned()), } } } @@ -205,11 +200,11 @@ impl ProcessBuilder { } } - pub fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { - Task::block_on(process.mm_list.new_shared()) + process.mm_list.new_shared().await } else { - Task::block_on(process.mm_list.new_cloned()) + process.mm_list.new_cloned().await }; if let Some(exit_signal) = clone_args.exit_signal { @@ -350,8 +345,18 @@ impl Process { trace_continue: bool, ) -> KResult> { let wait_object = { - let mut waits = self.wait_list.entry(wait_id, trace_stop, trace_continue); + let mut unlocked_waits = None; + loop { + let mut waits = match unlocked_waits { + Some(wait) => wait.await?, + None => { + self.wait_list + .entry(wait_id, trace_stop, trace_continue) + .await + } + }; + if let Some(object) = waits.get() { break object; } @@ -369,7 +374,7 @@ impl Process { return Ok(None); } - waits = waits.wait(no_block).await?; + unlocked_waits = Some(waits.wait(no_block)); } }; @@ -377,7 +382,7 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid); + procs.remove_process(wait_object.pid).await; assert!(self .inner .access_mut(procs.prove_mut()) @@ -390,8 +395,8 @@ impl Process { } /// Create a new session for the process. - pub fn setsid(self: &Arc) -> KResult { - let mut process_list = Task::block_on(ProcessList::get().write()); + pub async fn setsid(self: &Arc) -> KResult { + let mut process_list = ProcessList::get().write().await; // If there exists a session that has the same sid as our pid, we can't create a new // session. The standard says that we should create a new process group and be the // only process in the new process group and session. @@ -404,12 +409,14 @@ impl Process { .session(session.clone()) .build(&mut process_list); - { - let _old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); - Task::block_on(rcu_sync()); - } + let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); + old_pgroup.remove_member(self.pid, process_list.prove_mut()); + + call_rcu(move || { + drop(old_session); + drop(old_pgroup); + }); Ok(pgroup.pgid) } @@ -455,10 +462,9 @@ impl Process { }; pgroup.remove_member(self.pid, procs.prove_mut()); - { - let _old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - Task::block_on(rcu_sync()); - } + + let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); + call_rcu(move || drop(old_pgroup)); Ok(()) } @@ -467,8 +473,8 @@ impl Process { /// /// This function should be called on the process that issued the syscall in order to do /// permission checks. - pub fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { - let mut procs = Task::block_on(ProcessList::get().write()); + pub async fn setpgid(self: &Arc, pid: u32, pgid: u32) -> KResult<()> { + let mut procs = ProcessList::get().write().await; // We may set pgid of either the calling process or a child process. if pid == self.pid { self.do_setpgid(pgid, &mut procs) @@ -572,9 +578,9 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { Entry { - process_list: Task::block_on(ProcessList::get().read()), + process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), cv: &self.cv_wait_procs, want_stop, @@ -603,9 +609,8 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - let procs = Task::block_on(ProcessList::get().read()); - if let Some(process) = procs.try_find_process(item.pid) { - return process.pgroup(procs.prove()).pgid == pgid; + if let Some(process) = self.process_list.try_find_process(item.pid) { + return process.pgroup(self.process_list.prove()).pgid == pgid; } false } @@ -619,7 +624,7 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> { + pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index 2832dae5..af073e84 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -9,7 +9,7 @@ use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; -use eonix_runtime::task::Task; +use eonix_mm::address::Addr; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; pub struct ProcessList { @@ -54,7 +54,7 @@ impl ProcessList { self.threads.insert(thread.tid, thread.clone()); } - pub fn remove_process(&mut self, pid: u32) { + pub async fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. if let Some(thread) = self.threads.remove(&pid) { self.processes.remove(&pid); @@ -64,7 +64,7 @@ impl ProcessList { let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); pgroup.remove_member(pid, self.prove_mut()); - Task::block_on(rcu_sync()); + rcu_sync().await; if Arc::strong_count(&pgroup) == 1 { self.pgroups.remove(&pgroup.pgid); @@ -135,11 +135,9 @@ impl ProcessList { } if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid as *mut u32) - .unwrap() - .write(0u32); + let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); - let _ = futex_wake(clear_ctid, None, 1).await; + let _ = futex_wake(clear_ctid.addr(), None, 1).await; } if let Some(robust_list) = thread.get_robust_list() { @@ -150,14 +148,13 @@ impl ProcessList { if thread.tid == process.pid { assert_eq!(thread.tid, process.pid); - thread.files.close_all(); + thread.files.close_all().await; // If we are the session leader, we should drop the control terminal. if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = - Task::block_on(process.session(self.prove()).drop_control_terminal()) + if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await { - terminal.drop_session(); + terminal.drop_session().await; } } diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index 261a60c0..a7b57afd 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -87,14 +87,14 @@ impl Session { ) -> KResult<()> { let mut job_control = self.job_control.write().await; if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().as_ref() { + if let Some(session) = terminal.session().await.as_ref() { if session.sid == self.sid { return Ok(()); } } return Err(EPERM); } - terminal.set_session(self, forced)?; + terminal.set_session(self, forced).await?; job_control.control_terminal = Some(terminal.clone()); job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); Ok(()) diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index 5cff2fe6..d9970cad 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -9,7 +9,7 @@ use core::{cmp::Reverse, task::Waker}; use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::Runtime; use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; @@ -226,15 +226,12 @@ impl SignalList { // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`. // SAFETY: Preempt disabled above. - { + Runtime::block_till_woken(|waker| { let mut inner = self.inner.lock(); - let waker = Waker::from(Task::current().clone()); - - let old_waker = inner.stop_waker.replace(waker); + let old_waker = inner.stop_waker.replace(waker.clone()); assert!(old_waker.is_none(), "We should not have a waker here"); - } - - Task::park_preempt_disabled(); + }) + .await; if let Some(parent) = thread.process.parent.load() { parent.notify( @@ -296,15 +293,15 @@ impl SignalList { let old_fpu_state_vaddr = old_trap_ctx_vaddr + size_of::(); let old_mask_vaddr = old_fpu_state_vaddr + size_of::(); - *trap_ctx = UserPointer::::new_vaddr(old_trap_ctx_vaddr)?.read()?; + *trap_ctx = UserPointer::::with_addr(old_trap_ctx_vaddr)?.read()?; // Make sure that at least we won't crash the kernel. if !trap_ctx.is_user_mode() || !trap_ctx.is_interrupt_enabled() { return Err(EFAULT)?; } - *fpu_state = UserPointer::::new_vaddr(old_fpu_state_vaddr)?.read()?; - self.inner.lock().mask = UserPointer::::new_vaddr(old_mask_vaddr)?.read()?; + *fpu_state = UserPointer::::with_addr(old_fpu_state_vaddr)?.read()?; + self.inner.lock().mask = UserPointer::::with_addr(old_mask_vaddr)?.read()?; Ok(()) } diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 98682547..708f9802 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -3,6 +3,7 @@ use crate::{ io::BufferFill as _, kernel::{ constants::{EFAULT, EINVAL}, + syscall::UserMut, user::UserBuffer, }, }; @@ -152,7 +153,7 @@ impl SignalAction { let saved_data_addr = (current_sp - SAVED_DATA_SIZE).floor_to(16); let mut saved_data_buffer = - UserBuffer::new(saved_data_addr.addr() as *mut u8, SAVED_DATA_SIZE)?; + UserBuffer::new(UserMut::new(saved_data_addr), SAVED_DATA_SIZE)?; saved_data_buffer.copy(trap_ctx)?.ok_or(EFAULT)?; saved_data_buffer.copy(fpu_state)?.ok_or(EFAULT)?; @@ -200,7 +201,7 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(vaddr.addr() as *mut u8, data.len())?; + let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index cccbb918..11348e51 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,11 +1,11 @@ use super::{ signal::{RaiseResult, SignalList}, - Process, ProcessList, WaitType, + stackful, Process, ProcessList, WaitType, }; use crate::{ kernel::{ interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler}, + syscall::{syscall_handlers, SyscallHandler, User, UserMut}, task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, timer::{should_reschedule, timer_interrupt}, user::{UserPointer, UserPointerMut}, @@ -13,14 +13,14 @@ use crate::{ }, prelude::*, }; -use alloc::sync::Arc; +use alloc::{alloc::Allocator, sync::Arc}; use atomic_unique_refcell::AtomicUniqueRefCell; use core::{ - future::Future, + future::{poll_fn, Future}, pin::Pin, ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll, Waker}, + task::{Context, Poll}, }; use eonix_hal::{ fpu::FpuState, @@ -28,23 +28,21 @@ use eonix_hal::{ traits::{ fault::Fault, fpu::RawFpuState as _, - trap::{IrqState as _, RawTrapContext, TrapReturn, TrapType}, + trap::{RawTrapContext, TrapReturn, TrapType}, }, - trap::{disable_irqs_save, TrapContext}, + trap::TrapContext, }; use eonix_mm::address::{Addr as _, VAddr}; -use eonix_runtime::run::{Contexted, Run, RunState}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; +use stalloc::UnsafeStalloc; #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; -pub struct ThreadRunnable { - thread: Arc, - future: F, -} +#[derive(Clone, Copy)] +pub struct ThreadAlloc<'a>(pub &'a UnsafeStalloc<1023, 32>); pub struct ThreadBuilder { tid: Option, @@ -54,8 +52,8 @@ pub struct ThreadBuilder { fs_context: Option>, signal_list: Option, tls: Option, - set_child_tid: Option, - clear_child_tid: Option, + set_child_tid: Option>, + clear_child_tid: Option>, trap_ctx: Option, fpu_state: Option, @@ -71,11 +69,11 @@ struct ThreadInner { /// User pointer /// Store child thread's tid when child thread returns to user space. - set_child_tid: Option, + set_child_tid: Option>, - clear_child_tid: Option, + clear_child_tid: Option>, - robust_list_address: Option, + robust_list_address: Option>, } pub struct Thread { @@ -147,12 +145,12 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option) -> Self { + pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option) -> Self { + pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -291,13 +289,13 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option) { + pub fn set_robust_list(&self, robust_list_address: Option>) { self.inner.lock().robust_list_address = robust_list_address; } pub fn get_robust_list(&self) -> Option { let addr = self.inner.lock().robust_list_address?; - let user_pointer = UserPointer::new(addr.addr() as *const RobustListHead).ok()?; + let user_pointer = UserPointer::new(addr).ok()?; user_pointer.read().ok() } @@ -310,25 +308,30 @@ impl Thread { self.inner.lock().name.clone() } - pub fn clear_child_tid(&self, clear_child_tid: Option) { + pub fn clear_child_tid(&self, clear_child_tid: Option>) { self.inner.lock().clear_child_tid = clear_child_tid; } - pub fn get_set_ctid(&self) -> Option { + pub fn get_set_ctid(&self) -> Option> { self.inner.lock().set_child_tid } - pub fn get_clear_ctid(&self) -> Option { + pub fn get_clear_ctid(&self) -> Option> { self.inner.lock().clear_child_tid } - pub fn handle_syscall(&self, no: usize, args: [usize; 6]) -> Option { + pub async fn handle_syscall( + &self, + thd_alloc: ThreadAlloc<'_>, + no: usize, + args: [usize; 6], + ) -> Option { match syscall_handlers().get(no) { Some(Some(SyscallHandler { handler, name: _name, .. - })) => handler(self, args), + })) => handler(self, thd_alloc, args).await, _ => { println_warn!("Syscall {no}({no:#x}) isn't implemented."); self.raise(Signal::SIGSYS); @@ -353,12 +356,18 @@ impl Thread { async fn real_run(&self) { if let Some(set_ctid) = self.get_set_ctid() { - UserPointerMut::new(set_ctid as *mut u32) + UserPointerMut::new(set_ctid) .expect("set_child_tid pointer is invalid") .write(self.tid) .expect("set_child_tid write failed"); } + let stack_alloc = unsafe { + // SAFETY: The allocator will only be used within the context of this thread. + UnsafeStalloc::new() + }; + let thd_alloc = ThreadAlloc(&stack_alloc); + while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list @@ -397,6 +406,7 @@ impl Thread { self.signal_list.raise(Signal::SIGILL); } TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), + TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -406,7 +416,7 @@ impl Thread { } } TrapType::Syscall { no, args } => { - if let Some(retval) = self.handle_syscall(no, args) { + if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); @@ -421,28 +431,52 @@ impl Thread { } } - pub async fn run(self: Arc) { - struct ContextedRun<'a, F: Future>(F, &'a Thread); + async fn contexted(&self, future: F) -> F::Output + where + F: Future, + { + let mut future = core::pin::pin!(future); - impl Future for ContextedRun<'_, F> { - type Output = F::Output; + core::future::poll_fn(|cx| { + self.process.mm_list.activate(); - fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { - let irq_state = disable_irqs_save(); - let (future, _) = unsafe { - // SAFETY: We construct a pinned future and `&Thread` is `Unpin`. - let me = self.as_mut().get_unchecked_mut(); - (Pin::new_unchecked(&mut me.0), me.1) - }; + CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); + + unsafe { + eonix_preempt::disable(); - let retval = future.poll(ctx); + // SAFETY: Preemption is disabled. + self.load_thread_area32(); - irq_state.restore(); - retval + eonix_preempt::enable(); } - } - ContextedRun(self.real_run(), &self).await + let result = future.as_mut().poll(cx); + + self.process.mm_list.deactivate(); + + CURRENT_THREAD.set(None); + + result + }) + .await + } + + pub fn run(self: Arc) -> impl Future + Send + 'static { + async move { self.contexted(stackful(self.real_run())).await } + } +} + +unsafe impl Allocator for ThreadAlloc<'_> { + fn allocate( + &self, + layout: core::alloc::Layout, + ) -> Result, alloc::alloc::AllocError> { + self.0.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: core::alloc::Layout) { + self.0.deallocate(ptr, layout); } } @@ -468,56 +502,13 @@ pub async fn yield_now() { Yield { yielded: false }.await; } -pub fn new_thread_runnable( - thread: Arc, -) -> ThreadRunnable + Send + 'static> { - ThreadRunnable { - thread: thread.clone(), - future: thread.run(), - } -} - -impl Contexted for ThreadRunnable { - fn load_running_context(&self) { - self.thread.process.mm_list.activate(); - - let raw_ptr: *const Thread = &raw const *self.thread; - CURRENT_THREAD.set(NonNull::new(raw_ptr as *mut _)); - - unsafe { - // SAFETY: Preemption is disabled. - self.thread.load_thread_area32(); - } - - unsafe { - let trap_ctx_ptr: *const TrapContext = &raw const *self.thread.trap_ctx.borrow(); - // SAFETY: - CPU::local() - .as_mut() - .load_interrupt_stack(trap_ctx_ptr as u64); - } - } - - fn restore_running_context(&self) { - self.thread.process.mm_list.deactivate(); - - CURRENT_THREAD.set(None); - } -} - -impl Run for ThreadRunnable { - type Output = F::Output; - - fn run(mut self: Pin<&mut Self>, waker: &Waker) -> RunState { - let mut ctx = Context::from_waker(waker); - - match unsafe { - self.as_mut() - .map_unchecked_mut(|me| &mut me.future) - .poll(&mut ctx) - } { - Poll::Ready(output) => RunState::Finished(output), - Poll::Pending => RunState::Running, +pub fn wait_for_wakeups() -> impl Future { + let mut waited = false; + poll_fn(move |_| match waited { + true => Poll::Ready(()), + false => { + waited = true; + Poll::Pending } - } + }) } diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 31c08ea2..86024338 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -10,7 +10,6 @@ use alloc::{ }; use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_runtime::task::Task; use eonix_sync::{AsProof as _, Mutex}; use posix_types::signal::Signal; @@ -447,18 +446,18 @@ impl Terminal { } } - fn signal(&self, inner: &mut TerminalInner, signal: Signal) { + async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { if let Some(session) = inner.session.upgrade() { - Task::block_on(session.raise_foreground(signal)); + session.raise_foreground(signal).await; } if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { self.echo_char(inner, ch); - self.signal(inner, signal); + self.signal(inner, signal).await; } fn do_commit_char(&self, inner: &mut TerminalInner, ch: u8) { @@ -482,13 +481,13 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await } _ => {} } @@ -623,12 +622,12 @@ impl Terminal { ptr.write(window_size) } TerminalIORequest::GetTermios(ptr) => { - let termios = Task::block_on(self.inner.lock()).termio.get_user(); + let termios = self.inner.lock().await.termio.get_user(); ptr.write(termios) } TerminalIORequest::SetTermios(ptr) => { let user_termios = ptr.read()?; - let mut inner = Task::block_on(self.inner.lock()); + let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); @@ -644,13 +643,13 @@ impl Terminal { } /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = Task::block_on(self.inner.lock()); + pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { + let mut inner = self.inner.lock().await; if let Some(session) = inner.session.upgrade() { if !forced { Err(EPERM) } else { - Task::block_on(session.drop_control_terminal()); + session.drop_control_terminal().await; inner.session = Arc::downgrade(&session); Ok(()) } @@ -661,12 +660,12 @@ impl Terminal { } } - pub fn drop_session(&self) { - Task::block_on(self.inner.lock()).session = Weak::new(); + pub async fn drop_session(&self) { + self.inner.lock().await.session = Weak::new(); } - pub fn session(&self) -> Option> { - Task::block_on(self.inner.lock()).session.upgrade() + pub async fn session(&self) -> Option> { + self.inner.lock().await.session.upgrade() } } diff --git a/src/kernel/user.rs b/src/kernel/user.rs index b3701507..5e410c81 100644 --- a/src/kernel/user.rs +++ b/src/kernel/user.rs @@ -1,7 +1,3 @@ -pub mod dataflow; +mod dataflow; -#[allow(unused_imports)] -pub use dataflow::{UserBuffer, UserString}; - -pub type UserPointer<'a, T> = dataflow::UserPointer<'a, T, true>; -pub type UserPointerMut<'a, T> = dataflow::UserPointer<'a, T, false>; +pub use dataflow::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}; diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 17dbd4c9..02e7d791 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,17 +1,20 @@ +use crate::{ + io::{Buffer, FillResult}, + prelude::*, +}; use crate::{ io::{IntoStream, Stream}, - kernel::constants::{EFAULT, EINVAL}, + kernel::{ + constants::{EFAULT, EINVAL}, + syscall::{User, UserMut}, + }, }; use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; - pub struct CheckedUserPointer<'a> { - ptr: *const u8, + ptr: User, len: usize, _phantom: PhantomData<&'a ()>, } @@ -27,7 +30,12 @@ pub struct UserString<'a> { len: usize, } -pub struct UserPointer<'a, T: Copy, const CONST: bool> { +pub struct UserPointer<'a, T: Copy> { + pointer: CheckedUserPointer<'a>, + _phantom: PhantomData, +} + +pub struct UserPointerMut<'a, T: Copy> { pointer: CheckedUserPointer<'a>, _phantom: PhantomData, } @@ -37,9 +45,9 @@ pub struct UserStream<'a> { cur: usize, } -impl UserPointer<'_, T, CONST> { - pub fn new(ptr: *const T) -> KResult { - let pointer = CheckedUserPointer::new(ptr as *const u8, core::mem::size_of::())?; +impl UserPointer<'_, T> { + pub fn new(ptr: User) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast(), core::mem::size_of::())?; Ok(Self { pointer, @@ -47,8 +55,8 @@ impl UserPointer<'_, T, CONST> { }) } - pub fn new_vaddr(vaddr: usize) -> KResult { - Self::new(vaddr as *mut T) + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(User::with_addr(vaddr)) } /// # Might Sleep @@ -60,22 +68,48 @@ impl UserPointer<'_, T, CONST> { } pub fn offset(&self, offset: isize) -> KResult { - let new_vaddr = self.pointer.ptr as isize + offset * size_of::() as isize; - Self::new_vaddr(new_vaddr as usize) + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(new_ptr.cast()) } } -impl<'a, T: Copy> UserPointer<'a, T, false> { +impl<'a, T: Copy> UserPointerMut<'a, T> { + pub fn new(ptr: UserMut) -> KResult { + let pointer = CheckedUserPointer::new(ptr.cast().as_const(), core::mem::size_of::())?; + + Ok(Self { + pointer, + _phantom: PhantomData, + }) + } + + pub fn with_addr(vaddr: usize) -> KResult { + Self::new(UserMut::with_addr(vaddr)) + } + + /// # Might Sleep + pub fn read(&self) -> KResult { + let mut value = core::mem::MaybeUninit::::uninit(); + self.pointer + .read(value.as_mut_ptr() as *mut (), core::mem::size_of::())?; + Ok(unsafe { value.assume_init() }) + } + + pub fn offset(&self, offset: isize) -> KResult { + let new_ptr = self.pointer.ptr.offset(offset * size_of::() as isize); + Self::new(unsafe { new_ptr.cast().as_mut() }) + } + pub fn write(&self, value: T) -> KResult<()> { self.pointer - .write(&value as *const T as *mut (), core::mem::size_of::()) + .write(&raw const value as *mut (), core::mem::size_of::()) } } impl CheckedUserPointer<'_> { - pub fn new(ptr: *const u8, len: usize) -> KResult { + pub fn new(ptr: User, len: usize) -> KResult { const USER_MAX_ADDR: usize = 0x7ff_fff_fff_fff; - let end = (ptr as usize).checked_add(len); + let end = ptr.addr().checked_add(len); if ptr.is_null() || end.ok_or(EFAULT)? > USER_MAX_ADDR { Err(EFAULT) } else { @@ -89,19 +123,10 @@ impl CheckedUserPointer<'_> { pub fn forward(&mut self, offset: usize) { assert!(offset <= self.len); - self.ptr = self.ptr.wrapping_offset(offset as isize); + self.ptr = self.ptr.offset(offset as isize); self.len -= offset; } - pub fn get_const(&self) -> *const T { - self.ptr as *const T - } - - pub fn as_slice(&self) -> &[u8] { - // SAFETY: the pointer's validity is checked in `new` - unsafe { core::slice::from_raw_parts(self.ptr, self.len) } - } - /// # Might Sleep pub fn read(&self, buffer: *mut (), total: usize) -> KResult<()> { assert_preempt_enabled!("UserPointer::read"); @@ -126,7 +151,7 @@ impl CheckedUserPointer<'_> { ".quad 0x3", // type: load ".popsection", inout("rcx") total => error_bytes, - inout("rsi") self.ptr => _, + inout("rsi") self.ptr.addr() => _, inout("rdi") buffer => _, ); @@ -148,7 +173,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("a0") total => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, inout("a2") buffer => _, out("t0") _, ); @@ -171,7 +196,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x3", // type: load ".popsection", inout("$a0") total => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, inout("$a2") buffer => _, out("$t0") _, ); @@ -210,7 +235,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("rcx") total => error_bytes, inout("rsi") data => _, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, ); #[cfg(target_arch = "riscv64")] @@ -232,7 +257,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("a0") total => error_bytes, inout("a1") data => _, - inout("a2") self.ptr => _, + inout("a2") self.ptr.addr() => _, out("t0") _, ); @@ -255,7 +280,7 @@ impl CheckedUserPointer<'_> { ".popsection", inout("$a0") total => error_bytes, inout("$a1") data => _, - inout("$a2") self.ptr => _, + inout("$a2") self.ptr.addr() => _, out("$t0") _, ); }; @@ -293,7 +318,7 @@ impl CheckedUserPointer<'_> { ".popsection", in("rax") 0, inout("rcx") self.len => error_bytes, - inout("rdi") self.ptr => _, + inout("rdi") self.ptr.addr() => _, options(att_syntax) ); @@ -313,7 +338,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("a0") self.len => error_bytes, - inout("a1") self.ptr => _, + inout("a1") self.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -332,7 +357,7 @@ impl CheckedUserPointer<'_> { ".8byte 0x1", // type: store ".popsection", inout("$a0") self.len => error_bytes, - inout("$a1") self.ptr => _, + inout("$a1") self.ptr.addr() => _, ); }; @@ -345,8 +370,8 @@ impl CheckedUserPointer<'_> { } impl UserBuffer<'_> { - pub fn new(ptr: *mut u8, size: usize) -> KResult { - let ptr = CheckedUserPointer::new(ptr, size)?; + pub fn new(ptr: UserMut, size: usize) -> KResult { + let ptr = CheckedUserPointer::new(ptr.as_const(), size)?; Ok(Self { ptr, size, cur: 0 }) } @@ -388,7 +413,7 @@ impl<'lt> Buffer for UserBuffer<'lt> { impl<'lt> UserString<'lt> { /// # Might Sleep - pub fn new(ptr: *const u8) -> KResult { + pub fn new(ptr: User) -> KResult { assert_preempt_enabled!("UserString::new"); const MAX_LEN: usize = 4096; @@ -416,7 +441,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("al") _, inout("rcx") MAX_LEN => result, - ptr = inout(reg) ptr.ptr => _, + ptr = inout(reg) ptr.ptr.addr() => _, options(att_syntax), ); @@ -439,7 +464,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("t0") _, inout("a0") MAX_LEN => result, - inout("a1") ptr.ptr => _, + inout("a1") ptr.ptr.addr() => _, ); #[cfg(target_arch = "loongarch64")] @@ -461,7 +486,7 @@ impl<'lt> UserString<'lt> { ".popsection", out("$t0") _, inout("$a0") MAX_LEN => result, - inout("$a1") ptr.ptr => _, + inout("$a1") ptr.ptr.addr() => _, ); }; @@ -478,7 +503,7 @@ impl<'lt> UserString<'lt> { pub fn as_cstr(&self) -> &'lt CStr { unsafe { CStr::from_bytes_with_nul_unchecked(core::slice::from_raw_parts( - self.ptr.get_const(), + self.ptr.ptr.addr() as *const u8, self.len + 1, )) } diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index bd516bf2..8bcd9f8a 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -2,7 +2,7 @@ pub mod dcache; use super::{ inode::{Ino, Inode, Mode, RenameData, WriteOffset}, - s_isblk, s_ischr, s_isdir, s_isreg, DevId, FsContext, + DevId, FsContext, }; use crate::{ hash::KernelHasher, @@ -250,7 +250,7 @@ impl Dentry { } let parent = self.parent().get_inode()?; - parent.creat(self, mode as u32) + parent.creat(self, mode) } } } @@ -409,14 +409,14 @@ impl Dentry { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load(Ordering::Relaxed) { - mode if s_isdir(mode) => Err(EISDIR), - mode if s_isreg(mode) => inode.read(buffer, offset), - mode if s_isblk(mode) => { + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.read(buffer, offset), + Mode::BLK => { let device = BlockDevice::get(inode.devid()?)?; Ok(device.read_some(offset, buffer)?.allow_partial()) } - mode if s_ischr(mode) => { + Mode::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -427,11 +427,11 @@ impl Dentry { pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load(Ordering::Relaxed) { - mode if s_isdir(mode) => Err(EISDIR), - mode if s_isreg(mode) => inode.write(stream, offset), - mode if s_isblk(mode) => Err(EINVAL), // TODO - mode if s_ischr(mode) => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.mode.load().format() { + Mode::DIR => Err(EISDIR), + Mode::REG => inode.write(stream, offset), + Mode::BLK => Err(EINVAL), // TODO + Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 9dfdbddc..188a1cfc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,14 +1,14 @@ use super::{Dentry, Inode}; use crate::kernel::constants::ENOENT; +use crate::kernel::task::block_on; +use crate::kernel::vfs::inode::Mode; use crate::rcu::RCUPointer; use crate::{ - kernel::vfs::{s_isdir, s_islnk}, prelude::*, rcu::{RCUIterator, RCUList}, }; use alloc::sync::Arc; use core::sync::atomic::Ordering; -use eonix_runtime::task::Task; use eonix_sync::Mutex; const DCACHE_HASH_BITS: u32 = 8; @@ -42,7 +42,7 @@ pub fn d_find_fast(dentry: &Dentry) -> Option> { /// /// Silently fail without any side effects pub fn d_try_revalidate(dentry: &Arc) { - let _lock = Task::block_on(D_EXCHANGE_LOCK.lock()); + let _lock = block_on(D_EXCHANGE_LOCK.lock()); (|| -> KResult<()> { let parent = dentry.parent().get_inode()?; @@ -57,9 +57,9 @@ pub fn d_try_revalidate(dentry: &Arc) { /// /// Dentry flags will be determined by the inode's mode. pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load(Ordering::Acquire) { - mode if s_isdir(mode) => dentry.save_dir(inode), - mode if s_islnk(mode) => dentry.save_symlink(inode), + match inode.mode.load().format() { + Mode::DIR => dentry.save_dir(inode), + Mode::LNK => dentry.save_symlink(inode), _ => dentry.save_reg(inode), } } diff --git a/src/kernel/vfs/file.rs b/src/kernel/vfs/file.rs deleted file mode 100644 index 49cb1d44..00000000 --- a/src/kernel/vfs/file.rs +++ /dev/null @@ -1,636 +0,0 @@ -use super::{ - dentry::Dentry, - inode::{Mode, WriteOffset}, - s_isblk, s_isreg, -}; -use crate::{ - io::{Buffer, BufferFill, ByteBuffer, Chunks, IntoStream}, - kernel::{ - constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - mem::{paging::Page, AsMemoryBlock as _}, - task::Thread, - terminal::{Terminal, TerminalIORequest}, - user::{UserPointer, UserPointerMut}, - vfs::inode::Inode, - CharDevice, - }, - prelude::*, - sync::CondVar, -}; -use crate::{ - io::{Stream, StreamRead}, - kernel::constants::{ - EBADF, EFAULT, EINTR, EINVAL, ENOTDIR, ENOTTY, EOVERFLOW, EPIPE, ESPIPE, S_IFMT, - }, -}; -use alloc::{collections::vec_deque::VecDeque, sync::Arc}; -use bitflags::bitflags; -use core::{ - ops::{ControlFlow, Deref}, - sync::atomic::{AtomicU32, Ordering}, -}; -use eonix_runtime::task::Task; -use eonix_sync::Mutex; -use posix_types::{open::OpenFlags, signal::Signal, stat::StatX}; - -pub struct InodeFile { - read: bool, - write: bool, - append: bool, - /// Only a few modes those won't possibly change are cached here to speed up file operations. - /// Specifically, `S_IFMT` masked bits. - mode: Mode, - cursor: Mutex, - dentry: Arc, -} - -pub struct PipeInner { - buffer: VecDeque, - read_closed: bool, - write_closed: bool, -} - -pub struct Pipe { - inner: Mutex, - cv_read: CondVar, - cv_write: CondVar, -} - -pub struct PipeReadEnd { - pipe: Arc, -} - -pub struct PipeWriteEnd { - pipe: Arc, -} - -pub struct TerminalFile { - terminal: Arc, -} - -// TODO: We should use `File` as the base type, instead of `Arc` -// If we need shared states, like for `InodeFile`, the files themselves should -// have their own shared semantics. All `File` variants will just keep the -// `Clone` semantics. -// -// e.g. The `CharDevice` itself is stateless. -pub enum FileType { - Inode(InodeFile), - PipeRead(PipeReadEnd), - PipeWrite(PipeWriteEnd), - TTY(TerminalFile), - CharDev(Arc), -} - -pub struct File { - flags: AtomicU32, - file_type: FileType, -} - -impl File { - pub fn get_inode(&self) -> KResult>> { - match &self.file_type { - FileType::Inode(inode_file) => Ok(Some(inode_file.dentry.get_inode()?)), - _ => Ok(None), - } - } -} - -pub enum SeekOption { - Set(usize), - Current(isize), - End(isize), -} - -bitflags! { - pub struct PollEvent: u16 { - const Readable = 0x0001; - const Writable = 0x0002; - } -} - -impl Drop for PipeReadEnd { - fn drop(&mut self) { - self.pipe.close_read(); - } -} - -impl Drop for PipeWriteEnd { - fn drop(&mut self) { - self.pipe.close_write(); - } -} - -fn send_sigpipe_to_current() { - let current = Thread::current(); - current.raise(Signal::SIGPIPE); -} - -impl Pipe { - const PIPE_SIZE: usize = 4096; - - /// # Return - /// `(read_end, write_end)` - pub fn new(flags: OpenFlags) -> (Arc, Arc) { - let pipe = Arc::new(Self { - inner: Mutex::new(PipeInner { - buffer: VecDeque::with_capacity(Self::PIPE_SIZE), - read_closed: false, - write_closed: false, - }), - cv_read: CondVar::new(), - cv_write: CondVar::new(), - }); - - let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); - let mut write_flags = read_flags; - write_flags.insert(OpenFlags::O_WRONLY); - - ( - Arc::new(File { - flags: AtomicU32::new(read_flags.bits()), - file_type: FileType::PipeRead(PipeReadEnd { pipe: pipe.clone() }), - }), - Arc::new(File { - flags: AtomicU32::new(write_flags.bits()), - file_type: FileType::PipeWrite(PipeWriteEnd { pipe }), - }), - ) - } - - fn close_read(&self) { - let mut inner = Task::block_on(self.inner.lock()); - if inner.read_closed { - return; - } - - inner.read_closed = true; - self.cv_write.notify_all(); - } - - fn close_write(&self) { - let mut inner = Task::block_on(self.inner.lock()); - if inner.write_closed { - return; - } - - inner.write_closed = true; - self.cv_read.notify_all(); - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported."); - } - - let mut inner = self.inner.lock().await; - while inner.buffer.is_empty() && !inner.write_closed { - inner = self.cv_read.wait(inner).await; - } - - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - let mut retval = PollEvent::empty(); - if inner.write_closed { - retval |= PollEvent::Writable; - } - - if !inner.buffer.is_empty() { - retval |= PollEvent::Readable; - } - - Ok(retval) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - let mut inner = self.inner.lock().await; - - while !inner.write_closed && inner.buffer.is_empty() { - inner = self.cv_read.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - } - - let (data1, data2) = inner.buffer.as_slices(); - let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); - inner.buffer.drain(..nread); - - self.cv_write.notify_all(); - Ok(nread) - } - - async fn write_atomic(&self, data: &[u8]) -> KResult { - let mut inner = self.inner.lock().await; - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - - while inner.buffer.len() + data.len() > Self::PIPE_SIZE { - inner = self.cv_write.wait(inner).await; - if Thread::current().signal_list.has_pending_signal() { - return Err(EINTR); - } - - if inner.read_closed { - send_sigpipe_to_current(); - return Err(EPIPE); - } - } - - inner.buffer.extend(data); - - self.cv_read.notify_all(); - return Ok(data.len()); - } - - async fn write(&self, stream: &mut dyn Stream) -> KResult { - let mut buffer = [0; Self::PIPE_SIZE]; - let mut total = 0; - while let Some(data) = stream.poll_data(&mut buffer)? { - let nwrote = self.write_atomic(data).await?; - total += nwrote; - if nwrote != data.len() { - break; - } - } - Ok(total) - } -} - -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent64 { - /// Inode number - d_ino: u64, - /// Implementation defined. We ignore it - d_off: u64, - /// Length of this record - d_reclen: u16, - /// File type. Set to 0 - d_type: u8, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -/// File type is at offset `d_reclen - 1`. Set it to 0 -#[derive(Copy, Clone, Debug)] -#[repr(C, packed)] -struct UserDirent { - /// Inode number - d_ino: u32, - /// Implementation defined. We ignore it - d_off: u32, - /// Length of this record - d_reclen: u16, - /// Filename with a padding '\0' - d_name: [u8; 0], -} - -impl InodeFile { - pub fn new(dentry: Arc, flags: OpenFlags) -> Arc { - // SAFETY: `dentry` used to create `InodeFile` is valid. - // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load(Ordering::Relaxed) - & S_IFMT; - - let (read, write, append) = flags.as_rwa(); - - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::Inode(InodeFile { - dentry, - read, - write, - append, - mode: cached_mode, - cursor: Mutex::new(0), - }), - }) - } - - fn seek(&self, option: SeekOption) -> KResult { - let mut cursor = Task::block_on(self.cursor.lock()); - - let new_cursor = match option { - SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, - SeekOption::Set(n) => n, - SeekOption::End(off) => { - let inode = self.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; - size.checked_add_signed(off).ok_or(EOVERFLOW)? - } - }; - - *cursor = new_cursor; - Ok(new_cursor) - } - - fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - if !self.write { - return Err(EBADF); - } - - let mut cursor = Task::block_on(self.cursor.lock()); - - if self.append { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; - - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) - } - } - - fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - if !self.read { - return Err(EBADF); - } - - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = Task::block_on(self.cursor.lock()); - - let nread = self.dentry.read(buffer, *cursor)?; - - *cursor += nread; - nread - }; - - Ok(nread) - } - - fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } - - fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - let mut cursor = Task::block_on(self.cursor.lock()); - - let nread = self.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; - - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } - - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; - - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - - Ok(ControlFlow::Continue(())) - })?; - - *cursor += nread; - Ok(()) - } -} - -impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> Arc { - Arc::new(File { - flags: AtomicU32::new(flags.bits()), - file_type: FileType::TTY(TerminalFile { terminal: tty }), - }) - } - - async fn read(&self, buffer: &mut dyn Buffer) -> KResult { - self.terminal.read(buffer).await - } - - fn write(&self, stream: &mut dyn Stream) -> KResult { - stream.read_till_end(&mut [0; 128], |data| { - self.terminal.write(data); - Ok(()) - }) - } - - async fn poll(&self, event: PollEvent) -> KResult { - if !event.contains(PollEvent::Readable) { - unimplemented!("Poll event not supported.") - } - - self.terminal.poll_in().await.map(|_| PollEvent::Readable) - } - - fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { - Task::block_on(self.terminal.ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::new_vaddr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::new_vaddr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::new_vaddr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::new_vaddr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::new_vaddr(arg3)?), - _ => return Err(EINVAL), - })) - } -} - -impl FileType { - pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.read(buffer, offset), - FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await, - FileType::TTY(tty) => tty.read(buffer).await, - FileType::CharDev(device) => device.read(buffer), - _ => Err(EBADF), - } - } - - // TODO - // /// Read from the file into the given buffers. - // /// - // /// Reads are atomic, not intermingled with other reads or writes. - // pub fn readv<'r, 'i, I: Iterator>( - // &'r self, - // buffers: I, - // ) -> KResult { - // match self { - // File::Inode(inode) => inode.readv(buffers), - // File::PipeRead(pipe) => pipe.pipe.readv(buffers), - // _ => Err(EBADF), - // } - // } - - pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { - match self { - FileType::Inode(inode) => inode.write(stream, offset), - FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await, - FileType::TTY(tty) => tty.write(stream), - FileType::CharDev(device) => device.write(stream), - _ => Err(EBADF), - } - } - - pub fn seek(&self, option: SeekOption) -> KResult { - match self { - FileType::Inode(inode) => inode.seek(option), - _ => Err(ESPIPE), - } - } - - pub fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents(buffer), - _ => Err(ENOTDIR), - } - } - - pub fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.getdents64(buffer), - _ => Err(ENOTDIR), - } - } - - pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; - - match self { - FileType::Inode(file) if s_isblk(file.mode) || s_isreg(file.mode) => (), - _ => return Err(EINVAL), - } - - let mut nsent = 0; - for (cur, len) in Chunks::new(0, count, buffer.len()) { - if Thread::current().signal_list.has_pending_signal() { - return if cur == 0 { Err(EINTR) } else { Ok(cur) }; - } - let nread = self - .read(&mut ByteBuffer::new(&mut buffer[..len]), None) - .await?; - if nread == 0 { - break; - } - - let nwrote = dest_file - .write(&mut buffer[..nread].into_stream(), None) - .await?; - nsent += nwrote; - - if nwrote != len { - break; - } - } - - Ok(nsent) - } - - pub fn ioctl(&self, request: usize, arg3: usize) -> KResult { - match self { - FileType::TTY(tty) => tty.ioctl(request, arg3).map(|_| 0), - _ => Err(ENOTTY), - } - } - - pub async fn poll(&self, event: PollEvent) -> KResult { - match self { - FileType::Inode(_) => Ok(event), - FileType::TTY(tty) => tty.poll(event).await, - FileType::PipeRead(PipeReadEnd { pipe }) - | FileType::PipeWrite(PipeWriteEnd { pipe }) => pipe.poll(event).await, - _ => unimplemented!("Poll event not supported."), - } - } - - pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { - match self { - FileType::Inode(inode) => inode.dentry.statx(buffer, mask), - _ => Err(EBADF), - } - } - - pub fn as_path(&self) -> Option<&Arc> { - match self { - FileType::Inode(inode_file) => Some(&inode_file.dentry), - _ => None, - } - } -} - -impl File { - pub fn new(flags: OpenFlags, file_type: FileType) -> Arc { - Arc::new(Self { - flags: AtomicU32::new(flags.bits()), - file_type, - }) - } - - pub fn get_flags(&self) -> OpenFlags { - OpenFlags::from_bits_retain(self.flags.load(Ordering::Relaxed)) - } - - pub fn set_flags(&self, flags: OpenFlags) { - let flags = flags.difference( - OpenFlags::O_WRONLY - | OpenFlags::O_RDWR - | OpenFlags::O_CREAT - | OpenFlags::O_TRUNC - | OpenFlags::O_EXCL, - // | OpenFlags::O_NOCTTY, - ); - - self.flags.store(flags.bits(), Ordering::Relaxed); - } -} - -impl Deref for File { - type Target = FileType; - - fn deref(&self) -> &Self::Target { - &self.file_type - } -} diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs new file mode 100644 index 00000000..6386ba92 --- /dev/null +++ b/src/kernel/vfs/file/inode_file.rs @@ -0,0 +1,223 @@ +use super::{File, FileType, SeekOption}; +use crate::{ + io::{Buffer, BufferFill, Stream}, + kernel::{ + constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, + vfs::{ + dentry::Dentry, + inode::{Inode, Mode, WriteOffset}, + }, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use core::{ops::ControlFlow, sync::atomic::Ordering}; +use eonix_sync::Mutex; +use posix_types::{ + getdent::{UserDirent, UserDirent64}, + open::OpenFlags, + stat::StatX, +}; + +pub struct InodeFile { + pub r: bool, + pub w: bool, + pub a: bool, + /// Only a few modes those won't possibly change are cached here to speed up file operations. + /// Specifically, `S_IFMT` masked bits. + pub mode: Mode, + cursor: Mutex, + dentry: Arc, +} + +impl InodeFile { + pub fn new(dentry: Arc, flags: OpenFlags) -> File { + // SAFETY: `dentry` used to create `InodeFile` is valid. + // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. + let cached_mode = dentry + .get_inode() + .expect("`dentry` is invalid") + .mode + .load() + .format(); + + let (r, w, a) = flags.as_rwa(); + + File::new( + flags, + FileType::Inode(InodeFile { + dentry, + r, + w, + a, + mode: cached_mode, + cursor: Mutex::new(0), + }), + ) + } + + pub fn sendfile_check(&self) -> KResult<()> { + match self.mode { + Mode::REG | Mode::BLK => Ok(()), + _ => Err(EBADF), + } + } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + if !self.w { + return Err(EBADF); + } + + let mut cursor = self.cursor.lock().await; + + if self.a { + let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + + Ok(nwrote) + } else { + let nwrote = if let Some(offset) = offset { + self.dentry.write(stream, WriteOffset::Position(offset))? + } else { + let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; + *cursor += nwrote; + nwrote + }; + + Ok(nwrote) + } + } + + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + if !self.r { + return Err(EBADF); + } + + let nread = if let Some(offset) = offset { + let nread = self.dentry.read(buffer, offset)?; + nread + } else { + let mut cursor = self.cursor.lock().await; + + let nread = self.dentry.read(buffer, *cursor)?; + + *cursor += nread; + nread + }; + + Ok(nread) + } +} + +impl File { + pub fn get_inode(&self) -> KResult>> { + if let FileType::Inode(inode_file) = &**self { + Ok(Some(inode_file.dentry.get_inode()?)) + } else { + Ok(None) + } + } + + pub async fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent { + d_ino: ino as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> { + let FileType::Inode(inode_file) = &**self else { + return Err(ENOTDIR); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; + + if buffer.available() < real_record_len { + return Ok(ControlFlow::Break(())); + } + + let record = UserDirent64 { + d_ino: ino, + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; + + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; + + Ok(ControlFlow::Continue(())) + })?; + + *cursor += nread; + Ok(()) + } + + pub async fn seek(&self, option: SeekOption) -> KResult { + let FileType::Inode(inode_file) = &**self else { + return Err(ESPIPE); + }; + + let mut cursor = inode_file.cursor.lock().await; + + let new_cursor = match option { + SeekOption::Current(off) => cursor.checked_add_signed(off).ok_or(EOVERFLOW)?, + SeekOption::Set(n) => n, + SeekOption::End(off) => { + let inode = inode_file.dentry.get_inode()?; + let size = inode.size.load(Ordering::Relaxed) as usize; + size.checked_add_signed(off).ok_or(EOVERFLOW)? + } + }; + + *cursor = new_cursor; + Ok(new_cursor) + } + + pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> { + if let FileType::Inode(inode) = &**self { + inode.dentry.statx(buffer, mask) + } else { + Err(EBADF) + } + } + + pub fn as_path(&self) -> Option<&Arc> { + if let FileType::Inode(inode_file) = &**self { + Some(&inode_file.dentry) + } else { + None + } + } +} diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs new file mode 100644 index 00000000..bb1c66ec --- /dev/null +++ b/src/kernel/vfs/file/mod.rs @@ -0,0 +1,232 @@ +mod inode_file; +mod pipe; +mod terminal_file; + +use crate::{ + io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, + kernel::{ + constants::{EBADF, EINTR, EINVAL, ENOTTY}, + mem::{AsMemoryBlock, Page}, + task::Thread, + CharDevice, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use bitflags::bitflags; +use core::{ + ops::Deref, + sync::atomic::{AtomicI32, AtomicU32, Ordering}, +}; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; + +pub use inode_file::InodeFile; +pub use pipe::Pipe; +pub use terminal_file::TerminalFile; + +pub enum FileType { + Inode(InodeFile), + PipeRead(PipeReadEnd), + PipeWrite(PipeWriteEnd), + Terminal(TerminalFile), + CharDev(Arc), +} + +struct FileData { + flags: AtomicU32, + open_count: AtomicI32, + file_type: FileType, +} + +#[derive(Clone)] +pub struct File(Arc); + +pub enum SeekOption { + Set(usize), + Current(isize), + End(isize), +} + +bitflags! { + pub struct PollEvent: u16 { + const Readable = 0x0001; + const Writable = 0x0002; + } +} + +impl FileType { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.read(buffer, offset).await, + FileType::PipeRead(pipe) => pipe.read(buffer).await, + FileType::Terminal(tty) => tty.read(buffer).await, + FileType::CharDev(device) => device.read(buffer), + _ => Err(EBADF), + } + } + + // TODO + // /// Read from the file into the given buffers. + // /// + // /// Reads are atomic, not intermingled with other reads or writes. + // pub fn readv<'r, 'i, I: Iterator>( + // &'r self, + // buffers: I, + // ) -> KResult { + // match self { + // File::Inode(inode) => inode.readv(buffers), + // File::PipeRead(pipe) => pipe.pipe.readv(buffers), + // _ => Err(EBADF), + // } + // } + + pub async fn write(&self, stream: &mut dyn Stream, offset: Option) -> KResult { + match self { + FileType::Inode(inode) => inode.write(stream, offset).await, + FileType::PipeWrite(pipe) => pipe.write(stream).await, + FileType::Terminal(tty) => tty.write(stream), + FileType::CharDev(device) => device.write(stream), + _ => Err(EBADF), + } + } + + fn sendfile_check(&self) -> KResult<()> { + match self { + FileType::Inode(file) => file.sendfile_check(), + _ => Err(EINVAL), + } + } + + pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { + let buffer_page = Page::alloc(); + // SAFETY: We are the only owner of the page. + let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + + self.sendfile_check()?; + + let mut nsent = 0; + for (cur, len) in Chunks::new(0, count, buffer.len()) { + if Thread::current().signal_list.has_pending_signal() { + return if cur == 0 { Err(EINTR) } else { Ok(cur) }; + } + let nread = self + .read(&mut ByteBuffer::new(&mut buffer[..len]), None) + .await?; + if nread == 0 { + break; + } + + let nwrote = dest_file + .write(&mut buffer[..nread].into_stream(), None) + .await?; + nsent += nwrote; + + if nwrote != len { + break; + } + } + + Ok(nsent) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult { + match self { + FileType::Terminal(tty) => tty.ioctl(request, arg3).await.map(|_| 0), + _ => Err(ENOTTY), + } + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + match self { + FileType::Inode(_) => Ok(event), + FileType::Terminal(tty) => tty.poll(event).await, + FileType::PipeRead(pipe) => pipe.poll(event).await, + FileType::PipeWrite(pipe) => pipe.poll(event).await, + _ => unimplemented!("Poll event not supported."), + } + } +} + +impl File { + pub fn new(flags: OpenFlags, file_type: FileType) -> Self { + Self(Arc::new(FileData { + flags: AtomicU32::new(flags.bits()), + open_count: AtomicI32::new(1), + file_type, + })) + } + + pub fn get_flags(&self) -> OpenFlags { + OpenFlags::from_bits_retain(self.0.flags.load(Ordering::Relaxed)) + } + + pub fn set_flags(&self, flags: OpenFlags) { + let flags = flags.difference( + OpenFlags::O_WRONLY + | OpenFlags::O_RDWR + | OpenFlags::O_CREAT + | OpenFlags::O_TRUNC + | OpenFlags::O_EXCL, + // | OpenFlags::O_NOCTTY, + ); + + self.0.flags.store(flags.bits(), Ordering::Relaxed); + } + + /// Duplicate the file descriptor in order to store it in some [FileArray]. + /// + /// The [`File`]s stored in [FileArray]s hold an "open count", which is used + /// to track how many references to the file are currently open. + /// + /// # Panics + /// The [`File`]s stored in [FileArray]s MUST be retrieved by calling this + /// method. Otherwise, when the last reference to the file is dropped, + /// something bad will happen. ;) + /// + /// [FileArray]: crate::kernel::vfs::filearray::FileArray + pub fn dup(&self) -> Self { + self.0.open_count.fetch_add(1, Ordering::Relaxed); + Self(self.0.clone()) + } + + /// Close the file descriptor, decrementing the open count. + pub async fn close(self) { + // Due to rust async drop limits, we have to do this manually... + // + // Users of files can clone and drop it freely, but references held by + // file arrays must be dropped by calling this function (in order to + // await for the async close operation of the inner FileType). + match self.0.open_count.fetch_sub(1, Ordering::Relaxed) { + ..1 => panic!("File open count underflow."), + 1 => {} + _ => return, + } + + match &self.0.file_type { + FileType::PipeRead(pipe) => pipe.close().await, + FileType::PipeWrite(pipe) => pipe.close().await, + _ => {} + } + } +} + +impl Drop for FileData { + fn drop(&mut self) { + // If you're "lucky" enough to see this, it means that you've violated + // the file reference counting rules. Check File::close() for details. ;) + assert_eq!( + self.open_count.load(Ordering::Relaxed), + 0, + "File dropped with open count 0, check the comments for details." + ); + } +} + +impl Deref for File { + type Target = FileType; + + fn deref(&self) -> &Self::Target { + &self.0.file_type + } +} diff --git a/src/kernel/vfs/file/pipe.rs b/src/kernel/vfs/file/pipe.rs new file mode 100644 index 00000000..910f04fa --- /dev/null +++ b/src/kernel/vfs/file/pipe.rs @@ -0,0 +1,211 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream}, + kernel::{ + constants::{EINTR, EPIPE}, + task::Thread, + }, + prelude::KResult, + sync::CondVar, +}; +use alloc::{collections::vec_deque::VecDeque, sync::Arc}; +use eonix_sync::Mutex; +use posix_types::{open::OpenFlags, signal::Signal}; + +struct PipeInner { + buffer: VecDeque, + read_closed: bool, + write_closed: bool, +} + +pub struct Pipe { + inner: Mutex, + cv_read: CondVar, + cv_write: CondVar, +} + +pub struct PipeReadEnd { + pipe: Arc, +} + +pub struct PipeWriteEnd { + pipe: Arc, +} + +fn send_sigpipe_to_current() { + let current = Thread::current(); + current.raise(Signal::SIGPIPE); +} + +impl Pipe { + const PIPE_SIZE: usize = 4096; + + /// # Return + /// `(read_end, write_end)` + pub fn new(flags: OpenFlags) -> (File, File) { + let pipe = Arc::new(Self { + inner: Mutex::new(PipeInner { + buffer: VecDeque::with_capacity(Self::PIPE_SIZE), + read_closed: false, + write_closed: false, + }), + cv_read: CondVar::new(), + cv_write: CondVar::new(), + }); + + let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR); + let mut write_flags = read_flags; + write_flags.insert(OpenFlags::O_WRONLY); + + let read_pipe = pipe.clone(); + let write_pipe = pipe; + + ( + File::new( + read_flags, + FileType::PipeRead(PipeReadEnd { pipe: read_pipe }), + ), + File::new( + write_flags, + FileType::PipeWrite(PipeWriteEnd { pipe: write_pipe }), + ), + ) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported."); + } + + let mut inner = self.inner.lock().await; + while inner.buffer.is_empty() && !inner.write_closed { + inner = self.cv_read.wait(inner).await; + } + + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + let mut retval = PollEvent::empty(); + if inner.write_closed { + retval |= PollEvent::Writable; + } + + if !inner.buffer.is_empty() { + retval |= PollEvent::Readable; + } + + Ok(retval) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + let mut inner = self.inner.lock().await; + + while !inner.write_closed && inner.buffer.is_empty() { + inner = self.cv_read.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + } + + let (data1, data2) = inner.buffer.as_slices(); + let nread = buffer.fill(data1)?.allow_partial() + buffer.fill(data2)?.allow_partial(); + inner.buffer.drain(..nread); + + self.cv_write.notify_all(); + Ok(nread) + } + + async fn write_atomic(&self, data: &[u8]) -> KResult { + let mut inner = self.inner.lock().await; + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + + while inner.buffer.len() + data.len() > Self::PIPE_SIZE { + inner = self.cv_write.wait(inner).await; + if Thread::current().signal_list.has_pending_signal() { + return Err(EINTR); + } + + if inner.read_closed { + send_sigpipe_to_current(); + return Err(EPIPE); + } + } + + inner.buffer.extend(data); + + self.cv_read.notify_all(); + return Ok(data.len()); + } + + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + let mut buffer = [0; Self::PIPE_SIZE]; + let mut total = 0; + while let Some(data) = stream.poll_data(&mut buffer)? { + let nwrote = self.write_atomic(data).await?; + total += nwrote; + if nwrote != data.len() { + break; + } + } + Ok(total) + } +} + +impl PipeReadEnd { + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.pipe.read(buffer).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.read_closed { + return; + } + + inner.read_closed = true; + self.pipe.cv_write.notify_all(); + } +} + +impl PipeWriteEnd { + pub async fn write(&self, stream: &mut dyn Stream) -> KResult { + self.pipe.write(stream).await + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + self.pipe.poll(event).await + } + + pub async fn close(&self) { + let mut inner = self.pipe.inner.lock().await; + if inner.write_closed { + return; + } + + inner.write_closed = true; + self.pipe.cv_read.notify_all(); + } +} + +impl Drop for Pipe { + fn drop(&mut self) { + debug_assert!( + self.inner.get_mut().read_closed, + "Pipe read end should be closed before dropping (check File::close())." + ); + + debug_assert!( + self.inner.get_mut().write_closed, + "Pipe write end should be closed before dropping (check File::close())." + ); + } +} diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs new file mode 100644 index 00000000..f318c5b2 --- /dev/null +++ b/src/kernel/vfs/file/terminal_file.rs @@ -0,0 +1,55 @@ +use super::{File, FileType, PollEvent}; +use crate::{ + io::{Buffer, Stream, StreamRead}, + kernel::{ + constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, + terminal::TerminalIORequest, + user::{UserPointer, UserPointerMut}, + Terminal, + }, + prelude::KResult, +}; +use alloc::sync::Arc; +use posix_types::open::OpenFlags; + +pub struct TerminalFile { + terminal: Arc, +} + +impl TerminalFile { + pub fn new(tty: Arc, flags: OpenFlags) -> File { + File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + } + + pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { + self.terminal.read(buffer).await + } + + pub fn write(&self, stream: &mut dyn Stream) -> KResult { + stream.read_till_end(&mut [0; 128], |data| { + self.terminal.write(data); + Ok(()) + }) + } + + pub async fn poll(&self, event: PollEvent) -> KResult { + if !event.contains(PollEvent::Readable) { + unimplemented!("Poll event not supported.") + } + + self.terminal.poll_in().await.map(|_| PollEvent::Readable) + } + + pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { + self.terminal + .ioctl(match request as u32 { + TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), + TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), + TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), + TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), + TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + _ => return Err(EINVAL), + }) + .await + } +} diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index f8b06a12..b457a425 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,7 +1,7 @@ use super::{ - file::{File, InodeFile, TerminalFile}, + file::{File, InodeFile, Pipe}, inode::Mode, - s_ischr, Spin, + Spin, TerminalFile, }; use crate::kernel::{ constants::{ @@ -10,38 +10,51 @@ use crate::kernel::{ syscall::{FromSyscallArg, SyscallRetVal}, }; use crate::{ - kernel::{ - console::get_console, - constants::ENXIO, - vfs::{dentry::Dentry, file::Pipe, s_isdir, s_isreg}, - CharDevice, - }, + kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, prelude::*, }; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, +use alloc::sync::Arc; +use intrusive_collections::{ + intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use core::sync::atomic::Ordering; use itertools::{ FoldWhile::{Continue, Done}, Itertools, }; use posix_types::open::{FDFlags, OpenFlags}; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); #[derive(Clone)] struct OpenFile { + fd: FD, flags: FDFlags, - file: Arc, + file: File, + + link: RBTreeAtomicLink, +} + +intrusive_adapter!( + OpenFileAdapter = Box: OpenFile { link: RBTreeAtomicLink } +); + +impl<'a> KeyAdapter<'a> for OpenFileAdapter { + type Key = FD; + + fn get_key(&self, value: &'a OpenFile) -> Self::Key { + value.fd + } } #[derive(Clone)] +struct FDAllocator { + min_avail: FD, +} + struct FileArrayInner { - files: BTreeMap, - fd_min_avail: FD, + files: RBTree, + fd_alloc: FDAllocator, } pub struct FileArray { @@ -49,109 +62,202 @@ pub struct FileArray { } impl OpenFile { + fn new(fd: FD, flags: FDFlags, file: File) -> Box { + Box::new(Self { + fd, + flags, + file, + link: RBTreeAtomicLink::new(), + }) + } + pub fn close_on_exec(&self) -> bool { self.flags.contains(FDFlags::FD_CLOEXEC) } } +impl FDAllocator { + const fn new() -> Self { + Self { min_avail: FD(0) } + } + + fn reinit(&mut self) { + self.min_avail = FD(0); + } + + fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + files + .range(Bound::Included(&from), Bound::Unbounded) + .fold_while(from, |current, OpenFile { fd, .. }| { + if current == *fd { + Continue(FD(current.0 + 1)) + } else { + Done(current) + } + }) + .into_inner() + } + + /// Allocate a new file descriptor starting from `from`. + /// + /// Returned file descriptor should be used immediately. + /// + fn allocate_fd(&mut self, from: FD, files: &RBTree) -> FD { + let from = FD::max(from, self.min_avail); + + if from == self.min_avail { + let next_min_avail = self.find_available(FD(from.0 + 1), files); + let allocated = self.min_avail; + self.min_avail = next_min_avail; + allocated + } else { + self.find_available(from, files) + } + } + + fn release_fd(&mut self, fd: FD) { + if fd < self.min_avail { + self.min_avail = fd; + } + } + + fn next_fd(&mut self, files: &RBTree) -> FD { + self.allocate_fd(self.min_avail, files) + } +} + impl FileArray { pub fn new() -> Arc { Arc::new(FileArray { inner: Spin::new(FileArrayInner { - files: BTreeMap::new(), - fd_min_avail: FD(0), + files: RBTree::new(OpenFileAdapter::new()), + fd_alloc: FDAllocator::new(), }), }) } - #[allow(dead_code)] pub fn new_shared(other: &Arc) -> Arc { other.clone() } pub fn new_cloned(other: &Self) -> Arc { Arc::new(Self { - inner: Spin::new(other.inner.lock().clone()), + inner: Spin::new({ + let (new_files, new_fd_alloc) = { + let mut new_files = RBTree::new(OpenFileAdapter::new()); + let other_inner = other.inner.lock(); + + for file in other_inner.files.iter() { + let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + new_files.insert(new_file); + } + (new_files, other_inner.fd_alloc.clone()) + }; + + FileArrayInner { + files: new_files, + fd_alloc: new_fd_alloc, + } + }), }) } /// Acquires the file array lock. - pub fn get(&self, fd: FD) -> Option> { + pub fn get(&self, fd: FD) -> Option { self.inner.lock().get(fd) } - pub fn close_all(&self) { - let _old_files = { + pub async fn close_all(&self) { + let old_files = { let mut inner = self.inner.lock(); - inner.fd_min_avail = FD(0); - core::mem::take(&mut inner.files) + inner.fd_alloc.reinit(); + inner.files.take() }; + + for file in old_files.into_iter() { + file.file.close().await; + } } - pub fn close(&self, fd: FD) -> KResult<()> { - let _file = { + pub async fn close(&self, fd: FD) -> KResult<()> { + let file = { let mut inner = self.inner.lock(); - let file = inner.files.remove(&fd).ok_or(EBADF)?; - inner.release_fd(fd); - file + let file = inner.files.find_mut(&fd).remove().ok_or(EBADF)?; + inner.fd_alloc.release_fd(file.fd); + file.file }; + + file.close().await; Ok(()) } - pub fn on_exec(&self) -> () { - let mut inner = self.inner.lock(); + pub async fn on_exec(&self) { + let files_to_close = { + let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - // TODO: This is not efficient. We should avoid cloning. - let fds_to_close = inner - .files - .iter() - .filter(|(_, ofile)| ofile.close_on_exec()) - .map(|(&fd, _)| fd) - .collect::>(); + files.pick(|ofile| { + if ofile.close_on_exec() { + fd_alloc.release_fd(ofile.fd); + true + } else { + false + } + }) + }; - inner.files.retain(|_, ofile| !ofile.close_on_exec()); - fds_to_close.into_iter().for_each(|fd| inner.release_fd(fd)); + for open_file in files_to_close.into_iter() { + open_file.file.close().await; + } } -} -impl FileArray { pub fn dup(&self, old_fd: FD) -> KResult { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; - let new_file_data = old_file.file.clone(); + let new_file_data = old_file.file.dup(); let new_file_flags = old_file.flags; - let new_fd = inner.next_fd(); + let new_fd = fd_alloc.next_fd(files); inner.do_insert(new_fd, new_file_flags, new_file_data); Ok(new_fd) } - pub fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - let fdflags = flags.as_fd_flags(); - + /// Duplicates the file to a new file descriptor, returning the old file + /// description to be dropped. + fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { let mut inner = self.inner.lock(); - let old_file = inner.files.get(&old_fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let old_file = files.get_fd(old_fd).ok_or(EBADF)?; + let new_file_data = old_file.file.dup(); - let new_file_data = old_file.file.clone(); + match files.entry(&new_fd) { + Entry::Vacant(_) => { + assert_eq!(new_fd, fd_alloc.allocate_fd(new_fd, files)); + inner.do_insert(new_fd, fd_flags, new_file_data); - match inner.files.entry(new_fd) { - Entry::Vacant(_) => {} - Entry::Occupied(entry) => { - let new_file = entry.into_mut(); - let mut file_swap = new_file_data; + Ok(None) + } + Entry::Occupied(mut entry) => { + let mut file = entry.remove().unwrap(); + file.flags = fd_flags; + let old_file = core::mem::replace(&mut file.file, new_file_data); - new_file.flags = fdflags; - core::mem::swap(&mut file_swap, &mut new_file.file); + entry.insert(file); - drop(inner); - return Ok(new_fd); + Ok(Some(old_file)) } } + } - assert_eq!(new_fd, inner.allocate_fd(new_fd)); - inner.do_insert(new_fd, fdflags, new_file_data); + pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { + if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + old_file.close().await; + } Ok(new_fd) } @@ -160,9 +266,10 @@ impl FileArray { /// `(read_fd, write_fd)` pub fn pipe(&self, flags: OpenFlags) -> KResult<(FD, FD)> { let mut inner = self.inner.lock(); + let (files, fd_alloc) = inner.split_borrow(); - let read_fd = inner.next_fd(); - let write_fd = inner.next_fd(); + let read_fd = fd_alloc.next_fd(files); + let write_fd = fd_alloc.next_fd(files); let fdflag = flags.as_fd_flags(); @@ -179,23 +286,20 @@ impl FileArray { let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let filemode = inode.mode.load(Ordering::Relaxed); + let file_format = inode.mode.load().format(); - if flags.directory() { - if !s_isdir(filemode) { - return Err(ENOTDIR); - } - } else { - if s_isdir(filemode) && flags.write() { - return Err(EISDIR); - } + match (flags.directory(), file_format, flags.write()) { + (true, Mode::DIR, _) => {} + (true, _, _) => return Err(ENOTDIR), + (false, Mode::DIR, true) => return Err(EISDIR), + _ => {} } - if flags.truncate() && flags.write() && s_isreg(filemode) { + if flags.truncate() && flags.write() && file_format.is_reg() { inode.truncate(0)?; } - let file = if s_ischr(filemode) { + let file = if file_format.is_chr() { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { @@ -203,7 +307,8 @@ impl FileArray { }; let mut inner = self.inner.lock(); - let fd = inner.next_fd(); + let (files, fd_alloc) = inner.split_borrow(); + let fd = fd_alloc.next_fd(files); inner.do_insert(fd, fdflag, file); Ok(fd) @@ -211,43 +316,59 @@ impl FileArray { pub fn fcntl(&self, fd: FD, cmd: u32, arg: usize) -> KResult { let mut inner = self.inner.lock(); - let ofile = inner.files.get_mut(&fd).ok_or(EBADF)?; + let (files, fd_alloc) = inner.split_borrow(); + + let mut cursor = files.find_mut(&fd); - match cmd { + let ret = match cmd { F_DUPFD | F_DUPFD_CLOEXEC => { + let ofile = cursor.get().ok_or(EBADF)?; + let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); - let new_file_data = ofile.file.clone(); - let new_fd = inner.allocate_fd(FD(arg as u32)); + let new_file_data = ofile.file.dup(); + let new_fd = fd_alloc.allocate_fd(FD(arg as u32), files); inner.do_insert(new_fd, flags, new_file_data); - Ok(new_fd.0 as usize) + new_fd.0 as usize } - F_GETFD => Ok(ofile.flags.bits() as usize), + F_GETFD => cursor.get().ok_or(EBADF)?.flags.bits() as usize, F_SETFD => { + let mut ofile = cursor.remove().ok_or(EBADF)?; ofile.flags = FDFlags::from_bits_truncate(arg as u32); - Ok(0) + cursor.insert(ofile); + 0 } - F_GETFL => Ok(ofile.file.get_flags().bits() as usize), + F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, F_SETFL => { - ofile + cursor + .get() + .ok_or(EBADF)? .file .set_flags(OpenFlags::from_bits_retain(arg as u32)); - Ok(0) + 0 } _ => unimplemented!("fcntl: cmd={}", cmd), - } + }; + + Ok(ret) } /// Only used for init process. pub fn open_console(&self) { let mut inner = self.inner.lock(); - let (stdin, stdout, stderr) = (inner.next_fd(), inner.next_fd(), inner.next_fd()); + let (files, fd_alloc) = inner.split_borrow(); + + let (stdin, stdout, stderr) = ( + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + fd_alloc.next_fd(files), + ); let console_terminal = get_console().expect("No console terminal"); inner.do_insert( @@ -269,53 +390,25 @@ impl FileArray { } impl FileArrayInner { - fn get(&mut self, fd: FD) -> Option> { - self.files.get(&fd).map(|f| f.file.clone()) - } - - fn find_available(&mut self, from: FD) -> FD { - self.files - .range(&from..) - .fold_while(from, |current, (&key, _)| { - if current == key { - Continue(FD(current.0 + 1)) - } else { - Done(current) - } - }) - .into_inner() - } - - /// Allocate a new file descriptor starting from `from`. - /// - /// Returned file descriptor should be used immediately. - /// - fn allocate_fd(&mut self, from: FD) -> FD { - let from = FD::max(from, self.fd_min_avail); - - if from == self.fd_min_avail { - let next_min_avail = self.find_available(FD(from.0 + 1)); - let allocated = self.fd_min_avail; - self.fd_min_avail = next_min_avail; - allocated - } else { - self.find_available(from) - } + fn get(&mut self, fd: FD) -> Option { + self.files.get_fd(fd).map(|open| open.file.clone()) } - fn release_fd(&mut self, fd: FD) { - if fd < self.fd_min_avail { - self.fd_min_avail = fd; + /// Insert a file description to the file array. + fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { + match self.files.entry(&fd) { + Entry::Occupied(_) => { + panic!("File descriptor {fd:?} already exists in the file array."); + } + Entry::Vacant(insert_cursor) => { + insert_cursor.insert(OpenFile::new(fd, flags, file)); + } } } - fn next_fd(&mut self) -> FD { - self.allocate_fd(self.fd_min_avail) - } - - /// Insert a file description to the file array. - fn do_insert(&mut self, fd: FD, flags: FDFlags, file: Arc) { - assert!(self.files.insert(fd, OpenFile { flags, file }).is_none()); + fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + let Self { files, fd_alloc } = self; + (files, fd_alloc) } } @@ -323,6 +416,15 @@ impl FD { pub const AT_FDCWD: FD = FD(-100i32 as u32); } +impl core::fmt::Debug for FD { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + &Self::AT_FDCWD => f.write_str("FD(AT_FDCWD)"), + FD(no) => f.debug_tuple("FD").field(&no).finish(), + } + } +} + impl FromSyscallArg for FD { fn from_arg(value: usize) -> Self { Self(value as u32) @@ -334,3 +436,39 @@ impl SyscallRetVal for FD { Some(self.0 as usize) } } + +trait FilesExt { + fn get_fd(&self, fd: FD) -> Option<&OpenFile>; + + fn pick

(&mut self, pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool; +} + +impl FilesExt for RBTree { + fn get_fd(&self, fd: FD) -> Option<&OpenFile> { + self.find(&fd).get() + } + + fn pick

(&mut self, mut pred: P) -> Self + where + P: FnMut(&OpenFile) -> bool, + { + let mut picked = RBTree::new(OpenFileAdapter::new()); + + // TODO: might be better if we start picking from somewhere else + // or using a different approach. + let mut cursor = self.front_mut(); + while let Some(open_file) = cursor.get() { + if !pred(open_file) { + cursor.move_next(); + continue; + } + + picked.insert(cursor.remove().unwrap()); + cursor.move_next(); + } + + picked + } +} diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs index 2b52043d..52529f84 100644 --- a/src/kernel/vfs/inode.rs +++ b/src/kernel/vfs/inode.rs @@ -1,10 +1,13 @@ -use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId}; +use super::{dentry::Dentry, vfs::Vfs, DevId}; use crate::io::Stream; use crate::kernel::constants::{ EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT, + STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, + S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, }; use crate::kernel::mem::PageCache; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::task::block_on; use crate::kernel::timer::Instant; use crate::{io::Buffer, prelude::*}; use alloc::sync::{Arc, Weak}; @@ -14,7 +17,6 @@ use core::{ ptr::addr_of_mut, sync::atomic::{AtomicU32, AtomicU64, Ordering}, }; -use eonix_runtime::task::Task; use eonix_sync::RwLock; use posix_types::stat::StatX; @@ -32,8 +34,11 @@ pub type AtomicUid = AtomicU32; #[allow(dead_code)] pub type Gid = u32; pub type AtomicGid = AtomicU32; -pub type Mode = u32; -pub type AtomicMode = AtomicU32; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +pub struct AtomicMode(AtomicU32); #[derive(Debug)] pub struct InodeData { @@ -97,7 +102,7 @@ pub struct RenameData<'a, 'b> { #[allow(unused_variables)] pub trait Inode: Send + Sync + InodeInner + Any { fn is_dir(&self) -> bool { - self.mode.load(Ordering::SeqCst) & S_IFDIR != 0 + self.mode.load().is_dir() } fn lookup(&self, dentry: &Arc) -> KResult>> { @@ -136,7 +141,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { Err(if self.is_dir() { EISDIR } else { EINVAL }) } - fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { Err(if self.is_dir() { EISDIR } else { EINVAL }) } @@ -181,7 +186,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { let vfs = self.vfs.upgrade().expect("Vfs is dropped"); let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(Ordering::Relaxed); + let mode = self.mode.load(); if mask & STATX_NLINK != 0 { stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; @@ -213,13 +218,13 @@ pub trait Inode: Send + Sync + InodeInner + Any { stat.stx_mode = 0; if mask & STATX_MODE != 0 { - stat.stx_mode |= (mode & !S_IFMT) as u16; + stat.stx_mode |= mode.non_format_bits() as u16; stat.stx_mask |= STATX_MODE; } if mask & STATX_TYPE != 0 { - stat.stx_mode |= (mode & S_IFMT) as u16; - if s_isblk(mode) || s_ischr(mode) { + stat.stx_mode |= mode.format_bits() as u16; + if mode.is_blk() || mode.is_chr() { let devid = self.devid(); stat.stx_rdev_major = (devid? >> 8) & 0xff; stat.stx_rdev_minor = devid? & 0xff; @@ -280,7 +285,7 @@ pub trait Inode: Send + Sync + InodeInner + Any { f( uninit_mut.as_mut_ptr(), // SAFETY: `idata` is initialized and we will never move the lock. - &Task::block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), + &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), ); // Safety: `uninit` is initialized @@ -354,3 +359,136 @@ macro_rules! define_struct_inode { } pub(crate) use define_struct_inode; + +impl Mode { + pub const REG: Self = Self(S_IFREG); + pub const DIR: Self = Self(S_IFDIR); + pub const LNK: Self = Self(S_IFLNK); + pub const BLK: Self = Self(S_IFBLK); + pub const CHR: Self = Self(S_IFCHR); + + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn is_reg(&self) -> bool { + (self.0 & S_IFMT) == S_IFREG + } + + pub const fn is_dir(&self) -> bool { + (self.0 & S_IFMT) == S_IFDIR + } + + pub const fn is_lnk(&self) -> bool { + (self.0 & S_IFMT) == S_IFLNK + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn format(&self) -> Self { + Self::new(self.format_bits()) + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub const fn non_format(&self) -> Self { + Self::new(self.non_format_bits()) + } + + pub const fn perm(self, perm: u32) -> Self { + Self::new((self.0 & !0o777) | (perm & 0o777)) + } + + pub const fn set_perm(&mut self, perm: u32) { + *self = self.perm(perm); + } + + pub const fn mask_perm(&mut self, perm_mask: u32) { + let perm_mask = perm_mask & 0o777; + let self_perm = self.non_format_bits() & 0o777; + + *self = self.perm(self_perm & perm_mask); + } +} + +impl AtomicMode { + pub const fn new(bits: u32) -> Self { + Self(AtomicU32::new(bits)) + } + + pub const fn from(mode: Mode) -> Self { + Self::new(mode.0) + } + + pub fn load(&self) -> Mode { + Mode(self.0.load(Ordering::Relaxed)) + } + + pub fn store(&self, mode: Mode) { + self.0.store(mode.0, Ordering::Relaxed); + } +} + +impl core::fmt::Debug for AtomicMode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("AtomicMode") + .field("bits", &self.load().0) + .finish() + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let format_name = match self.format() { + Mode::REG => "REG", + Mode::DIR => "DIR", + Mode::LNK => "LNK", + Mode::BLK => "BLK", + Mode::CHR => "CHR", + _ => "UNK", + }; + + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format_name}, {perm:#o})", + perm = self.non_format_bits() + )?, + rem => write!( + f, + "Mode({format_name}, {perm:#o}, rem={rem:#x})", + perm = self.non_format_bits() & 0o777 + )?, + } + + Ok(()) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index efd68aa7..f62cb9b9 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,4 +1,3 @@ -use crate::kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}; use crate::prelude::*; use alloc::sync::Arc; use dentry::Dentry; @@ -6,33 +5,15 @@ use eonix_sync::LazyLock; use inode::Mode; pub mod dentry; -pub mod file; +mod file; pub mod filearray; pub mod inode; pub mod mount; pub mod vfs; -pub type DevId = u32; - -pub fn s_isreg(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFREG -} +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; -pub fn s_isdir(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFDIR -} - -pub fn s_ischr(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFCHR -} - -pub fn s_isblk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFBLK -} - -pub fn s_islnk(mode: Mode) -> bool { - (mode & S_IFMT) == S_IFLNK -} +pub type DevId = u32; pub struct FsContext { pub fsroot: Arc, @@ -44,7 +25,7 @@ static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(0o022), + umask: Spin::new(Mode::new(0o022)), }) }); diff --git a/src/lib.rs b/src/lib.rs index 6fd82c40..80d24c28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![no_std] #![no_main] +#![feature(allocator_api)] #![feature(c_size_t)] #![feature(concat_idents)] #![feature(arbitrary_self_types)] @@ -8,6 +9,9 @@ extern crate alloc; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +extern crate unwinding; + mod driver; mod fs; mod hash; @@ -15,6 +19,8 @@ mod io; mod kernel; mod kernel_init; mod net; +#[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] +mod panic; mod path; mod prelude; mod rcu; @@ -24,21 +30,23 @@ use crate::kernel::task::alloc_pid; use alloc::{ffi::CString, sync::Arc}; use core::{ hint::spin_loop, - sync::atomic::{AtomicBool, Ordering}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; use eonix_hal::{ - arch_exported::bootstrap::shutdown, processor::CPU, traits::trap::IrqState, + arch_exported::bootstrap::shutdown, + context::TaskContext, + processor::{halt, CPU, CPU_COUNT}, + traits::{context::RawTaskContext, trap::IrqState}, trap::disable_irqs_save, }; use eonix_mm::address::PRange; -use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task}; +use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; use kernel::{ mem::GlobalPageAlloc, - task::{ - new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder, - }, + task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, + inode::Mode, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, FsContext, }, @@ -50,6 +58,9 @@ use prelude::*; #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { + #[cfg(target_arch = "riscv64")] + panic::stack_trace(); + shutdown(); } @@ -80,6 +91,25 @@ fn panic(info: &core::panic::PanicInfo) -> ! { } static BSP_OK: AtomicBool = AtomicBool::new(false); +static CPU_SHUTTING_DOWN: AtomicUsize = AtomicUsize::new(0); + +fn shutdown_system() -> ! { + let cpu_count = CPU_COUNT.load(Ordering::Relaxed); + + if CPU_SHUTTING_DOWN.fetch_add(1, Ordering::AcqRel) + 1 == cpu_count { + println_info!("All CPUs are shutting down. Gracefully powering off..."); + shutdown(); + } else { + println_info!( + "CPU {} is shutting down. Waiting for other CPUs...", + CPU::local().cpuid() + ); + + loop { + halt(); + } + } +} #[eonix_hal::main] fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { @@ -90,21 +120,26 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { driver::sbi_console::init_console(); } - // To satisfy the `Scheduler` "preempt count == 0" assertion. - eonix_preempt::disable(); + BSP_OK.store(true, Ordering::Release); - // We need root dentry to be present in constructor of `FsContext`. - // So call `init_vfs` first, then `init_multitasking`. - Scheduler::init_local_scheduler::(); + RUNTIME.spawn(init_process(data.get_early_stack())); - Scheduler::get().spawn::(FutureRun::new(init_process(data.get_early_stack()))); + drop(data); - BSP_OK.store(true, Ordering::Release); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); - drop(data); unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() + TaskContext::switch_to_noreturn(&mut ctx); } } @@ -115,18 +150,30 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { spin_loop(); } - Scheduler::init_local_scheduler::(); println_debug!("AP{} started", CPU::local().cpuid()); - eonix_preempt::disable(); + let mut ctx = TaskContext::new(); + let stack_bottom = { + let stack = KernelStack::new(); + let bottom = stack.get_bottom().addr().get(); + core::mem::forget(stack); + + bottom + }; + ctx.set_interrupt_enabled(true); + ctx.set_program_counter(standard_main as usize); + ctx.set_stack_pointer(stack_bottom); - // TODO!!!!!: Free the stack after having switched to idle task. unsafe { - // SAFETY: `preempt::count()` == 1. - Scheduler::goto_scheduler_noreturn() + TaskContext::switch_to_noreturn(&mut ctx); } } +fn standard_main() -> ! { + RUNTIME.enter(); + shutdown_system(); +} + async fn init_process(early_kstack: PRange) { unsafe { let irq_ctx = disable_irqs_save(); @@ -176,7 +223,7 @@ async fn init_process(early_kstack: PRange) { let fs_context = FsContext::global(); let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); - mnt_dir.mkdir(0o755).unwrap(); + mnt_dir.mkdir(Mode::new(0o755)).unwrap(); do_mount( &mnt_dir, @@ -216,6 +263,7 @@ async fn init_process(early_kstack: PRange) { ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) .expect("Failed to parse init program") .load() + .await .expect("Failed to load init program") }; @@ -223,7 +271,7 @@ async fn init_process(early_kstack: PRange) { .name(Arc::from(&b"busybox"[..])) .entry(load_info.entry_ip, load_info.sp); - let mut process_list = Task::block_on(ProcessList::get().write()); + let mut process_list = ProcessList::get().write().await; let (thread, process) = ProcessBuilder::new() .pid(alloc_pid()) .mm_list(load_info.mm_list) @@ -235,5 +283,5 @@ async fn init_process(early_kstack: PRange) { // TODO!!!: Remove this. thread.files.open_console(); - Scheduler::get().spawn::(new_thread_runnable(thread)); + RUNTIME.spawn(thread.run()); } diff --git a/src/panic.rs b/src/panic.rs new file mode 100644 index 00000000..3c9c5f34 --- /dev/null +++ b/src/panic.rs @@ -0,0 +1,29 @@ +use core::ffi::c_void; + +use eonix_log::println_fatal; +use unwinding::abi::{ + UnwindContext, UnwindReasonCode, _Unwind_Backtrace, _Unwind_GetIP, _Unwind_GetRegionStart, +}; + +pub fn stack_trace() { + struct CallbackData { + counter: usize, + } + + extern "C" fn callback(unwind_ctx: &UnwindContext<'_>, arg: *mut c_void) -> UnwindReasonCode { + let data = unsafe { &mut *(arg as *mut CallbackData) }; + data.counter += 1; + + println_fatal!( + "{:4}: {:#018x} - at function {:#018x}", + data.counter, + _Unwind_GetIP(unwind_ctx), + _Unwind_GetRegionStart(unwind_ctx), + ); + + UnwindReasonCode::NO_REASON + } + + let mut data = CallbackData { counter: 0 }; + _Unwind_Backtrace(callback, &raw mut data as *mut c_void); +} diff --git a/src/rcu.rs b/src/rcu.rs index f018d3f3..c1645d33 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,11 +1,11 @@ -use crate::prelude::*; +use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; -use eonix_runtime::task::Task; +use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; use pointers::BorrowedArc; @@ -21,7 +21,7 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { Self { value, - _guard: Task::block_on(GLOBAL_RCU_SEM.read()), + _guard: block_on(GLOBAL_RCU_SEM.read()), _phantom: PhantomData, } } @@ -48,6 +48,14 @@ pub async fn rcu_sync() { let _ = GLOBAL_RCU_SEM.write().await; } +pub fn call_rcu(func: impl FnOnce() + Send + 'static) { + RUNTIME.spawn(async move { + // Wait for all readers to finish. + rcu_sync().await; + func(); + }); +} + pub trait RCUNode { fn rcu_prev(&self) -> &AtomicPtr; fn rcu_next(&self) -> &AtomicPtr; @@ -154,7 +162,7 @@ impl> RCUList { } pub fn iter(&self) -> RCUIterator { - let _lck = Task::block_on(self.reader_lock.read()); + let _lck = block_on(self.reader_lock.read()); RCUIterator { // SAFETY: We have a read lock, so the node is still alive. @@ -186,9 +194,15 @@ impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { } } -pub struct RCUPointer(AtomicPtr); +pub struct RCUPointer(AtomicPtr) +where + T: Send + Sync + 'static; -impl core::fmt::Debug for RCUPointer { +impl core::fmt::Debug for RCUPointer +where + T: core::fmt::Debug, + T: Send + Sync + 'static, +{ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match NonNull::new(self.0.load(Ordering::Acquire)) { Some(pointer) => { @@ -201,7 +215,10 @@ impl core::fmt::Debug for RCUPointer { } } -impl RCUPointer { +impl RCUPointer +where + T: Send + Sync + 'static, +{ pub const fn empty() -> Self { Self(AtomicPtr::new(core::ptr::null_mut())) } @@ -258,13 +275,16 @@ impl RCUPointer { } } -impl Drop for RCUPointer { +impl Drop for RCUPointer +where + T: Send + Sync + 'static, +{ fn drop(&mut self) { // SAFETY: We call `rcu_sync()` to ensure that all readers are done. if let Some(arc) = unsafe { self.swap(None) } { // We only wait if there are other references. if Arc::strong_count(&arc) == 1 { - Task::block_on(rcu_sync()); + call_rcu(move || drop(arc)); } } }