From 5c86b788e452e7a5a1e3a961252351779b7dc00f Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 21:23:30 +0000 Subject: [PATCH 01/20] trycopy: reimplement sparse_mmap's trycopy with inline asm Use inline asm instead of global asm or a C implementation, which allows the compiler to inline the code. Use a custom exception-table-based approach for recovery instead of setjmp/longjmp on Unix (slow) or SEH exceptions on Windows (prevents inlining). Besides improving performance across the board, this allows us to use a single per-arch implementation of the primitives across all OSes. This should make it easier to add new primitives over time, if desired. --- Cargo.lock | 17 +- Cargo.toml | 2 +- .../src/_jobs/check_clippy.rs | 11 +- support/sparse_mmap/Cargo.toml | 11 +- support/sparse_mmap/benches/perf.rs | 33 - support/sparse_mmap/build.rs | 45 - support/sparse_mmap/src/lib.rs | 609 +----------- support/sparse_mmap/src/trycopy.c | 277 ------ .../sparse_mmap/src/trycopy_windows_arm64.rs | 200 ---- .../sparse_mmap/src/trycopy_windows_x64.rs | 192 ---- support/sparse_mmap/src/unix.rs | 2 +- support/sparse_mmap/src/windows.rs | 5 +- support/trycopy/Cargo.toml | 32 + support/trycopy/benches/perf.rs | 60 ++ support/trycopy/src/aarch64.rs | 357 +++++++ support/trycopy/src/lib.rs | 909 ++++++++++++++++++ support/trycopy/src/memcpy.rs | 74 ++ support/trycopy/src/x86_64.rs | 399 ++++++++ vm/devices/user_driver/Cargo.toml | 4 +- vm/devices/user_driver/src/vfio.rs | 10 +- vm/vmcore/guestmem/Cargo.toml | 1 + vm/vmcore/guestmem/src/lib.rs | 20 +- 22 files changed, 1875 insertions(+), 1395 deletions(-) delete mode 100644 support/sparse_mmap/benches/perf.rs delete mode 100644 support/sparse_mmap/build.rs delete mode 100644 support/sparse_mmap/src/trycopy.c delete mode 100644 support/sparse_mmap/src/trycopy_windows_arm64.rs delete mode 100644 support/sparse_mmap/src/trycopy_windows_x64.rs create mode 100644 support/trycopy/Cargo.toml create mode 100644 support/trycopy/benches/perf.rs create mode 100644 support/trycopy/src/aarch64.rs create mode 100644 support/trycopy/src/lib.rs create mode 100644 support/trycopy/src/memcpy.rs create mode 100644 support/trycopy/src/x86_64.rs diff --git a/Cargo.lock b/Cargo.lock index 828e453740..57572d3123 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2741,6 +2741,7 @@ dependencies = [ "pal_event", "sparse_mmap", "thiserror 2.0.16", + "trycopy", "zerocopy 0.8.25", ] @@ -6877,13 +6878,12 @@ dependencies = [ name = "sparse_mmap" version = "0.0.0" dependencies = [ - "cc", - "criterion", "getrandom 0.3.3", "libc", "pal", "parking_lot", "thiserror 2.0.16", + "trycopy", "windows-sys 0.61.0", "zerocopy 0.8.25", ] @@ -7675,6 +7675,17 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "trycopy" +version = "0.0.0" +dependencies = [ + "criterion", + "libc", + "thiserror 2.0.16", + "windows-sys 0.61.0", + "zerocopy 0.8.25", +] + [[package]] name = "typed-path" version = "0.11.0" @@ -8234,8 +8245,8 @@ dependencies = [ "parking_lot", "pci_core", "safeatomic", - "sparse_mmap", "tracing", + "trycopy", "uevent", "vfio-bindings", "vfio_sys", diff --git a/Cargo.toml b/Cargo.toml index 733417e6ae..cad8ab940d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,6 +153,7 @@ test_with_tracing = { path = "support/test_with_tracing" } test_with_tracing_macro = { path = "support/test_with_tracing/test_with_tracing_macro" } tracelimit = { path = "support/tracelimit" } tracing_helpers = { path = "support/tracing_helpers" } +trycopy = { path = "support/trycopy" } ucs2 = { path = "support/ucs2" } uevent = { path = "support/uevent" } unix_socket = { path = "support/unix_socket" } @@ -418,7 +419,6 @@ bitvec = { version = "1.1", default-features = false } blocking = "1.2" caps = "0.5" cargo_toml = "0.22" -cc = "1.0" cfg-if = "1" clap = "4.2" crc32fast = { version = "1.3.2", default-features = false } diff --git a/flowey/flowey_lib_hvlite/src/_jobs/check_clippy.rs b/flowey/flowey_lib_hvlite/src/_jobs/check_clippy.rs index a14687e07d..5f1d4fa04e 100644 --- a/flowey/flowey_lib_hvlite/src/_jobs/check_clippy.rs +++ b/flowey/flowey_lib_hvlite/src/_jobs/check_clippy.rs @@ -187,15 +187,6 @@ impl SimpleFlowNode for Node { } }); - let extra_env = if matches!( - target.operating_system, - target_lexicon::OperatingSystem::Darwin(_) - ) { - Some(vec![("SPARSE_MMAP_NO_BUILD".into(), "1".into())]) - } else { - None - }; - // HACK: the following behavior has been cargo-culted from our old // CI, and at some point, we should actually improve the testing // story on windows, so that we can run with FeatureSet::All in CI. @@ -217,7 +208,7 @@ impl SimpleFlowNode for Node { profile: profile.clone(), features: features.clone(), target, - extra_env, + extra_env: None, exclude, keep_going: true, all_targets: true, diff --git a/support/sparse_mmap/Cargo.toml b/support/sparse_mmap/Cargo.toml index 5e9f5d8071..ee22f47913 100644 --- a/support/sparse_mmap/Cargo.toml +++ b/support/sparse_mmap/Cargo.toml @@ -6,11 +6,9 @@ name = "sparse_mmap" edition.workspace = true rust-version.workspace = true -[build-dependencies] -cc.workspace = true - [dependencies] pal.workspace = true +trycopy.workspace = true thiserror.workspace = true zerocopy.workspace = true @@ -31,12 +29,5 @@ windows-sys = { workspace = true, features = [ "Win32_System_Threading", ]} -[[bench]] -name = "perf" -harness = false - -[dev-dependencies] -criterion.workspace = true - [lints] workspace = true diff --git a/support/sparse_mmap/benches/perf.rs b/support/sparse_mmap/benches/perf.rs deleted file mode 100644 index a2ad6f3b6b..0000000000 --- a/support/sparse_mmap/benches/perf.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Performance tests. - -// UNSAFETY: testing unsafe interfaces -#![expect(unsafe_code)] -#![expect(missing_docs)] - -use sparse_mmap::initialize_try_copy; -use std::hint::black_box; - -criterion::criterion_main!(benches); - -criterion::criterion_group!(benches, bench_access); - -fn bench_access(c: &mut criterion::Criterion) { - initialize_try_copy(); - c.bench_function("try-read-8", |b| { - // SAFETY: passing a valid src. - b.iter(|| unsafe { - let n = 0u8; - sparse_mmap::try_read_volatile(&n).unwrap(); - }); - }) - .bench_function("read-8", |b| { - // SAFETY: passing a valid src. - b.iter(|| unsafe { - let n = 0u8; - std::ptr::read_volatile(black_box(&n)); - }) - }); -} diff --git a/support/sparse_mmap/build.rs b/support/sparse_mmap/build.rs deleted file mode 100644 index 881b1ca646..0000000000 --- a/support/sparse_mmap/build.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#![expect(missing_docs)] - -fn main() { - if std::env::var_os("CARGO_CFG_WINDOWS").is_some() { - // Implemented in inline asm. - return; - } - if std::env::var_os("SPARSE_MMAP_NO_BUILD").is_some() { - return; - } - let mut build = cc::Build::new(); - build.file("src/trycopy.c").warnings_into_errors(true); - - for (a, b) in std::env::vars() { - eprintln!("note: {}={}", a, b); - } - if std::env::var_os("CARGO_CFG_WINDOWS").is_some() - && std::env::var("CARGO_CFG_TARGET_ENV").unwrap() == "gnu" - { - if get_tool_var("CC").is_none() { - // clang is required for SEH support. - build.compiler("clang"); - } - // ms-extensions is required for SEH support. - build.flag("-fms-extensions"); - } - - build.compile("trycopy"); - println!("cargo:rerun-if-changed=src/trycopy.c"); -} - -fn get_tool_var(name: &str) -> Option { - let target = std::env::var("TARGET").unwrap().replace('-', "_"); - let var = format!("{}_{}", name, target); - println!("cargo:rerun-if-env-changed={}", var); - std::env::var(var) - .or_else(|_| { - println!("cargo:rerun-if-env-changed={}", name); - std::env::var(name) - }) - .ok() -} diff --git a/support/sparse_mmap/src/lib.rs b/support/sparse_mmap/src/lib.rs index 1350689979..fb93a89246 100644 --- a/support/sparse_mmap/src/lib.rs +++ b/support/sparse_mmap/src/lib.rs @@ -9,8 +9,6 @@ #![expect(clippy::undocumented_unsafe_blocks, clippy::missing_safety_doc)] pub mod alloc; -mod trycopy_windows_arm64; -mod trycopy_windows_x64; pub mod unix; pub mod windows; @@ -33,419 +31,12 @@ use zerocopy::Immutable; use zerocopy::IntoBytes; use zerocopy::KnownLayout; -/// Must be called before using try_copy on Unix platforms. -pub fn initialize_try_copy() { - #[cfg(unix)] - { - static INIT: std::sync::Once = std::sync::Once::new(); - INIT.call_once(|| unsafe { - let err = install_signal_handlers(); - if err != 0 { - panic!( - "could not install signal handlers: {}", - std::io::Error::from_raw_os_error(err) - ) - } - }); - } -} - -unsafe extern "C" { - #[cfg(unix)] - fn install_signal_handlers() -> i32; - - fn try_memmove( - dest: *mut u8, - src: *const u8, - length: usize, - failure: *mut AccessFailure, - ) -> i32; - fn try_memset(dest: *mut u8, c: i32, length: usize, failure: *mut AccessFailure) -> i32; - fn try_cmpxchg8( - dest: *mut u8, - expected: &mut u8, - desired: u8, - failure: *mut AccessFailure, - ) -> i32; - fn try_cmpxchg16( - dest: *mut u16, - expected: &mut u16, - desired: u16, - failure: *mut AccessFailure, - ) -> i32; - fn try_cmpxchg32( - dest: *mut u32, - expected: &mut u32, - desired: u32, - failure: *mut AccessFailure, - ) -> i32; - fn try_cmpxchg64( - dest: *mut u64, - expected: &mut u64, - desired: u64, - failure: *mut AccessFailure, - ) -> i32; - fn try_read8(dest: *mut u8, src: *const u8, failure: *mut AccessFailure) -> i32; - fn try_read16(dest: *mut u16, src: *const u16, failure: *mut AccessFailure) -> i32; - fn try_read32(dest: *mut u32, src: *const u32, failure: *mut AccessFailure) -> i32; - fn try_read64(dest: *mut u64, src: *const u64, failure: *mut AccessFailure) -> i32; - fn try_write8(dest: *mut u8, value: u8, failure: *mut AccessFailure) -> i32; - fn try_write16(dest: *mut u16, value: u16, failure: *mut AccessFailure) -> i32; - fn try_write32(dest: *mut u32, value: u32, failure: *mut AccessFailure) -> i32; - fn try_write64(dest: *mut u64, value: u64, failure: *mut AccessFailure) -> i32; -} - -#[repr(C)] -struct AccessFailure { - address: *mut u8, - #[cfg(unix)] - si_signo: i32, - #[cfg(unix)] - si_code: i32, -} - -#[derive(Debug, Error)] -#[error("failed to {} memory", if self.is_write { "write" } else { "read" })] -pub struct MemoryError { - offset: usize, - is_write: bool, - #[source] - source: OsAccessError, -} - -#[derive(Debug, Error)] -enum OsAccessError { - #[cfg(windows)] - #[error("access violation")] - AccessViolation, - #[cfg(unix)] - #[error("SIGSEGV (si_code = {0:x})")] - Sigsegv(u32), - #[cfg(unix)] - #[error("SIGBUS (si_code = {0:x})")] - Sigbus(u32), -} - -impl MemoryError { - fn new(src: Option<*const u8>, dest: *mut u8, len: usize, failure: &AccessFailure) -> Self { - let (offset, is_write) = if failure.address.is_null() { - // In the case of a general protection fault (#GP) the provided address is zero. - (0, src.is_none()) - } else if (dest..dest.wrapping_add(len)).contains(&failure.address) { - (failure.address as usize - dest as usize, true) - } else if let Some(src) = src { - if (src..src.wrapping_add(len)).contains(&failure.address.cast_const()) { - (failure.address as usize - src as usize, false) - } else { - panic!( - "invalid failure address: {:p} src: {:p} dest: {:p} len: {:#x}", - failure.address, src, dest, len - ); - } - } else { - panic!( - "invalid failure address: {:p} src: None dest: {:p} len: {:#x}", - failure.address, dest, len - ); - }; - #[cfg(windows)] - let source = OsAccessError::AccessViolation; - #[cfg(unix)] - let source = match failure.si_signo { - libc::SIGSEGV => OsAccessError::Sigsegv(failure.si_code as u32), - libc::SIGBUS => OsAccessError::Sigbus(failure.si_code as u32), - _ => { - panic!( - "unexpected signal: {} src: {:?} dest: {:p} len: {:#x}", - failure.si_signo, src, dest, len - ); - } - }; - Self { - offset, - is_write, - source, - } - } - - /// Returns the byte offset into the buffer at which the access violation - /// occurred. - pub fn offset(&self) -> usize { - self.offset - } -} - -/// Copies `count` elements from `src` to `dest`. `src` and `dest` may overlap. -/// Fails on access violation/SIGSEGV. Note that on case of failure, some of the -/// bytes (even partial elements) may already have been copied. -/// -/// This also fails if initialize_try_copy has not been called. -/// -/// # Safety -/// -/// This routine is safe to use if the memory pointed to by `src` or `dest` is -/// being concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `src` and -/// `dest` are valid, reserved addresses but you do not know if they are mapped -/// with the appropriate protection. For example, this routine is useful if -/// `dest` is a sparse mapping where some pages are mapped with -/// PAGE_NOACCESS/PROT_NONE, and some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_copy(src: *const T, dest: *mut T, count: usize) -> Result<(), MemoryError> { - let mut failure = MaybeUninit::uninit(); - let len = count * size_of::(); - // SAFETY: guaranteed by caller. - let ret = unsafe { - try_memmove( - dest.cast::(), - src.cast::(), - len, - failure.as_mut_ptr(), - ) - }; - match ret { - 0 => Ok(()), - _ => Err(MemoryError::new( - Some(src.cast()), - dest.cast(), - len, - // SAFETY: failure is initialized in the failure path. - unsafe { failure.assume_init_ref() }, - )), - } -} - -/// Writes `count` bytes of the value `val` to `dest`. Fails on access -/// violation/SIGSEGV. Note that on case of failure, some of the bytes (even -/// partial elements) may already have been written. -/// -/// This also fails if initialize_try_copy has not been called. -/// -/// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_write_bytes(dest: *mut T, val: u8, count: usize) -> Result<(), MemoryError> { - let mut failure = MaybeUninit::uninit(); - let len = count * size_of::(); - // SAFETY: guaranteed by caller. - let ret = unsafe { try_memset(dest.cast::(), val.into(), len, failure.as_mut_ptr()) }; - match ret { - 0 => Ok(()), - _ => Err(MemoryError::new( - None, - dest.cast(), - len, - // SAFETY: failure is initialized in the failure path. - unsafe { failure.assume_init_ref() }, - )), - } -} - -/// Atomically swaps the value at `dest` with `new` when `*dest` is `current`, -/// using a sequentially-consistent memory ordering. -/// -/// Returns `Ok(Ok(new))` if the swap was successful, `Ok(Err(*dest))` if the -/// swap failed, or `Err(MemoryError::AccessViolation)` if the swap could not be -/// attempted due to an access violation. -/// -/// Fails at compile time if the size is not 1, 2, 4, or 8 bytes, or if the type -/// is under-aligned. -/// -/// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_compare_exchange( - dest: *mut T, - mut current: T, - new: T, -) -> Result, MemoryError> { - const { - assert!(matches!(size_of::(), 1 | 2 | 4 | 8)); - assert!(align_of::() >= size_of::()); - }; - let mut failure = MaybeUninit::uninit(); - // SAFETY: guaranteed by caller - let ret = unsafe { - match size_of::() { - 1 => try_cmpxchg8( - dest.cast(), - std::mem::transmute::<&mut T, &mut u8>(&mut current), - std::mem::transmute_copy::(&new), - failure.as_mut_ptr(), - ), - 2 => try_cmpxchg16( - dest.cast(), - std::mem::transmute::<&mut T, &mut u16>(&mut current), - std::mem::transmute_copy::(&new), - failure.as_mut_ptr(), - ), - 4 => try_cmpxchg32( - dest.cast(), - std::mem::transmute::<&mut T, &mut u32>(&mut current), - std::mem::transmute_copy::(&new), - failure.as_mut_ptr(), - ), - 8 => try_cmpxchg64( - dest.cast(), - std::mem::transmute::<&mut T, &mut u64>(&mut current), - std::mem::transmute_copy::(&new), - failure.as_mut_ptr(), - ), - _ => unreachable!(), - } - }; - match ret { - n if n > 0 => Ok(Ok(new)), - 0 => Ok(Err(current)), - _ => Err(MemoryError::new( - None, - dest.cast(), - size_of::(), - // SAFETY: failure is initialized in the failure path. - unsafe { failure.assume_init_ref() }, - )), - } -} - -/// Reads the value at `src` using one or more read instructions. -/// -/// If `T` is 1, 2, 4, or 8 bytes in size, then exactly one read instruction is -/// used. -/// -/// Returns `Ok(T)` if the read was successful, or `Err(MemoryError)` if the -/// read was unsuccessful. -/// -/// # Safety -/// -/// This routine is safe to use if the memory pointed to by `src` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `src` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `src` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_read_volatile( - src: *const T, -) -> Result { - let mut dest = MaybeUninit::::uninit(); - let mut failure = MaybeUninit::uninit(); - // SAFETY: guaranteed by caller - let ret = unsafe { - match size_of::() { - 1 => try_read8(dest.as_mut_ptr().cast(), src.cast(), failure.as_mut_ptr()), - 2 => try_read16(dest.as_mut_ptr().cast(), src.cast(), failure.as_mut_ptr()), - 4 => try_read32(dest.as_mut_ptr().cast(), src.cast(), failure.as_mut_ptr()), - 8 => try_read64(dest.as_mut_ptr().cast(), src.cast(), failure.as_mut_ptr()), - _ => try_memmove( - dest.as_mut_ptr().cast(), - src.cast::(), - size_of::(), - failure.as_mut_ptr(), - ), - } - }; - match ret { - 0 => { - // SAFETY: dest was fully initialized by try_read. - Ok(unsafe { dest.assume_init() }) - } - _ => Err(MemoryError::new( - Some(src.cast()), - dest.as_mut_ptr().cast(), - size_of::(), - // SAFETY: failure is initialized in the failure path. - unsafe { failure.assume_init_ref() }, - )), - } -} - -/// Writes `value` at `dest` using one or more write instructions. -/// -/// If `T` is 1, 2, 4, or 8 bytes in size, then exactly one write instruction is -/// used. -/// -/// Returns `Ok(())` if the write was successful, or `Err(MemoryError)` if the -/// write was unsuccessful. -/// -/// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_write_volatile( - dest: *mut T, - value: &T, -) -> Result<(), MemoryError> { - let mut failure = MaybeUninit::uninit(); - // SAFETY: guaranteed by caller - let ret = unsafe { - match size_of::() { - 1 => try_write8( - dest.cast(), - std::mem::transmute_copy(value), - failure.as_mut_ptr(), - ), - 2 => try_write16( - dest.cast(), - std::mem::transmute_copy(value), - failure.as_mut_ptr(), - ), - 4 => try_write32( - dest.cast(), - std::mem::transmute_copy(value), - failure.as_mut_ptr(), - ), - 8 => try_write64( - dest.cast(), - std::mem::transmute_copy(value), - failure.as_mut_ptr(), - ), - _ => try_memmove( - dest.cast(), - std::ptr::from_ref(value).cast(), - size_of::(), - failure.as_mut_ptr(), - ), - } - }; - match ret { - 0 => Ok(()), - _ => Err(MemoryError::new( - None, - dest.cast(), - size_of::(), - // SAFETY: failure is initialized in the failure path. - unsafe { failure.assume_init_ref() }, - )), - } -} - #[derive(Debug, Error)] pub enum SparseMappingError { #[error("out of bounds")] OutOfBounds, #[error(transparent)] - Memory(MemoryError), + Memory(trycopy::MemoryError), } impl SparseMapping { @@ -472,7 +63,7 @@ impl SparseMapping { self.check(offset, size_of::())?; // SAFETY: the bounds have been checked above. - unsafe { try_read_volatile(self.as_ptr().byte_add(offset).cast()) } + unsafe { trycopy::try_read_volatile(self.as_ptr().byte_add(offset).cast()) } .map_err(SparseMappingError::Memory) } @@ -488,7 +79,7 @@ impl SparseMapping { self.check(offset, size_of::())?; // SAFETY: the bounds have been checked above. - unsafe { try_write_volatile(self.as_ptr().byte_add(offset).cast(), value) } + unsafe { trycopy::try_write_volatile(self.as_ptr().byte_add(offset).cast(), value) } .map_err(SparseMappingError::Memory) } @@ -500,7 +91,7 @@ impl SparseMapping { // SAFETY: the bounds have been checked above. unsafe { let dest = self.as_ptr().cast::().add(offset); - try_copy(data.as_ptr(), dest, data.len()).map_err(SparseMappingError::Memory) + trycopy::try_copy(data.as_ptr(), dest, data.len()).map_err(SparseMappingError::Memory) } } @@ -512,7 +103,8 @@ impl SparseMapping { // SAFETY: the bounds have been checked above. unsafe { let src = (self.as_ptr() as *const u8).add(offset); - try_copy(src, data.as_mut_ptr(), data.len()).map_err(SparseMappingError::Memory) + trycopy::try_copy(src, data.as_mut_ptr(), data.len()) + .map_err(SparseMappingError::Memory) } } @@ -545,7 +137,7 @@ impl SparseMapping { // SAFETY: the bounds have been checked above. unsafe { let dest = self.as_ptr().cast::().add(offset); - try_write_bytes(dest, val, len).map_err(SparseMappingError::Memory) + trycopy::try_write_bytes(dest, val, len).map_err(SparseMappingError::Memory) } } @@ -571,152 +163,6 @@ impl SparseMapping { mod tests { use super::*; - #[derive(Copy, Clone, Debug)] - enum Primitive { - Read, - Write, - CompareAndSwap, - } - - #[repr(u32)] - #[derive(Copy, Clone, Debug, Eq, PartialEq)] - enum Size { - Bit8 = 8, - Bit16 = 16, - Bit32 = 32, - Bit64 = 64, - } - - fn test_unsafe_primitive(primitive: Primitive, size: Size) { - // NOTE: this test provides a very basic validation of - // the compare-and-swap operation, mostly to check that - // the failures address in returned correctly. See other tests - // for more. - let mut dest = !0u64; - let dest_addr = std::ptr::from_mut(&mut dest).cast::<()>(); - let src = 0x5555_5555_5555_5555u64; - let src_addr = std::ptr::from_ref(&src).cast::<()>(); - let bad_addr_mut = 0x100 as *mut (); // Within 0..0x1000 - let bad_addr = bad_addr_mut.cast_const(); - let nonsense_addr = !0u64 as *mut (); - let expected = if size != Size::Bit64 { - dest.wrapping_shl(size as u32) | src.wrapping_shr(64 - (size as u32)) - } else { - src - }; - let mut af = AccessFailure { - address: nonsense_addr.cast(), - #[cfg(unix)] - si_signo: 0, - #[cfg(unix)] - si_code: 0, - }; - let af_addr = &mut af as *mut _; - - let res = unsafe { - match size { - Size::Bit8 => match primitive { - Primitive::Read => try_read8(dest_addr.cast(), src_addr.cast(), af_addr), - Primitive::Write => try_write8(dest_addr.cast(), src as u8, af_addr), - Primitive::CompareAndSwap => { - 1 - try_cmpxchg8(dest_addr.cast(), &mut (dest as u8), src as u8, af_addr) - } - }, - Size::Bit16 => match primitive { - Primitive::Read => try_read16(dest_addr.cast(), src_addr.cast(), af_addr), - Primitive::Write => try_write16(dest_addr.cast(), src as u16, af_addr), - Primitive::CompareAndSwap => { - 1 - try_cmpxchg16(dest_addr.cast(), &mut (dest as u16), src as u16, af_addr) - } - }, - Size::Bit32 => match primitive { - Primitive::Read => try_read32(dest_addr.cast(), src_addr.cast(), af_addr), - Primitive::Write => try_write32(dest_addr.cast(), src as u32, af_addr), - Primitive::CompareAndSwap => { - 1 - try_cmpxchg32(dest_addr.cast(), &mut (dest as u32), src as u32, af_addr) - } - }, - Size::Bit64 => match primitive { - Primitive::Read => try_read64(dest_addr.cast(), src_addr.cast(), af_addr), - Primitive::Write => try_write64(dest_addr.cast(), src, af_addr), - Primitive::CompareAndSwap => { - 1 - try_cmpxchg64(dest_addr.cast(), &mut { dest }, src, af_addr) - } - }, - } - }; - assert_eq!( - dest, expected, - "Expected value must match the result for {primitive:?} and {size:?}" - ); - assert_eq!( - res, 0, - "Success should be returned for {primitive:?} and {size:?}" - ); - assert_eq!( - af.address, - nonsense_addr.cast(), - "Fault address must not be set for {primitive:?} and {size:?}" - ); - - let res = unsafe { - match size { - Size::Bit8 => match primitive { - Primitive::Read => try_read8(dest_addr.cast(), bad_addr.cast(), af_addr), - Primitive::Write => try_write8(bad_addr_mut.cast(), src as u8, af_addr), - Primitive::CompareAndSwap => { - try_cmpxchg8(bad_addr_mut.cast(), &mut (dest as u8), src as u8, af_addr) - } - }, - Size::Bit16 => match primitive { - Primitive::Read => try_read16(dest_addr.cast(), bad_addr.cast(), af_addr), - Primitive::Write => try_write16(bad_addr_mut.cast(), src as u16, af_addr), - Primitive::CompareAndSwap => { - try_cmpxchg16(bad_addr_mut.cast(), &mut (dest as u16), src as u16, af_addr) - } - }, - Size::Bit32 => match primitive { - Primitive::Read => try_read32(dest_addr.cast(), bad_addr.cast(), af_addr), - Primitive::Write => try_write32(bad_addr_mut.cast(), src as u32, af_addr), - Primitive::CompareAndSwap => { - try_cmpxchg32(bad_addr_mut.cast(), &mut (dest as u32), src as u32, af_addr) - } - }, - Size::Bit64 => match primitive { - Primitive::Read => try_read64(dest_addr.cast(), bad_addr.cast(), af_addr), - Primitive::Write => try_write64(bad_addr_mut.cast(), src, af_addr), - Primitive::CompareAndSwap => { - try_cmpxchg64(bad_addr_mut.cast(), &mut { dest }, src, af_addr) - } - }, - } - }; - assert_eq!( - dest, expected, - "Fault preserved source and destination for {primitive:?} and {size:?}" - ); - assert_eq!( - res, -1, - "Error code must be returned for {primitive:?} and {size:?}" - ); - assert_eq!( - af.address, - bad_addr_mut.cast(), - "Fault address must be set for {primitive:?} and {size:?}" - ); - } - - #[test] - fn test_unsafe_primitives() { - initialize_try_copy(); - - for primitive in [Primitive::Read, Primitive::Write, Primitive::CompareAndSwap] { - for size in [Size::Bit8, Size::Bit16, Size::Bit32, Size::Bit64] { - test_unsafe_primitive(primitive, size); - } - } - } - static BUF: [u8; 65536] = [0xcc; 65536]; fn test_with(range_size: usize) { @@ -751,47 +197,6 @@ mod tests { test_with(0x40000000 + SparseMapping::page_size()); } - #[test] - fn test_try_copy() { - initialize_try_copy(); - - let mapping = SparseMapping::new(2 * 1024 * 1024).unwrap(); - let page_size = SparseMapping::page_size(); - mapping.alloc(page_size, page_size).unwrap(); - let base = mapping.as_ptr().cast::(); - unsafe { - try_copy(BUF.as_ptr(), base, 100).unwrap_err(); - try_copy(BUF.as_ptr(), base.add(page_size), 100).unwrap(); - try_copy(BUF.as_ptr(), base.add(page_size), page_size + 1).unwrap_err(); - } - } - - #[test] - fn test_cmpxchg() { - initialize_try_copy(); - - let page_size = SparseMapping::page_size(); - let mapping = SparseMapping::new(page_size * 2).unwrap(); - mapping.alloc(0, page_size).unwrap(); - let base = mapping.as_ptr().cast::(); - unsafe { - assert_eq!(try_compare_exchange(base.add(8), 0, 1).unwrap().unwrap(), 1); - assert_eq!( - try_compare_exchange(base.add(8), 0, 2) - .unwrap() - .unwrap_err(), - 1 - ); - assert_eq!( - try_compare_exchange(base.cast::().add(1), 1, 2) - .unwrap() - .unwrap(), - 2 - ); - try_compare_exchange(base.add(page_size), 0, 2).unwrap_err(); - } - } - #[test] fn test_overlapping_mappings() { #![expect(clippy::identity_op)] diff --git a/support/sparse_mmap/src/trycopy.c b/support/sparse_mmap/src/trycopy.c deleted file mode 100644 index 4d607fa5f3..0000000000 --- a/support/sparse_mmap/src/trycopy.c +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#include -#include -#include -#include - -#if defined(_WIN32) -#include -#else -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -struct access_failure { - void *address; -#if !defined(_WIN32) - int signal; - int si_code; -#endif -}; - -#if defined(_WIN32) - -/// Exception filter for try_memmove. -static int exception_filter(EXCEPTION_POINTERS *exc, struct access_failure *failure) -{ - if (exc->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) - { - *failure = (struct access_failure) { .address = (void *)exc->ExceptionRecord->ExceptionInformation[1] }; - return EXCEPTION_EXECUTE_HANDLER; - } - else - { - return EXCEPTION_CONTINUE_SEARCH; - } -} - -/// Simple wrapper around memmove. -/// -/// This is necessary for LLVM to compile the SEH __try/__except block in -/// try_memmove correctly. Presumably this is because of this limitation in -/// clang 12, at least, combined with memmove being a special function. -/// -/// https://clang.llvm.org/docs/MSVCCompatibility.html -/// -/// Asynchronous Exceptions (SEH): Partial. -/// -/// Structured exceptions (__try / __except / __finally) mostly work on x86 -/// and x64. LLVM does not model asynchronous exceptions, so it is currently -/// impossible to catch an asynchronous exception generated in the same -/// frame as the catching __try. -static void memmove_wrapper(void* dest, void* src, uintptr_t length) -{ - memmove(dest, src, length); -} - -// On Windows, just use Structured Exception Handling (SEH) to -// attempt the memmove. -int try_memmove(void* dest, void* src, uintptr_t length, struct access_failure* failure) -{ - __try - { - memmove_wrapper(dest, src, length); - return 0; - } - __except(exception_filter(GetExceptionInformation(), failure)) - { - return -1; - } -} - -static void memset_wrapper(void* dest, int c, uintptr_t length) -{ - memset(dest, c, length); -} - -int try_memset(void* dest, int c, uintptr_t length, struct access_failure* failure) -{ - __try - { - memset_wrapper(dest, c, length); - return 0; - } - __except(exception_filter(GetExceptionInformation(), failure)) - { - return -1; - } -} - -// Make sure to include the wrapper function for SEH handling to work with LLVM. -#define TRY_READ(name, type) \ - static type name ## _wrapper(const volatile type* src) \ - { \ - return *src; \ - } \ - int name(const volatile type* src, type* dest, struct access_failure* failure) \ - { \ - __try { \ - *dest = name ## _wrapper(src); \ - return 0; \ - } __except(exception_filter(GetExceptionInformation(), failure)) { \ - return -1; \ - } \ - } - -#define TRY_WRITE(name, type) \ - static void name ## _wrapper(volatile type* dest, type value) \ - { \ - *dest = value; \ - } \ - int name(volatile type* dest, type value, struct access_failure* failure) \ - { \ - __try { \ - name ## _wrapper(dest, value); \ - return 0; \ - } __except(exception_filter(GetExceptionInformation(), failure)) { \ - return -1; \ - } \ - } - -#define TRY_CMPXCHG(name, intrinsic, type) \ - bool name ## _wrapper(type *dest, type *expected, type desired) \ - { \ - type old = intrinsic((void*)dest, desired, *expected); \ - if (old == *expected) { \ - return true; \ - } else { \ - *expected = old; \ - return false; \ - } \ - } \ - int name(type *dest, type *expected, type desired, struct access_failure *failure) \ - { \ - __try { \ - return name ## _wrapper(dest, expected, desired); \ - } __except(exception_filter(GetExceptionInformation(), failure)) { \ - return -1; \ - } \ - } - -#define TRY_WORD(size, intrinsic) \ - TRY_READ(try_read ## size, int ## size ## _t) \ - TRY_WRITE(try_write ## size, int ## size ## _t) \ - TRY_CMPXCHG(try_cmpxchg ## size, intrinsic, int ## size ## _t) - -TRY_WORD(8, _InterlockedCompareExchange8) -TRY_WORD(16, _InterlockedCompareExchange16) -TRY_WORD(32, _InterlockedCompareExchange) -TRY_WORD(64, _InterlockedCompareExchange64) - -#else - -// Keep track of this thread's jump point to return failure -// if memmove touches an invalid page. -__thread struct access_failure * volatile signal_access_failure; -__thread sigjmp_buf signal_jmp_buf; - -static void handle_signal(int sig, siginfo_t *info, __attribute__((unused)) void *ucontext) -{ - // Only handle the signal if we're in the middle of a memmove, with the - // jump point set on this thread. - if (signal_access_failure) - { - *signal_access_failure = (struct access_failure) { .address = info->si_addr, .signal = sig, .si_code = info->si_code }; - signal_access_failure = NULL; - - // siglongjmp out of the signal handler. - siglongjmp(signal_jmp_buf, 1); - } - else - { - // Restore the default handler and continue to crash the process. - struct sigaction act = { .sa_handler = SIG_DFL }; - sigemptyset(&act.sa_mask); - sigaction(sig, &act, NULL); - } -} - -int install_signal_handlers() -{ - // Install signal handler for SIGSEGV. - // - // SA_NODEFER is required due to siglongjmp. - struct sigaction act = { .sa_sigaction = &handle_signal, .sa_flags = SA_NODEFER | SA_SIGINFO }; - - // Don't block any signals. - if (sigemptyset(&act.sa_mask) == -1) - { - return errno; - } - - static const int signals[] = { SIGSEGV, SIGBUS }; - - for (size_t i = 0; i < sizeof(signals) / sizeof(signals[0]); i++) - { - int sig = signals[i]; - - // Connect the signal handler. - if (sigaction(sig, &act, NULL) == -1) - { - return errno; - } - } - - return 0; -} - -// On UNIX, hook SIGSEGV across the memmove to determine if the -// copy succeeded or failed. -#define TRY_OP(failure, op) \ - if (signal_access_failure) \ - { \ - abort(); \ - } \ - \ - signal_access_failure = (failure); \ - \ - if (sigsetjmp(signal_jmp_buf, 0) == 0) { \ - op; \ - signal_access_failure = NULL; \ - } else { \ - return -1; \ - } - -int try_memmove(void *dest, void *src, uintptr_t length, struct access_failure *failure) -{ - TRY_OP(failure, memmove(dest, src, length)); - return 0; -} - -int try_memset(void *dest, int c, uintptr_t length, struct access_failure *failure) -{ - TRY_OP(failure, memset(dest, c, length)); - return 0; -} - -#define TRY_READ(name, type) \ - int name(type *dest, const volatile type *src, struct access_failure *failure) \ - { \ - TRY_OP(failure, *dest = *src); \ - return 0; \ - } - -#define TRY_WRITE(name, type) \ - int name(volatile type *dest, type value, struct access_failure *failure) \ - { \ - TRY_OP(failure, *dest = value); \ - return 0; \ - } - -#define TRY_CMPXCHG(name, type) \ - int name(type *dest, type *expected, type desired, struct access_failure *failure) \ - { \ - bool success; \ - TRY_OP(failure, success = __atomic_compare_exchange_n(dest, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); \ - return success; \ - } \ - -#define TRY_WORD(size) \ - TRY_READ(try_read ## size, int ## size ## _t) \ - TRY_WRITE(try_write ## size, int ## size ## _t) \ - TRY_CMPXCHG(try_cmpxchg ## size, int ## size ## _t) - -TRY_WORD(8) -TRY_WORD(16) -TRY_WORD(32) -TRY_WORD(64) - -#endif diff --git a/support/sparse_mmap/src/trycopy_windows_arm64.rs b/support/sparse_mmap/src/trycopy_windows_arm64.rs deleted file mode 100644 index 3117b43231..0000000000 --- a/support/sparse_mmap/src/trycopy_windows_arm64.rs +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Rust inline asm implementation of the try_* functions for Windows ARM64. -//! -//! This uses manually implemented SEH handlers, avoiding the need to use a C -//! compiler. - -// xtask-fmt allow-target-arch sys-crate -#![cfg(all(windows, target_arch = "aarch64"))] - -use crate::AccessFailure; -use crate::sys::EXCEPTION_CONTINUE_SEARCH; -use crate::sys::EXCEPTION_EXECUTE_HANDLER; -use windows_sys::Win32::Foundation::EXCEPTION_ACCESS_VIOLATION; - -/// The exception filter that runs when there is an access violation in one of -/// the functions defined below. -unsafe extern "C" fn exception_filter( - pointers: &mut windows_sys::Win32::System::Diagnostics::Debug::EXCEPTION_POINTERS, - frame: *mut (), -) -> i32 { - // SAFETY: the caller provides a valid pointer to an exception record. - unsafe { - if (*pointers.ExceptionRecord).ExceptionCode != EXCEPTION_ACCESS_VIOLATION { - return EXCEPTION_CONTINUE_SEARCH; - } - } - - let af_addr; - let address; - // SAFETY: the caller provides a valid pointer to an exception record. - unsafe { - // Number `8` comes from the stack frame allocation code in `seh_proc!` below. - af_addr = frame.cast_const().cast::().sub(8).cast::().read(); - address = (*pointers.ExceptionRecord).ExceptionInformation[1] as *mut u8; - } - - // SAFETY: the address of the access failure structure is put onto the stack - // before executing the code that may fault. - unsafe { (af_addr as *mut AccessFailure).write(AccessFailure { address }) } - EXCEPTION_EXECUTE_HANDLER -} - -/// Defines a function with a __try/__except block. `$head` runs before the try, -/// `$body` runs inside the try, and `$tail` runs after the try. -/// -/// If code faults while running the instructions in `$body`, then the exception -/// filter will fill out the [`AccessFailure`] pointed to by `$failure_reg`, and -/// the function will return -1. -macro_rules! seh_proc { - ($func:path, $failure_reg:expr, [$($head:expr),* $(,)?], [$($body:expr),* $(,)?], [$($tail:expr),* $(,)?]) => { - std::arch::global_asm! { - ".pushsection .text", - ".globl {func}", - ".arch armv8.1-a", - ".p2align 2", - ".def {func}; .scl 2; .type 32; .endef", - ".seh_proc {func}", - "{func}:", - "sub sp, sp, #48", - ".seh_stackalloc 48", - "stp fp, lr, [sp, #16]", - ".seh_save_fplr 16", - "add fp, sp, #16", - ".seh_add_fp 16", - ".seh_endprologue", - // Save the pointer to the access failure data on the stack - // for the exception filter. - concat!("str ", $failure_reg, ", [fp, #-8]"), - $($head,)* - "1:", - $($body,)* - "2:", - $($tail,)* - "3:", - ".seh_startepilogue", - "ldp fp, lr, [sp, #16]", - ".seh_save_fplr 16", - "add sp, sp, #48", - ".seh_stackalloc 48", - ".seh_endepilogue", - "ret", - "4:", - "mov w0, #-1", // return -1 on failure - "b 3b", - ".seh_handler __C_specific_handler, @except", - ".seh_handlerdata", - ".long 1", // one handler entry - ".long (1b)@IMGREL", // start address of __try block - ".long (2b)@IMGREL", // end address of __try block - ".long ({filter})@IMGREL", // exception filter - ".long (4b)@IMGREL", // exception handler - ".text", - ".seh_endproc", - ".popsection", - func = sym $func, - filter = sym exception_filter, - } - }; -} - -seh_proc!( - super::try_memmove, - "x3", - [], - ["bl memcpy", "mov w0, wzr"], - [] // mov is in body since there must be at least one instruction after a call -); -seh_proc!( - super::try_memset, - "x3", - [], - ["bl memset", "mov w0, wzr"], - [] // mov is in body since there must be at least one instruction after a call -); -seh_proc!( - super::try_cmpxchg8, - "x3", - ["ldrb w8, [x1]", "mov w9, w8"], - ["casalb w8, w2, [x0]"], - ["strb w8, [x1]", "cmp w8, w9", "cset w0, eq"] -); -seh_proc!( - super::try_cmpxchg16, - "x3", - ["ldrh w8, [x1]", "mov w9, w8"], - ["casalh w8, w2, [x0]"], - ["strh w8, [x1]", "cmp w8, w9", "cset w0, eq"] -); -seh_proc!( - super::try_cmpxchg32, - "x3", - ["ldr w8, [x1]", "mov w9, w8"], - ["casal w8, w2, [x0]"], - ["str w8, [x1]", "cmp w8, w9", "cset w0, eq"] -); -seh_proc!( - super::try_cmpxchg64, - "x3", - ["ldr x8, [x1]", "mov x9, x8"], - ["casal x8, x2, [x0]"], - ["str x8, [x1]", "cmp x8, x9", "cset w0, eq"] -); -seh_proc!( - super::try_read8, - "x2", - [], - ["ldrb w8, [x1]"], - ["strb w8, [x0]", "mov w0, wzr"] -); -seh_proc!( - super::try_read16, - "x2", - [], - ["ldrh w8, [x1]"], - ["strh w8, [x0]", "mov w0, wzr"] -); -seh_proc!( - super::try_read32, - "x2", - [], - ["ldr w8, [x1]"], - ["str w8, [x0]", "mov w0, wzr"] -); -seh_proc!( - super::try_read64, - "x2", - [], - ["ldr x8, [x1]"], - ["str x8, [x0]", "mov w0, wzr"] -); -seh_proc!( - super::try_write8, - "x2", - [], - ["strb w1, [x0]"], - ["mov w0, wzr"] -); -seh_proc!( - super::try_write16, - "x2", - [], - ["strh w1, [x0]"], - ["mov w0, wzr"] -); -seh_proc!( - super::try_write32, - "x2", - [], - ["str w1, [x0]"], - ["mov w0, wzr"] -); -seh_proc!( - super::try_write64, - "x2", - [], - ["str x1, [x0]"], - ["mov w0, wzr"] -); diff --git a/support/sparse_mmap/src/trycopy_windows_x64.rs b/support/sparse_mmap/src/trycopy_windows_x64.rs deleted file mode 100644 index efabd69acf..0000000000 --- a/support/sparse_mmap/src/trycopy_windows_x64.rs +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Rust inline asm implementation of the try_* functions for Windows x86_64. -//! -//! This uses manually implemented SEH handlers, avoiding the need to use a C -//! compiler. - -// xtask-fmt allow-target-arch sys-crate -#![cfg(all(windows, target_arch = "x86_64"))] - -use crate::AccessFailure; -use crate::sys::EXCEPTION_CONTINUE_SEARCH; -use crate::sys::EXCEPTION_EXECUTE_HANDLER; -use windows_sys::Win32::Foundation::EXCEPTION_ACCESS_VIOLATION; - -/// The exception filter that runs when there is an access violation in one of -/// the functions defined below. -unsafe extern "C" fn exception_filter( - pointers: &mut windows_sys::Win32::System::Diagnostics::Debug::EXCEPTION_POINTERS, - _frame: *mut (), -) -> i32 { - // SAFETY: the caller provides a valid pointer to an exception record. - unsafe { - if (*pointers.ExceptionRecord).ExceptionCode != EXCEPTION_ACCESS_VIOLATION { - return EXCEPTION_CONTINUE_SEARCH; - } - } - - let rdi; - let address; - // SAFETY: the caller provides a valid pointer to an exception record. - unsafe { - rdi = (*pointers.ContextRecord).Rdi; - address = (*pointers.ExceptionRecord).ExceptionInformation[1] as *mut u8; - } - - // SAFETY: the address of the access failure structure is put in the rdi - // register before executing the code that may fault. - unsafe { (rdi as *mut AccessFailure).write(AccessFailure { address }) } - EXCEPTION_EXECUTE_HANDLER -} - -/// Defines a function with a __try/__except block. `$head` runs before the try, -/// `$body` runs inside the try, and `$tail` runs after the try. -/// -/// If code faults while running the instructions in `$body`, then the exception -/// filter will fill out the [`AccessFailure`] pointed to by `$failure_reg`, and -/// the function will return -1. -macro_rules! seh_proc { - ($func:path, $failure_reg:expr, [$($head:expr),* $(,)?], [$($body:expr),* $(,)?], [$($tail:expr),* $(,)?]) => { - std::arch::global_asm! { - ".pushsection .text", - ".globl {func}", - ".p2align 4", - ".def {func}; .scl 2; .type 32; .endef", - ".seh_proc {func}", - "{func}:", - "push %rdi", - ".seh_pushreg rdi", - "sub $32, %rsp", // space for home params - ".seh_stackalloc 32", - ".seh_endprologue", - // save the failure register to callee-save rdi so that it's available in exception_filter - concat!("mov ", $failure_reg, ", %rdi"), - $($head,)* - "1:", - $($body,)* - "2:", - $($tail,)* - "3:", - "add $32, %rsp", - "pop %rdi", - "ret", - "4:", - "mov $-1, %eax", // return -1 on failure - "jmp 3b", - ".seh_handler __C_specific_handler, @except", - ".seh_handlerdata", - ".long 1", // one handler entry - ".long (1b)@IMGREL", // start address of __try block - ".long (2b)@IMGREL", // end address of __try block - ".long ({filter})@IMGREL", // exception filter - ".long (4b)@IMGREL", // exception handler - ".text", - ".seh_endproc", - ".popsection", - func = sym $func, - filter = sym exception_filter, - options(att_syntax), // required for IMGREL - } - }; -} - -seh_proc!( - super::try_memmove, - "%r9", - [], - ["call memcpy", "xorl %eax, %eax"], - [] // xor is in body since there must be at least one instruction after a call -); -seh_proc!( - super::try_memset, - "%r9", - [], - ["call memset", "xorl %eax, %eax"], - [] // xor is in body since there must be at least one instruction after a call -); -seh_proc!( - super::try_cmpxchg8, - "%r9", - ["movb (%rdx), %al"], - ["cmpxchg %r8b, (%rcx)"], - ["movb %al, (%rdx)", "setz %al", "movzx %al, %eax"] -); -seh_proc!( - super::try_cmpxchg16, - "%r9", - ["movw (%rdx), %ax",], - ["cmpxchg %r8w, (%rcx)"], - ["movw %ax, (%rdx)", "setz %al", "movzx %al, %eax"] -); -seh_proc!( - super::try_cmpxchg32, - "%r9", - ["movl (%rdx), %eax",], - ["cmpxchg %r8d, (%rcx)"], - ["movl %eax, (%rdx)", "setz %al", "movzx %al, %eax"] -); -seh_proc!( - super::try_cmpxchg64, - "%r9", - ["movq (%rdx), %rax",], - ["cmpxchg %r8, (%rcx)"], - ["movq %rax, (%rdx)", "setz %al", "movzx %al, %eax"] -); -seh_proc!( - super::try_read8, - "%r8", - [], - ["movb (%rdx), %al"], - ["movb %al, (%rcx)", "xorl %eax, %eax"] -); -seh_proc!( - super::try_read16, - "%r8", - [], - ["movw (%rdx), %ax"], - ["movw %ax, (%rcx)", "xorl %eax, %eax"] -); -seh_proc!( - super::try_read32, - "%r8", - [], - ["movl (%rdx), %eax"], - ["movl %eax, (%rcx)", "xorl %eax, %eax"] -); -seh_proc!( - super::try_read64, - "%r8", - [], - ["movq (%rdx), %rax"], - ["movq %rax, (%rcx)", "xorl %eax, %eax"] -); -seh_proc!( - super::try_write8, - "%r8", - [], - ["movb %dl, (%rcx)"], - ["xorl %eax, %eax"] -); -seh_proc!( - super::try_write16, - "%r8", - [], - ["movw %dx, (%rcx)"], - ["xorl %eax, %eax"] -); -seh_proc!( - super::try_write32, - "%r8", - [], - ["movl %edx, (%rcx)"], - ["xorl %eax, %eax"] -); -seh_proc!( - super::try_write64, - "%r8", - [], - ["movq %rdx, (%rcx)"], - ["xorl %eax, %eax"] -); diff --git a/support/sparse_mmap/src/unix.rs b/support/sparse_mmap/src/unix.rs index 1cd5e84fc9..556359b599 100644 --- a/support/sparse_mmap/src/unix.rs +++ b/support/sparse_mmap/src/unix.rs @@ -95,7 +95,7 @@ impl SparseMapping { /// The range will be aligned to the largest system page size that's smaller /// or equal to `len`. pub fn new(len: usize) -> Result { - super::initialize_try_copy(); + trycopy::initialize_try_copy(); // Length of 0 return an OS error, so we need to handle it explicitly. if len == 0 { diff --git a/support/sparse_mmap/src/windows.rs b/support/sparse_mmap/src/windows.rs index a529e5af58..f90858689a 100644 --- a/support/sparse_mmap/src/windows.rs +++ b/support/sparse_mmap/src/windows.rs @@ -43,10 +43,6 @@ pub(crate) fn page_size() -> usize { PAGE_SIZE } -pub(crate) const EXCEPTION_EXECUTE_HANDLER: i32 = 1; -pub(crate) const EXCEPTION_CONTINUE_SEARCH: i32 = 0; -pub(crate) const _EXCEPTION_CONTINUE_EXECUTION: i32 = -1; - const MEM_REPLACE_PLACEHOLDER: u32 = 0x4000; const MEM_RESERVE_PLACEHOLDER: u32 = 0x40000; @@ -292,6 +288,7 @@ impl MappingList { impl SparseMapping { /// Reserves a sparse mapping range with the given size. pub fn new(len: usize) -> Result { + trycopy::initialize_try_copy(); Self::new_inner(None, None, len) } diff --git a/support/trycopy/Cargo.toml b/support/trycopy/Cargo.toml new file mode 100644 index 0000000000..86104b9789 --- /dev/null +++ b/support/trycopy/Cargo.toml @@ -0,0 +1,32 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "trycopy" +edition.workspace = true +rust-version.workspace = true + +[dependencies] + +thiserror.workspace = true +zerocopy.workspace = true + +[target.'cfg(unix)'.dependencies] +libc.workspace = true + +[target.'cfg(windows)'.dependencies] +windows-sys = { workspace = true, features = [ + "Win32_Foundation", + "Win32_System_Diagnostics_Debug", + "Win32_System_Kernel", +]} + +[[bench]] +name = "perf" +harness = false + +[dev-dependencies] +criterion.workspace = true + +[lints] +workspace = true diff --git a/support/trycopy/benches/perf.rs b/support/trycopy/benches/perf.rs new file mode 100644 index 0000000000..1fed22b7e9 --- /dev/null +++ b/support/trycopy/benches/perf.rs @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Performance tests. + +// UNSAFETY: testing unsafe interfaces +#![expect(unsafe_code)] +#![expect(missing_docs)] + +use std::hint::black_box; +use trycopy::initialize_try_copy; + +criterion::criterion_main!(benches); + +criterion::criterion_group!(benches, bench_access); + +fn bench_access(c: &mut criterion::Criterion) { + initialize_try_copy(); + c.bench_function("try-read-8", |b| { + // SAFETY: passing a valid src. + b.iter(|| unsafe { + let n = 0u8; + trycopy::try_read_volatile(&n).unwrap(); + }); + }) + .bench_function("read-8", |b| { + // SAFETY: passing a valid src. + b.iter(|| unsafe { + let n = 0u8; + std::ptr::read_volatile(black_box(&n)); + }) + }) + .bench_function("try-copy-1", try_copy_n::<1>) + .bench_function("try-copy-4", try_copy_n::<4>) + .bench_function("try-copy-8", try_copy_n::<8>) + .bench_function("try-copy-32", try_copy_n::<32>) + .bench_function("try-copy-256", try_copy_n::<256>) + .bench_function("try-copy-4096", try_copy_n::<4096>) + .bench_function("try-set-1", try_set_n::<1>) + .bench_function("try-set-32", try_set_n::<32>) + .bench_function("try-set-256", try_set_n::<256>) + .bench_function("try-set-4096", try_set_n::<4096>); +} + +fn try_copy_n(b: &mut criterion::Bencher<'_>) { + let src = [0u8; N]; + let mut dest = [0u8; N]; + // SAFETY: passing valid src and dest. + b.iter(|| unsafe { + trycopy::try_copy(black_box(src.as_ptr()), black_box(dest.as_mut_ptr()), N).unwrap(); + }) +} + +fn try_set_n(b: &mut criterion::Bencher<'_>) { + let mut dest = [0u8; N]; + // SAFETY: passing valid dest. + b.iter(|| unsafe { + trycopy::try_write_bytes(black_box(dest.as_mut_ptr()), 0u8, N).unwrap(); + }) +} diff --git a/support/trycopy/src/aarch64.rs b/support/trycopy/src/aarch64.rs new file mode 100644 index 0000000000..556dd91135 --- /dev/null +++ b/support/trycopy/src/aarch64.rs @@ -0,0 +1,357 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// xtask-fmt allow-target-arch sys-crate +#![cfg(target_arch = "aarch64")] + +use super::Context; +use super::Fault; +use super::recovery_descriptor; + +pub(super) fn get_context_ip(ctx: &Context) -> usize { + #[cfg(target_os = "linux")] + { + ctx.pc as _ + } + #[cfg(target_os = "macos")] + { + ctx.__ss.__pc as _ + } + #[cfg(windows)] + { + ctx.Pc as _ + } +} + +pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Option) { + #[cfg(target_os = "linux")] + { + ctx.pc = ip as _; + if let Some(result) = result { + ctx.regs[0] = result as _; + } + } + #[cfg(target_os = "macos")] + { + ctx.__ss.__pc = ip as _; + if let Some(result) = result { + ctx.__ss.__x[0] = result as _; + } + } + #[cfg(windows)] + { + ctx.Pc = ip as _; + if let Some(result) = result { + // SAFETY: the union is always valid. + unsafe { ctx.Anonymous.X[0] = result as _ }; + } + } +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +unsafe fn try_copy_forward(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + fn copy1(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + ldrb {s1:w}, [{src}], #1 + subs {len}, {len}, #1 + strb {s1:w}, [{dest}], #1 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + src = inout(reg) src => _, + len = inout(reg) length => _, + s1 = out(reg) _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn copy8(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + ldr {s1:x}, [{src}], #8 + subs {len}, {len}, #8 + str {s1:x}, [{dest}], #8 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + src = inout(reg) src => _, + len = inout(reg) length => _, + s1 = out(reg) _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn copy32(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + ldr {s1:q}, [{src}], #16 + ldr {s2:q}, [{src}], #16 + subs {len}, {len}, #32 + str {s1:q}, [{dest}], #16 + str {s2:q}, [{dest}], #16 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + src = inout(reg) src => _, + len = inout(reg) length => _, + s1 = out(vreg) _, + s2 = out(vreg) _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + crate::memcpy::try_copy_forward_with(dest, src, length, copy1, copy8, copy32) +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +unsafe fn try_copy_backward(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + cbz {len}, 2f + sub {dest}, {dest}, #1 + sub {src}, {src}, #1 + 1: + ldrb {s1:w}, [{src}, {len}] + strb {s1:w}, [{dest}, {len}] + subs {len}, {len}, #1 + bne 1b + 2: + ", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + src = inout(reg) src => _, + len = inout(reg) length => _, + s1 = out(reg) _, + bail = label { return Err(Fault) }, + options(nostack), + } + Ok(()) + } +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +pub(crate) unsafe fn try_memmove( + dest: *mut u8, + src: *const u8, + length: usize, +) -> Result<(), Fault> { + if (dest as usize).wrapping_sub(src as usize) >= length { + // SAFETY: caller ensured. + unsafe { try_copy_forward(dest, src, length) } + } else { + crate::cold_path(); + // SAFETY: caller ensured. + unsafe { try_copy_backward(dest, src, length) } + } +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. +pub(crate) unsafe fn try_memset(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + fn set1(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + strb {c:w}, [{dest}], #1 + subs {len}, {len}, #1 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + c = in(reg) c, + len = inout(reg) length => _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn set8(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + str {c:x}, [{dest}], #8 + subs {len}, {len}, #8 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + c = in(reg) c as u64 * 0x0101010101010101, + len = inout(reg) length => _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn set32_zero(dest: *mut u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 1: + str {zero:q}, [{dest}], #16 + str {zero:q}, [{dest}], #16 + subs {len}, {len}, #32 + bne 1b + 2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = inout(reg) dest => _, + zero = in(vreg) 0, + len = inout(reg) length => _, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + crate::memcpy::try_memset_with(dest, c, length, set1, set8, set32_zero) +} + +macro_rules! try_read { + ($vis:vis $func:ident, $ty:ty, $asm:expr) => { + /// # Safety + /// `src` must be an address that's reserved. + $vis unsafe fn $func(src: *const $ty) -> Result<$ty, Fault> { + // SAFETY: caller ensured. + unsafe { + let out: u64; + let result: i32; + core::arch::asm!( + "1:", + $asm, + "mov w0, wzr", + "2:", + recovery_descriptor!("1b", "2b", "."), + out = out(reg) out, + src = in(reg) src, + lateout("x0") result, + options(nostack, readonly), + ); + if result == 0 { + Ok(out as $ty) + } else { + Err(Fault) + } + } + } + }; +} + +try_read!(pub(crate) try_read8, u8, "ldrb {out:w}, [{src}]"); +try_read!(pub(crate) try_read16, u16, "ldrh {out:w}, [{src}]"); +try_read!(pub(crate) try_read32, u32, "ldr {out:w}, [{src}]"); +try_read!(pub(crate) try_read64, u64, "ldr {out}, [{src}]"); + +macro_rules! try_write { + ($vis:vis $func:ident, $ty:ty, $asm:expr) => { + /// # Safety + /// `dest` must be an address that's reserved and can be written to without + /// violating Rust's aliasing rules. + $vis unsafe fn $func(dest: *mut $ty, val: $ty) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm!( + "1:", + $asm, + "2:", + recovery_descriptor!("1b", "2b", "{bail}"), + dest = in(reg) dest, + val = in(reg) val as u64, + bail = label { return Err(Fault) }, + options(nostack), + ) + } + Ok(()) + } + }; +} + +try_write!(pub(crate) try_write8, u8, "strb {val:w}, [{dest}]"); +try_write!(pub(crate) try_write16, u16, "strh {val:w}, [{dest}]"); +try_write!(pub(crate) try_write32, u32, "str {val:w}, [{dest}]"); +try_write!(pub(crate) try_write64, u64, "str {val}, [{dest}]"); + +macro_rules! try_cmpxchg { + ($vis:vis $func:ident, $ty:ty, $asm:expr) => { + /// # Safety + /// `dest` must be an address that's reserved and can be written to without + /// violating Rust's aliasing rules. + $vis unsafe fn $func( + dest: *mut $ty, + expected: &mut $ty, + desired: $ty, + ) -> Result { + let actual; + let result: i32; + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + "1:", + $asm, + "mov w0, wzr", + "2:", + recovery_descriptor!("1b", "2b", "."), + dest = in(reg) dest, + desired = in(reg) desired, + expected = inout(reg) *expected => actual, + lateout("x0") result, + options(nostack), + } + }; + if result == 0 { + if *expected == actual { + Ok(true) + } else { + *expected = actual; + Ok(false) + } + } else { + Err(Fault) + } + } + } +} + +try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "casalb {expected:w}, {desired:w}, [{dest}]"); +try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "casalh {expected:w}, {desired:w}, [{dest}]"); +try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "casal {expected:w}, {desired:w}, [{dest}]"); +try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "casal {expected}, {desired}, [{dest}]"); diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs new file mode 100644 index 0000000000..c237db25d6 --- /dev/null +++ b/support/trycopy/src/lib.rs @@ -0,0 +1,909 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Safe memory operations across trust boundaries with fault recovery. +//! +//! This crate provides memory access primitives (copy, read, write, +//! compare-exchange) that can safely handle access violations without +//! panicking. It's designed for scenarios where you need to access memory that +//! may not be properly mapped or protected, such as guest VM memory in a VMM. +//! +//! # Use Case +//! +//! In virtualization contexts like OpenVMM and OpenHCL, the VMM must access +//! guest memory that may not be fully mapped or may have varying protection +//! levels. Directly accessing such memory can lead to access violations (e.g., +//! SIGSEGV on Unix or access violations on Windows), which would typically +//! crash the VMM. This crate provides safe abstractions to perform these memory +//! operations while gracefully handling faults. +//! +//! # How It Works +//! +//! The implementation uses a combination of: +//! +//! 1. **Architecture-specific assembly**: Each memory operation is implemented +//! using inline assembly (for x86_64 and aarch64) with precise control over +//! which instructions might fault. +//! +//! 2. **Recovery descriptors**: Each faulting code region is annotated with a +//! `RecoveryDescriptor` stored in a special linker section. These +//! descriptors map instruction pointer ranges to recovery code. +//! +//! 3. **Signal/exception handlers**: Global handlers for SIGSEGV/SIGBUS (Unix) +//! or vectored exception handlers (Windows) intercept access violations. +//! When a fault occurs, the handler searches the recovery descriptor table +//! for a matching instruction pointer range and, if found, redirects +//! execution to the recovery code. +//! +//! 4. **Thread-local fault tracking**: The faulting address and fault details +//! are stored in thread-local storage, allowing the caller to determine +//! exactly where and why the access failed. +//! +//! # Initialization +//! +//! Before using any operations, you must call [`initialize_try_copy`] once. +//! This installs the necessary signal/exception handlers. Calling it multiple +//! times is safe (only the first call has an effect). +//! +//! # Example +//! +//! ```rust +//! trycopy::initialize_try_copy(); +//! +//! // Attempt to read from potentially unmapped guest memory +//! let guest_ptr = 0x1000 as *const u64; +//! match unsafe { trycopy::try_read_volatile(guest_ptr) } { +//! Ok(value) => println!("Read value: {:#x}", value), +//! Err(e) => println!("Access failed at offset {}: {}", e.offset(), e), +//! } +//! ``` +//! +//! # Safety Guarantees +//! +//! These operations are safe to use even when: +//! - The memory is being concurrently modified +//! - The memory may not be mapped at all +//! - The memory has incorrect protection attributes +//! +//! However, callers must still ensure: +//! - Pointers are properly aligned for their type (for atomic operations) +//! - The address space is valid and reserved (even if not committed/mapped) +//! - Concurrent access doesn't violate Rust's aliasing rules in safe code +//! +//! # Performance +//! +//! The inline assembly can be inlined by the compiler, ensuring overhead in the +//! success case is comparable to an ordinary relaxed `AtomicU` memory access +//! or `memcpy` call. The fault case is expensive (signal handling, table +//! lookup), but that's expected since faults are exceptional conditions. + +// UNSAFETY: all kinds of assembly, signal handling. +#![expect(unsafe_code)] + +mod aarch64; +mod memcpy; +mod x86_64; + +// xtask-fmt allow-target-arch sys-crate +#[cfg(target_arch = "aarch64")] +use aarch64::*; +// xtask-fmt allow-target-arch sys-crate +#[cfg(target_arch = "x86_64")] +use x86_64::*; + +use std::mem::MaybeUninit; +use thiserror::Error; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +/// Must be called before using [`try_copy`] or other `try_` functions with a +/// memory buffer that could fault. +pub fn initialize_try_copy() { + static INIT: std::sync::Once = std::sync::Once::new(); + INIT.call_once(|| { + // SAFETY: calling just once, as required. + unsafe { + install_signal_handlers(); + } + }); +} + +/// Copies `count` elements from `src` to `dest`. `src` and `dest` may overlap. +/// Fails on access violation/SIGSEGV. Note that on case of failure, some of the +/// bytes (even partial elements) may already have been copied. +/// +/// This also fails if initialize_try_copy has not been called. +/// +/// # Safety +/// +/// This routine is safe to use if the memory pointed to by `src` or `dest` is +/// being concurrently mutated. +/// +/// WARNING: This routine should only be used when you know that `src` and +/// `dest` are valid, reserved addresses but you do not know if they are mapped +/// with the appropriate protection. For example, this routine is useful if +/// `dest` is a sparse mapping where some pages are mapped with +/// PAGE_NOACCESS/PROT_NONE, and some are mapped with PAGE_READWRITE/PROT_WRITE. +pub unsafe fn try_copy(src: *const T, dest: *mut T, count: usize) -> Result<(), MemoryError> { + let len = count * size_of::(); + // SAFETY: guaranteed by caller. + let ret = unsafe { try_memmove(dest.cast::(), src.cast::(), len) }; + match ret { + Ok(()) => Ok(()), + Err(Fault) => { + cold_path(); + Err(MemoryError::from_last_failure( + Some(src.cast()), + dest.cast(), + len, + )) + } + } +} + +/// Writes `count` bytes of the value `val` to `dest`. Fails on access +/// violation/SIGSEGV. Note that on case of failure, some of the bytes (even +/// partial elements) may already have been written. +/// +/// This also fails if initialize_try_copy has not been called. +/// +/// # Safety +/// +/// This routine is safe to use if the memory pointed to by `dest` is being +/// concurrently mutated. +/// +/// WARNING: This routine should only be used when you know that `dest` is +/// valid, reserved addresses but you do not know if they are mapped with the +/// appropriate protection. For example, this routine is useful if `dest` is a +/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and +/// some are mapped with PAGE_READWRITE/PROT_WRITE. +pub unsafe fn try_write_bytes(dest: *mut T, val: u8, count: usize) -> Result<(), MemoryError> { + let len = count * size_of::(); + // SAFETY: guaranteed by caller. + let ret = unsafe { try_memset(dest.cast::(), val, len) }; + match ret { + Ok(()) => Ok(()), + Err(Fault) => { + cold_path(); + Err(MemoryError::from_last_failure(None, dest.cast(), len)) + } + } +} + +/// Atomically swaps the value at `dest` with `new` when `*dest` is `current`, +/// using a sequentially-consistent memory ordering. +/// +/// Returns `Ok(Ok(new))` if the swap was successful, `Ok(Err(*dest))` if the +/// swap failed, or `Err(MemoryError::AccessViolation)` if the swap could not be +/// attempted due to an access violation. +/// +/// Fails at compile time if the size is not 1, 2, 4, or 8 bytes, or if the type +/// is under-aligned. +/// +/// # Safety +/// +/// This routine is safe to use if the memory pointed to by `dest` is being +/// concurrently mutated. +/// +/// WARNING: This routine should only be used when you know that `dest` is +/// valid, reserved addresses but you do not know if they are mapped with the +/// appropriate protection. For example, this routine is useful if `dest` is a +/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and +/// some are mapped with PAGE_READWRITE/PROT_WRITE. +pub unsafe fn try_compare_exchange( + dest: *mut T, + mut current: T, + new: T, +) -> Result, MemoryError> { + const { + assert!(matches!(size_of::(), 1 | 2 | 4 | 8)); + assert!(align_of::() >= size_of::()); + }; + // SAFETY: guaranteed by caller + let ret = unsafe { + match size_of::() { + 1 => try_cmpxchg8( + dest.cast(), + std::mem::transmute::<&mut T, &mut u8>(&mut current), + std::mem::transmute_copy::(&new), + ), + 2 => try_cmpxchg16( + dest.cast(), + std::mem::transmute::<&mut T, &mut u16>(&mut current), + std::mem::transmute_copy::(&new), + ), + 4 => try_cmpxchg32( + dest.cast(), + std::mem::transmute::<&mut T, &mut u32>(&mut current), + std::mem::transmute_copy::(&new), + ), + 8 => try_cmpxchg64( + dest.cast(), + std::mem::transmute::<&mut T, &mut u64>(&mut current), + std::mem::transmute_copy::(&new), + ), + _ => unreachable!(), + } + }; + match ret { + Ok(true) => Ok(Ok(new)), + Ok(false) => Ok(Err(current)), + Err(Fault) => { + cold_path(); + Err(MemoryError::from_last_failure( + None, + dest.cast(), + size_of::(), + )) + } + } +} + +/// Reads the value at `src` using one or more read instructions. +/// +/// If `T` is 1, 2, 4, or 8 bytes in size, then exactly one read instruction is +/// used. +/// +/// Returns `Ok(T)` if the read was successful, or `Err(MemoryError)` if the +/// read was unsuccessful. +/// +/// # Safety +/// +/// This routine is safe to use if the memory pointed to by `src` is being +/// concurrently mutated. +/// +/// WARNING: This routine should only be used when you know that `src` is +/// valid, reserved addresses but you do not know if they are mapped with the +/// appropriate protection. For example, this routine is useful if `src` is a +/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and +/// some are mapped with PAGE_READWRITE/PROT_WRITE. +pub unsafe fn try_read_volatile( + src: *const T, +) -> Result { + let mut dest = MaybeUninit::::uninit(); + // SAFETY: guaranteed by caller + let ret = unsafe { + match size_of::() { + 1 => try_read8(src.cast()).map(|v| { + dest.write(std::mem::transmute_copy(&v)); + }), + 2 => try_read16(src.cast()).map(|v| { + dest.write(std::mem::transmute_copy(&v)); + }), + 4 => try_read32(src.cast()).map(|v| { + dest.write(std::mem::transmute_copy(&v)); + }), + 8 => try_read64(src.cast()).map(|v| { + dest.write(std::mem::transmute_copy(&v)); + }), + _ => try_memmove(dest.as_mut_ptr().cast(), src.cast::(), size_of::()), + } + }; + match ret { + Ok(()) => { + // SAFETY: dest was fully initialized by try_read. + Ok(unsafe { dest.assume_init() }) + } + Err(Fault) => { + cold_path(); + Err(MemoryError::from_last_failure( + Some(src.cast()), + dest.as_mut_ptr().cast(), + size_of::(), + )) + } + } +} + +/// Writes `value` at `dest` using one or more write instructions. +/// +/// If `T` is 1, 2, 4, or 8 bytes in size, then exactly one write instruction is +/// used. +/// +/// Returns `Ok(())` if the write was successful, or `Err(MemoryError)` if the +/// write was unsuccessful. +/// +/// # Safety +/// +/// This routine is safe to use if the memory pointed to by `dest` is being +/// concurrently mutated. +/// +/// WARNING: This routine should only be used when you know that `dest` is +/// valid, reserved addresses but you do not know if they are mapped with the +/// appropriate protection. For example, this routine is useful if `dest` is a +/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and +/// some are mapped with PAGE_READWRITE/PROT_WRITE. +pub unsafe fn try_write_volatile( + dest: *mut T, + value: &T, +) -> Result<(), MemoryError> { + // SAFETY: guaranteed by caller + let ret = unsafe { + match size_of::() { + 1 => try_write8(dest.cast(), std::mem::transmute_copy(value)), + 2 => try_write16(dest.cast(), std::mem::transmute_copy(value)), + 4 => try_write32(dest.cast(), std::mem::transmute_copy(value)), + 8 => try_write64(dest.cast(), std::mem::transmute_copy(value)), + _ => try_memmove( + dest.cast(), + std::ptr::from_ref(value).cast(), + size_of::(), + ), + } + }; + match ret { + Ok(()) => Ok(()), + Err(Fault) => { + cold_path(); + Err(MemoryError::from_last_failure( + None, + dest.cast(), + size_of::(), + )) + } + } +} + +/// Error returned when a memory access fails. +#[derive(Debug, Error)] +#[error("failed to {} memory", if self.is_write { "write" } else { "read" })] +pub struct MemoryError { + offset: usize, + is_write: bool, + #[source] + source: OsAccessError, +} + +#[derive(Debug, Error)] +enum OsAccessError { + #[cfg(windows)] + #[error("access violation")] + AccessViolation, + #[cfg(unix)] + #[error("SIGSEGV (si_code = {0:x})")] + Sigsegv(u32), + #[cfg(unix)] + #[error("SIGBUS (si_code = {0:x})")] + Sigbus(u32), +} + +impl MemoryError { + fn from_last_failure(src: Option<*const u8>, dest: *mut u8, len: usize) -> Self { + let failure = LAST_ACCESS_FAILURE.get(); + let (offset, is_write) = if failure.address.is_null() { + // In the case of a general protection fault (#GP) the provided address is zero. + (0, src.is_none()) + } else if (dest..dest.wrapping_add(len)).contains(&failure.address) { + (failure.address as usize - dest as usize, true) + } else if let Some(src) = src { + if (src..src.wrapping_add(len)).contains(&failure.address.cast_const()) { + (failure.address as usize - src as usize, false) + } else { + panic!( + "invalid failure address: {:p} src: {:p} dest: {:p} len: {:#x}", + failure.address, src, dest, len + ); + } + } else { + panic!( + "invalid failure address: {:p} src: None dest: {:p} len: {:#x}", + failure.address, dest, len + ); + }; + #[cfg(windows)] + let source = OsAccessError::AccessViolation; + #[cfg(unix)] + let source = match failure.si_signo { + libc::SIGSEGV => OsAccessError::Sigsegv(failure.si_code as u32), + libc::SIGBUS => OsAccessError::Sigbus(failure.si_code as u32), + _ => { + panic!( + "unexpected signal: {} src: {:?} dest: {:p} len: {:#x}", + failure.si_signo, src, dest, len + ); + } + }; + Self { + offset, + is_write, + source, + } + } + + /// Returns the byte offset into the buffer at which the access violation + /// occurred. + pub fn offset(&self) -> usize { + self.offset + } +} + +#[derive(Debug)] +struct Fault; + +#[derive(Copy, Clone)] +struct AccessFailure { + address: *mut u8, + #[cfg(unix)] + si_signo: i32, + #[cfg(unix)] + si_code: i32, +} + +thread_local! { + static LAST_ACCESS_FAILURE: std::cell::Cell = const { + std::cell::Cell::new(AccessFailure { + address: std::ptr::null_mut(), + #[cfg(unix)] + si_signo: 0, + #[cfg(unix)] + si_code: 0, + }) + }; +} + +// FUTURE: replace with [`core::hint::cold_path`] when stabilized. +#[cold] +fn cold_path() {} + +#[cfg(target_os = "linux")] +type Context = libc::mcontext_t; +#[cfg(target_os = "macos")] +type Context = libc::__darwin_mcontext64; +#[cfg(windows)] +type Context = windows_sys::Win32::System::Diagnostics::Debug::CONTEXT; + +/// # Safety +/// This function installs global signal handlers. It must only be called once. +#[cfg(unix)] +unsafe fn install_signal_handlers() { + fn handle_signal(sig: i32, info: &libc::siginfo_t, ucontext: &mut libc::ucontext_t) { + let failure = AccessFailure { + // SAFETY: si_addr is always valid for SIGSEGV and SIGBUS. + address: unsafe { info.si_addr().cast() }, + si_signo: sig, + si_code: info.si_code, + }; + + #[cfg(target_os = "linux")] + let ctx = &mut ucontext.uc_mcontext; + + #[cfg(target_os = "macos")] + // SAFETY: mcontext is always valid. + let ctx = unsafe { &mut *ucontext.uc_mcontext }; + + let recovered = recover(ctx, failure); + if !recovered { + std::process::abort(); + } + } + + // SAFETY: installing signal handlers as documented. + unsafe { + let act = libc::sigaction { + sa_sigaction: handle_signal as usize, + sa_flags: libc::SA_SIGINFO, + ..core::mem::zeroed() + }; + for signal in [libc::SIGSEGV, libc::SIGBUS] { + libc::sigaction(signal, &act, std::ptr::null_mut()); + } + } +} + +/// # Safety +/// This function installs global exception handlers. It must only be called once. +#[cfg(windows)] +unsafe fn install_signal_handlers() { + use windows_sys::Win32::Foundation::EXCEPTION_ACCESS_VIOLATION; + use windows_sys::Win32::System::Diagnostics::Debug::AddVectoredExceptionHandler; + use windows_sys::Win32::System::Diagnostics::Debug::EXCEPTION_CONTINUE_EXECUTION; + use windows_sys::Win32::System::Diagnostics::Debug::EXCEPTION_CONTINUE_SEARCH; + use windows_sys::Win32::System::Diagnostics::Debug::EXCEPTION_POINTERS; + + extern "system" fn exception_handler(pointers_ptr: *mut EXCEPTION_POINTERS) -> i32 { + let (pointers, record, context); + // SAFETY: pointers and its fields are always valid. + unsafe { + pointers = &*pointers_ptr; + record = &*pointers.ExceptionRecord; + context = &mut *pointers.ContextRecord; + } + if record.ExceptionCode != EXCEPTION_ACCESS_VIOLATION { + return EXCEPTION_CONTINUE_SEARCH; + } + + let failure = AccessFailure { + address: record.ExceptionInformation[1] as *mut u8, + }; + let recovered = recover(context, failure); + if recovered { + EXCEPTION_CONTINUE_EXECUTION + } else { + EXCEPTION_CONTINUE_SEARCH + } + } + + // SAFETY: installing exception handler as documented. + let handle = unsafe { AddVectoredExceptionHandler(1, Some(exception_handler)) }; + if handle.is_null() { + panic!("could not install vectored exception handler"); + } +} + +#[repr(C)] +struct RecoveryDescriptor { + /// Start of the faulting code region (relative to the address of this + /// field). + start: i32, + /// End of the faulting code region (relative to the address of this field). + end: i32, + /// Recovery address (relative to the address of this field). If zero, + /// then the instruction pointer will be set to `end` and the result + /// register will be set to -1. + recover: i32, +} + +fn recover(context: &mut Context, failure: AccessFailure) -> bool { + // SAFETY: linker-defined symbols. + #[cfg(target_os = "linux")] + unsafe extern "C" { + #[link_name = "__start_try_copy"] + static START_TRY_COPY: [RecoveryDescriptor; 0]; + #[link_name = "__stop_try_copy"] + static STOP_TRY_COPY: [RecoveryDescriptor; 0]; + } + + // SAFETY: linker-defined symbols. + #[cfg(target_os = "macos")] + unsafe extern "C" { + #[link_name = "\x01section$start$__DATA$__try_copy"] + static START_TRY_COPY: [RecoveryDescriptor; 0]; + #[link_name = "\x01section$end$__DATA$__try_copy"] + static STOP_TRY_COPY: [RecoveryDescriptor; 0]; + } + + // SAFETY: creating symbols in the specified section in order to locate the + // recovery descriptors. + #[cfg(windows)] + #[unsafe(link_section = ".rdata.trycopy@a")] + static START_TRY_COPY: [RecoveryDescriptor; 0] = []; + #[cfg(windows)] + #[unsafe(link_section = ".rdata.trycopy@c")] + static STOP_TRY_COPY: [RecoveryDescriptor; 0] = []; + + // SAFETY: accessing the trycopy section as defined above. + let table = unsafe { + std::slice::from_raw_parts( + START_TRY_COPY.as_ptr(), + STOP_TRY_COPY + .as_ptr() + .offset_from_unsigned(START_TRY_COPY.as_ptr()), + ) + }; + + let ip = get_context_ip(context); + + // Search for a matching recovery descriptor. + for r in table { + let reloc = |addr: &i32| -> usize { + core::ptr::from_ref(addr) + .addr() + .wrapping_add_signed(*addr as isize) + }; + let end = reloc(&r.end); + if ip >= reloc(&r.start) && ip < end { + // Write the recovery info. + // + // Note that this is not generally guaranteed to be async signal safe, + // but in this case we know the thread is running in a recovery region, + // so it is fine. + LAST_ACCESS_FAILURE.set(failure); + + // Adjust the instruction pointer to the recovery address and write + // the failure code. + let (ip, result) = if r.recover == 0 { + (end, Some(-1)) + } else { + (reloc(&r.recover), None) + }; + + set_context_ip_and_result(context, ip, result); + return true; + } + } + false +} + +#[cfg(target_os = "linux")] +macro_rules! recovery_section { + () => { + "try_copy,\"a\"" + }; +} + +#[cfg(target_os = "windows")] +macro_rules! recovery_section { + () => { + ".rdata.trycopy@b,\"dr\"" + }; +} + +#[cfg(target_os = "macos")] +macro_rules! recovery_section { + () => { + "__DATA,__try_copy,regular,no_dead_strip" + }; +} + +/// Used within an asm block. Inserts a [`RecoveryDescriptor`] into the binary. +/// The first and second parameters are labels marking the start and end of the +/// code region to recover from--any access faults within that region will be +/// recovered by jumping to the recovery label given as the third parameter. +/// +/// If the third parameter is the special value ".", then instead of jumping to +/// the recovery label, the instruction pointer will be set to the end of the +/// code region, and the result register (rcx on x86_64, x0 on aarch64) will be +/// set to -1 to indicate failure. +/// +/// FUTURE: remove this extra result register behavior once Rust supports +/// `label` with inline asm blocks that have outputs. +macro_rules! recovery_descriptor { + ($start:tt, $stop:tt, $recover:tt) => { + concat!( + ".pushsection ", + crate::recovery_section!(), + "\n", + ".align 4\n", + ".long ", + $start, + " - .\n", + ".long ", + $stop, + " - .\n", + ".long ", + $recover, + " - .\n", + ".popsection" + ) + }; +} + +use recovery_descriptor; +use recovery_section; + +#[cfg(test)] +mod tests { + #![expect(clippy::undocumented_unsafe_blocks)] + + use crate::AccessFailure; + use crate::LAST_ACCESS_FAILURE; + use crate::initialize_try_copy; + use crate::try_cmpxchg8; + use crate::try_cmpxchg16; + use crate::try_cmpxchg32; + use crate::try_cmpxchg64; + use crate::try_compare_exchange; + use crate::try_memmove; + use crate::try_memset; + use crate::try_read8; + use crate::try_read16; + use crate::try_read32; + use crate::try_read64; + use crate::try_write8; + use crate::try_write16; + use crate::try_write32; + use crate::try_write64; + + #[derive(Copy, Clone, Debug)] + enum Primitive { + Read, + Write, + CompareAndSwap, + } + + #[repr(u32)] + #[derive(Copy, Clone, Debug, Eq, PartialEq)] + enum Size { + Bit8 = 8, + Bit16 = 16, + Bit32 = 32, + Bit64 = 64, + } + + fn test_unsafe_primitive(primitive: Primitive, size: Size) { + // NOTE: this test provides a very basic validation of + // the compare-and-swap operation, mostly to check that + // the failures address in returned correctly. See other tests + // for more. + let mut dest = !0u64; + let dest_addr = std::ptr::from_mut(&mut dest); + let src = 0x5555_5555_5555_5555u64; + let src_addr = std::ptr::from_ref(&src).cast::<()>(); + let bad_addr_mut = 0x100 as *mut (); // Within 0..0x1000 + let bad_addr = bad_addr_mut.cast_const(); + let nonsense_addr = !0u64 as *mut (); + let expected = if size != Size::Bit64 { + dest.wrapping_shl(size as u32) | src.wrapping_shr(64 - (size as u32)) + } else { + src + }; + LAST_ACCESS_FAILURE.set(AccessFailure { + address: nonsense_addr.cast(), + #[cfg(unix)] + si_signo: 0, + #[cfg(unix)] + si_code: 0, + }); + + let res = unsafe { + match size { + Size::Bit8 => match primitive { + Primitive::Read => try_read8(src_addr.cast()).map(|v| { + dest_addr.cast::().write(v); + true + }), + Primitive::Write => try_write8(dest_addr.cast(), src as u8).map(|()| true), + Primitive::CompareAndSwap => { + try_cmpxchg8(dest_addr.cast(), &mut (dest as u8), src as u8) + } + }, + Size::Bit16 => match primitive { + Primitive::Read => try_read16(src_addr.cast()).map(|v| { + dest_addr.cast::().write(v); + true + }), + Primitive::Write => try_write16(dest_addr.cast(), src as u16).map(|()| true), + Primitive::CompareAndSwap => { + try_cmpxchg16(dest_addr.cast(), &mut (dest as u16), src as u16) + } + }, + Size::Bit32 => match primitive { + Primitive::Read => try_read32(src_addr.cast()).map(|v| { + dest_addr.cast::().write(v); + true + }), + Primitive::Write => try_write32(dest_addr.cast(), src as u32).map(|()| true), + Primitive::CompareAndSwap => { + try_cmpxchg32(dest_addr.cast(), &mut (dest as u32), src as u32) + } + }, + Size::Bit64 => match primitive { + Primitive::Read => try_read64(src_addr.cast()).map(|v| { + dest_addr.write(v); + true + }), + Primitive::Write => try_write64(dest_addr.cast(), src).map(|()| true), + Primitive::CompareAndSwap => { + try_cmpxchg64(dest_addr.cast(), &mut { dest }, src) + } + }, + } + }; + assert!( + res.unwrap(), + "Success should be returned for {primitive:?} and {size:?}" + ); + assert_eq!( + dest, expected, + "Expected value must match the result for {primitive:?} and {size:?}" + ); + assert_eq!( + LAST_ACCESS_FAILURE.get().address, + nonsense_addr.cast(), + "Fault address must not be set for {primitive:?} and {size:?}" + ); + + let res = unsafe { + match size { + Size::Bit8 => match primitive { + Primitive::Read => try_read8(bad_addr.cast()).map(drop), + Primitive::Write => try_write8(bad_addr_mut.cast(), src as u8), + Primitive::CompareAndSwap => { + try_cmpxchg8(bad_addr_mut.cast(), &mut (dest as u8), src as u8).map(drop) + } + }, + Size::Bit16 => match primitive { + Primitive::Read => try_read16(bad_addr.cast()).map(drop), + Primitive::Write => try_write16(bad_addr_mut.cast(), src as u16), + Primitive::CompareAndSwap => { + try_cmpxchg16(bad_addr_mut.cast(), &mut (dest as u16), src as u16).map(drop) + } + }, + Size::Bit32 => match primitive { + Primitive::Read => try_read32(bad_addr.cast()).map(drop), + Primitive::Write => try_write32(bad_addr_mut.cast(), src as u32), + Primitive::CompareAndSwap => { + try_cmpxchg32(bad_addr_mut.cast(), &mut (dest as u32), src as u32).map(drop) + } + }, + Size::Bit64 => match primitive { + Primitive::Read => try_read64(bad_addr.cast()).map(drop), + Primitive::Write => try_write64(bad_addr_mut.cast(), src), + Primitive::CompareAndSwap => { + try_cmpxchg64(bad_addr_mut.cast(), &mut { dest }, src).map(drop) + } + }, + } + }; + res.unwrap_err(); + assert_eq!( + dest, expected, + "Fault preserved source and destination for {primitive:?} and {size:?}" + ); + let af = LAST_ACCESS_FAILURE.get(); + assert_eq!( + af.address, + bad_addr_mut.cast(), + "Fault address must be set for {primitive:?} and {size:?}" + ); + } + + #[test] + fn test_unsafe_primitives() { + initialize_try_copy(); + + for primitive in [Primitive::Read, Primitive::Write, Primitive::CompareAndSwap] { + for size in [Size::Bit8, Size::Bit16, Size::Bit32, Size::Bit64] { + test_unsafe_primitive(primitive, size); + } + } + } + + #[test] + fn test_try_memmove_overlapping() { + initialize_try_copy(); + + let data = (0..256).map(|i| i as u8).collect::>(); + + // Reverse overlap + { + let mut buf = data.clone(); + unsafe { try_memmove(buf.as_mut_ptr(), buf.as_mut_ptr().add(1), 255).unwrap() }; + assert_eq!(&buf[0..255], &data[1..256]); + } + + // Forward overlap + { + let mut buf = data.clone(); + unsafe { try_memmove(buf.as_mut_ptr().add(1), buf.as_mut_ptr(), 255).unwrap() }; + assert_eq!(&buf[1..256], &data[0..255]); + } + } + + #[test] + fn test_try_memset() { + initialize_try_copy(); + + let mut buf = [0u8; 256]; + unsafe { try_memset(buf.as_mut_ptr(), 0x5a, buf.len()).unwrap() }; + assert_eq!(&buf, &[0x5a; 256]); + + unsafe { try_memset(0x100 as *mut u8, 0x5a, 100).unwrap_err() }; + } + + #[test] + fn test_cmpxchg() { + initialize_try_copy(); + + let mut mapping = vec![0u64; 256]; + let base = mapping.as_mut_ptr().cast::(); + unsafe { + assert_eq!(try_compare_exchange(base.add(8), 0, 1).unwrap().unwrap(), 1); + assert_eq!( + try_compare_exchange(base.add(8), 0, 2) + .unwrap() + .unwrap_err(), + 1 + ); + assert_eq!( + try_compare_exchange(base.cast::().add(1), 1, 2) + .unwrap() + .unwrap(), + 2 + ); + try_compare_exchange(0x1000 as *mut u8, 0, 2).unwrap_err(); + } + } +} diff --git a/support/trycopy/src/memcpy.rs b/support/trycopy/src/memcpy.rs new file mode 100644 index 0000000000..e9101bd714 --- /dev/null +++ b/support/trycopy/src/memcpy.rs @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Arch-independent functions for copying and setting memory in chunks, using +//! arch-specific low-level functions for the actual copying and setting. + +use super::Fault; + +/// Copy memory from `src` to `dest` in chunks in the forward direction, using +/// the provided functions for copying 1, 8, and 32 bytes at a time. +/// +/// `copy1`, `copy8`, and `copy32` can assume that `length` is non-zero and a +/// multiple of 1, 8, and 32 respectively. +pub(crate) fn try_copy_forward_with( + mut dest: *mut u8, + mut src: *const u8, + mut length: usize, + copy1: impl Fn(*mut u8, *const u8, usize) -> Result<(), Fault>, + copy8: impl Fn(*mut u8, *const u8, usize) -> Result<(), Fault>, + copy32: impl Fn(*mut u8, *const u8, usize) -> Result<(), Fault>, +) -> Result<(), Fault> { + if length >= 32 { + let this = length & !31; + copy32(dest, src, this)?; + dest = dest.wrapping_add(this); + src = src.wrapping_add(this); + length &= 31; + } + if length >= 8 { + let this = length & !7; + copy8(dest, src, this)?; + dest = dest.wrapping_add(this); + src = src.wrapping_add(this); + length &= 7; + } + if length > 0 { + copy1(dest, src, length) + } else { + Ok(()) + } +} + +/// Set memory at `dest` to byte `c` in chunks, using the provided functions +/// for setting 1, 8, and 32 bytes at a time. +/// +/// `set1`, `set8`, and `set32_zero` can assume that `length` is non-zero and a +/// multiple of 1, 8, and 32 respectively. Note that `set32_zero` is used for +/// setting 32-byte chunks to zero only. +pub(crate) fn try_memset_with( + mut dest: *mut u8, + c: u8, + mut length: usize, + set1: impl Fn(*mut u8, u8, usize) -> Result<(), Fault>, + set8: impl Fn(*mut u8, u8, usize) -> Result<(), Fault>, + set32_zero: impl Fn(*mut u8, usize) -> Result<(), Fault>, +) -> Result<(), Fault> { + if c == 0 && length >= 32 { + let this = length & !31; + set32_zero(dest, this)?; + dest = dest.wrapping_add(this); + length &= 31; + } + if length >= 8 { + let this = length & !7; + set8(dest, c, this)?; + dest = dest.wrapping_add(this); + length &= 7; + } + if length >= 1 { + set1(dest, c, length) + } else { + Ok(()) + } +} diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs new file mode 100644 index 0000000000..a7e2dacc6f --- /dev/null +++ b/support/trycopy/src/x86_64.rs @@ -0,0 +1,399 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// xtask-fmt allow-target-arch sys-crate +#![cfg(target_arch = "x86_64")] + +use super::Context; +use super::Fault; +use super::recovery_descriptor; + +pub(super) fn get_context_ip(ctx: &Context) -> usize { + #[cfg(target_os = "linux")] + { + ctx.gregs[libc::REG_RIP as usize] as _ + } + #[cfg(target_os = "windows")] + { + ctx.Rip as _ + } +} + +pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Option) { + #[cfg(target_os = "linux")] + { + ctx.gregs[libc::REG_RIP as usize] = ip as _; + if let Some(result) = result { + ctx.gregs[libc::REG_RCX as usize] = result as _; + } + } + #[cfg(target_os = "windows")] + { + ctx.Rip = ip as _; + if let Some(result) = result { + ctx.Rcx = result as _; + } + } +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +unsafe fn try_copy_forward(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + fn copy1(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + mov {s1}, byte ptr [{src} + {i}] + mov byte ptr [{dest} + {i}], {s1} + inc {i} + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3b", "{bail}"), + s1 = out(reg_byte) _, + i = inout(reg) 0u64 => _, + src = in(reg) src, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn copy8(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + mov {s1}, qword ptr [{src} + {i}] + mov qword ptr [{dest} + {i}], {s1} + add {i}, 8 + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3b", "{bail}"), + s1 = out(reg) _, + i = inout(reg) 0u64 => _, + src = in(reg) src, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn copy32(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + movdqu {s1}, xmmword ptr [{src} + {i}] + movdqu {s2}, xmmword ptr [{src} + {i} + 16] + movdqu xmmword ptr [{dest} + {i}], {s1} + movdqu xmmword ptr [{dest} + {i} + 16], {s2} + add {i}, 32 + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3f", "{bail}"), + s1 = out(xmm_reg) _, + s2 = out(xmm_reg) _, + i = inout(reg) 0u64 => _, + src = in(reg) src, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn copy_movsb(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + "2:", + "rep movsb", + "3:", + recovery_descriptor!("2b", "3b", "{bail}"), + in("rdi") dest, + in("rsi") src, + in("rcx") length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + if length >= 1024 { + return copy_movsb(dest, src, length); + } + crate::memcpy::try_copy_forward_with(dest, src, length, copy1, copy8, copy32) +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +unsafe fn try_copy_backward(dest: *mut u8, src: *const u8, length: usize) -> Result<(), Fault> { + // Note, `rep movsb` with the direction flag set is slow, but this path + // should be rare. + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + "2:", + "std", + "rep movsb", + "3:", + "cld", + recovery_descriptor!("2b", "3b", "{bail}"), + in("rdi") dest.add(length - 1), + in("rsi") src.add(length - 1), + in("rcx") length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. `src` must be an address that's reserved. +pub(crate) unsafe fn try_memmove( + dest: *mut u8, + src: *const u8, + length: usize, +) -> Result<(), Fault> { + if (dest as usize).wrapping_sub(src as usize) >= length { + // SAFETY: caller ensured. + unsafe { try_copy_forward(dest, src, length) } + } else { + crate::cold_path(); + // SAFETY: caller ensured. + unsafe { try_copy_backward(dest, src, length) } + } +} + +/// # Safety +/// `dest` must be an address that's reserved and can be written to without +/// violating Rust's aliasing rules. +pub(crate) unsafe fn try_memset(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + fn set_stosb(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + "2:", + "rep stosb", + "3:", + recovery_descriptor!("2b", "3b", "{bail}"), + in("rdi") dest, + in("al") c, + in("rcx") length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn set1(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + mov byte ptr [{dest} + {i}], {c} + inc {i} + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3b", "{bail}"), + c = in(reg_byte) c, + i = inout(reg) 0u64 => _, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn set8(dest: *mut u8, c: u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + mov qword ptr [{dest} + {i}], {c} + add {i}, 8 + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3b", "{bail}"), + c = in(reg) c as u64 * 0x0101010101010101, + i = inout(reg) 0u64 => _, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + fn set32_zero(dest: *mut u8, length: usize) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + " + 2: + movdqu xmmword ptr [{dest} + {i}], {c} + movdqu xmmword ptr [{dest} + {i} + 16], {c} + add {i}, 32 + cmp {i}, {len} + jne 2b + 3: + ", + recovery_descriptor!("2b", "3b", "{bail}"), + c = in(xmm_reg) 0, + i = inout(reg) 0u64 => _, + dest = in(reg) dest, + len = in(reg) length, + bail = label { return Err(Fault) }, + options(nostack), + } + } + Ok(()) + } + + if length >= 1024 { + return set_stosb(dest, c, length); + } + crate::memcpy::try_memset_with(dest, c, length, set1, set8, set32_zero) +} + +macro_rules! try_read { + ($vis:vis $func:ident, $ty:ty, $asm:expr) => { + /// # Safety + /// `src` must be an address that's reserved. + $vis unsafe fn $func(src: *const $ty) -> Result<$ty, Fault> { + // SAFETY: caller ensured. + unsafe { + let out: u64; + let result: i32; + core::arch::asm!( + "2:", + $asm, + "xor ecx, ecx", + "3:", + recovery_descriptor!("2b", "3b", "."), + out = out(reg) out, + src = in(reg) src, + lateout("rcx") result, + options(nostack, readonly), + ); + if result == 0 { + Ok(out as $ty) + } else { + Err(Fault) + } + } + } + }; +} + +try_read!(pub(crate) try_read8, u8, "movzx {out:e}, byte ptr [{src}]"); +try_read!(pub(crate) try_read16, u16, "movzx {out:e}, word ptr [{src}]"); +try_read!(pub(crate) try_read32, u32, "mov {out:e}, dword ptr [{src}]"); +try_read!(pub(crate) try_read64, u64, "mov {out}, qword ptr [{src}]"); + +macro_rules! try_write { + ($vis:vis $func:ident, $ty:ty, $reg_kind:tt, $asm:expr) => { + /// # Safety + /// `dest` must be an address that's reserved and can be written to + /// without violating Rust's aliasing rules. + $vis unsafe fn $func(dest: *mut $ty, val: $ty) -> Result<(), Fault> { + // SAFETY: caller ensured. + unsafe { + core::arch::asm!( + "2:", + $asm, + "3:", + recovery_descriptor!("2b", "3b", "{bail}"), + dest = in(reg) dest, + val = in(reg) val as u64, + bail = label { return Err(Fault) }, + options(nostack, preserves_flags), + ) + } + Ok(()) + } + }; +} + +try_write!(pub(crate) try_write8, u8, reg_byte, "mov byte ptr [{dest}], {val:l}"); +try_write!(pub(crate) try_write16, u16, reg, "mov word ptr [{dest}], {val:x}"); +try_write!(pub(crate) try_write32, u32, reg, "mov dword ptr [{dest}], {val:e}"); +try_write!(pub(crate) try_write64, u64, reg, "mov qword ptr [{dest}], {val}"); + +macro_rules! try_cmpxchg { + ($vis:vis $func:ident, $ty:ty, $ax:tt, $reg_kind:tt, $asm:expr) => { + /// # Safety + /// `dest` must be an address that's reserved and can be written to + /// without violating Rust's aliasing rules. + $vis unsafe fn $func( + dest: *mut $ty, + expected: &mut $ty, + desired: $ty, + ) -> Result { + let actual; + let result: i32; + // SAFETY: caller ensured. + unsafe { + core::arch::asm! { + "2:", + $asm, + "setz cl", + "movzx ecx, cl", + "3:", + recovery_descriptor!("2b", "3b", "."), + dest = in(reg) dest, + desired = in($reg_kind) desired, + inout($ax) *expected => actual, + lateout("rcx") result, + options(nostack), + } + }; + if result > 0 { + Ok(true) + } else if result == 0 { + *expected = actual; + Ok(false) + } else { + Err(Fault) + } + } + }; +} + +try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "al", reg_byte, "cmpxchg byte ptr [{dest}], {desired}"); +try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "ax", reg, "cmpxchg word ptr [{dest}], {desired:x}"); +try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "eax", reg, "cmpxchg dword ptr [{dest}], {desired:e}"); +try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "rax", reg, "cmpxchg qword ptr [{dest}], {desired}"); diff --git a/vm/devices/user_driver/Cargo.toml b/vm/devices/user_driver/Cargo.toml index 6b7d2c014d..08b24efeaa 100644 --- a/vm/devices/user_driver/Cargo.toml +++ b/vm/devices/user_driver/Cargo.toml @@ -8,7 +8,7 @@ rust-version.workspace = true [features] default = ["vfio"] -vfio = ["vfio_sys", "sparse_mmap"] +vfio = ["vfio_sys"] mmio_simulate_fallback = [] [dependencies] @@ -21,6 +21,7 @@ safeatomic.workspace = true uevent.workspace = true event-listener.workspace = true guestmem.workspace = true +trycopy.workspace = true vmcore.workspace = true anyhow.workspace = true @@ -34,7 +35,6 @@ futures.workspace = true futures-concurrency.workspace = true libc.workspace = true pal_event.workspace = true -sparse_mmap = { workspace = true, optional = true } vfio_sys = { workspace = true, optional = true } [lints] diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 03819f282b..2e9322bb5b 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -192,7 +192,7 @@ impl VfioDevice { } let info = self.device.region_info(n.into())?; let mapping = self.device.map(info.offset, info.size as usize, true)?; - sparse_mmap::initialize_try_copy(); + trycopy::initialize_try_copy(); Ok(MappedRegionWithFallback { device: self.device.clone(), mapping, @@ -432,18 +432,18 @@ impl MappedRegionWithFallback { fn read_from_mapping( &self, offset: usize, - ) -> Result { + ) -> Result { // SAFETY: the offset is validated to be in bounds and aligned. - unsafe { sparse_mmap::try_read_volatile(self.mapping::(offset)) } + unsafe { trycopy::try_read_volatile(self.mapping::(offset)) } } fn write_to_mapping( &self, offset: usize, data: T, - ) -> Result<(), sparse_mmap::MemoryError> { + ) -> Result<(), trycopy::MemoryError> { // SAFETY: the offset is validated to be in bounds and aligned. - unsafe { sparse_mmap::try_write_volatile(self.mapping::(offset), &data) } + unsafe { trycopy::try_write_volatile(self.mapping::(offset), &data) } } fn read_from_file(&self, offset: usize, buf: &mut [u8]) { diff --git a/vm/vmcore/guestmem/Cargo.toml b/vm/vmcore/guestmem/Cargo.toml index e13ea9abef..bd981be0f5 100644 --- a/vm/vmcore/guestmem/Cargo.toml +++ b/vm/vmcore/guestmem/Cargo.toml @@ -14,6 +14,7 @@ inspect.workspace = true pal_event.workspace = true sparse_mmap.workspace = true minircu = { workspace = true, optional = true } +trycopy.workspace = true thiserror.workspace = true zerocopy.workspace = true diff --git a/vm/vmcore/guestmem/src/lib.rs b/vm/vmcore/guestmem/src/lib.rs index bfc2c2cdd5..525c0645ce 100644 --- a/vm/vmcore/guestmem/src/lib.rs +++ b/vm/vmcore/guestmem/src/lib.rs @@ -1235,7 +1235,7 @@ impl GuestMemory { // Skip this on miri even when there is a mapping, since the mapping may // never be accessed by the code under test. if imp.mapping().is_some() && !cfg!(miri) { - sparse_mmap::initialize_try_copy(); + trycopy::initialize_try_copy(); } Self::new_inner(debug_name.into(), imp, false) } @@ -1277,8 +1277,8 @@ impl GuestMemory { region_size: u64, mut imps: Vec>, ) -> Result { - // Install signal handlers on unix. - sparse_mmap::initialize_try_copy(); + // Install signal handlers. + trycopy::initialize_try_copy(); if !region_size.is_power_of_two() { return Err(MultiRegionError::NotPowerOfTwo(region_size)); @@ -1532,7 +1532,7 @@ impl GuestMemory { gpa: u64, len: usize, mut param: P, - mut f: impl FnMut(&mut P, *mut u8) -> Result, + mut f: impl FnMut(&mut P, *mut u8) -> Result, fallback: impl FnOnce(&mut P) -> Result, ) -> Result { let op = || { @@ -1595,7 +1595,7 @@ impl GuestMemory { // SAFETY: dest..dest+len is guaranteed to point to a reserved VA // range, and src..src+len is guaranteed by the caller to be a valid // buffer for reads. - unsafe { sparse_mmap::try_copy(src, dest, len) } + unsafe { trycopy::try_copy(src, dest, len) } }, |()| { // SAFETY: src..src+len is guaranteed by the caller to point to a valid @@ -1649,7 +1649,7 @@ impl GuestMemory { (), |(), dest| { // SAFETY: dest..dest+len is guaranteed to point to a reserved VA range. - unsafe { sparse_mmap::try_write_bytes(dest, val, len) } + unsafe { trycopy::try_write_bytes(dest, val, len) } }, |()| self.inner.imp.fill_fallback(gpa, val, len), ) @@ -1677,7 +1677,7 @@ impl GuestMemory { // SAFETY: src..src+len is guaranteed to point to a reserved VA // range, and dest..dest+len is guaranteed by the caller to be a // valid buffer for writes. - unsafe { sparse_mmap::try_copy(src, dest, len) } + unsafe { trycopy::try_copy(src, dest, len) } }, |()| { // SAFETY: dest..dest+len is guaranteed by the caller to point to a @@ -1738,7 +1738,7 @@ impl GuestMemory { |(), dest| { // SAFETY: dest..dest+len is guaranteed to point to // a reserved VA range. - unsafe { sparse_mmap::try_write_volatile(dest.cast(), b) } + unsafe { trycopy::try_write_volatile(dest.cast(), b) } }, |()| { // SAFETY: b is a valid buffer for reads. @@ -1777,7 +1777,7 @@ impl GuestMemory { |(), dest| { // SAFETY: dest..dest+len is guaranteed by the caller to be a valid // buffer for writes. - unsafe { sparse_mmap::try_compare_exchange(dest.cast(), current, new) } + unsafe { trycopy::try_compare_exchange(dest.cast(), current, new) } }, |()| { let mut current = current; @@ -1829,7 +1829,7 @@ impl GuestMemory { |(), src| { // SAFETY: src..src+len is guaranteed to point to a reserved VA // range. - unsafe { sparse_mmap::try_read_volatile(src.cast::()) } + unsafe { trycopy::try_read_volatile(src.cast::()) } }, |()| { let mut obj = std::mem::MaybeUninit::::zeroed(); From 7c42c3dfecf4df76f6db7db7863e875e16765a28 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 21:36:59 +0000 Subject: [PATCH 02/20] tweaks --- support/trycopy/Cargo.toml | 1 - support/trycopy/src/lib.rs | 2 +- support/trycopy/src/memcpy.rs | 16 +++++++--------- support/trycopy/src/x86_64.rs | 10 +++++----- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/support/trycopy/Cargo.toml b/support/trycopy/Cargo.toml index 86104b9789..27c999f9c0 100644 --- a/support/trycopy/Cargo.toml +++ b/support/trycopy/Cargo.toml @@ -7,7 +7,6 @@ edition.workspace = true rust-version.workspace = true [dependencies] - thiserror.workspace = true zerocopy.workspace = true diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index c237db25d6..10da120aad 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -47,7 +47,7 @@ //! //! # Example //! -//! ```rust +//! ```no_run //! trycopy::initialize_try_copy(); //! //! // Attempt to read from potentially unmapped guest memory diff --git a/support/trycopy/src/memcpy.rs b/support/trycopy/src/memcpy.rs index e9101bd714..a52b969485 100644 --- a/support/trycopy/src/memcpy.rs +++ b/support/trycopy/src/memcpy.rs @@ -8,7 +8,7 @@ use super::Fault; /// Copy memory from `src` to `dest` in chunks in the forward direction, using /// the provided functions for copying 1, 8, and 32 bytes at a time. -/// +/// /// `copy1`, `copy8`, and `copy32` can assume that `length` is non-zero and a /// multiple of 1, 8, and 32 respectively. pub(crate) fn try_copy_forward_with( @@ -33,16 +33,15 @@ pub(crate) fn try_copy_forward_with( src = src.wrapping_add(this); length &= 7; } - if length > 0 { - copy1(dest, src, length) - } else { - Ok(()) + if length >= 1 { + copy1(dest, src, length)?; } + Ok(()) } /// Set memory at `dest` to byte `c` in chunks, using the provided functions /// for setting 1, 8, and 32 bytes at a time. -/// +/// /// `set1`, `set8`, and `set32_zero` can assume that `length` is non-zero and a /// multiple of 1, 8, and 32 respectively. Note that `set32_zero` is used for /// setting 32-byte chunks to zero only. @@ -67,8 +66,7 @@ pub(crate) fn try_memset_with( length &= 7; } if length >= 1 { - set1(dest, c, length) - } else { - Ok(()) + set1(dest, c, length)?; } + Ok(()) } diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index a7e2dacc6f..e6cddb1a9c 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -325,7 +325,7 @@ try_read!(pub(crate) try_read32, u32, "mov {out:e}, dword ptr [{src}]"); try_read!(pub(crate) try_read64, u64, "mov {out}, qword ptr [{src}]"); macro_rules! try_write { - ($vis:vis $func:ident, $ty:ty, $reg_kind:tt, $asm:expr) => { + ($vis:vis $func:ident, $ty:ty, $asm:expr) => { /// # Safety /// `dest` must be an address that's reserved and can be written to /// without violating Rust's aliasing rules. @@ -348,10 +348,10 @@ macro_rules! try_write { }; } -try_write!(pub(crate) try_write8, u8, reg_byte, "mov byte ptr [{dest}], {val:l}"); -try_write!(pub(crate) try_write16, u16, reg, "mov word ptr [{dest}], {val:x}"); -try_write!(pub(crate) try_write32, u32, reg, "mov dword ptr [{dest}], {val:e}"); -try_write!(pub(crate) try_write64, u64, reg, "mov qword ptr [{dest}], {val}"); +try_write!(pub(crate) try_write8, u8, "mov byte ptr [{dest}], {val:l}"); +try_write!(pub(crate) try_write16, u16, "mov word ptr [{dest}], {val:x}"); +try_write!(pub(crate) try_write32, u32, "mov dword ptr [{dest}], {val:e}"); +try_write!(pub(crate) try_write64, u64, "mov qword ptr [{dest}], {val}"); macro_rules! try_cmpxchg { ($vis:vis $func:ident, $ty:ty, $ax:tt, $reg_kind:tt, $asm:expr) => { From 776c5f041f33f539c5cfb35a97efba9f9778b7da Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 21:39:25 +0000 Subject: [PATCH 03/20] tweaks --- support/trycopy/src/x86_64.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index e6cddb1a9c..09998cee1d 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -364,20 +364,19 @@ macro_rules! try_cmpxchg { desired: $ty, ) -> Result { let actual; - let result: i32; + let result: i8; // SAFETY: caller ensured. unsafe { core::arch::asm! { "2:", $asm, "setz cl", - "movzx ecx, cl", "3:", recovery_descriptor!("2b", "3b", "."), dest = in(reg) dest, desired = in($reg_kind) desired, inout($ax) *expected => actual, - lateout("rcx") result, + lateout("cl") result, options(nostack), } }; From 139e730ba6c88a7a0989cf0972984782ad6a49fb Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 21:41:08 +0000 Subject: [PATCH 04/20] feedback --- vm/devices/user_driver/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vm/devices/user_driver/Cargo.toml b/vm/devices/user_driver/Cargo.toml index 08b24efeaa..df4d865029 100644 --- a/vm/devices/user_driver/Cargo.toml +++ b/vm/devices/user_driver/Cargo.toml @@ -8,7 +8,7 @@ rust-version.workspace = true [features] default = ["vfio"] -vfio = ["vfio_sys"] +vfio = ["vfio_sys", "trycopy"] mmio_simulate_fallback = [] [dependencies] @@ -21,7 +21,7 @@ safeatomic.workspace = true uevent.workspace = true event-listener.workspace = true guestmem.workspace = true -trycopy.workspace = true +trycopy = { workspace = true, optional = true } vmcore.workspace = true anyhow.workspace = true From 397bc091e7773674d3891223684958991b586e74 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 22:23:47 +0000 Subject: [PATCH 05/20] comments,no-zerocopy --- Cargo.lock | 1 - support/trycopy/Cargo.toml | 1 - support/trycopy/src/lib.rs | 116 +++++++++++++++++-------------------- 3 files changed, 52 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57572d3123..55c9e975c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7683,7 +7683,6 @@ dependencies = [ "libc", "thiserror 2.0.16", "windows-sys 0.61.0", - "zerocopy 0.8.25", ] [[package]] diff --git a/support/trycopy/Cargo.toml b/support/trycopy/Cargo.toml index 27c999f9c0..d5fb119155 100644 --- a/support/trycopy/Cargo.toml +++ b/support/trycopy/Cargo.toml @@ -8,7 +8,6 @@ rust-version.workspace = true [dependencies] thiserror.workspace = true -zerocopy.workspace = true [target.'cfg(unix)'.dependencies] libc.workspace = true diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 10da120aad..616614e9e9 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -93,10 +93,6 @@ use x86_64::*; use std::mem::MaybeUninit; use thiserror::Error; -use zerocopy::FromBytes; -use zerocopy::Immutable; -use zerocopy::IntoBytes; -use zerocopy::KnownLayout; /// Must be called before using [`try_copy`] or other `try_` functions with a /// memory buffer that could fault. @@ -110,22 +106,24 @@ pub fn initialize_try_copy() { }); } -/// Copies `count` elements from `src` to `dest`. `src` and `dest` may overlap. -/// Fails on access violation/SIGSEGV. Note that on case of failure, some of the -/// bytes (even partial elements) may already have been copied. +/// Copies `count` elements from `src` to `dest`, returning an error if an +/// access violation occurs. The source and destination may overlap. +/// +/// No guarantees are made about the access width used to perform the copy or +/// the order in which the accesses are made. In the case of failure, some of +/// the bytes (even partial elements) may already have been copied. /// -/// This also fails if initialize_try_copy has not been called. +/// If [`initialize_try_copy`] has not been called and a fault occurs, the +/// process will be terminated according to the platform's default behavior. /// /// # Safety -/// -/// This routine is safe to use if the memory pointed to by `src` or `dest` is -/// being concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `src` and -/// `dest` are valid, reserved addresses but you do not know if they are mapped -/// with the appropriate protection. For example, this routine is useful if -/// `dest` is a sparse mapping where some pages are mapped with -/// PAGE_NOACCESS/PROT_NONE, and some are mapped with PAGE_READWRITE/PROT_WRITE. +/// `src` and `dest` must point to reserved addresses, which may or may not +/// actually be backed. `dest` cannot point to memory that would violate Rust's +/// aliasing rules. +/// +/// Note that this creates a bitwise copy of the data, even if `T` is not +/// `Copy`. The caller must ensure that subsequent uses of `src` or `dest` do +/// not cause undefined behavior. pub unsafe fn try_copy(src: *const T, dest: *mut T, count: usize) -> Result<(), MemoryError> { let len = count * size_of::(); // SAFETY: guaranteed by caller. @@ -143,22 +141,22 @@ pub unsafe fn try_copy(src: *const T, dest: *mut T, count: usize) -> Result<( } } -/// Writes `count` bytes of the value `val` to `dest`. Fails on access -/// violation/SIGSEGV. Note that on case of failure, some of the bytes (even -/// partial elements) may already have been written. -/// -/// This also fails if initialize_try_copy has not been called. -/// +/// Sets `count * size_of::()` bytes of memory at `dest` to the byte value +/// `val`, returning an error if an access violation occurs. +/// +/// No guarantees are made about the access width used to perform the set or the +/// order in which the accesses are made. In the case of failure, some of the +/// bytes may already have been set. +/// +/// If [`initialize_try_copy`] has not been called and a fault occurs, the +/// process will be terminated according to the platform's default behavior. +/// /// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. +/// `dest` must point to reserved addresses, which may or may not be backed. +/// `dest` cannot point to memory that would violate Rust's aliasing rules. +/// +/// Note that if the written bytes are not a valid representation of `T`, +/// subsequent uses of the memory may be undefined behavior. pub unsafe fn try_write_bytes(dest: *mut T, val: u8, count: usize) -> Result<(), MemoryError> { let len = count * size_of::(); // SAFETY: guaranteed by caller. @@ -183,22 +181,17 @@ pub unsafe fn try_write_bytes(dest: *mut T, val: u8, count: usize) -> Result< /// is under-aligned. /// /// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_compare_exchange( +/// `dest` must point to a reserved address, which may or may not be backed. +/// `dest` cannot point to memory that would violate Rust's aliasing rules. +pub unsafe fn try_compare_exchange( dest: *mut T, mut current: T, new: T, ) -> Result, MemoryError> { const { assert!(matches!(size_of::(), 1 | 2 | 4 | 8)); + // This `T` must be at least as aligned as the primitive type's natural + // alignment (which is its size). assert!(align_of::() >= size_of::()); }; // SAFETY: guaranteed by caller @@ -241,7 +234,8 @@ pub unsafe fn try_compare_exchange( +/// `src` must point to a reserved address, which may or may not be backed. +/// +/// Note that this creates a bitwise copy of the data, even if `T` is not +/// `Copy`. The caller must ensure that subsequent uses of the returned value +/// do not cause undefined behavior. +pub unsafe fn try_read_volatile( src: *const T, ) -> Result { let mut dest = MaybeUninit::::uninit(); @@ -297,7 +287,8 @@ pub unsafe fn try_read_volatile( } } -/// Writes `value` at `dest` using one or more write instructions. +/// Writes `value` at `dest` using one or more write instructions, failing if an +/// access violation occurs. /// /// If `T` is 1, 2, 4, or 8 bytes in size, then exactly one write instruction is /// used. @@ -306,16 +297,13 @@ pub unsafe fn try_read_volatile( /// write was unsuccessful. /// /// # Safety -/// -/// This routine is safe to use if the memory pointed to by `dest` is being -/// concurrently mutated. -/// -/// WARNING: This routine should only be used when you know that `dest` is -/// valid, reserved addresses but you do not know if they are mapped with the -/// appropriate protection. For example, this routine is useful if `dest` is a -/// sparse mapping where some pages are mapped with PAGE_NOACCESS/PROT_NONE, and -/// some are mapped with PAGE_READWRITE/PROT_WRITE. -pub unsafe fn try_write_volatile( +/// `dest` must point to a reserved address, which may or may not be backed. +/// `dest` cannot point to memory that would violate Rust's aliasing rules. +/// +/// Note that this creates a bitwise copy of the data, even if `T` is not +/// `Copy`. The caller must ensure that subsequent uses of `dest` do not +/// cause undefined behavior. +pub unsafe fn try_write_volatile( dest: *mut T, value: &T, ) -> Result<(), MemoryError> { From e88216179fefff92bb7fd46c3f100f2b64bb9418 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 22:31:23 +0000 Subject: [PATCH 06/20] comments --- support/trycopy/src/lib.rs | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 616614e9e9..8f94a5dca5 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -108,10 +108,10 @@ pub fn initialize_try_copy() { /// Copies `count` elements from `src` to `dest`, returning an error if an /// access violation occurs. The source and destination may overlap. -/// +/// /// No guarantees are made about the access width used to perform the copy or /// the order in which the accesses are made. In the case of failure, some of -/// the bytes (even partial elements) may already have been copied. +/// the bytes (even partial elements) may already have been copied. /// /// If [`initialize_try_copy`] has not been called and a fault occurs, the /// process will be terminated according to the platform's default behavior. @@ -120,7 +120,7 @@ pub fn initialize_try_copy() { /// `src` and `dest` must point to reserved addresses, which may or may not /// actually be backed. `dest` cannot point to memory that would violate Rust's /// aliasing rules. -/// +/// /// Note that this creates a bitwise copy of the data, even if `T` is not /// `Copy`. The caller must ensure that subsequent uses of `src` or `dest` do /// not cause undefined behavior. @@ -143,18 +143,18 @@ pub unsafe fn try_copy(src: *const T, dest: *mut T, count: usize) -> Result<( /// Sets `count * size_of::()` bytes of memory at `dest` to the byte value /// `val`, returning an error if an access violation occurs. -/// +/// /// No guarantees are made about the access width used to perform the set or the /// order in which the accesses are made. In the case of failure, some of the /// bytes may already have been set. -/// +/// /// If [`initialize_try_copy`] has not been called and a fault occurs, the /// process will be terminated according to the platform's default behavior. -/// +/// /// # Safety /// `dest` must point to reserved addresses, which may or may not be backed. /// `dest` cannot point to memory that would violate Rust's aliasing rules. -/// +/// /// Note that if the written bytes are not a valid representation of `T`, /// subsequent uses of the memory may be undefined behavior. pub unsafe fn try_write_bytes(dest: *mut T, val: u8, count: usize) -> Result<(), MemoryError> { @@ -245,13 +245,11 @@ pub unsafe fn try_compare_exchange( /// /// # Safety /// `src` must point to a reserved address, which may or may not be backed. -/// +/// /// Note that this creates a bitwise copy of the data, even if `T` is not /// `Copy`. The caller must ensure that subsequent uses of the returned value /// do not cause undefined behavior. -pub unsafe fn try_read_volatile( - src: *const T, -) -> Result { +pub unsafe fn try_read_volatile(src: *const T) -> Result { let mut dest = MaybeUninit::::uninit(); // SAFETY: guaranteed by caller let ret = unsafe { @@ -299,14 +297,11 @@ pub unsafe fn try_read_volatile( /// # Safety /// `dest` must point to a reserved address, which may or may not be backed. /// `dest` cannot point to memory that would violate Rust's aliasing rules. -/// +/// /// Note that this creates a bitwise copy of the data, even if `T` is not /// `Copy`. The caller must ensure that subsequent uses of `dest` do not /// cause undefined behavior. -pub unsafe fn try_write_volatile( - dest: *mut T, - value: &T, -) -> Result<(), MemoryError> { +pub unsafe fn try_write_volatile(dest: *mut T, value: &T) -> Result<(), MemoryError> { // SAFETY: guaranteed by caller let ret = unsafe { match size_of::() { @@ -361,7 +356,11 @@ impl MemoryError { fn from_last_failure(src: Option<*const u8>, dest: *mut u8, len: usize) -> Self { let failure = LAST_ACCESS_FAILURE.get(); let (offset, is_write) = if failure.address.is_null() { - // In the case of a general protection fault (#GP) the provided address is zero. + // In the case of a general protection fault (#GP) the provided + // address is zero. + // + // TODO: get the failure offset from the routine that actually + // faulted rather than relying on the kernel. (0, src.is_none()) } else if (dest..dest.wrapping_add(len)).contains(&failure.address) { (failure.address as usize - dest as usize, true) @@ -475,6 +474,8 @@ unsafe fn install_signal_handlers() { ..core::mem::zeroed() }; for signal in [libc::SIGSEGV, libc::SIGBUS] { + // TODO: chain to previous handler. Doing so safely and correctly + // might require running this code before `main`, via a constructor. libc::sigaction(signal, &act, std::ptr::null_mut()); } } From 08ad63565425512f7ea2fd456a4ab195882cb714 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 22:55:05 +0000 Subject: [PATCH 07/20] fix windows --- support/sparse_mmap/src/windows.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/support/sparse_mmap/src/windows.rs b/support/sparse_mmap/src/windows.rs index f90858689a..27a59366b0 100644 --- a/support/sparse_mmap/src/windows.rs +++ b/support/sparse_mmap/src/windows.rs @@ -686,13 +686,12 @@ pub fn alloc_shared_memory(size: usize) -> io::Result { mod tests { use super::SparseMapping; use super::alloc_shared_memory; - use crate::initialize_try_copy; use crate::try_copy; use windows_sys::Win32::System::Memory::PAGE_READWRITE; #[test] fn test_shared_mem_split() { - initialize_try_copy(); + trycopy::initialize_try_copy(); let shmem = alloc_shared_memory(0x100000).unwrap(); let sparse = SparseMapping::new(0x100000).unwrap(); From 1ef015411f931a27a35208f899b33cfa4e1eadd7 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 23:06:23 +0000 Subject: [PATCH 08/20] fix aarch64 --- support/trycopy/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 8f94a5dca5..9e8ca0f9c2 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -644,7 +644,7 @@ macro_rules! recovery_descriptor { ".pushsection ", crate::recovery_section!(), "\n", - ".align 4\n", + ".balign 4\n", ".long ", $start, " - .\n", From f7e7981ab5323dada58a6b30fa142d64523b34a5 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 23:22:55 +0000 Subject: [PATCH 09/20] retain recovery descriptors --- support/trycopy/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 9e8ca0f9c2..40bf611583 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -608,7 +608,7 @@ fn recover(context: &mut Context, failure: AccessFailure) -> bool { #[cfg(target_os = "linux")] macro_rules! recovery_section { () => { - "try_copy,\"a\"" + "try_copy,\"aR\"" }; } From c769936faf1a8ac96a27761e1f3c1f7bbb24f7da Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 23:33:53 +0000 Subject: [PATCH 10/20] clarify --- support/trycopy/src/lib.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 40bf611583..9f5c23a4a8 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -547,9 +547,9 @@ fn recover(context: &mut Context, failure: AccessFailure) -> bool { // SAFETY: linker-defined symbols. #[cfg(target_os = "macos")] unsafe extern "C" { - #[link_name = "\x01section$start$__DATA$__try_copy"] + #[link_name = "\x01section$start$__TEXT$__try_copy"] static START_TRY_COPY: [RecoveryDescriptor; 0]; - #[link_name = "\x01section$end$__DATA$__try_copy"] + #[link_name = "\x01section$end$__TEXT$__try_copy"] static STOP_TRY_COPY: [RecoveryDescriptor; 0]; } @@ -608,6 +608,7 @@ fn recover(context: &mut Context, failure: AccessFailure) -> bool { #[cfg(target_os = "linux")] macro_rules! recovery_section { () => { + // a = allocate, R = retain: don't discard on linking. "try_copy,\"aR\"" }; } @@ -615,6 +616,7 @@ macro_rules! recovery_section { #[cfg(target_os = "windows")] macro_rules! recovery_section { () => { + // d = data, r = read-only ".rdata.trycopy@b,\"dr\"" }; } @@ -622,7 +624,9 @@ macro_rules! recovery_section { #[cfg(target_os = "macos")] macro_rules! recovery_section { () => { - "__DATA,__try_copy,regular,no_dead_strip" + // __TEXT = read-only segment, regular = regular section, no_dead_strip + // = don't discard on linking. + "__TEXT,__try_copy,regular,no_dead_strip" }; } From 5b18edbf6a15f325867da73c95b3c1e6e37f0401 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 28 Oct 2025 23:51:02 +0000 Subject: [PATCH 11/20] windows --- support/sparse_mmap/src/windows.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/support/sparse_mmap/src/windows.rs b/support/sparse_mmap/src/windows.rs index 27a59366b0..19f21f3875 100644 --- a/support/sparse_mmap/src/windows.rs +++ b/support/sparse_mmap/src/windows.rs @@ -686,8 +686,8 @@ pub fn alloc_shared_memory(size: usize) -> io::Result { mod tests { use super::SparseMapping; use super::alloc_shared_memory; - use crate::try_copy; use windows_sys::Win32::System::Memory::PAGE_READWRITE; + use trycopy::try_copy; #[test] fn test_shared_mem_split() { From c0cf57ada29ff15bfa30bc6a6aafe41d97df9d0f Mon Sep 17 00:00:00 2001 From: John Starks Date: Wed, 29 Oct 2025 19:05:29 +0000 Subject: [PATCH 12/20] different windows approach --- support/sparse_mmap/src/windows.rs | 2 +- support/trycopy/Cargo.toml | 2 + support/trycopy/src/lib.rs | 95 +++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/support/sparse_mmap/src/windows.rs b/support/sparse_mmap/src/windows.rs index 19f21f3875..702b212e5a 100644 --- a/support/sparse_mmap/src/windows.rs +++ b/support/sparse_mmap/src/windows.rs @@ -686,8 +686,8 @@ pub fn alloc_shared_memory(size: usize) -> io::Result { mod tests { use super::SparseMapping; use super::alloc_shared_memory; - use windows_sys::Win32::System::Memory::PAGE_READWRITE; use trycopy::try_copy; + use windows_sys::Win32::System::Memory::PAGE_READWRITE; #[test] fn test_shared_mem_split() { diff --git a/support/trycopy/Cargo.toml b/support/trycopy/Cargo.toml index d5fb119155..e3db0a61cd 100644 --- a/support/trycopy/Cargo.toml +++ b/support/trycopy/Cargo.toml @@ -17,6 +17,8 @@ windows-sys = { workspace = true, features = [ "Win32_Foundation", "Win32_System_Diagnostics_Debug", "Win32_System_Kernel", + "Win32_System_SystemInformation", + "Win32_System_SystemServices", ]} [[bench]] diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 9f5c23a4a8..ecc20476de 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -534,8 +534,12 @@ struct RecoveryDescriptor { recover: i32, } -fn recover(context: &mut Context, failure: AccessFailure) -> bool { - // SAFETY: linker-defined symbols. +/// Returns the recovery descriptor table, found by linker-defined symbols +/// marking the start and end of the section. +#[cfg(unix)] +fn recovery_table() -> &'static [RecoveryDescriptor] { + // SAFETY: the linker automatically defines these symbols when the section + // is non-empty. #[cfg(target_os = "linux")] unsafe extern "C" { #[link_name = "__start_try_copy"] @@ -544,38 +548,97 @@ fn recover(context: &mut Context, failure: AccessFailure) -> bool { static STOP_TRY_COPY: [RecoveryDescriptor; 0]; } - // SAFETY: linker-defined symbols. + // SAFETY: the linker automatically defines these symbols when the section + // is non-empty. #[cfg(target_os = "macos")] unsafe extern "C" { + // The linker on macOS uses a special naming scheme for section symbols. #[link_name = "\x01section$start$__TEXT$__try_copy"] static START_TRY_COPY: [RecoveryDescriptor; 0]; #[link_name = "\x01section$end$__TEXT$__try_copy"] static STOP_TRY_COPY: [RecoveryDescriptor; 0]; } - // SAFETY: creating symbols in the specified section in order to locate the - // recovery descriptors. - #[cfg(windows)] - #[unsafe(link_section = ".rdata.trycopy@a")] - static START_TRY_COPY: [RecoveryDescriptor; 0] = []; - #[cfg(windows)] - #[unsafe(link_section = ".rdata.trycopy@c")] - static STOP_TRY_COPY: [RecoveryDescriptor; 0] = []; - // SAFETY: accessing the trycopy section as defined above. - let table = unsafe { + unsafe { std::slice::from_raw_parts( START_TRY_COPY.as_ptr(), STOP_TRY_COPY .as_ptr() .offset_from_unsigned(START_TRY_COPY.as_ptr()), ) - }; + } +} + +/// Returns the recovery descriptor table, found by finding the .section via the +/// PE headers. +/// +/// The more typical way to do this on Windows is to use the grouping feature of +/// the linker to create symbols marking the start and end of the section, via +/// something like `.trycopy$a` and `.trycopy$z`, with the elements in between +/// in `.trycopy$b`. +/// +/// However, Rust/LLVM inline asm (but not global asm) seems to drop the '$', +/// so this doesn't work. So, we use a different technique. +#[cfg(windows)] +fn recovery_table() -> &'static [RecoveryDescriptor] { + /// Find a PE section by name. + fn find_section(name: [u8; 8]) -> Option<(*const u8, usize)> { + use windows_sys::Win32::System::Diagnostics::Debug::IMAGE_NT_HEADERS64; + use windows_sys::Win32::System::Diagnostics::Debug::IMAGE_SECTION_HEADER; + use windows_sys::Win32::System::SystemServices::IMAGE_DOS_HEADER; + + unsafe extern "C" { + safe static __ImageBase: IMAGE_DOS_HEADER; + } + let dos_header = &__ImageBase; + let base_ptr = &raw const __ImageBase; + // SAFETY: the current module must have valid PE headers. + let pe = unsafe { + &*base_ptr + .byte_add(dos_header.e_lfanew as usize) + .cast::() + }; + let number_of_sections: usize = pe.FileHeader.NumberOfSections.into(); + + // SAFETY: the section table is laid out in memory according to the PE format. + let sections = unsafe { + let base = (&raw const pe.OptionalHeader) + .byte_add(pe.FileHeader.SizeOfOptionalHeader.into()) + .cast::(); + std::slice::from_raw_parts(base, number_of_sections) + }; + + sections.iter().find_map(|section| { + (section.Name == name).then_some({ + // SAFETY: section data is valid according to the PE format. + unsafe { + ( + base_ptr.byte_add(section.VirtualAddress as usize).cast(), + section.Misc.VirtualSize as usize, + ) + } + }) + }) + } + + let (start, len) = find_section(*b".trycopy").expect("could not find .trycopy section"); + assert_eq!(len % size_of::(), 0); + // SAFETY: this section is made up solely of RecoveryDescriptor entries. + unsafe { + std::slice::from_raw_parts( + start.cast::(), + len / size_of::(), + ) + } +} + +fn recover(context: &mut Context, failure: AccessFailure) -> bool { let ip = get_context_ip(context); // Search for a matching recovery descriptor. - for r in table { + for r in recovery_table() { let reloc = |addr: &i32| -> usize { core::ptr::from_ref(addr) .addr() @@ -617,7 +680,7 @@ macro_rules! recovery_section { macro_rules! recovery_section { () => { // d = data, r = read-only - ".rdata.trycopy@b,\"dr\"" + ".trycopy,\"dr\"" }; } From 0505f9119290510ed360fa40c8e4d6f759449299 Mon Sep 17 00:00:00 2001 From: John Starks Date: Wed, 29 Oct 2025 19:08:43 +0000 Subject: [PATCH 13/20] feedback --- support/trycopy/src/aarch64.rs | 6 +++--- support/trycopy/src/x86_64.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/support/trycopy/src/aarch64.rs b/support/trycopy/src/aarch64.rs index 556dd91135..1a21f5c981 100644 --- a/support/trycopy/src/aarch64.rs +++ b/support/trycopy/src/aarch64.rs @@ -279,7 +279,7 @@ macro_rules! try_read { try_read!(pub(crate) try_read8, u8, "ldrb {out:w}, [{src}]"); try_read!(pub(crate) try_read16, u16, "ldrh {out:w}, [{src}]"); try_read!(pub(crate) try_read32, u32, "ldr {out:w}, [{src}]"); -try_read!(pub(crate) try_read64, u64, "ldr {out}, [{src}]"); +try_read!(pub(crate) try_read64, u64, "ldr {out:x}, [{src}]"); macro_rules! try_write { ($vis:vis $func:ident, $ty:ty, $asm:expr) => { @@ -308,7 +308,7 @@ macro_rules! try_write { try_write!(pub(crate) try_write8, u8, "strb {val:w}, [{dest}]"); try_write!(pub(crate) try_write16, u16, "strh {val:w}, [{dest}]"); try_write!(pub(crate) try_write32, u32, "str {val:w}, [{dest}]"); -try_write!(pub(crate) try_write64, u64, "str {val}, [{dest}]"); +try_write!(pub(crate) try_write64, u64, "str {val:x}, [{dest}]"); macro_rules! try_cmpxchg { ($vis:vis $func:ident, $ty:ty, $asm:expr) => { @@ -354,4 +354,4 @@ macro_rules! try_cmpxchg { try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "casalb {expected:w}, {desired:w}, [{dest}]"); try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "casalh {expected:w}, {desired:w}, [{dest}]"); try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "casal {expected:w}, {desired:w}, [{dest}]"); -try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "casal {expected}, {desired}, [{dest}]"); +try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "casal {expected:x}, {desired:x}, [{dest}]"); diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 09998cee1d..45b6806864 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -322,7 +322,7 @@ macro_rules! try_read { try_read!(pub(crate) try_read8, u8, "movzx {out:e}, byte ptr [{src}]"); try_read!(pub(crate) try_read16, u16, "movzx {out:e}, word ptr [{src}]"); try_read!(pub(crate) try_read32, u32, "mov {out:e}, dword ptr [{src}]"); -try_read!(pub(crate) try_read64, u64, "mov {out}, qword ptr [{src}]"); +try_read!(pub(crate) try_read64, u64, "mov {out:r}, qword ptr [{src}]"); macro_rules! try_write { ($vis:vis $func:ident, $ty:ty, $asm:expr) => { @@ -351,7 +351,7 @@ macro_rules! try_write { try_write!(pub(crate) try_write8, u8, "mov byte ptr [{dest}], {val:l}"); try_write!(pub(crate) try_write16, u16, "mov word ptr [{dest}], {val:x}"); try_write!(pub(crate) try_write32, u32, "mov dword ptr [{dest}], {val:e}"); -try_write!(pub(crate) try_write64, u64, "mov qword ptr [{dest}], {val}"); +try_write!(pub(crate) try_write64, u64, "mov qword ptr [{dest}], {val:r}"); macro_rules! try_cmpxchg { ($vis:vis $func:ident, $ty:ty, $ax:tt, $reg_kind:tt, $asm:expr) => { @@ -395,4 +395,4 @@ macro_rules! try_cmpxchg { try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "al", reg_byte, "cmpxchg byte ptr [{dest}], {desired}"); try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "ax", reg, "cmpxchg word ptr [{dest}], {desired:x}"); try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "eax", reg, "cmpxchg dword ptr [{dest}], {desired:e}"); -try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "rax", reg, "cmpxchg qword ptr [{dest}], {desired}"); +try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "rax", reg, "cmpxchg qword ptr [{dest}], {desired:r}"); From 4dee3284767b4a0913752b295c4eda8151b3a7c0 Mon Sep 17 00:00:00 2001 From: John Starks Date: Thu, 30 Oct 2025 21:53:17 +0000 Subject: [PATCH 14/20] handle the no descriptor case --- support/trycopy/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index ecc20476de..a5cc1eb49b 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -559,6 +559,13 @@ fn recovery_table() -> &'static [RecoveryDescriptor] { static STOP_TRY_COPY: [RecoveryDescriptor; 0]; } + // Ensure the section exists even if there no recovery descriptors get + // generated. + #[cfg_attr(target_os = "linux", unsafe(link_section = "try_copy"))] + #[cfg_attr(target_os = "macos", link_section = "__TEXT,__try_copy,regular")] + #[used] + static ENSURE_EXISTS: [RecoveryDescriptor; 0] = []; + // SAFETY: accessing the trycopy section as defined above. unsafe { std::slice::from_raw_parts( @@ -623,7 +630,10 @@ fn recovery_table() -> &'static [RecoveryDescriptor] { }) } - let (start, len) = find_section(*b".trycopy").expect("could not find .trycopy section"); + let Some((start, len)) = find_section(*b".trycopy") else { + // No recovery descriptors. + return &[] + }; assert_eq!(len % size_of::(), 0); // SAFETY: this section is made up solely of RecoveryDescriptor entries. unsafe { From ea0d080b2cbd4aa0978ae37b774f91ffd5a45022 Mon Sep 17 00:00:00 2001 From: John Starks Date: Thu, 30 Oct 2025 22:55:06 +0000 Subject: [PATCH 15/20] build fixes --- support/trycopy/src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index a5cc1eb49b..0ea4f91f5e 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -562,7 +562,10 @@ fn recovery_table() -> &'static [RecoveryDescriptor] { // Ensure the section exists even if there no recovery descriptors get // generated. #[cfg_attr(target_os = "linux", unsafe(link_section = "try_copy"))] - #[cfg_attr(target_os = "macos", link_section = "__TEXT,__try_copy,regular")] + #[cfg_attr( + target_os = "macos", + unsafe(link_section = "__TEXT,__try_copy,regular") + )] #[used] static ENSURE_EXISTS: [RecoveryDescriptor; 0] = []; @@ -632,7 +635,7 @@ fn recovery_table() -> &'static [RecoveryDescriptor] { let Some((start, len)) = find_section(*b".trycopy") else { // No recovery descriptors. - return &[] + return &[]; }; assert_eq!(len % size_of::(), 0); // SAFETY: this section is made up solely of RecoveryDescriptor entries. From 5f4d1baf69465719fea417c1647c956206e61ac6 Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 31 Oct 2025 18:20:19 +0000 Subject: [PATCH 16/20] restore direction flag --- support/trycopy/src/x86_64.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 45b6806864..8de0df01be 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -20,12 +20,15 @@ pub(super) fn get_context_ip(ctx: &Context) -> usize { } pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Option) { + // This function also clears the direction flag to restore the ABI expectation. + const DIRECTION_FLAG_MASK: u64 = 0x400; #[cfg(target_os = "linux")] { ctx.gregs[libc::REG_RIP as usize] = ip as _; if let Some(result) = result { ctx.gregs[libc::REG_RCX as usize] = result as _; } + ctx.gregs[libc::REG_EFL as usize] &= !DIRECTION_FLAG_MASK as libc::greg_t; } #[cfg(target_os = "windows")] { @@ -33,6 +36,7 @@ pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Op if let Some(result) = result { ctx.Rcx = result as _; } + ctx.EFlags &= !DIRECTION_FLAG_MASK; } } @@ -157,8 +161,8 @@ unsafe fn try_copy_backward(dest: *mut u8, src: *const u8, length: usize) -> Res "2:", "std", "rep movsb", + "cld", // note: `set_context_ip_and_result` will clear this in the failure case "3:", - "cld", recovery_descriptor!("2b", "3b", "{bail}"), in("rdi") dest.add(length - 1), in("rsi") src.add(length - 1), From e41921656f362d44c843d2e3018e1b8685088784 Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 31 Oct 2025 18:24:15 +0000 Subject: [PATCH 17/20] fix back label --- support/trycopy/src/x86_64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 8de0df01be..31b69eb2bf 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -111,7 +111,7 @@ unsafe fn try_copy_forward(dest: *mut u8, src: *const u8, length: usize) -> Resu jne 2b 3: ", - recovery_descriptor!("2b", "3f", "{bail}"), + recovery_descriptor!("2b", "3b", "{bail}"), s1 = out(xmm_reg) _, s2 = out(xmm_reg) _, i = inout(reg) 0u64 => _, From bddc9a3972b87cd365bed5874f9b58146c92e63a Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 31 Oct 2025 18:29:51 +0000 Subject: [PATCH 18/20] more testing --- support/trycopy/src/lib.rs | 31 ++++++++++++++++++++++++++----- support/trycopy/src/x86_64.rs | 4 ++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/support/trycopy/src/lib.rs b/support/trycopy/src/lib.rs index 0ea4f91f5e..9622e091eb 100644 --- a/support/trycopy/src/lib.rs +++ b/support/trycopy/src/lib.rs @@ -920,6 +920,23 @@ mod tests { } } + #[test] + fn test_try_memmove_nonoverlapping() { + initialize_try_copy(); + let max = 8000; + let src = (0..max).map(|x| (x % 256) as u8).collect::>(); + let mut dest = vec![0u8; max]; + for i in 0..max { + let dest = &mut dest[max - i..]; + let src = &src[max - i..]; + dest.fill(0); + unsafe { + try_memmove(dest.as_mut_ptr(), src.as_ptr(), i).unwrap(); + }; + assert_eq!(dest, src); + } + } + #[test] fn test_try_memmove_overlapping() { initialize_try_copy(); @@ -945,11 +962,15 @@ mod tests { fn test_try_memset() { initialize_try_copy(); - let mut buf = [0u8; 256]; - unsafe { try_memset(buf.as_mut_ptr(), 0x5a, buf.len()).unwrap() }; - assert_eq!(&buf, &[0x5a; 256]); - - unsafe { try_memset(0x100 as *mut u8, 0x5a, 100).unwrap_err() }; + for c in [0, 0x5f] { + for n in [ + 0, 1, 15, 16, 31, 32, 63, 64, 127, 128, 255, 256, 528, 1942, 4097, + ] { + let mut buf = vec![0u8; n]; + unsafe { try_memset(buf.as_mut_ptr(), c, n).unwrap() }; + assert_eq!(buf, vec![c; n]); + } + } } #[test] diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 31b69eb2bf..1caacbd634 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -20,7 +20,7 @@ pub(super) fn get_context_ip(ctx: &Context) -> usize { } pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Option) { - // This function also clears the direction flag to restore the ABI expectation. + // This function also clears the direction flag to restore the ABI expectation. const DIRECTION_FLAG_MASK: u64 = 0x400; #[cfg(target_os = "linux")] { @@ -28,7 +28,7 @@ pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Op if let Some(result) = result { ctx.gregs[libc::REG_RCX as usize] = result as _; } - ctx.gregs[libc::REG_EFL as usize] &= !DIRECTION_FLAG_MASK as libc::greg_t; + ctx.gregs[libc::REG_EFL as usize] &= !DIRECTION_FLAG_MASK as libc::greg_t; } #[cfg(target_os = "windows")] { From f35d9719079671cc27ba2eba60722f0b4d47474d Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 31 Oct 2025 18:51:20 +0000 Subject: [PATCH 19/20] lock it down --- support/trycopy/src/x86_64.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 1caacbd634..120176f0db 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -396,7 +396,7 @@ macro_rules! try_cmpxchg { }; } -try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "al", reg_byte, "cmpxchg byte ptr [{dest}], {desired}"); -try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "ax", reg, "cmpxchg word ptr [{dest}], {desired:x}"); -try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "eax", reg, "cmpxchg dword ptr [{dest}], {desired:e}"); -try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "rax", reg, "cmpxchg qword ptr [{dest}], {desired:r}"); +try_cmpxchg!(pub(crate) try_cmpxchg8, u8, "al", reg_byte, "lock cmpxchg byte ptr [{dest}], {desired}"); +try_cmpxchg!(pub(crate) try_cmpxchg16, u16, "ax", reg, "lock cmpxchg word ptr [{dest}], {desired:x}"); +try_cmpxchg!(pub(crate) try_cmpxchg32, u32, "eax", reg, "lock cmpxchg dword ptr [{dest}], {desired:e}"); +try_cmpxchg!(pub(crate) try_cmpxchg64, u64, "rax", reg, "lock cmpxchg qword ptr [{dest}], {desired:r}"); From 4f68328683e5237cd951c4f61e17cc08266611e2 Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 31 Oct 2025 19:19:27 +0000 Subject: [PATCH 20/20] fix --- support/trycopy/src/x86_64.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/support/trycopy/src/x86_64.rs b/support/trycopy/src/x86_64.rs index 120176f0db..13d244dca9 100644 --- a/support/trycopy/src/x86_64.rs +++ b/support/trycopy/src/x86_64.rs @@ -21,14 +21,14 @@ pub(super) fn get_context_ip(ctx: &Context) -> usize { pub(super) fn set_context_ip_and_result(ctx: &mut Context, ip: usize, result: Option) { // This function also clears the direction flag to restore the ABI expectation. - const DIRECTION_FLAG_MASK: u64 = 0x400; + const DIRECTION_FLAG_MASK: u32 = 0x400; #[cfg(target_os = "linux")] { ctx.gregs[libc::REG_RIP as usize] = ip as _; if let Some(result) = result { ctx.gregs[libc::REG_RCX as usize] = result as _; } - ctx.gregs[libc::REG_EFL as usize] &= !DIRECTION_FLAG_MASK as libc::greg_t; + ctx.gregs[libc::REG_EFL as usize] &= !(DIRECTION_FLAG_MASK as libc::greg_t); } #[cfg(target_os = "windows")] {