diff --git a/Cargo.lock b/Cargo.lock index 9813848..b31bd4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -575,9 +575,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" [[package]] name = "hermit-abi" @@ -638,12 +638,12 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" -version = "2.9.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.16.0", ] [[package]] @@ -726,9 +726,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libloading" @@ -824,9 +824,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memflow" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df612ab27a15bc64554a6bc93cf80493b9bff753834aebea044089ffdf6295b6" +checksum = "0b5a164dd29bb697a512c389215acf4899a3e671a72844acb04d1ddac6ba8d7f" dependencies = [ "abi_stable", "bitflags 1.3.2", @@ -858,9 +858,9 @@ dependencies = [ [[package]] name = "memflow-derive" -version = "0.2.0" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d766f6681f968c92eb0359fc4bc99039ebe2568df4bb884c7cb7b16023e94d32" +checksum = "d894dc2b0bbce37b81280e1b36da4db3e524e4df5a57d5899b4bd943f489b506" dependencies = [ "darling", "proc-macro-crate", @@ -882,6 +882,7 @@ dependencies = [ "mach2", "memflow", "procfs", + "sysinfo", "windows", ] @@ -926,6 +927,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -935,6 +945,25 @@ dependencies = [ "autocfg", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.9.0", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.36.7" @@ -1012,12 +1041,11 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" [[package]] name = "proc-macro-crate" -version = "2.0.2" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b00f26d3400549137f92511a46ac1cd8ce37cb5598a96d382381458b992a5d24" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_datetime", - "toml_edit", + "toml_edit 0.23.7", ] [[package]] @@ -1223,18 +1251,28 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -1302,6 +1340,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + [[package]] name = "tarc" version = "0.1.6" @@ -1345,8 +1397,8 @@ checksum = "185d8ab0dfbb35cf1399a6344d8484209c088f75f8f68230da55d48d95d43e3d" dependencies = [ "serde", "serde_spanned", - "toml_datetime", - "toml_edit", + "toml_datetime 0.6.3", + "toml_edit 0.20.2", ] [[package]] @@ -1358,6 +1410,15 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + [[package]] name = "toml_edit" version = "0.20.2" @@ -1367,8 +1428,29 @@ dependencies = [ "indexmap", "serde", "serde_spanned", - "toml_datetime", - "winnow", + "toml_datetime 0.6.3", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime 0.7.3", + "toml_parser", + "winnow 0.7.13", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow 0.7.13", ] [[package]] @@ -1841,6 +1923,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + [[package]] name = "x86_64" version = "0.14.13" diff --git a/Cargo.toml b/Cargo.toml index 837cb04..366b07d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "memflow-native" version = "0.2.6" -authors = ["Aurimas Blažulionis <0x60@pm.me>", "ko1N "] +authors = ["Aurimas Blažulionis <0x60@pm.me>", "ko1N ", "k1nd0ne "] edition = "2021" description = "System call based proxy-OS for memflow" documentation = "https://docs.rs/memflow-native" @@ -21,6 +21,7 @@ itertools = "0.14" libc = { version = "0.2" } log = "0.4" memflow = { version = "0.2", features = ["plugins", "goblin"] } +#memflow = { path = "../memflow/memflow" } # we keep procfs on version 0.15.x because it does not build properly with the backtrace on 0.16.x # tracking issue: https://github.com/eminence/procfs/pull/309 @@ -42,6 +43,7 @@ windows = { version = "0.61", features = [ "Win32_UI_Input", "Win32_UI_Input_KeyboardAndMouse", ] } +sysinfo = "0.37" [target.'cfg(target_os = "macos")'.dependencies] mac-sys-info = "0.1" diff --git a/src/linux/process.rs b/src/linux/process.rs index 591e6a4..836d830 100644 --- a/src/linux/process.rs +++ b/src/linux/process.rs @@ -77,6 +77,27 @@ impl LinuxProcess { MMapPath::Other(s) => s.as_str().into(), } } + + fn collect_envars(&self) -> Result> { + let path = format!("/proc/{}/environ", self.proc.pid()); + let data = std::fs::read(path) + .map_err(|_| Error(ErrorOrigin::OsLayer, ErrorKind::EnvarNotFound))?; + + let mut out = Vec::new(); + for entry in data.split(|b| *b == 0).filter(|entry| !entry.is_empty()) { + let entry = String::from_utf8_lossy(entry); + if let Some((name, value)) = entry.split_once('=') { + out.push(EnvVarInfo { + name: ReprCString::from(name), + value: ReprCString::from(value), + address: Address::NULL, + arch: self.info.proc_arch, + }); + } + } + + Ok(out) + } } cglue_impl_group!(LinuxProcess, ProcessInstance, {}); @@ -277,6 +298,40 @@ impl Process for LinuxProcess { .feed_into(out); } } + + fn envar_list_callback( + &mut self, + target_arch: Option<&ArchitectureIdent>, + mut callback: EnvVarCallback, + ) -> Result<()> { + if let Some(arch) = target_arch { + if *arch != self.info.proc_arch { + return Ok(()); + } + } + + for envar in self.collect_envars()? { + if !callback.call(envar) { + break; + } + } + + Ok(()) + } + + fn environment_block_address(&mut self, _architecture: ArchitectureIdent) -> Result
{ + // Linux does not expose a stable public env-block pointer through procfs. + Ok(Address::NULL) + } + + fn envar_list_from_address( + &mut self, + _env_block: Address, + architecture: ArchitectureIdent, + callback: EnvVarCallback, + ) -> Result<()> { + self.envar_list_callback(Some(&architecture), callback) + } } impl MemoryView for LinuxProcess { diff --git a/src/macos/mem.rs b/src/macos/mem.rs index 6ceb054..38fe051 100644 --- a/src/macos/mem.rs +++ b/src/macos/mem.rs @@ -15,7 +15,9 @@ fn get_task(pid: u32) -> Result { let mut task = MACH_PORT_NULL; let res = task_for_pid(mach_task_self(), pid as i32, &mut task as *mut mach_port_t); if res != KERN_SUCCESS { - log::error!("Could not get task: {res}"); + log::warn!( + "task_for_pid permission denied/unavailable for pid {pid} (kern_return={res})" + ); Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)) } else { Ok(task) @@ -36,6 +38,23 @@ impl ProcessVirtualMemory { pid: info.pid, }) } + + pub fn new_unavailable(pid: u32) -> Self { + Self { + port: MACH_PORT_NULL, + pid, + } + } + + pub(crate) fn ensure_port(&mut self) -> Result { + // Acquire task port lazily so callers that only need process metadata can still work + // when task_for_pid is restricted. + if self.port == MACH_PORT_NULL { + self.port = get_task(self.pid)?; + } + + Ok(self.port) + } } // Helper trait for `process_rw` to be generic. @@ -67,6 +86,8 @@ impl<'a> RWSlice for CSliceRef<'a, u8> { for off in iter { let size = core::cmp::min(size - off, u32::MAX as _); + let local = (local as usize + off) as *const c_void; + let remote = (remote as usize + off) as *const c_void; let ret = mach_vm_write(port, remote as _, local as _, size as u32); @@ -95,11 +116,12 @@ impl<'a> RWSlice for CSliceMut<'a, u8> { // mach_vm_read_list exists, however, it seems to allocate new buffers, meaning, we would // need to perform a second copy, and free those buffers immediately afterwards (1 syscall // per buffer!). Therefore, we are doing sequential read syscall per buffer. - let ret = mach_vm_read_overwrite(port, remote as _, size as _, local as _, &mut 0); + let mut out_size = 0; + let ret = mach_vm_read_overwrite(port, remote as _, size as _, local as _, &mut out_size); if ret != KERN_SUCCESS { return Err(Error(ErrorOrigin::OsLayer, ErrorKind::UnableToReadMemory)); } - Ok(size) + Ok(out_size as usize) } } @@ -113,9 +135,11 @@ impl ProcessVirtualMemory { mut out_fail, }: MemOps, CTup2>, ) -> Result<()> { + let port = self.ensure_port()?; + for CTup3(addr, meta_addr, buf) in inp { let written = - unsafe { T::do_rw(self.port, buf.as_ptr() as _, addr.to_umem() as _, buf.len()) } + unsafe { T::do_rw(port, buf.as_ptr() as _, addr.to_umem() as _, buf.len()) } .unwrap_or(0); let (succeed, fail) = buf.split_at(written as _); diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 5f597ce..1802498 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -1,7 +1,7 @@ use memflow::os::process::*; use memflow::prelude::v1::*; -use libc::{c_int, sysctl, CTL_KERN, KERN_PROCARGS2}; +use libc::{c_int, size_t, sysctl, CTL_KERN, KERN_ARGMAX, KERN_PROCARGS2}; use libc::{sysconf, _SC_PAGESIZE}; use libproc::{ @@ -28,6 +28,132 @@ use mem::ProcessVirtualMemory; pub mod process; pub use process::MacProcess; +#[derive(Clone, Debug, Default)] +pub(super) struct ProcArgs { + pub exec_path: String, + pub argv: Vec, + pub environ: Vec<(String, String)>, +} + +fn argmax() -> usize { + let mut mib: [c_int; 2] = [CTL_KERN, KERN_ARGMAX]; + let mut value: c_int = 0; + let mut len = core::mem::size_of::() as size_t; + + let ret = unsafe { + sysctl( + mib.as_mut_ptr(), + mib.len() as _, + (&mut value as *mut c_int).cast(), + &mut len, + core::ptr::null_mut(), + 0, + ) + }; + + if ret == 0 && value > 0 { + value as usize + } else { + 4096 + } +} + +pub(super) fn read_procargs2(pid: Pid) -> Result> { + // Read raw `KERN_PROCARGS2` for the target pid. + // This is shared by process-info building and env/argv enumeration to keep parsing consistent. + let mut scratch = vec![0u8; argmax()]; + + let mut mib: [c_int; 3] = [CTL_KERN, KERN_PROCARGS2, pid as _]; + let mut len = scratch.len() as size_t; + + let ret = unsafe { + sysctl( + mib.as_mut_ptr(), + mib.len() as _, + scratch.as_mut_ptr().cast(), + &mut len, + core::ptr::null_mut(), + 0, + ) + }; + + if ret != 0 || len < 4 { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } + + scratch.truncate(len); + Ok(scratch) +} + +pub(super) fn parse_procargs2(data: &[u8]) -> Result { + if data.len() < 4 { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } + + let argc = u32::from_ne_bytes(data[..4].try_into().unwrap()) as usize; + let buf = &data[4..]; + + let mut idx = 0usize; + while idx < buf.len() && buf[idx] != 0 { + idx += 1; + } + if idx == buf.len() { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } + + let exec_path = String::from_utf8_lossy(&buf[..idx]).into_owned(); + + while idx < buf.len() && buf[idx] == 0 { + idx += 1; + } + + let mut argv = Vec::new(); + for _ in 0..argc { + if idx >= buf.len() { + break; + } + let start = idx; + while idx < buf.len() && buf[idx] != 0 { + idx += 1; + } + if idx > start { + argv.push(String::from_utf8_lossy(&buf[start..idx]).into_owned()); + } + if idx < buf.len() { + idx += 1; + } + } + + // Parse the trailing NUL-separated `KEY=VALUE` block. + // Some macOS processes only expose argc/argv here (no env entries), which is a valid outcome. + let mut environ = Vec::new(); + while idx < buf.len() { + let start = idx; + while idx < buf.len() && buf[idx] != 0 { + idx += 1; + } + + if idx == start { + break; + } + + let entry = String::from_utf8_lossy(&buf[start..idx]); + if let Some((name, value)) = entry.split_once('=') { + environ.push((name.to_string(), value.to_string())); + } + + if idx < buf.len() { + idx += 1; + } + } + + Ok(ProcArgs { + exec_path, + argv, + environ, + }) +} + fn get_arch() -> ArchitectureIdent { static ARCH: OnceLock = OnceLock::new(); @@ -45,7 +171,6 @@ fn get_arch() -> ArchitectureIdent { pub struct MacOs { info: OsInfo, - scratch: Box<[u8]>, //cached_modules: Vec, } @@ -59,7 +184,6 @@ impl Clone for MacOs { fn clone(&self) -> Self { Self { info: self.info.clone(), - scratch: self.scratch.clone(), //cached_modules: vec![], } } @@ -75,8 +199,6 @@ impl Default for MacOs { Self { info, - // TODO: call KERN_ARGMAX to figure out the actual value. - scratch: vec![0; 4096].into_boxed_slice(), //cached_modules: vec![], } } @@ -109,79 +231,37 @@ impl Os for MacOs { } fn process_info_by_pid(&mut self, pid: Pid) -> Result { - let us = std::process::id(); - + // We query BSD info with the target `pid`. + // Using the caller pid here breaks name-based filtering. let bsd_info = - lp::proc_pid::pidinfo::(us as _, pid as _).map_err(|e| { + lp::proc_pid::pidinfo::(pid as _, 0).map_err(|e| { error!("bsd_info: {e}"); Error(ErrorOrigin::OsLayer, ErrorKind::Unknown) })?; // We could use lp::proc_pid::pidpath for path, but we already get it from procargs2 let (path, command_line): (ReprCString, ReprCString) = { - let mut name: [c_int; 3] = [CTL_KERN, KERN_PROCARGS2, pid as _]; - let mut len = self.scratch.len() - 4; - let ret = unsafe { - sysctl( - name.as_mut_ptr(), - name.len() as _, - self.scratch.as_mut_ptr().cast(), - &mut len, - core::ptr::null_mut(), - 0, - ) - }; - - if ret != 0 { - len = 0; - } - - // We skip the first arg, because that is the executable path. - let mut num_args = u32::from_ne_bytes(self.scratch[..4].try_into().unwrap()) + 1; - - let buf = &mut self.scratch[4..(4 + len)]; - - let mut start_idx = 0; - let mut start_idx_stripped = 0; - let mut idx = 0; - - for (i, b) in buf.iter_mut().enumerate() { - if num_args == 0 { - break; + let fallback_path = bsd_info + .pbi_comm + .iter() + .copied() + .map(|b| b as u8) + .take_while(|b| *b != 0) + .collect::>(); + let fallback_path = String::from_utf8_lossy(&fallback_path).into_owned(); + + // Prefer parsed procargs for executable path + argv, but always keep a safe fallback. + match read_procargs2(pid).and_then(|d| parse_procargs2(&d)) { + Ok(parsed) => { + let path = if parsed.exec_path.is_empty() { + fallback_path.as_str() + } else { + parsed.exec_path.as_str() + }; + (path.into(), parsed.argv.join(" ").into()) } - - if *b == 0 { - *b = b' '; - num_args -= 1; - if start_idx == 0 { - start_idx = i + 1; - start_idx_stripped = i + 1; - } else if start_idx_stripped == i { - num_args += 1; - start_idx_stripped = i + 1; - } - } - - idx = i; + Err(_) => (fallback_path.into(), "".into()), } - - let path = if start_idx == 0 { - let b = bsd_info.pbi_comm.split(|v| *v == 0).next().unwrap_or(&[]); - unsafe { &*(b as *const [_] as *const [u8]) } - } else { - &buf[..(start_idx - 1)] - }; - - ( - std::str::from_utf8(path).unwrap_or_default().into(), - std::str::from_utf8(if start_idx_stripped <= idx { - &buf[start_idx_stripped..idx] - } else { - &[] - }) - .unwrap_or_default() - .into(), - ) }; let name = path.split(&['/', '\\'][..]).last().unwrap().into(); @@ -227,7 +307,7 @@ impl Os for MacOs { /// /// # Arguments /// * `callback` - where to pass each matching module to. This is an opaque callback. - fn module_address_list_callback(&mut self, mut callback: AddressCallback) -> Result<()> { + fn module_address_list_callback(&mut self, _callback: AddressCallback) -> Result<()> { // TODO: build this with OSKextCopyLoadedKextInfo. /*self.cached_modules = procfs::modules() .map_err(|_| Error(ErrorOrigin::OsLayer, ErrorKind::UnableToReadDir))? @@ -246,7 +326,7 @@ impl Os for MacOs { /// /// # Arguments /// * `address` - address where module's information resides in - fn module_by_address(&mut self, address: Address) -> Result { + fn module_by_address(&mut self, _address: Address) -> Result { /*self.cached_modules .get(address.to_umem() as usize) .map(|km| ModuleInfo { diff --git a/src/macos/process.rs b/src/macos/process.rs index 9626b7e..0b1e559 100644 --- a/src/macos/process.rs +++ b/src/macos/process.rs @@ -16,6 +16,7 @@ const PROC_PIDREGIONPATHINFO: i32 = 8; use core::mem::MaybeUninit; use itertools::Itertools; +use std::ffi::CStr; #[repr(C)] #[allow(non_camel_case_types)] @@ -116,12 +117,87 @@ impl Clone for MacProcess { } impl MacProcess { + /// Fallback module discovery based on `PROC_PIDREGIONPATHINFO`. + /// + /// This is used when dyld/task-port based module enumeration is unavailable. + /// We walk VM regions, keep file-backed entries with absolute paths, and merge + /// contiguous regions that resolve to the same path into a single module span. + /// The result is best-effort and may be less precise than dyld metadata. + fn update_cached_module_maps_from_regions(&mut self) -> Result<()> { + log::info!( + "Using region-path module fallback for pid {} (dyld/task-port path unavailable)", + self.info.pid + ); + self.cached_module_maps.clear(); + + let mut start = Address::NULL; + let end = Address::invalid(); + let mut prwpi: proc_regionwithpathinfo = unsafe { MaybeUninit::zeroed().assume_init() }; + + while start < end { + let size = core::mem::size_of::() as _; + let ret = unsafe { + proc_pidinfo( + self.info.pid as _, + PROC_PIDREGIONPATHINFO, + start.to_umem() as _, + &mut prwpi as *mut proc_regionwithpathinfo as *mut _, + size, + ) + }; + + if ret <= 0 { + break; + } + if ret < size { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } + + let region_start = Address::from(prwpi.prp_prinfo.pri_address); + let region_size = prwpi.prp_prinfo.pri_size as umem; + + if region_size == 0 { + break; + } + + let path = unsafe { + CStr::from_ptr(prwpi.prp_vip.vip_path.as_ptr().cast::()) + .to_string_lossy() + .into_owned() + }; + + // Only keep concrete filesystem mappings; anonymous/synthetic regions are ignored. + if !path.is_empty() && path.starts_with('/') { + if let Some(last) = self.cached_module_maps.last_mut() { + // Coalesce neighboring regions for the same file into one stable entry. + if last.2 == path && last.0 + last.1 == region_start { + last.1 += region_size; + } else { + self.cached_module_maps + .push((region_start, region_size, path)); + } + } else { + self.cached_module_maps + .push((region_start, region_size, path)); + } + } + + start = region_start + region_size; + } + + self.cached_module_maps.sort_by_key(|v| v.0); + + Ok(()) + } + pub fn try_new(info: ProcessInfo) -> Result { Ok(Self { - virt_mem: ProcessVirtualMemory::try_new(&info).map_err(|e| { - log::error!("Unable to get port"); - e - })?, + // Do not fail process construction just because task_for_pid is denied. + // This keeps name/pid process selection usable for metadata/envar paths. + virt_mem: ProcessVirtualMemory::try_new(&info).unwrap_or_else(|e| { + log::warn!("Unable to get task port for pid {}: {e:?}", info.pid); + ProcessVirtualMemory::new_unavailable(info.pid) + }), info, cached_maps: vec![], cached_module_maps: vec![], @@ -129,182 +205,206 @@ impl MacProcess { } pub fn update_cached_module_maps(&mut self) -> Result<()> { - let mut info: task_dyld_info = unsafe { MaybeUninit::zeroed().assume_init() }; - - self.cached_module_maps.clear(); + let dyld_result = (|| { + let mut info: task_dyld_info = unsafe { MaybeUninit::zeroed().assume_init() }; - let mut count = - (core::mem::size_of::() / core::mem::size_of::()) as _; - let ret = unsafe { - task_info( - self.virt_mem.port, - TASK_DYLD_INFO, - &mut info as *mut task_dyld_info as *mut _, - &mut count, - ) - }; - - if ret != KERN_SUCCESS { - return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); - } + self.cached_module_maps.clear(); - // 0 -> 32-bit fmt - // 1 -> 64-bit fmt - // We need to verify that the format is the same as our native pointer width (usize size), - // so that we don't misread nonsense. - if 4 * (1 + info.all_image_info_format) as usize != core::mem::size_of::() { - return Err(Error(ErrorOrigin::OsLayer, ErrorKind::NotSupported)); - } + let mut count = + (core::mem::size_of::() / core::mem::size_of::()) as _; + let port = self.virt_mem.ensure_port()?; + let ret = unsafe { + task_info( + port, + TASK_DYLD_INFO, + &mut info as *mut task_dyld_info as *mut _, + &mut count, + ) + }; - let infos = self.read::(info.all_image_info_addr.into())?; - - let mut left = infos.info_array_count as usize; - let mut info_buf = vec![dyld_image_info::default(); core::cmp::min(128, left)]; - - while left > 0 { - let pos = infos.info_array_count as usize - left; - - let size = core::cmp::min(left, info_buf.len()); - - self.read_into( - Address::from( - infos.dyld_image_info + pos * core::mem::size_of::(), - ), - &mut info_buf[..size], - )?; - left -= size; - - // And now, let's process the elements - for i in &info_buf[..size] { - // TODO: do the string reads concurrently - let name = self.read_char_string(i.image_file_path.into())?; - - let start = Address::from(i.image_load_address); - let mut end = start; - - // Now, we need to figure out the size of the image. To do this, iterate through - // proc_regioninfo and grab all entries with identical (non-zero) inode number. - let mut prwpi: proc_regionwithpathinfo = - unsafe { MaybeUninit::zeroed().assume_init() }; - let mut last_ino = 0; - - loop { - let size = core::mem::size_of::() as _; - let ret = unsafe { - proc_pidinfo( - self.info.pid as _, - PROC_PIDREGIONPATHINFO, - end.to_umem() as _, - &mut prwpi as *mut proc_regionwithpathinfo as *mut _, - size, - ) - }; - - if ret <= 0 { - break; - } - if ret < size { - panic!("Invalid size returned from proc_pidinfo ({ret} vs {size})"); - } + if ret != KERN_SUCCESS { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } - let ino = prwpi.prp_vip.vip_vi.vi_stat.vst_ino; + // 0 -> 32-bit fmt + // 1 -> 64-bit fmt + // We need to verify that the format is the same as our native pointer width (usize size), + // so that we don't misread nonsense. + if 4 * (1 + info.all_image_info_format) as usize != core::mem::size_of::() { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::NotSupported)); + } - // FIXME: if we get ino 0 at the start, this usually indicates that we are - // dealing with a submap - submaps are how dyld's are shared across processes, - // meaning, practically shared libraries from dyld cache will be reported to be - // of size 0. - if ino == 0 || (last_ino != 0 && ino != last_ino) { - break; - } + let infos = self.read::(info.all_image_info_addr.into())?; + + let mut left = infos.info_array_count as usize; + let mut info_buf = vec![dyld_image_info::default(); core::cmp::min(128, left)]; + + while left > 0 { + let pos = infos.info_array_count as usize - left; + + let size = core::cmp::min(left, info_buf.len()); + + self.read_into( + Address::from( + infos.dyld_image_info + pos * core::mem::size_of::(), + ), + &mut info_buf[..size], + )?; + left -= size; + + // And now, let's process the elements + for i in &info_buf[..size] { + // TODO: do the string reads concurrently + let name = self.read_utf8_lossy(i.image_file_path.into(), 4096)?; + + let start = Address::from(i.image_load_address); + let mut end = start; + + // Now, we need to figure out the size of the image. To do this, iterate through + // proc_regioninfo and grab all entries with identical (non-zero) inode number. + let mut prwpi: proc_regionwithpathinfo = + unsafe { MaybeUninit::zeroed().assume_init() }; + let mut last_ino = 0; + + loop { + let size = core::mem::size_of::() as _; + let ret = unsafe { + proc_pidinfo( + self.info.pid as _, + PROC_PIDREGIONPATHINFO, + end.to_umem() as _, + &mut prwpi as *mut proc_regionwithpathinfo as *mut _, + size, + ) + }; - let len = prwpi.prp_prinfo.pri_size as umem; - end = Address::from(prwpi.prp_prinfo.pri_address) + len; + if ret <= 0 { + break; + } + if ret < size { + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); + } - last_ino = ino; - } + let ino = prwpi.prp_vip.vip_vi.vi_stat.vst_ino; - let mut mod_sz = (end - start) as umem; + // FIXME: if we get ino 0 at the start, this usually indicates that we are + // dealing with a submap - submaps are how dyld's are shared across processes, + // meaning, practically shared libraries from dyld cache will be reported to be + // of size 0. + if ino == 0 || (last_ino != 0 && ino != last_ino) { + break; + } - // FIXME: figure out a way without parsing the mach file... - let _ = (|| { - let header = self.read::(start)?; + let len = prwpi.prp_prinfo.pri_size as umem; + end = Address::from(prwpi.prp_prinfo.pri_address) + len; - if header.sizeofcmds as usize > size::mb(16) { - return Err(ErrorKind::Unknown.into()); + last_ino = ino; } - let cmdaddr = start - + core::mem::size_of::() - + if header.magic == 0xfeedfacf { - 4 - } else if header.magic == 0xfeedface { - 0 - } else { + let mut mod_sz = (end - start) as umem; + + // FIXME: figure out a way without parsing the mach file... + let _ = (|| { + let header = self.read::(start)?; + + if header.sizeofcmds as usize > size::mb(16) { return Err(ErrorKind::Unknown.into()); - }; + } - let mut cmds = vec![0; header.sizeofcmds as usize]; - - self.read_raw_into(cmdaddr, &mut cmds[..])?; - - let view = DataView::from(&cmds[..]); - - let mut cmdoff = 0; - - let mut base_addr = None; - let mut all_seg_sz = 0; - - for _ in 0..header.ncmds { - let hdr = view.read::(cmdoff); - - if let Some((addr, sz, seg)) = if hdr.ty == LC_SEGMENT { - let addr = view.read::(cmdoff + 24); - let sz = view.read::(cmdoff + 24 + 4); - Some(( - addr as umem, - sz as umem, - view.read::(cmdoff + 24 + 16), - )) - } else if hdr.ty == LC_SEGMENT_64 { - let addr = view.read::(cmdoff + 24); - let sz = view.read::(cmdoff + 24 + 8); - Some(( - addr as umem, - sz as umem, - view.read::(cmdoff + 24 + 32), - )) - } else { - None - } { - // Skip __PAGEZERO segment that has no sections - // TODO: should we also check for protection flags? - if seg.nsects != 0 { - if base_addr.is_none() { - base_addr = Some(addr); + let cmdaddr = start + + core::mem::size_of::() + + if header.magic == 0xfeedfacf { + 4 + } else if header.magic == 0xfeedface { + 0 + } else { + return Err(ErrorKind::Unknown.into()); + }; + + let mut cmds = vec![0; header.sizeofcmds as usize]; + + self.read_raw_into(cmdaddr, &mut cmds[..])?; + + let view = DataView::from(&cmds[..]); + + let mut cmdoff = 0; + + let mut base_addr = None; + let mut all_seg_sz = 0; + + for _ in 0..header.ncmds { + let hdr = view.read::(cmdoff); + + if let Some((addr, sz, seg)) = if hdr.ty == LC_SEGMENT { + let addr = view.read::(cmdoff + 24); + let sz = view.read::(cmdoff + 24 + 4); + Some(( + addr as umem, + sz as umem, + view.read::(cmdoff + 24 + 16), + )) + } else if hdr.ty == LC_SEGMENT_64 { + let addr = view.read::(cmdoff + 24); + let sz = view.read::(cmdoff + 24 + 8); + Some(( + addr as umem, + sz as umem, + view.read::(cmdoff + 24 + 32), + )) + } else { + None + } { + // Skip __PAGEZERO segment that has no sections + // TODO: should we also check for protection flags? + if seg.nsects != 0 { + if base_addr.is_none() { + base_addr = Some(addr); + } + all_seg_sz = + core::cmp::max(all_seg_sz, addr - base_addr.unwrap() + sz); } - all_seg_sz = - core::cmp::max(all_seg_sz, addr - base_addr.unwrap() + sz); } - } - cmdoff += hdr.sz as usize; - } + cmdoff += hdr.sz as usize; + } - mod_sz = core::cmp::max(all_seg_sz, mod_sz); + mod_sz = core::cmp::max(all_seg_sz, mod_sz); - Result::Ok(()) - })(); + Result::Ok(()) + })(); - self.cached_module_maps.push((start, mod_sz, name)); + self.cached_module_maps.push((start, mod_sz, name)); + } } + + self.cached_module_maps.sort_by_key(|v| v.0); + + Ok(()) + })(); + + if dyld_result.is_ok() { + return Ok(()); } - self.cached_module_maps.sort_by_key(|v| v.0); + let dyld_err = dyld_result.err().unwrap(); + log::warn!( + "Falling back to proc region module enumeration for pid {}", + self.info.pid + ); + self.update_cached_module_maps_from_regions()?; + + if self.cached_module_maps.is_empty() { + log::warn!( + "Region-path module fallback produced no modules for pid {}; returning original dyld error", + self.info.pid + ); + return Err(dyld_err); + } Ok(()) } - pub fn update_cached_maps(&mut self, mut start: Address, end: Address) { + pub fn update_cached_maps(&mut self, mut start: Address, end: Address) -> Result<()> { let mut pri: proc_regioninfo = unsafe { MaybeUninit::zeroed().assume_init() }; let mut last_pri: proc_regioninfo = unsafe { MaybeUninit::zeroed().assume_init() }; @@ -326,7 +426,7 @@ impl MacProcess { break; } if ret < size { - panic!("Invalid size returned from proc_pidinfo ({ret} vs {size})"); + return Err(Error(ErrorOrigin::OsLayer, ErrorKind::Unknown)); } start = Address::from(pri.pri_address); @@ -382,6 +482,7 @@ impl MacProcess { } self.cached_maps.sort_by_key(|v| v.0); + Ok(()) } } @@ -406,7 +507,7 @@ impl Process for MacProcess { self.cached_module_maps .iter() .enumerate() - .filter(|_| target_arch.is_none() || Some(&self.info().sys_arch) == target_arch) + .filter(|_| target_arch.is_none() || Some(&self.info().proc_arch) == target_arch) .take_while(|(i, _)| { callback.call(ModuleAddressInfo { address: Address::from(*i as u64), @@ -428,7 +529,7 @@ impl Process for MacProcess { address: Address, architecture: ArchitectureIdent, ) -> Result { - if architecture != self.info.sys_arch { + if architecture != self.info.proc_arch { return Err(Error(ErrorOrigin::OsLayer, ErrorKind::NotFound)); } @@ -453,7 +554,7 @@ impl Process for MacProcess { .unwrap_or(path) .into(), path: path.into(), - arch: self.info.sys_arch, + arch: self.info.proc_arch, } }) .ok_or(Error(ErrorOrigin::OsLayer, ErrorKind::NotFound)) @@ -514,7 +615,9 @@ impl Process for MacProcess { end: Address, out: MemoryRangeCallback, ) { - self.update_cached_maps(start, end); + if self.update_cached_maps(start, end).is_err() { + return; + } self.cached_maps .iter() @@ -530,7 +633,7 @@ impl Process for MacProcess { }) .map(|(s, sz, perms)| { if s + sz > end { - let diff = s - end; + let diff = s + sz - end; (s, sz - diff as umem, perms) } else { (s, sz, perms) @@ -546,6 +649,53 @@ impl Process for MacProcess { .map(<_>::into) .feed_into(out); } + + fn envar_list_callback( + &mut self, + target_arch: Option<&ArchitectureIdent>, + mut callback: EnvVarCallback, + ) -> Result<()> { + if let Some(arch) = target_arch { + if *arch != self.info.proc_arch { + return Ok(()); + } + } + + let parsed = + super::read_procargs2(self.info.pid).and_then(|data| super::parse_procargs2(&data))?; + + for (name, value) in parsed.environ { + let info = EnvVarInfo { + name: name.into(), + value: value.into(), + address: memflow::types::Address::from(0), + arch: self.info.proc_arch, + }; + + if !callback.call(info) { + break; + } + } + + Ok(()) + } + + fn environment_block_address(&mut self, _architecture: ArchitectureIdent) -> Result
{ + // macOS does not expose a stable public env-block pointer like Windows. + // Return a sentinel and enumerate via sysctl in `envar_list_from_address`. + Ok(Address::NULL) + } + + fn envar_list_from_address( + &mut self, + _env_block: Address, + architecture: ArchitectureIdent, + callback: EnvVarCallback, + ) -> Result<()> { + // Same rationale as Linux: we can’t use env_block directly, but we *can* + // enumerate via sysctl. + self.envar_list_callback(Some(&architecture), callback) + } } impl MemoryView for MacProcess { diff --git a/src/windows/process.rs b/src/windows/process.rs index a9b81e7..1dfb479 100644 --- a/src/windows/process.rs +++ b/src/windows/process.rs @@ -4,8 +4,8 @@ use memflow::prelude::v1::*; use memflow::types::gap_remover::GapRemover; use super::{conv_err, ProcessVirtualMemory}; +use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System}; -use windows::Wdk::System::Threading::{NtQueryInformationProcess, ProcessBasicInformation}; use windows::Win32::Foundation::{HINSTANCE, HMODULE, STILL_ACTIVE}; use windows::Win32::System::Memory::{ @@ -19,10 +19,7 @@ use windows::Win32::System::ProcessStatus::{ LIST_MODULES_64BIT, }; -use windows::Win32::System::Threading::PROCESS_BASIC_INFORMATION; - -use core::mem::{size_of, size_of_val}; -use core::ptr; +use core::mem::size_of; #[derive(Clone)] pub struct WindowsProcess { @@ -41,6 +38,41 @@ impl WindowsProcess { cached_modules: vec![], }) } + + fn process_exists(&self) -> bool { + let pid = Pid::from_u32(self.info.pid); + let mut system = System::new_with_specifics( + RefreshKind::nothing().with_processes(ProcessRefreshKind::everything()), + ); + system.refresh_processes(ProcessesToUpdate::Some(&[pid]), true); + system.process(pid).is_some() + } + + fn collect_envars(&self) -> Result> { + let pid = Pid::from_u32(self.info.pid); + let mut system = System::new_with_specifics( + RefreshKind::nothing().with_processes(ProcessRefreshKind::everything()), + ); + system.refresh_processes(ProcessesToUpdate::Some(&[pid]), true); + let proc = system + .process(pid) + .ok_or(Error(ErrorOrigin::OsLayer, ErrorKind::EnvarNotFound))?; + + let mut out = Vec::new(); + for entry in proc.environ() { + let s = entry.to_string_lossy().into_owned(); + if let Some((name, value)) = s.split_once('=') { + out.push(EnvVarInfo { + name: ReprCString::from(name), + value: ReprCString::from(value), + address: Address::NULL, + arch: self.info.proc_arch, + }); + } + } + + Ok(out) + } } cglue_impl_group!(WindowsProcess, ProcessInstance, {}); @@ -49,27 +81,10 @@ cglue_impl_group!(WindowsProcess, IntoProcessInstance, {}); impl Process for WindowsProcess { /// Retrieves the state of the process fn state(&mut self) -> ProcessState { - let mut info = PROCESS_BASIC_INFORMATION::default(); - - if unsafe { - NtQueryInformationProcess( - **self.virt_mem.handle, - ProcessBasicInformation, - &mut info as *mut _ as _, - size_of_val(&info) as _, - ptr::null_mut(), - ) - } - .ok() - .is_err() - { - return ProcessState::Unknown; - } - - if info.ExitStatus == STILL_ACTIVE { + if self.process_exists() { ProcessState::Alive } else { - ProcessState::Dead(info.ExitStatus.0) + ProcessState::Dead(STILL_ACTIVE.0) } } @@ -203,23 +218,13 @@ impl Process for WindowsProcess { /// /// This will generally be for the initial executable that was run fn primary_module_address(&mut self) -> Result
{ - let mut info = PROCESS_BASIC_INFORMATION::default(); - - unsafe { - NtQueryInformationProcess( - **self.virt_mem.handle, - ProcessBasicInformation, - &mut info as *mut _ as _, - size_of_val(&info) as _, - ptr::null_mut(), - ) - } - .ok() - .map_err(conv_err)?; - - // 0x10 is the offset of the `ImageBaseAddress` field in the `PEB64` structure - self.read_addr64(Address::from(info.PebBaseAddress as umem + 0x10)) - .data_part() + let proc_arch = self.info.proc_arch; + self.module_list_arch(Some(&proc_arch)) + .map_err(|_| Error(ErrorOrigin::OsLayer, ErrorKind::NotFound))? + .into_iter() + .next() + .map(|m| m.base) + .ok_or(Error(ErrorOrigin::OsLayer, ErrorKind::NotFound)) } fn module_import_list_callback( @@ -246,6 +251,49 @@ impl Process for WindowsProcess { memflow::os::util::module_section_list_callback(&mut self.virt_mem, info, callback) } + fn envar_list_callback( + &mut self, + target_arch: Option<&ArchitectureIdent>, + mut callback: EnvVarCallback, + ) -> Result<()> { + if let Some(target_arch) = target_arch { + if *target_arch != self.info.proc_arch { + return Ok(()); + } + } + + for ev in self.collect_envars()? { + if !callback.call(ev) { + break; + } + } + + Ok(()) + } + + fn environment_block_address(&mut self, _architecture: ArchitectureIdent) -> Result
{ + Err(Error(ErrorOrigin::OsLayer, ErrorKind::NotSupported)) + } + + fn envar_list_from_address( + &mut self, + _env_block: Address, + architecture: ArchitectureIdent, + mut callback: EnvVarCallback, + ) -> Result<()> { + if architecture != self.info.proc_arch { + return Ok(()); + } + + for ev in self.collect_envars()? { + if !callback.call(ev) { + break; + } + } + + Ok(()) + } + /// Retrieves the process info fn info(&self) -> &ProcessInfo { &self.info