diff --git a/project/Cargo.lock b/project/Cargo.lock index 61ae73d92..0df32c726 100644 --- a/project/Cargo.lock +++ b/project/Cargo.lock @@ -1512,6 +1512,7 @@ dependencies = [ "etcd-client", "ipnetwork", "libcni", + "libcontainer", "libvault", "log", "quinn", diff --git a/project/common/BUCK b/project/common/BUCK index 5ecf187c8..789467f8a 100644 --- a/project/common/BUCK +++ b/project/common/BUCK @@ -32,6 +32,7 @@ rust_library( "//third-party/rust/crates/chrono/0.4.42:chrono", "//third-party/rust/crates/etcd-client/0.17.0:etcd-client", "//third-party/rust/crates/ipnetwork/0.17.0:ipnetwork", + "//third-party/rust/crates/libcontainer/0.5.7:libcontainer", "//third-party/rust/crates/log/0.4.29:log", "//third-party/rust/crates/quinn/0.11.9:quinn", "//third-party/rust/crates/serde/1.0.228:serde", diff --git a/project/common/Cargo.toml b/project/common/Cargo.toml index 7fb23d9ff..910ee61ad 100644 --- a/project/common/Cargo.toml +++ b/project/common/Cargo.toml @@ -24,4 +24,5 @@ bytes = { workspace = true } log = { workspace = true } libvault = { workspace = true } uuid = { workspace = true, features = ["v4", "serde"] } -etcd-client = { workspace = true } \ No newline at end of file +etcd-client = { workspace = true } +libcontainer = { workspace = true } \ No newline at end of file diff --git a/project/common/src/lib.rs b/project/common/src/lib.rs index 03e5e8375..dad6e6163 100644 --- a/project/common/src/lib.rs +++ b/project/common/src/lib.rs @@ -18,6 +18,7 @@ pub mod _private { pub mod lease; pub mod quic; +use libcontainer::oci_spec::runtime::Capability; pub use libvault::modules::pki::types::{IssueCertificateRequest, IssueCertificateResponse}; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -224,6 +225,74 @@ pub struct Resource { pub memory: Option, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub struct SecurityContext { + #[serde(rename = "runAsUser")] + pub run_as_user: Option, + + #[serde(rename = "runAsGroup")] + pub run_as_group: Option, + + #[serde(default)] + pub privileged: Option, + + #[serde(rename = "allowPrivilegeEscalation", default)] + pub allow_privilege_escalation: Option, + + pub capabilities: Option, +} + +/// The pattern should be like: "CAP_AUDIT_CONTROL" refers to +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub struct Capabilities { + #[serde(default)] + pub add: Vec, // List of capabilities to add + + #[serde(default)] + pub drop: Vec, // List of capabilities to drop +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub struct EnvVar { + pub name: String, + + #[serde(default)] + pub value: Option, + // #[serde(rename = "valueFrom", default)] + // pub value_from: Option, +} + +// #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +// pub struct EnvVarSource { +// #[serde(rename = "secretKeyRef", default)] +// pub secret_key_ref: Option, // Selects a key of a Secret + +// #[serde(rename = "configMapKeyRef", default)] +// pub config_map_key_ref: Option, // Selects a key of a ConfigMap + +// #[serde(rename = "fieldRef", default)] +// pub field_ref: Option, // Selects a field of the Pod/container +// } + +// // Placeholder structures for EnvVarSource fields +// pub struct SecretKeySelector { /* ... */ } +// pub struct ConfigMapKeySelector { /* ... */ } +// pub struct ObjectFieldSelector { /* ... */ } + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub struct VolumeMount { + pub name: String, + + #[serde(rename = "mountPath")] + pub mount_path: String, + + #[serde(rename = "readOnly", default)] + pub read_only: Option, + + #[serde(rename = "subPath", default)] + pub sub_path: Option, +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct ContainerSpec { pub name: String, @@ -246,6 +315,22 @@ pub struct ContainerSpec { #[serde(rename = "startupProbe", default)] pub startup_probe: Option, + + // Handle the security + #[serde(rename = "securityContext", default)] + pub security_context: Option, + + #[serde(default)] + pub env: Option>, + + #[serde(rename = "volumeMounts", default)] + pub volume_mounts: Option>, + + #[serde(default)] + pub command: Option>, + + #[serde(rename = "workingDir", default)] + pub working_dir: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] diff --git a/project/libscheduler/tests/xline_test.rs b/project/libscheduler/tests/xline_test.rs index fadd04385..b506e0ad0 100644 --- a/project/libscheduler/tests/xline_test.rs +++ b/project/libscheduler/tests/xline_test.rs @@ -150,6 +150,11 @@ fn create_test_pod(name: &str, cpu_limit: Option<&str>, memory_limit: Option<&st liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: vec![], tolerations: vec![], diff --git a/project/rkl/src/commands/compose/mod.rs b/project/rkl/src/commands/compose/mod.rs index 9e5ac1684..93ca48acb 100644 --- a/project/rkl/src/commands/compose/mod.rs +++ b/project/rkl/src/commands/compose/mod.rs @@ -233,6 +233,11 @@ impl ComposeManager { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }; // handle the services volume name diff --git a/project/rkl/src/commands/container/mod.rs b/project/rkl/src/commands/container/mod.rs index 0b7211b79..e993395e3 100644 --- a/project/rkl/src/commands/container/mod.rs +++ b/project/rkl/src/commands/container/mod.rs @@ -1,3 +1,4 @@ +use crate::oci; use crate::{ commands::{ Exec, ExecContainer, @@ -9,7 +10,7 @@ use crate::{ }, cri::cri_api::{ContainerConfig, CreateContainerResponse, Mount}, rootpath, - task::{add_cap_net_admin, add_cap_net_raw, get_cni}, + task::get_cni, }; use anyhow::{Ok, Result, anyhow}; use chrono::{DateTime, Local}; @@ -173,6 +174,11 @@ impl ContainerRunner { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }, config: None, container_id: container_id.to_string(), @@ -309,11 +315,7 @@ impl ContainerRunner { // build the process path let mut process = ProcessBuilder::default().cwd(&config.working_dir).build()?; - let mut capabilities = process.capabilities().clone().unwrap(); - // add the CAP_NET_RAW - add_cap_net_raw(&mut capabilities); - add_cap_net_admin(&mut capabilities); - + let capabilities = oci::new_linux_capabilities_with_defaults(); process.set_capabilities(Some(capabilities)); process.set_terminal(Some(false)); process.set_args(Some(config.args.clone())); @@ -322,7 +324,7 @@ impl ContainerRunner { spec.set_process(Some(process)); - let mut mounts = convert_oci_mounts(&config.mounts)?; + let mut mounts = convert_cri_to_oci_mounts(&config.mounts)?; let existing_mounts = spec.mounts().clone().unwrap_or_default(); mounts.extend(existing_mounts); spec.set_mounts(Some(mounts)); @@ -394,7 +396,7 @@ impl ContainerRunner { } pub fn setup_container_network(&self) -> Result { - // single container status + // If this container is not from compose, then setup the config file again if self.determine_single_status() { setup_network_conf()?; } @@ -499,7 +501,7 @@ impl ContainerRunner { } } -fn convert_oci_mounts(mounts: &Vec) -> Result> { +fn convert_cri_to_oci_mounts(mounts: &Vec) -> Result> { let mut oci_mounts: Vec = vec![]; for mount in mounts { let oci_mount = MountBuilder::default() @@ -793,6 +795,11 @@ mod test { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }; let runner = ContainerRunner::from_spec(spec.clone(), None).unwrap(); assert_eq!(runner.container_id, "demo1"); @@ -819,6 +826,11 @@ mod test { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }, None, ) diff --git a/project/rkl/src/daemon/probe.rs b/project/rkl/src/daemon/probe.rs index eeda1d338..1a5292999 100644 --- a/project/rkl/src/daemon/probe.rs +++ b/project/rkl/src/daemon/probe.rs @@ -869,6 +869,11 @@ mod tests { ..Probe::default() }), startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: vec![], tolerations: vec![], diff --git a/project/rkl/src/lib.rs b/project/rkl/src/lib.rs index 29fc630c3..12e8dd350 100644 --- a/project/rkl/src/lib.rs +++ b/project/rkl/src/lib.rs @@ -3,6 +3,7 @@ pub mod commands; pub mod cri; pub mod daemon; pub mod network; +pub mod oci; mod rootpath; pub mod task; //mod status_access; diff --git a/project/rkl/src/main.rs b/project/rkl/src/main.rs index ef8963f5e..ee2ffdc24 100644 --- a/project/rkl/src/main.rs +++ b/project/rkl/src/main.rs @@ -9,6 +9,7 @@ mod cri; mod daemon; mod dns; mod network; +mod oci; mod quic; mod rootpath; mod task; diff --git a/project/rkl/src/oci/mod.rs b/project/rkl/src/oci/mod.rs new file mode 100644 index 000000000..2ed49ea0f --- /dev/null +++ b/project/rkl/src/oci/mod.rs @@ -0,0 +1,300 @@ +use lazy_static::lazy_static; +use libcontainer::oci_spec::runtime::{ + Capability, LinuxBuilder, LinuxCapabilities, LinuxNamespaceBuilder, LinuxNamespaceType, + ProcessBuilder, Spec, +}; + +use crate::cri::cri_api::ContainerConfig; +use anyhow::{Result, anyhow}; +use common::ContainerSpec; +use oci_spec::runtime::{LinuxNamespace, RootBuilder}; +use std::collections::HashSet; + +// Default supported capabilities (from docker's implementation) +lazy_static! { + pub static ref DEFAULT_CAPABILITIES: Vec = { + vec![ + Capability::Chown, + Capability::DacOverride, + Capability::Fsetid, + Capability::Fowner, + Capability::Mknod, + Capability::NetRaw, + Capability::Setgid, + Capability::Setuid, + Capability::Setfcap, + Capability::Setpcap, + Capability::NetBindService, + Capability::SysChroot, + Capability::Kill, + Capability::AuditWrite, + ] + }; +} + +pub struct OCISpecGenerator { + inner_spec: Spec, + container_config: ContainerConfig, + container_spec: ContainerSpec, + pause_pid: Option, +} + +impl OCISpecGenerator { + pub fn new(config: &ContainerConfig, spec: &ContainerSpec, pause_pid: Option) -> Self { + Self { + inner_spec: Spec::default(), + container_config: config.clone(), + container_spec: spec.clone(), + pause_pid, + } + } + + fn get_capabilities(&self) -> Result { + let mut capabilities = new_linux_capabilities_with_defaults(); + + // Handle the passed capabilities from users + if let Some(ctx) = &self.container_spec.security_context { + // FIXME: Handle the privileged Mode + // Note: Privileged mode typically requires additional configurations, such as: + // - Disable Seccomp filters. + // - Disable AppArmor/SELinux profiles. + // - Allow access to all /dev devices. + // give all the available capabilities + if let Some(p) = ctx.privileged + && p + { + set_all_capabilities(&mut capabilities); + } + + if let Some(caps) = &ctx.capabilities { + caps.add.iter().for_each(|cap| { + add_cap(*cap, &mut capabilities); + }); + + caps.drop.iter().for_each(|cap| { + drop_cap(cap, &mut capabilities); + }); + } + } + + Ok(capabilities) + } + + pub fn process_set(&mut self) -> Result<()> { + let mut process = ProcessBuilder::default().build()?; + + // Choose from container_spec(first) or container_config(from container's image) + let arg = if self.container_spec.args.is_empty() { + self.container_config.args.clone() + } else { + self.container_spec.args.clone() + }; + process.set_args(Some(arg)); + + let capabilities = self.get_capabilities()?; + process.set_capabilities(Some(capabilities)); + + self.inner_spec.set_process(Some(process)); + Ok(()) + } + + pub fn generate(mut self) -> Result { + let root = RootBuilder::default().readonly(false).build()?; + self.inner_spec.set_root(Some(root)); + + let namespaces = self + .create_container_namespaces() + .map_err(|e| anyhow!("failed to setup Linux namespace: {e}"))?; + + self.process_set() + .map_err(|e| anyhow!("failed to setup oci process: {e}"))?; + + let mut linux_builder = LinuxBuilder::default().namespaces(namespaces); + + if let Some(linux_config) = &self.container_config.linux + && let Some(resources) = &linux_config.resources + { + linux_builder = linux_builder.resources(&resources.clone()); + } + + let linux = linux_builder.build()?; + self.inner_spec.set_linux(Some(linux)); + + Ok(self.inner_spec) + } + + fn create_container_namespaces(&self) -> Result> { + let mut namespaces = Vec::new(); + + // Mount and Cgroup will always be specific + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Mount) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Cgroup) + .build()?, + ); + + if let Some(pid) = self.pause_pid { + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Pid) + .path(format!("/proc/{pid}/ns/pid")) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Network) + .path(format!("/proc/{pid}/ns/net")) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Ipc) + .path(format!("/proc/{pid}/ns/ipc")) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Uts) + .path(format!("/proc/{pid}/ns/uts")) + .build()?, + ); + } else { + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Pid) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Network) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Ipc) + .build()?, + ); + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::Uts) + .build()?, + ); + } + + Ok(namespaces) + } +} + +macro_rules! drop_capability_from_set { + ($caps:expr, $getter:ident, $setter:ident, $cap:expr) => { + let mut set = $caps.$getter().as_ref().cloned().unwrap_or_default(); + + set.remove($cap); + + $caps.$setter(Some(set)); + }; +} + +macro_rules! add_capability_to_set { + ($caps:expr, $getter:ident, $setter:ident, $cap:expr) => { + let mut set = $caps.$getter().as_ref().cloned().unwrap_or_default(); + + set.insert($cap.clone()); + + $caps.$setter(Some(set)); + }; +} + +fn get_default_set() -> Option> { + Some(DEFAULT_CAPABILITIES.iter().cloned().collect()) +} + +pub fn new_linux_capabilities_with_defaults() -> LinuxCapabilities { + let mut capabilities = LinuxCapabilities::default(); + + let default_set = get_default_set(); + capabilities.set_bounding(default_set.clone()); + capabilities.set_effective(default_set.clone()); + capabilities.set_inheritable(default_set.clone()); + capabilities.set_permitted(default_set.clone()); + capabilities.set_ambient(default_set); + + capabilities +} + +pub fn drop_cap(cap: &Capability, capabilities: &mut LinuxCapabilities) { + drop_capability_from_set!(capabilities, bounding, set_bounding, cap); + drop_capability_from_set!(capabilities, effective, set_effective, cap); + drop_capability_from_set!(capabilities, inheritable, set_inheritable, cap); + drop_capability_from_set!(capabilities, permitted, set_permitted, cap); + drop_capability_from_set!(capabilities, ambient, set_ambient, cap); +} + +pub fn add_cap(cap: Capability, capabilities: &mut LinuxCapabilities) { + add_capability_to_set!(capabilities, bounding, set_bounding, cap); + add_capability_to_set!(capabilities, effective, set_effective, cap); + add_capability_to_set!(capabilities, inheritable, set_inheritable, cap); + add_capability_to_set!(capabilities, permitted, set_permitted, cap); + add_capability_to_set!(capabilities, ambient, set_ambient, cap); +} + +pub fn set_all_capabilities(capabilities: &mut LinuxCapabilities) { + let all_caps = get_all_capabilities(); + capabilities.set_bounding(Some(all_caps.clone())); + capabilities.set_effective(Some(all_caps.clone())); + capabilities.set_inheritable(Some(all_caps.clone())); + capabilities.set_permitted(Some(all_caps.clone())); + capabilities.set_ambient(Some(all_caps)); +} +fn get_all_capabilities() -> HashSet { + [ + Capability::Chown, + Capability::DacOverride, + Capability::DacReadSearch, + Capability::Fowner, + Capability::Fsetid, + Capability::Kill, + Capability::Setgid, + Capability::Setuid, + Capability::Setpcap, + Capability::LinuxImmutable, + Capability::NetBindService, + Capability::NetBroadcast, + Capability::NetAdmin, + Capability::NetRaw, + Capability::IpcLock, + Capability::IpcOwner, + Capability::SysModule, + Capability::SysRawio, + Capability::SysChroot, + Capability::SysPtrace, + Capability::SysPacct, + Capability::SysAdmin, + Capability::SysBoot, + Capability::SysNice, + Capability::SysResource, + Capability::SysTime, + Capability::SysTtyConfig, + Capability::Mknod, + Capability::Lease, + Capability::AuditWrite, + Capability::AuditControl, + Capability::Setfcap, + Capability::MacOverride, + Capability::MacAdmin, + Capability::Syslog, + Capability::WakeAlarm, + Capability::BlockSuspend, + Capability::AuditRead, + Capability::Perfmon, + Capability::Bpf, + Capability::CheckpointRestore, + ] + .into_iter() + .collect::>() +} diff --git a/project/rkl/src/task.rs b/project/rkl/src/task.rs index 7f1322e60..36f16e65e 100644 --- a/project/rkl/src/task.rs +++ b/project/rkl/src/task.rs @@ -2,25 +2,22 @@ use crate::commands::container::config::ContainerConfigBuilder; use crate::commands::container::handle_image_typ; use crate::commands::{create, delete, kill, load_container, start}; use crate::cri::cri_api::{ - CreateContainerRequest, CreateContainerResponse, LinuxContainerConfig, LinuxContainerResources, - PodSandboxConfig, PodSandboxMetadata, PortMapping, Protocol, RemovePodSandboxRequest, - RemovePodSandboxResponse, RunPodSandboxRequest, RunPodSandboxResponse, StartContainerRequest, - StartContainerResponse, StopPodSandboxRequest, StopPodSandboxResponse, + ContainerConfig, CreateContainerRequest, CreateContainerResponse, LinuxContainerConfig, + LinuxContainerResources, PodSandboxConfig, PodSandboxMetadata, PortMapping, Protocol, + RemovePodSandboxRequest, RemovePodSandboxResponse, RunPodSandboxRequest, RunPodSandboxResponse, + StartContainerRequest, StartContainerResponse, StopPodSandboxRequest, StopPodSandboxResponse, }; +use crate::oci::{self, OCISpecGenerator}; use crate::rootpath; use anyhow::{Result, anyhow}; use common::{ContainerRes, ContainerSpec, PodTask}; use json::JsonValue; use libcni::rust_cni::cni::Libcni; use libcontainer::oci_spec::runtime::{ - Capability, LinuxBuilder, LinuxCapabilities, LinuxCpuBuilder, LinuxMemoryBuilder, - LinuxNamespaceBuilder, LinuxNamespaceType, LinuxResources, LinuxResourcesBuilder, - ProcessBuilder, Spec, + LinuxCpuBuilder, LinuxMemoryBuilder, LinuxResources, LinuxResourcesBuilder, }; use libcontainer::syscall::syscall::create_syscall; use liboci_cli::{Create, Delete, Kill, Start}; -use oci_spec::runtime::RootBuilder; -use std::fs; use std::fs::File; use std::io::{BufWriter, Read, Write}; use std::path::{Path, PathBuf}; @@ -130,14 +127,54 @@ impl TaskRunner { let config = request.config.unwrap_or_default(); let sandbox_id = config.metadata.unwrap_or_default().name.to_string(); - // get bundle path of pause container from labels - let bundle_path = self.task.metadata.labels.get("bundle").cloned().unwrap(); - // .unwrap_or(get_pause_bundle()?); + // 1. Get sandbox bundle path + let sandbox_spec = ContainerSpec { + name: "sandbox".to_string(), + // FIXME: SHOULD define a const variable image name + image: "pause:3.9".to_string(), + ports: vec![], + args: vec![], + resources: None, + liveness_probe: None, + readiness_probe: None, + startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, + }; + let (config_builder, bundle_path) = handle_image_typ(&sandbox_spec) + .map_err(|e| anyhow!("failed to get pause container's bundle_path: {e}"))?; + + // 2. build final oci specification config.json + let mut config = ContainerConfig::default(); + if let Some(mut config_b) = config_builder { + config = config_b + .container_spec(sandbox_spec.clone())? + .clone() + .build(); + } + let oci_spec = oci::OCISpecGenerator::new(&config, &sandbox_spec, None) + .generate() + .map_err(|e| anyhow!("failed to generate sandbox pause oci spec: {e}"))?; + + let config_path = format!("{bundle_path}/config.json"); + if !Path::new(&config_path).exists() { + let file = File::create(&config_path)?; + let mut writer = BufWriter::new(file); + serde_json::to_writer_pretty(&mut writer, &oci_spec)?; + writer.flush()?; + } + let bundle_dir = PathBuf::from(&bundle_path); if !bundle_dir.exists() { return Err(anyhow!("Bundle directory does not exist")); } + info!("Get sandbox {sandbox_id}'s bundle path: {bundle_path}"); + + // 3. Create container use cri let create_args = Create { bundle: bundle_dir.clone(), console_socket: None, @@ -154,6 +191,7 @@ impl TaskRunner { create(create_args, root_path.clone(), false) .map_err(|e| anyhow!("Failed to create container: {}", e))?; + // 4. Start container use cri let start_args = Start { container_id: sandbox_id.clone(), }; @@ -187,6 +225,8 @@ impl TaskRunner { .unwrap_or("") .to_string(); self.pause_pid = Some(pid_i32); + // let podip = runner.ip().unwrap().to_string(); + info!("podip:{podip}"); let response = RunPodSandboxResponse { pod_sandbox_id: sandbox_id, @@ -265,67 +305,11 @@ impl TaskRunner { let pause_pid = self .pause_pid .ok_or_else(|| anyhow!("Pause container PID is not set"))?; - // create OCI Spec - let mut spec = Spec::default(); - - let root = RootBuilder::default() - .readonly(false) - .build() - .unwrap_or_default(); - - spec.set_root(Some(root)); - - let namespaces = vec![ - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Pid) - .path(format!("/proc/{pause_pid}/ns/pid")) - .build()?, - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Network) - .path(format!("/proc/{pause_pid}/ns/net")) - .build()?, - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Ipc) - .path(format!("/proc/{pause_pid}/ns/ipc")) - .build()?, - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Uts) - .path(format!("/proc/{pause_pid}/ns/uts")) - .build()?, - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Mount) - .build()?, - LinuxNamespaceBuilder::default() - .typ(LinuxNamespaceType::Cgroup) - .build()?, - ]; - - let mut linux = LinuxBuilder::default().namespaces(namespaces); - if let Some(x) = &config.linux - && let Some(r) = &x.resources - { - linux = linux.resources(r); - } - let linux = linux.build()?; - spec.set_linux(Some(linux)); - - let mut process = ProcessBuilder::default().build()?; - - // set args - let arg = if container_spec.args.is_empty() { - config.args.clone() - } else { - container_spec.args.clone() - }; - - process.set_args(Some(arg)); - let mut capabilities = process.capabilities().clone().unwrap(); - add_cap_net_raw(&mut capabilities); - process.set_capabilities(Some(capabilities)); - - spec.set_process(Some(process)); - // [image_specification] check if config's spec + let generator = OCISpecGenerator::new(config, container_spec, Some(pause_pid)); + let spec = generator.generate().map_err(|e| { + anyhow!("failed to build OCI Specification for container {container_id}: {e}") + })?; let bundle_path = if let Some(image_spec) = &config.image { image_spec.image.clone() @@ -343,16 +327,16 @@ impl TaskRunner { if !bundle_dir.exists() { return Err(anyhow!("Bundle directory does not exist")); } - // write into config.json + + // FIXME: If there is a config.json in bundle (which is unexpected in production), keep it + // Expected behavior: the container should own it's unique bundle path let config_path = format!("{bundle_path}/config.json"); - if Path::new(&config_path).exists() { - fs::remove_file(&config_path) - .map_err(|e| anyhow!("Failed to remove existing config.json: {}", e))?; + if !Path::new(&config_path).exists() { + let file = File::create(&config_path)?; + let mut writer = BufWriter::new(file); + serde_json::to_writer_pretty(&mut writer, &spec)?; + writer.flush()?; } - let file = File::create(&config_path)?; - let mut writer = BufWriter::new(file); - serde_json::to_writer_pretty(&mut writer, &spec)?; - writer.flush()?; let create_args = Create { bundle: bundle_path.clone().into(), @@ -364,7 +348,6 @@ impl TaskRunner { container_id: container_id.clone(), }; - // get root_path let root_path = rootpath::determine(None, &*create_syscall()) .map_err(|e| anyhow!("Failed to determine root path: {}", e))?; @@ -678,50 +661,6 @@ pub fn get_cni() -> Result { Ok(cni) } -pub fn add_cap_net_admin(capabilities: &mut LinuxCapabilities) { - let mut bounding = capabilities.bounding().clone().unwrap(); - bounding.insert(Capability::NetAdmin); - capabilities.set_bounding(Some(bounding)); - - let mut effective = capabilities.effective().clone().unwrap(); - effective.insert(Capability::NetAdmin); - capabilities.set_effective(Some(effective)); - - let mut inheritable = capabilities.inheritable().clone().unwrap(); - inheritable.insert(Capability::NetAdmin); - capabilities.set_inheritable(Some(inheritable)); - - let mut permitted = capabilities.permitted().clone().unwrap(); - permitted.insert(Capability::NetAdmin); - capabilities.set_permitted(Some(permitted)); - - let mut ambient = capabilities.ambient().clone().unwrap(); - ambient.insert(Capability::NetAdmin); - capabilities.set_ambient(Some(ambient)); -} - -pub fn add_cap_net_raw(capabilities: &mut LinuxCapabilities) { - let mut bounding = capabilities.bounding().clone().unwrap(); - bounding.insert(Capability::NetRaw); - capabilities.set_bounding(Some(bounding)); - - let mut effective = capabilities.effective().clone().unwrap(); - effective.insert(Capability::NetRaw); - capabilities.set_effective(Some(effective)); - - let mut inheritable = capabilities.inheritable().clone().unwrap(); - inheritable.insert(Capability::NetRaw); - capabilities.set_inheritable(Some(inheritable)); - - let mut permitted = capabilities.permitted().clone().unwrap(); - permitted.insert(Capability::NetRaw); - capabilities.set_permitted(Some(permitted)); - - let mut ambient = capabilities.ambient().clone().unwrap(); - ambient.insert(Capability::NetRaw); - capabilities.set_ambient(Some(ambient)); -} - #[cfg(test)] mod test { use super::*; diff --git a/project/rkl/tests/test_common.rs b/project/rkl/tests/test_common.rs index 31f98f4fd..8e98f7247 100644 --- a/project/rkl/tests/test_common.rs +++ b/project/rkl/tests/test_common.rs @@ -52,6 +52,11 @@ where liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: vec![], tolerations: vec![], diff --git a/project/rkl/tests/test_single_container.rs b/project/rkl/tests/test_single_container.rs index e382debdc..a10d5d074 100644 --- a/project/rkl/tests/test_single_container.rs +++ b/project/rkl/tests/test_single_container.rs @@ -31,6 +31,11 @@ where liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, } } diff --git a/project/rks/tests/test_deployment.rs b/project/rks/tests/test_deployment.rs index 1adc49f9d..7e5561b4a 100644 --- a/project/rks/tests/test_deployment.rs +++ b/project/rks/tests/test_deployment.rs @@ -397,6 +397,11 @@ async fn test_deployment_hash_collision() -> Result<()> { spec: PodSpec { node_name: None, containers: vec![ContainerSpec { + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, name: "blocker".to_string(), image: "./blocker-image".to_string(), ports: Vec::new(), @@ -705,6 +710,11 @@ fn create_test_deployment(name: &str, replicas: i32) -> Deployment { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: Vec::new(), tolerations: Vec::new(), diff --git a/project/rks/tests/test_garbage_collector.rs b/project/rks/tests/test_garbage_collector.rs index 3a5c821a8..8052de154 100644 --- a/project/rks/tests/test_garbage_collector.rs +++ b/project/rks/tests/test_garbage_collector.rs @@ -63,6 +63,11 @@ fn pod_with_meta(name: &str, uid: Uuid, owners: Option>) -> }), liveness_probe: None, readiness_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, startup_probe: None, }; @@ -132,6 +137,11 @@ fn replicaset_with_meta(name: &str, uid: Uuid, replicas: i32) -> ReplicaSet { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: vec![], tolerations: vec![], diff --git a/project/rks/tests/test_replicaset.rs b/project/rks/tests/test_replicaset.rs index de2ee60e7..e9b1eb9d1 100644 --- a/project/rks/tests/test_replicaset.rs +++ b/project/rks/tests/test_replicaset.rs @@ -95,6 +95,11 @@ fn make_test_replicaset(name: &str, replicas: i32) -> ReplicaSet { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: Vec::new(), tolerations: Vec::new(), @@ -457,6 +462,11 @@ async fn test_replicaset_adopts_orphan_pods() -> Result<()> { liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: Vec::new(), tolerations: Vec::new(), diff --git a/project/rks/tests/test_scheduler.rs b/project/rks/tests/test_scheduler.rs index 2128cc21e..e1af2cb8c 100644 --- a/project/rks/tests/test_scheduler.rs +++ b/project/rks/tests/test_scheduler.rs @@ -122,6 +122,11 @@ fn create_test_pod(name: &str, cpu_limit: Option<&str>, memory_limit: Option<&st liveness_probe: None, readiness_probe: None, startup_probe: None, + security_context: None, + env: None, + volume_mounts: None, + command: None, + working_dir: None, }], init_containers: vec![], tolerations: vec![], diff --git a/third-party/rust/crates/system-configuration/0.6.1/BUCK b/third-party/rust/crates/system-configuration/0.6.1/BUCK index 04d2694f5..f53ef23c1 100644 --- a/third-party/rust/crates/system-configuration/0.6.1/BUCK +++ b/third-party/rust/crates/system-configuration/0.6.1/BUCK @@ -23,6 +23,7 @@ rust_library( crate = "system_configuration", crate_root = "vendor/src/lib.rs", edition = "2021", + compatible_with = ["prelude//os:macos"], rustc_flags = ["@$(location :system-configuration-manifest[env_flags])"], visibility = ["PUBLIC"], deps = [ diff --git a/third-party/rust/crates/windows-future/0.2.1/BUCK b/third-party/rust/crates/windows-future/0.2.1/BUCK index cc12a9af9..64035bc9b 100644 --- a/third-party/rust/crates/windows-future/0.2.1/BUCK +++ b/third-party/rust/crates/windows-future/0.2.1/BUCK @@ -23,6 +23,7 @@ rust_library( crate = "windows_future", crate_root = "vendor/src/lib.rs", edition = "2021", + compatible_with = ["prelude//os:windows"], rustc_flags = ["@$(location :windows-future-manifest[env_flags])"], visibility = ["PUBLIC"], deps = [ diff --git a/third-party/rust/crates/windows/0.52.0/BUCK b/third-party/rust/crates/windows/0.52.0/BUCK index 1b524d227..497700978 100644 --- a/third-party/rust/crates/windows/0.52.0/BUCK +++ b/third-party/rust/crates/windows/0.52.0/BUCK @@ -23,6 +23,7 @@ rust_library( crate = "windows", crate_root = "vendor/src/lib.rs", edition = "2021", + compatible_with = ["prelude//os:windows"], features = [ "Wdk", "Wdk_System", diff --git a/third-party/rust/crates/windows/0.61.3/BUCK b/third-party/rust/crates/windows/0.61.3/BUCK index fff6acf99..0850e078e 100644 --- a/third-party/rust/crates/windows/0.61.3/BUCK +++ b/third-party/rust/crates/windows/0.61.3/BUCK @@ -23,6 +23,7 @@ rust_library( crate = "windows", crate_root = "vendor/src/lib.rs", edition = "2021", + compatible_with = ["prelude//os:windows"], features = [ "Win32", "Win32_Foundation", diff --git a/third-party/rust/crates/winreg/0.50.0/BUCK b/third-party/rust/crates/winreg/0.50.0/BUCK index a6bff39e5..5865e1c4c 100644 --- a/third-party/rust/crates/winreg/0.50.0/BUCK +++ b/third-party/rust/crates/winreg/0.50.0/BUCK @@ -23,6 +23,7 @@ rust_library( crate = "winreg", crate_root = "vendor/src/lib.rs", edition = "2018", + compatible_with = ["prelude//os:windows"], rustc_flags = ["@$(location :winreg-manifest[env_flags])"], visibility = ["PUBLIC"], deps = [