-
Notifications
You must be signed in to change notification settings - Fork 1
nspawn: Stop overmounting /sys and /proc when a user namespace is used #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -440,6 +440,7 @@ int tmpfs_patch_options( | |
| int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { | ||
| _cleanup_free_ char *top = NULL, *full = NULL;; | ||
| unsigned long extra_flags = 0; | ||
| bool is_mount_point; | ||
| int r; | ||
|
|
||
| top = path_join(dest, "/sys"); | ||
|
|
@@ -449,12 +450,9 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { | |
| r = path_is_mount_point(top); | ||
| if (r < 0) | ||
| return log_error_errno(r, "Failed to determine if '%s' is a mountpoint: %m", top); | ||
| if (r == 0) { | ||
| /* If this is not a mount point yet, then mount a tmpfs there */ | ||
| r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS); | ||
| if (r < 0) | ||
| return r; | ||
| } else { | ||
| is_mount_point = r > 0; | ||
|
|
||
| if (is_mount_point) { | ||
| r = path_is_fs_type(top, SYSFS_MAGIC); | ||
| if (r < 0) | ||
| return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); | ||
|
|
@@ -467,6 +465,21 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { | |
| return 0; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: When There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: must-fix: When |
||
| } | ||
|
|
||
| /* When running in a user namespace, to enable mounting sysfs in nested containers, we cannot | ||
| * overmount it, so we mount it as is. While the user namespace won't be able to write to sysfs, we | ||
| * still have to mount it read-only as that's part of the container interface and various units | ||
| * conditionalize themselves based on whether /sys is mounted read-only or not. */ | ||
| if (!FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO)) | ||
| return mount_nofollow_verbose(LOG_ERR, "sysfs", top, "sysfs", | ||
| MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); | ||
|
|
||
| if (!is_mount_point) { | ||
| /* If this is not a mount point yet, then mount a tmpfs there */ | ||
| r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS); | ||
| if (r < 0) | ||
| return r; | ||
| } | ||
|
|
||
| full = path_join(top, "/full"); | ||
| if (!full) | ||
| return log_oom(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1642,6 +1642,19 @@ static int parse_argv(int argc, char *argv[]) { | |
| return 1; | ||
| } | ||
|
|
||
| static int container_in_userns(void) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: |
||
| int r; | ||
|
|
||
| if (arg_userns_mode != USER_NAMESPACE_NO) | ||
| return true; | ||
|
|
||
| r = namespace_is_init(NAMESPACE_USER); | ||
| if (r < 0 && !IN_SET(r, -EBADR, -ENOSYS)) | ||
| return log_error_errno(r, "Failed to check if in initial user namespace: %m"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: When |
||
|
|
||
| return r == 0; | ||
| } | ||
|
|
||
| static int verify_arguments(void) { | ||
| int r; | ||
|
|
||
|
|
@@ -1654,6 +1667,15 @@ static int verify_arguments(void) { | |
|
|
||
| SET_FLAG(arg_mount_settings, MOUNT_USE_USERNS, arg_userns_mode != USER_NAMESPACE_NO); | ||
|
|
||
| /* When running in a user namespace the kernel will protect procfs/sysfs for us, so there's no need | ||
| * to mount them read-only or mask individual files. This applies both when we allocate a user | ||
| * namespace ourselves, and when nspawn is invoked from within an existing user namespace. */ | ||
| r = container_in_userns(); | ||
| if (r < 0) | ||
| return r; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: Clearing |
||
| if (r > 0) | ||
| arg_mount_settings &= ~MOUNT_APPLY_APIVFS_RO; | ||
|
|
||
| if (arg_private_network) | ||
| SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, arg_private_network); | ||
|
|
||
|
|
@@ -1735,9 +1757,6 @@ static int verify_arguments(void) { | |
| if (arg_userns_mode != USER_NAMESPACE_NO && (arg_mount_settings & MOUNT_APPLY_APIVFS_NETNS) && !arg_private_network) | ||
| return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid namespacing settings. Mounting sysfs with --private-users requires --private-network."); | ||
|
|
||
| if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO)) | ||
| return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write API VFS mounts."); | ||
|
|
||
| if (arg_expose_ports && !arg_private_network) | ||
| return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking."); | ||
|
|
||
|
|
@@ -2130,6 +2149,10 @@ static int setup_boot_id(void) { | |
| const char *to; | ||
| int r; | ||
|
|
||
| r = container_in_userns(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: must-fix: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: suggestion: In both r = container_in_userns();
if (r < 0)
return r;
if (r > 0)
return 0;There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: must-fix: r = container_in_userns();
if (r < 0)
return r;
if (r > 0)
return 0; |
||
| if (r != 0) | ||
| return r; | ||
|
|
||
| /* Generate a new randomized boot ID, so that each boot-up of the container gets a new one */ | ||
|
|
||
| r = tempfn_random_child("/run", "proc-sys-kernel-random-boot-id", &path); | ||
|
|
@@ -2539,6 +2562,10 @@ static int setup_kmsg(int fd_inner_socket) { | |
|
|
||
| assert(fd_inner_socket >= 0); | ||
|
|
||
| r = container_in_userns(); | ||
| if (r != 0) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude: must-fix: Same issue as r = container_in_userns();
if (r < 0)
return r;
if (r > 0)
return 0; |
||
| return r; | ||
|
|
||
| BLOCK_WITH_UMASK(0000); | ||
|
|
||
| /* We create the kmsg FIFO as a temporary file in /run, but immediately delete it after bind mounting it to | ||
|
|
@@ -5793,9 +5820,14 @@ static int run_container( | |
| (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, pid); | ||
|
|
||
| /* Retrieve the kmsg fifo allocated by inner child */ | ||
| fd_kmsg_fifo = receive_one_fd(fd_inner_socket_pair[0], 0); | ||
| if (fd_kmsg_fifo < 0) | ||
| return log_error_errno(fd_kmsg_fifo, "Failed to receive kmsg fifo from inner child: %m"); | ||
| r = container_in_userns(); | ||
| if (r < 0) | ||
| return r; | ||
| if (r == 0) { | ||
| fd_kmsg_fifo = receive_one_fd(fd_inner_socket_pair[0], 0); | ||
| if (fd_kmsg_fifo < 0) | ||
| return log_error_errno(fd_kmsg_fifo, "Failed to receive kmsg fifo from inner child: %m"); | ||
| } | ||
|
|
||
| if (arg_expose_ports) { | ||
| r = expose_port_watch_rtnl(event, fd_inner_socket_pair[0], on_address_change, expose_args, &rtnl); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Claude: nit:
bool is_mount_pointis declared at the top but not initialized until several lines later. It could be declared at the point of initialization for clarity: