Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mk/toolchain.mk
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ endif
CFLAGS += -std=gnu11 -D_GNU_SOURCE -Wall -Wextra -Wpedantic -Wshadow
CFLAGS += -Wno-unused-parameter
CFLAGS += -Iinclude -Isrc
LDFLAGS += -Wl,-z,noexecstack -Wl,-z,separate-code

# Build mode from Kconfig (fallback to BUILD= for unconfigured builds)
ifeq ($(CONFIG_BUILD_RELEASE),y)
Expand Down
39 changes: 38 additions & 1 deletion src/dispatch-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct kbox_dispatch forward_mmap(const struct kbox_syscall_request *req,
/* W^X enforcement for mmap in trap/rewrite mode. */
if (request_uses_trap_signals(req)) {
int prot = (int) kbox_syscall_request_arg(req, 2);
int mmap_flags = (int) kbox_syscall_request_arg(req, 3);
long mmap_fd = to_dirfd_arg(kbox_syscall_request_arg(req, 4));
if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) {
if (ctx->verbose)
fprintf(stderr,
Expand All @@ -81,6 +83,16 @@ struct kbox_dispatch forward_mmap(const struct kbox_syscall_request *req,
prot, kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
if (mmap_fd != -1 && (mmap_flags & MAP_SHARED) != 0 &&
(prot & PROT_EXEC) != 0) {
if (ctx->verbose)
fprintf(stderr,
"kbox: mmap denied: shared executable file mapping "
"(prot=0x%x flags=0x%x fd=%ld pid=%u)\n",
prot, mmap_flags, mmap_fd,
kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
}

long fd = to_dirfd_arg(kbox_syscall_request_arg(req, 4));
Expand Down Expand Up @@ -165,6 +177,30 @@ struct kbox_dispatch forward_mprotect(const struct kbox_syscall_request *req,
return kbox_dispatch_errno(EACCES);
}

if ((prot & PROT_EXEC) != 0) {
int alias_rc = guest_range_has_shared_file_write_mapping(
kbox_syscall_request_pid(req), addr, len);
if (alias_rc < 0) {
if (ctx->verbose)
fprintf(stderr,
"kbox: mprotect denied: cannot inspect shared "
"mapping state at 0x%llx len=%llu (pid=%u)\n",
(unsigned long long) addr, (unsigned long long) len,
kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
if (alias_rc > 0) {
if (ctx->verbose)
fprintf(stderr,
"kbox: mprotect denied: executable promotion of "
"shared writable file mapping at 0x%llx len=%llu "
"(pid=%u)\n",
(unsigned long long) addr, (unsigned long long) len,
kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
}

/* Allow the mprotect to proceed via host kernel. If the page transitions
* to PROT_EXEC, JIT code on it will take the Tier 1 (RET_TRAP) slow path
* because it won't be in the BPF allow ranges. This is safe: un-rewritten
Expand Down Expand Up @@ -566,7 +602,8 @@ static struct kbox_dispatch trap_userspace_exec(
int launch_rc = kbox_loader_prepare_launch(&spec, &launch);
if (launch_rc < 0) {
const char msg[] = "kbox: trap exec: loader prepare failed\n";
(void) write(STDERR_FILENO, msg, sizeof(msg) - 1);
ssize_t n = write(STDERR_FILENO, msg, sizeof(msg) - 1);
(void) n;
_exit(127);
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/dispatch-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ int translate_request_at_path(const struct kbox_syscall_request *req,
long *lkl_dirfd);
int should_continue_for_dirfd(long lkl_dirfd);
int guest_addr_is_writable(pid_t pid, uint64_t addr);
int guest_range_has_shared_file_write_mapping(pid_t pid,
uint64_t addr,
uint64_t len);
int dup_tracee_fd(pid_t pid, int tracee_fd);
void translate_proc_self(const char *path,
pid_t pid,
Expand Down
6 changes: 4 additions & 2 deletions src/dispatch-misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,10 @@ static struct kbox_dispatch dispatch_iov_transfer(
goto done;

if (is_write) {
if (mirror_host)
(void) write(STDOUT_FILENO, scratch, n);
if (mirror_host) {
ssize_t written = write(STDOUT_FILENO, scratch, n);
(void) written;
}
} else {
int wrc =
guest_mem_write(ctx, pid, base + seg_total, scratch, n);
Expand Down
67 changes: 59 additions & 8 deletions src/image.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sys/resource.h>
#include <unistd.h>

#include "fd-table.h"
#include "kbox/compiler.h"
#include "kbox/elf.h"
#include "kbox/identity.h"
Expand Down Expand Up @@ -452,7 +453,10 @@ static int prepare_userspace_launch(const struct kbox_image_args *args,
* Fall back to zeros only if getrandom is unavailable.
*/
memset(launch_random, 0, sizeof(launch_random));
(void) getrandom(launch_random, sizeof(launch_random), 0);
{
ssize_t n = getrandom(launch_random, sizeof(launch_random), 0);
(void) n;
}

argv = build_loader_argv(command, args->extra_args, args->extra_argc);
if (!argv)
Expand Down Expand Up @@ -605,6 +609,36 @@ static void init_launch_ctx(struct kbox_supervisor_ctx *ctx,
ctx->web = web_ctx;
}

/* After the exec-range seccomp filter is installed, the success path must
* branch directly into guest code. ASAN/UBSAN runtimes and stack-protector
* epilogues may issue host syscalls from unregistered IPs, which the filter
* rejects with EPERM.
*/
__attribute__((no_stack_protector))
#if KBOX_HAS_ASAN
__attribute__((no_sanitize("address")))
#endif
__attribute__((no_sanitize("undefined"))) static int
install_exec_filter_and_transfer(
int (*install_filter)(const struct kbox_host_nrs *h,
const struct kbox_syscall_trap_ip_range *trap_ranges,
size_t trap_range_count),
const struct kbox_host_nrs *host_nrs,
const struct kbox_syscall_trap_ip_range *ranges,
size_t range_count,
const struct kbox_loader_transfer_state *transfer)
{
if (!install_filter || !host_nrs || !ranges || range_count == 0 ||
!transfer) {
errno = EINVAL;
return -1;
}
if (install_filter(host_nrs, ranges, range_count) < 0)
return -1;

kbox_loader_transfer_to_guest(transfer);
}

static int run_trap_launch(const struct kbox_image_args *args,
const struct kbox_sysnrs *sysnrs,
struct kbox_loader_launch *launch,
Expand All @@ -619,6 +653,15 @@ static int run_trap_launch(const struct kbox_image_args *args,

if (!host_nrs || !launch)
return -1;
#if defined(__x86_64__) && KBOX_HAS_ASAN
(void) sysnrs;
(void) web_ctx;
fprintf(stderr,
"kbox: trap mode is unsupported in x86_64 ASAN builds; "
"use --syscall-mode=seccomp or BUILD=release\n");
errno = ENOTSUP;
return -1;
#endif
if (collect_trap_exec_ranges(launch, ranges, KBOX_LOADER_MAX_MAPPINGS,
&range_count) < 0) {
fprintf(stderr,
Expand Down Expand Up @@ -659,14 +702,15 @@ static int run_trap_launch(const struct kbox_image_args *args,
fprintf(stderr, "kbox: trap exec range[%zu]: %p-%p\n", ri,
(void *) ranges[ri].start, (void *) ranges[ri].end);
}
if (kbox_install_seccomp_trap_ranges(host_nrs, ranges, range_count) < 0) {
if (install_exec_filter_and_transfer(kbox_install_seccomp_trap_ranges,
host_nrs, ranges, range_count,
&launch->transfer) < 0) {
fprintf(stderr,
"kbox: trap launch failed: cannot install guest trap filter\n");
kbox_syscall_trap_runtime_uninstall(&runtime);
return -1;
}

kbox_loader_transfer_to_guest(&launch->transfer);
__builtin_unreachable();
}

static int run_rewrite_launch(const struct kbox_image_args *args,
Expand All @@ -685,6 +729,13 @@ static int run_rewrite_launch(const struct kbox_image_args *args,
if (!host_nrs || !launch)
return -1;
#if defined(__x86_64__)
#if KBOX_HAS_ASAN
fprintf(stderr,
"kbox: rewrite mode is unsupported in x86_64 ASAN builds; "
"use --syscall-mode=seccomp or BUILD=release\n");
errno = ENOTSUP;
return -1;
#endif
if (args->verbose) {
fprintf(
stderr,
Expand Down Expand Up @@ -735,17 +786,17 @@ static int run_rewrite_launch(const struct kbox_image_args *args,
return -1;
}

if (kbox_install_seccomp_rewrite_ranges(host_nrs, ranges, range_count) <
0) {
if (install_exec_filter_and_transfer(kbox_install_seccomp_rewrite_ranges,
host_nrs, ranges, range_count,
&launch->transfer) < 0) {
fprintf(
stderr,
"kbox: rewrite launch failed: cannot install guest trap filter\n");
kbox_syscall_trap_runtime_uninstall(&trap_runtime);
kbox_rewrite_runtime_reset(&rewrite_runtime);
return -1;
}

kbox_loader_transfer_to_guest(&launch->transfer);
__builtin_unreachable();
}

/* Public entry point. */
Expand Down
12 changes: 6 additions & 6 deletions src/loader-transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ int kbox_loader_prepare_transfer(const struct kbox_loader_handoff *handoff,
return 0;
}

/* Suppress ASAN: this function switches to the guest stack and jumps to
* guest code. ASAN's stack tracking doesn't know about the guest stack
* and would flag every subsequent stack access as a buffer overflow.
/* The transfer boundary must not run sanitizer/runtime callbacks or stack
* protector epilogues. It switches to the guest stack and branches into
* guest code after the exec-range seccomp filter is active.
*/
__attribute__((noreturn))
__attribute__((noreturn)) __attribute__((no_stack_protector))
#if KBOX_HAS_ASAN
__attribute__((no_sanitize("address")))
#endif
void kbox_loader_transfer_to_guest(
const struct kbox_loader_transfer_state *state)
__attribute__((no_sanitize("undefined"))) void
kbox_loader_transfer_to_guest(const struct kbox_loader_transfer_state *state)
{
if (!state)
__builtin_trap();
Expand Down
30 changes: 20 additions & 10 deletions src/net-slirp.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ static void *rx_reader_loop(void *arg)

/* Signal the eventfd to wake net_poll. */
uint64_t val = 1;
(void) write(rx_eventfd, &val, sizeof(val));
n = write(rx_eventfd, &val, sizeof(val));
(void) n;
}

out:
Expand Down Expand Up @@ -391,7 +392,8 @@ static void slirp_notify(void *opaque)
(void) opaque;
/* Wake the event loop so it picks up new SLIRP state. */
char c = 'W';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
}

static const SlirpCb slirp_callbacks = {
Expand Down Expand Up @@ -659,7 +661,8 @@ static int net_tx(struct lkl_netdev *nd, struct iovec *iov, int cnt)

/* Wake the event loop to process the TX packet. */
char c = 'T';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
return 0;
}

Expand Down Expand Up @@ -730,13 +733,15 @@ static int net_poll(struct lkl_netdev *nd)
unsigned tail = __atomic_load_n(&rx_tail, __ATOMIC_RELAXED);
if (head != tail) {
uint64_t val;
(void) read(rx_eventfd, &val, sizeof(val));
ssize_t n = read(rx_eventfd, &val, sizeof(val));
(void) n;
flags |= LKL_DEV_NET_POLL_RX;
} else {
struct pollfd pfd = {.fd = rx_eventfd, .events = POLLIN};
if (poll(&pfd, 1, 100) > 0) {
uint64_t val;
(void) read(rx_eventfd, &val, sizeof(val));
ssize_t n = read(rx_eventfd, &val, sizeof(val));
(void) n;
flags |= LKL_DEV_NET_POLL_RX;
}
}
Expand All @@ -751,7 +756,8 @@ static void net_poll_hup(struct lkl_netdev *nd)
(void) nd;
/* Write a zero-length frame to unblock the RX read. */
uint16_t zero = 0;
(void) write(rx_pipe[1], &zero, sizeof(zero));
ssize_t n = write(rx_pipe[1], &zero, sizeof(zero));
(void) n;
}

static void net_free(struct lkl_netdev *nd)
Expand Down Expand Up @@ -977,7 +983,8 @@ int kbox_net_add_device(void)
__atomic_store_n(&slirp_running, 0, __ATOMIC_RELAXED);
{
char c = 'Q';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
}
pthread_join(slirp_thread, NULL);
err_eventfd:
Expand Down Expand Up @@ -1124,7 +1131,8 @@ void kbox_net_cleanup(void)

/* Wake the event loop and RX reader so they exit. */
char c = 'Q';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
net_poll_hup(&slirp_netdev); /* wake rx_reader_thread via rx_pipe */

pthread_join(slirp_thread, NULL);
Expand Down Expand Up @@ -1203,7 +1211,8 @@ int kbox_net_register_socket(int lkl_fd, int supervisor_fd, int sock_type)

/* Wake the event loop. */
char c = 'R';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
return 0;
}

Expand All @@ -1228,7 +1237,8 @@ void kbox_net_deregister_socket(int lkl_fd)

/* Wake the event loop so it closes the supervisor_fd promptly. */
char c = 'D';
(void) write(wakeup_pipe[1], &c, 1);
ssize_t n = write(wakeup_pipe[1], &c, 1);
(void) n;
}

#else /* !KBOX_HAS_SLIRP */
Expand Down
4 changes: 3 additions & 1 deletion src/procmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ static void restore_default_and_reraise(int sig)

static int action_uses_fault_handler(const struct sigaction *sa)
{
const void *fault_handler_ptr = (const void *) (uintptr_t) &fault_handler;

if (!sa)
return 0;
if ((sa->sa_flags & SA_SIGINFO) != 0)
return sa->sa_sigaction == fault_handler;
return sa->sa_handler == (void (*)(int)) fault_handler;
return (const void *) (uintptr_t) sa->sa_handler == fault_handler_ptr;
}

static void fault_handler(int sig, siginfo_t *info, void *ucontext)
Expand Down
Loading
Loading