Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion scripts/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,37 @@ expect_output_count()
rm -f "$OUTPUT"
}

guest_has_test()
{
test_prog="$1"
"$KBOX" image -S "$ROOTFS" -- /bin/sh -c "test -x /opt/tests/${test_prog}" \
2> /dev/null
}

require_guest_test()
{
test_prog="$1"

if guest_has_test "$test_prog"; then
return 0
fi

printf " %-40s ${RED}FAIL${NC} (missing from rootfs)\n" "$test_prog"
FAIL=$((FAIL + 1))
return 1
}

rewrite_mode_probe=$(mktemp)
rewrite_mode_state="broken"
if run_with_timeout "$KBOX" image -S "$ROOTFS" --syscall-mode=rewrite -- /bin/true \
> "$rewrite_mode_probe" 2>&1; then
rewrite_mode_state="available"
elif grep -q "rewrite mode is unsupported in x86_64 ASAN builds" \
"$rewrite_mode_probe"; then
rewrite_mode_state="unsupported"
fi
rm -f "$rewrite_mode_probe"

echo "=== kbox integration tests ==="
echo " binary: ${KBOX}"
echo " rootfs: ${ROOTFS}"
Expand Down Expand Up @@ -300,7 +331,7 @@ echo ""
echo "--- Guest test programs ---"

for test_prog in dup-test clock-test signal-test path-escape-test errno-test; do
if "$KBOX" image -S "$ROOTFS" -- /bin/sh -c "test -x /opt/tests/${test_prog}" 2> /dev/null; then
if guest_has_test "$test_prog"; then
expect_success "$test_prog" \
"$KBOX" image -S "$ROOTFS" -- "/opt/tests/${test_prog}"
else
Expand All @@ -319,6 +350,31 @@ else
SKIP=$((SKIP + 1))
fi

echo ""
echo "--- Rewrite security ---"

if [ "$rewrite_mode_state" = "available" ]; then
if require_guest_test "jit-spray-test"; then
expect_output "jit-spray-test" "PASS: jit_spray_boundary" \
"$KBOX" image -S "$ROOTFS" --syscall-mode=rewrite \
-- "/opt/tests/jit-spray-test"
fi

if require_guest_test "jit-alias-test"; then
expect_output "jit-alias-test" "PASS: jit_alias_blocked" \
"$KBOX" image -S "$ROOTFS" --syscall-mode=rewrite \
-- "/opt/tests/jit-alias-test"
fi
elif [ "$rewrite_mode_state" = "unsupported" ]; then
for t in jit-spray-test jit-alias-test; do
printf " %-40s ${YELLOW}SKIP${NC} (x86_64 ASAN rewrite unsupported)\n" "$t"
SKIP=$((SKIP + 1))
done
else
printf " %-40s ${RED}FAIL${NC} (rewrite mode unavailable)\n" "rewrite-smoke"
FAIL=$((FAIL + 1))
fi

# ---- Networking (requires --net / SLIRP support) ----
echo ""
echo "--- Networking ---"
Expand Down
30 changes: 19 additions & 11 deletions src/dispatch-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ struct kbox_dispatch forward_mmap(const struct kbox_syscall_request *req,
/* W^X enforcement for mprotect in trap/rewrite mode.
*
* Reject simultaneous PROT_WRITE|PROT_EXEC to prevent JIT spray attacks.
* On none->X transitions, scan the page for syscall/sysenter/SVC instructions
* and add them to the origin map for rewrite-mode caller validation.
* On writable->executable transitions in trap/rewrite mode, scan the promoted
* region and fail closed if it contains syscall instructions.
*
* In seccomp mode, this is a no-op: CONTINUE lets the host kernel handle it.
*/
Expand Down Expand Up @@ -199,17 +199,25 @@ struct kbox_dispatch forward_mprotect(const struct kbox_syscall_request *req,
kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
{
struct kbox_rewrite_runtime *runtime =
kbox_rewrite_runtime_active();

if (kbox_rewrite_runtime_promote_exec_region(runtime, addr, len) <
0) {
if (ctx->verbose)
fprintf(stderr,
"kbox: mprotect denied: scan-on-X failed at "
"0x%llx len=%llu (pid=%u)\n",
(unsigned long long) addr, (unsigned long long) len,
kbox_syscall_request_pid(req));
return kbox_dispatch_errno(EACCES);
}
}
}

/* Allow the mprotect to proceed via host kernel. If the page transitions
* to PROT_EXEC, JIT code on it will take the Tier 1 (RET_TRAP) slow path
* because it won't be in the BPF allow ranges. This is safe: un-rewritten
* syscall instructions in JIT pages are caught by the SIGSYS handler.
*
* Full scan-on-X (rewriting JIT pages at mprotect time) is a future
* optimization: it would promote JIT pages from Tier 1 (~3us) to Tier 2
* (~41ns) but requires synchronous instruction scanning while the page
* is still writable, which adds latency to the mprotect call.
/* Clean pages can proceed. Pages with runtime-emitted syscall sites are
* denied by scan-on-X above.
*/
return kbox_dispatch_continue();
}
Expand Down
178 changes: 178 additions & 0 deletions src/rewrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "io-util.h"
#include "kbox/x86-decode.h"
#include "procmem.h"
#include "rewrite.h"
#include "syscall-nr.h"
#include "syscall-trap.h"
Expand All @@ -41,6 +42,7 @@
#define AARCH64_VENEER_SIZE 16u /* LDR x16, +8; BR x16; .quad target */
#define AARCH64_VENEER_SEARCH_STEP (64u * 1024u)
#define AARCH64_VENEER_SEARCH_LIMIT ((uint64_t) 127 * 1024 * 1024)
#define KBOX_REWRITE_SCAN_CHUNK (64u * 1024u)

#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0x100000
Expand Down Expand Up @@ -89,6 +91,11 @@ static inline void store_active_rewrite_runtime(
__atomic_store_n(&active_rewrite_runtime, runtime, __ATOMIC_RELEASE);
}

struct kbox_rewrite_runtime *kbox_rewrite_runtime_active(void)
{
return load_active_rewrite_runtime();
}

static void write_le32(unsigned char out[4], uint32_t value);
static int rewrite_is_wrapper_site(const struct kbox_rewrite_origin_map *map,
uint64_t origin_addr);
Expand Down Expand Up @@ -2302,6 +2309,8 @@ static int alloc_aarch64_veneer_page(struct kbox_rewrite_runtime *runtime,
#if defined(__aarch64__)
uint64_t page_size;
uint64_t search_lo, search_hi, addr;
uint64_t hint;
int64_t delta;
void *region;

if (!runtime || !veneer_base_out)
Expand All @@ -2322,6 +2331,24 @@ static int alloc_aarch64_veneer_page(struct kbox_rewrite_runtime *runtime,
&search_hi))
search_hi = UINT64_MAX - page_size;

hint = (near_addr + page_size) & ~(page_size - 1);
region = mmap((void *) (uintptr_t) hint, (size_t) page_size,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (region != MAP_FAILED) {
delta = (int64_t) (uintptr_t) region - (int64_t) near_addr;
if (delta > -AARCH64_B_RANGE && delta < AARCH64_B_RANGE) {
runtime->trampoline_regions[runtime->trampoline_region_count]
.mapping = region;
runtime->trampoline_regions[runtime->trampoline_region_count].size =
(size_t) page_size;
runtime->trampoline_region_count++;
*veneer_base_out = (uint64_t) (uintptr_t) region;
return 0;
}
// cppcheck-suppress nullPointerOutOfMemory
munmap(region, (size_t) page_size);
}

/* Search upward first (likely to succeed, past the mapping). */
for (addr = (near_addr + page_size) & ~(page_size - 1); addr <= search_hi;
addr += AARCH64_VENEER_SEARCH_STEP) {
Expand Down Expand Up @@ -3651,6 +3678,157 @@ static int alloc_x86_64_trampoline_region(
#endif
}

static int collect_exec_region_sites(enum kbox_rewrite_arch arch,
uint64_t addr,
size_t len,
struct site_array *array)
{
unsigned char *buf;
size_t chunk_size;
size_t overlap;
size_t offset = 0;

if (!array || len == 0)
return -1;

if (arch == KBOX_REWRITE_ARCH_X86_64) {
chunk_size = KBOX_REWRITE_SCAN_CHUNK;
overlap = 1;
} else if (arch == KBOX_REWRITE_ARCH_AARCH64) {
chunk_size = KBOX_REWRITE_SCAN_CHUNK;
overlap = 0;
} else {
errno = ENOTSUP;
return -1;
}
if ((chunk_size & 3u) != 0) {
errno = EINVAL;
return -1;
}

buf = malloc(chunk_size + overlap);
if (!buf)
return -1;

while (offset < len) {
size_t to_read = len - offset;
int rc;

if (to_read > chunk_size)
to_read = chunk_size;
rc = kbox_current_read(addr + offset, buf, to_read);
if (rc < 0) {
free(buf);
errno = -rc;
return -1;
}

if (arch == KBOX_REWRITE_ARCH_X86_64) {
for (size_t i = 0; i + 1 < to_read; i++) {
struct kbox_rewrite_site site;

if (buf[i] != 0x0f ||
(buf[i + 1] != 0x05 && buf[i + 1] != 0x34)) {
continue;
}
memset(&site, 0, sizeof(site));
site.file_offset = offset + i;
site.vaddr = addr + offset + i;
site.segment_vaddr = addr;
site.segment_mem_size = len;
site.width = 2;
site.original[0] = buf[i];
site.original[1] = buf[i + 1];
site.site_class = KBOX_REWRITE_SITE_UNKNOWN;
if (collect_sites_array_cb(&site, array) < 0) {
free(buf);
return -1;
}
free(buf);
return 0;
}
} else {
for (size_t i = 0; i + 3 < to_read; i += 4) {
struct kbox_rewrite_site site;

if (buf[i] != 0x01 || buf[i + 1] != 0x00 ||
buf[i + 2] != 0x00 || buf[i + 3] != 0xd4) {
continue;
}
memset(&site, 0, sizeof(site));
site.file_offset = offset + i;
site.vaddr = addr + offset + i;
site.segment_vaddr = addr;
site.segment_mem_size = len;
site.width = 4;
memcpy(site.original, buf + i, 4);
site.site_class = KBOX_REWRITE_SITE_UNKNOWN;
if (collect_sites_array_cb(&site, array) < 0) {
free(buf);
return -1;
}
free(buf);
return 0;
}
}

if (to_read == len - offset)
break;
offset += to_read - overlap;
}

free(buf);
return 0;
}

int kbox_rewrite_runtime_promote_exec_region(
struct kbox_rewrite_runtime *runtime,
uint64_t addr,
uint64_t len)
{
struct site_array sites;
enum kbox_rewrite_arch arch = KBOX_REWRITE_ARCH_UNKNOWN;
int rc = -1;

if (len == 0)
return 0;
if (len > (uint64_t) SIZE_MAX) {
errno = EOVERFLOW;
return -1;
}
if (runtime && runtime->installed)
arch = runtime->arch;
else {
#if defined(__x86_64__)
arch = KBOX_REWRITE_ARCH_X86_64;
#elif defined(__aarch64__)
arch = KBOX_REWRITE_ARCH_AARCH64;
#endif
}
if (arch == KBOX_REWRITE_ARCH_UNKNOWN) {
errno = ENOTSUP;
return -1;
}
memset(&sites, 0, sizeof(sites));

if (collect_exec_region_sites(arch, addr, (size_t) len, &sites) < 0)
goto out;
if (runtime && runtime->ctx && runtime->ctx->verbose) {
fprintf(stderr,
"kbox: scan-on-X: addr=0x%llx len=%llu sites=%zu arch=%s\n",
(unsigned long long) addr, (unsigned long long) len,
sites.count, kbox_rewrite_arch_name(arch));
}
if (sites.count == 0)
rc = 0;
else
errno = EACCES;

out:
free_site_array(&sites);
return rc;
}

static int collect_launch_sites(struct runtime_site_array *array,
const struct kbox_loader_launch *launch,
const struct kbox_host_nrs *host_nrs)
Expand Down
5 changes: 5 additions & 0 deletions src/rewrite.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,13 @@ int kbox_rewrite_apply_memfd_phase1_path_candidates(
size_t *applied_count,
struct kbox_rewrite_report *report);
void kbox_rewrite_runtime_reset(struct kbox_rewrite_runtime *runtime);
struct kbox_rewrite_runtime *kbox_rewrite_runtime_active(void);
int kbox_rewrite_runtime_install(struct kbox_rewrite_runtime *runtime,
struct kbox_supervisor_ctx *ctx,
struct kbox_loader_launch *launch);
int kbox_rewrite_runtime_promote_exec_region(
struct kbox_rewrite_runtime *runtime,
uint64_t addr,
uint64_t len);

#endif /* KBOX_REWRITE_H */
Loading
Loading