From a0707e2a1390c1b792ef87cf7aa49ad314d7887b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 5 Mar 2026 09:06:01 +0000 Subject: [PATCH 1/3] perf(upgrade): copy-then-mmap with child probe + delta telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two improvements to the delta upgrade system: 1. **Copy-then-mmap with child process probe for mmap safety** Instead of reading the old binary into a Uint8Array via arrayBuffer() (~100 MB JS heap spike), copies the file to a temp location and probes mmap safety via a child process before attempting it: - Copy: copyFileSync (CoW reflinks on btrfs/xfs/APFS for near-instant) - Probe: spawn child to test Bun.mmap() on the copy — if macOS AMFI sends SIGKILL, only the child dies, parent survives - Mmap: if probe succeeds, mmap the copy (zero JS heap, kernel-managed) - Fallback: if probe fails, read copy via arrayBuffer() (~100 MB heap) 2. **Instrument delta upgrade with Sentry spans, metrics, and error capture** Delta failures were invisible in Sentry (no spans, no captureException, errors logged at debug level). Now: - withTracingSpan('upgrade.delta') wraps the delta attempt - captureException on failure with warning level + delta context tags - log.warn() instead of log.debug() so users see failures - span.setStatus({ code: 2 }) on error for correct telemetry - Sentry.metrics.distribution for patch_bytes and chain_length - chainLength field added to DeltaResult --- src/lib/bspatch.ts | 147 +++++++++++++++++++++++++++++++++++---- src/lib/delta-upgrade.ts | 111 ++++++++++++++++++++++++----- 2 files changed, 226 insertions(+), 32 deletions(-) diff --git a/src/lib/bspatch.ts b/src/lib/bspatch.ts index dc33bac5..7a3501e3 100644 --- a/src/lib/bspatch.ts +++ b/src/lib/bspatch.ts @@ -5,15 +5,21 @@ * TRDIFF10 format (produced by zig-bsdiff with `--use-zstd`). Designed for * minimal memory usage during CLI self-upgrades: * - * - Old binary: loaded via `Bun.file().arrayBuffer()` (~100 MB heap) + * - Old binary: copy-then-mmap for 0 JS heap (CoW on btrfs/xfs/APFS), + * with child-process probe for mmap safety (macOS AMFI sends uncatchable + * SIGKILL on signed Mach-O), falling back to `arrayBuffer()` if unsafe * - Diff/extra blocks: streamed via `DecompressionStream('zstd')` * - Output: written incrementally to disk via `Bun.file().writer()` * - Integrity: SHA-256 computed inline via `Bun.CryptoHasher` * - * Total heap usage: ~100 MB for old file + ~1-2 MB for streaming buffers. - * `Bun.mmap()` is NOT usable here because the old file is the running binary: - * - macOS: AMFI sends uncatchable SIGKILL (PROT_WRITE on signed Mach-O) - * - Linux: ETXTBSY from `open()` with write flags on a running executable + * `Bun.mmap()` cannot target the running binary directly because it opens + * with PROT_WRITE/O_RDWR: + * - macOS: AMFI sends uncatchable SIGKILL (writable mapping on signed Mach-O) + * - Linux: ETXTBSY from `open()` (kernel blocks write-open on running ELF) + * + * The copy-then-mmap strategy sidesteps both: the copy is a regular file + * with no running process, so mmap succeeds. On CoW-capable filesystems + * (btrfs, xfs, APFS) the copy is near-instant with zero extra disk I/O. * * TRDIFF10 format (from zig-bsdiff): * ``` @@ -25,6 +31,10 @@ * ``` */ +import { copyFileSync, unlinkSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + /** TRDIFF10 header magic bytes */ const TRDIFF10_MAGIC = "TRDIFF10"; @@ -210,12 +220,117 @@ function createZstdStreamReader(compressed: Uint8Array): BufferedStreamReader { ); } +/** Result of loading the old binary for patching */ +type OldFileHandle = { + /** Memory-mapped or in-memory view of the old binary */ + data: Uint8Array; + /** Cleanup function to call after patching (removes temp copy, if any) */ + cleanup: () => void; +}; + +/** + * Probe whether `Bun.mmap()` works on a file by spawning a child process. + * + * macOS AMFI sends uncatchable SIGKILL when a process creates a writable + * memory mapping of a code-signed Mach-O binary — even a copy. Since + * SIGKILL terminates the process inside the syscall (before any catch + * block runs), we cannot detect the failure in-process. + * + * This probe spawns a minimal child that attempts `Bun.mmap()` on the + * target file. If the child exits 0, mmap is safe. If it's killed + * (SIGKILL) or exits non-zero, we fall back to `arrayBuffer()`. + * + * @param filePath - Path to the file to probe (should be the temp copy) + * @returns true if mmap is safe on this file, false otherwise + */ +async function probeMmapSafe(filePath: string): Promise { + try { + const child = Bun.spawn( + [ + process.execPath, + "-e", + `Bun.mmap(${JSON.stringify(filePath)}, { shared: false })`, + ], + { stdout: "ignore", stderr: "ignore" } + ); + const exitCode = await child.exited; + return exitCode === 0; + } catch { + return false; + } +} + +/** + * Load the old binary for read access during patching. + * + * Strategy: copy to temp file, probe mmap safety via child process, + * then mmap the copy if safe. This avoids `Bun.mmap()` on files that + * would trigger uncatchable SIGKILL (macOS AMFI on signed Mach-O) while + * keeping zero JS heap on platforms where mmap works. On CoW filesystems + * (btrfs, xfs, APFS) the copy is a metadata-only reflink (near-instant). + * + * Falls back to `Bun.file().arrayBuffer()` (~100 MB heap) if the probe + * fails or if copy/mmap errors for any reason. + */ +async function loadOldBinary(oldPath: string): Promise { + const tempCopy = join(tmpdir(), `sentry-patch-old-${process.pid}`); + try { + copyFileSync(oldPath, tempCopy); + + // Probe mmap safety in a child process — macOS AMFI sends uncatchable + // SIGKILL on writable mappings of signed Mach-O, even for copies. + const mmapSafe = await probeMmapSafe(tempCopy); + + if (mmapSafe) { + const data = Bun.mmap(tempCopy, { shared: false }); + return { + data, + cleanup: () => { + try { + unlinkSync(tempCopy); + } catch { + // Best-effort cleanup — OS will reclaim on reboot + } + }, + }; + } + + // mmap probe failed — read copy into JS heap, then clean up temp file + const data = new Uint8Array(await Bun.file(tempCopy).arrayBuffer()); + try { + unlinkSync(tempCopy); + } catch { + // Best-effort cleanup + } + return { + data, + cleanup: () => { + // Data is in JS heap — no temp file to clean up + }, + }; + } catch { + // Copy failed — fall back to reading original directly into JS heap + try { + unlinkSync(tempCopy); + } catch { + // May not exist if copyFileSync failed + } + return { + data: new Uint8Array(await Bun.file(oldPath).arrayBuffer()), + cleanup: () => { + // Data is in JS heap — no temp file to clean up + }, + }; + } +} + /** * Apply a TRDIFF10 binary patch with streaming I/O for minimal memory usage. * - * Reads the old file into memory via `Bun.file().arrayBuffer()`, then streams - * diff/extra blocks (~16 KB buffers) via `DecompressionStream('zstd')`, - * writes output via `Bun.file().writer()`, and computes SHA-256 inline. + * Copies the old file to a temp path and mmaps the copy (0 JS heap), falling + * back to `arrayBuffer()` if mmap fails. Streams diff/extra blocks via + * `DecompressionStream('zstd')`, writes output via `Bun.file().writer()`, + * and computes SHA-256 inline. * * @param oldPath - Path to the existing (old) binary file * @param patchData - Complete TRDIFF10 patch file contents @@ -246,12 +361,10 @@ export async function applyPatch( ); const extraReader = createZstdStreamReader(patchData.subarray(extraStart)); - // Bun.mmap() is NOT usable for the old file during self-upgrades because - // it always opens with PROT_WRITE, and the old file is the running binary: - // - macOS: AMFI sends uncatchable SIGKILL on writable mapping of signed Mach-O - // - Linux: open() returns ETXTBSY when opening a running executable for write - // Reading into memory costs ~100 MB heap but avoids both platform restrictions. - const oldFile = new Uint8Array(await Bun.file(oldPath).arrayBuffer()); + // Load old binary via copy-then-mmap (0 JS heap) or arrayBuffer fallback. + // See loadOldBinary() for why direct mmap of the running binary is impossible. + const { data: oldFile, cleanup: cleanupOldFile } = + await loadOldBinary(oldPath); // Streaming output: write directly to disk, no output buffer in memory const writer = Bun.file(destPath).writer(); @@ -300,7 +413,11 @@ export async function applyPatch( oldpos += seekBy; } } finally { - await writer.end(); + try { + await writer.end(); + } finally { + cleanupOldFile(); + } } // Validate output size matches header diff --git a/src/lib/delta-upgrade.ts b/src/lib/delta-upgrade.ts index 2d60b7dc..3bfd3d2b 100644 --- a/src/lib/delta-upgrade.ts +++ b/src/lib/delta-upgrade.ts @@ -19,6 +19,9 @@ import { unlinkSync } from "node:fs"; +// biome-ignore lint/performance/noNamespaceImport: Sentry SDK recommends namespace import +import * as Sentry from "@sentry/bun"; + import { GITHUB_RELEASES_URL, getPlatformBinaryName, @@ -34,6 +37,7 @@ import { type OciManifest, } from "./ghcr.js"; import { logger } from "./logger.js"; +import { withTracingSpan } from "./telemetry.js"; /** Scoped logger for delta upgrade operations */ const log = logger.withTag("delta-upgrade"); @@ -78,6 +82,8 @@ export type DeltaResult = { sha256: string; /** Total bytes downloaded for the patch chain */ patchBytes: number; + /** Number of patches in the chain (1 = direct, >1 = multi-hop) */ + chainLength: number; }; /** @@ -563,29 +569,92 @@ export async function resolveNightlyChain( * @param destPath - Path to write the patched binary * @returns Delta result with SHA-256 and size info, or null if delta is unavailable */ -export async function attemptDeltaUpgrade( +export function attemptDeltaUpgrade( targetVersion: string, oldBinaryPath: string, destPath: string ): Promise { if (!canAttemptDelta(targetVersion)) { - return null; + return Promise.resolve(null); } - log.debug(`Attempting delta upgrade from ${CLI_VERSION} to ${targetVersion}`); + const channel = isNightlyVersion(targetVersion) ? "nightly" : "stable"; - try { - if (isNightlyVersion(targetVersion)) { - return await resolveNightlyDelta(targetVersion, oldBinaryPath, destPath); - } - return await resolveStableDelta(targetVersion, oldBinaryPath, destPath); - } catch (error) { - // Any error during delta upgrade → fall back to full download. - // Log at debug so --verbose reveals the root cause (ETXTBSY, network, etc.) - const msg = error instanceof Error ? error.message : String(error); - log.debug(`Delta upgrade failed (${msg}), falling back to full download`); - return null; - } + return withTracingSpan( + "delta-upgrade", + "upgrade.delta", + async (span) => { + span.setAttribute("delta.from_version", CLI_VERSION); + span.setAttribute("delta.to_version", targetVersion); + + log.debug( + `Attempting delta upgrade from ${CLI_VERSION} to ${targetVersion}` + ); + + try { + const result = + channel === "nightly" + ? await resolveNightlyDelta(targetVersion, oldBinaryPath, destPath) + : await resolveStableDelta(targetVersion, oldBinaryPath, destPath); + + if (result) { + span.setAttribute("delta.patch_bytes", result.patchBytes); + span.setAttribute("delta.chain_length", result.chainLength); + span.setAttribute("delta.sha256", result.sha256.slice(0, 12)); + span.setStatus({ code: 1 }); // OK + + Sentry.metrics.distribution( + "upgrade.delta.patch_bytes", + result.patchBytes, + { + attributes: { channel }, + } + ); + Sentry.metrics.distribution( + "upgrade.delta.chain_length", + result.chainLength, + { + attributes: { channel }, + } + ); + } else { + // No patch available — not an error, just unavailable + span.setAttribute("delta.result", "unavailable"); + span.setStatus({ code: 1 }); // OK — graceful fallback + } + return result; + } catch (error) { + // Record the error in Sentry so we can see delta failures in telemetry. + // Marked non-fatal: the upgrade continues via full download. + Sentry.captureException(error, { + level: "warning", + tags: { + "delta.from_version": CLI_VERSION, + "delta.to_version": targetVersion, + "delta.channel": channel, + }, + contexts: { + delta_upgrade: { + from_version: CLI_VERSION, + to_version: targetVersion, + channel, + old_binary_path: oldBinaryPath, + }, + }, + }); + + const msg = error instanceof Error ? error.message : String(error); + log.warn( + `Delta upgrade failed (${msg}), falling back to full download` + ); + span.setStatus({ code: 2 }); // Error + span.setAttribute("delta.result", "error"); + span.setAttribute("delta.error", msg); + return null; + } + }, + { "delta.channel": channel } + ); } /** @@ -609,7 +678,11 @@ export async function resolveStableDelta( } const sha256 = await applyPatchChain(chain, oldBinaryPath, destPath); - return { sha256, patchBytes: chain.totalSize }; + return { + sha256, + patchBytes: chain.totalSize, + chainLength: chain.patches.length, + }; } /** @@ -654,7 +727,11 @@ export async function resolveNightlyDelta( } const sha256 = await applyPatchChain(chain, oldBinaryPath, destPath); - return { sha256, patchBytes: chain.totalSize }; + return { + sha256, + patchBytes: chain.totalSize, + chainLength: chain.patches.length, + }; } /** Remove intermediate patching files, ignoring errors. */ From d6a30760f9fc8c1c884aaebea053a003a3850cd8 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 5 Mar 2026 10:33:31 +0000 Subject: [PATCH 2/3] test(coverage): add end-to-end success test for delta upgrade + CI isolated coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a test to test/isolated/delta-upgrade.test.ts that exercises the full success path of attemptDeltaUpgrade using real TRDIFF10 fixture files (small-old.bin → small.trdiff10 → small-new.bin). This covers: - attemptDeltaUpgrade success path: span attributes, Sentry.metrics, setStatus - resolveStableDelta return with chainLength field - DeltaResult type with all three fields verified Also update CI to collect coverage from isolated tests: - Add 'Isolated Tests' step with --coverage to .github/workflows/ci.yml - Upload both coverage/lcov.info and coverage-isolated/lcov.info to codecov - Add coverage-isolated/ to .gitignore Estimated patch coverage improvement: 73% → ~94% for delta-upgrade.ts --- .github/workflows/ci.yml | 3 + .gitignore | 1 + AGENTS.md | 8 +-- test/isolated/delta-upgrade.test.ts | 89 ++++++++++++++++++++++++++++- 4 files changed, 96 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77430f04..a1edc33a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -169,10 +169,13 @@ jobs: run: bun install --frozen-lockfile - name: Unit Tests run: bun run test:unit + - name: Isolated Tests + run: bun run test:isolated --coverage --coverage-reporter=lcov --coverage-dir=coverage-isolated - name: Upload Coverage uses: getsentry/codecov-action@main with: token: ${{ secrets.GITHUB_TOKEN }} + files: ./coverage/lcov.info,./coverage-isolated/lcov.info build-binary: name: Build Binary (${{ matrix.target }}) diff --git a/.gitignore b/.gitignore index 8738fcc8..c989b7ab 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ dist-bin # code coverage coverage +coverage-isolated *.lcov # test artifacts diff --git a/AGENTS.md b/AGENTS.md index 9c431ec0..a954ea25 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -666,14 +666,11 @@ mock.module("./some-module", () => ({ * **Install script: BSD sed and awk JSON parsing breaks OCI digest extraction**: The install script parses OCI manifests with awk (no jq). Key trap: BSD sed \`\n\` is literal, not newline. Fix: single awk pass tracking last-seen \`"digest"\`, printing when \`"org.opencontainers.image.title"\` matches target. The config digest (\`sha256:44136fa...\`) is a 2-byte \`{}\` blob — downloading it instead of the real binary causes \`gunzip: unexpected end of file\`. -* **Bun.mmap() unusable on running executables on ANY platform**: \`Bun.mmap()\` always opens files with \`PROT\_WRITE\`/\`O\_RDWR\` regardless of the \`shared\` flag. This fails on the running binary: macOS sends uncatchable SIGKILL (AMFI rejects writable mappings on signed Mach-O), Linux returns ETXTBSY (kernel blocks opening running executables for write). The platform-conditional fix (PR #340: darwin→arrayBuffer, else→mmap) was insufficient — mmap fails on BOTH platforms. Fixed by using \`new Uint8Array(await Bun.file(oldPath).arrayBuffer())\` unconditionally in bspatch.ts. Costs ~100 MB heap for the old binary but is the only approach that works cross-platform. +* **macOS SIGKILL on MAP\_SHARED mmap of signed Mach-O binaries**: \`Bun.mmap()\` always opens files with \`PROT\_WRITE\`/\`O\_RDWR\` regardless of the \`shared\` flag. This fails on the running binary: macOS sends uncatchable SIGKILL (AMFI rejects writable mappings on signed Mach-O), Linux returns ETXTBSY (kernel blocks opening running executables for write). The platform-conditional fix (PR #340: darwin→arrayBuffer, else→mmap) was insufficient — mmap fails on BOTH platforms. Fixed by using \`new Uint8Array(await Bun.file(oldPath).arrayBuffer())\` unconditionally in bspatch.ts. Costs ~100 MB heap for the old binary but is the only approach that works cross-platform. * **Multiple mockFetch calls replace each other — use unified mocks for multi-endpoint tests**: Bun test mocking gotchas: (1) \`mockFetch()\` replaces \`globalThis.fetch\` — calling it twice replaces the first mock. Use a single unified fetch mock dispatching by URL pattern. (2) \`mock.module()\` pollutes the module registry for ALL subsequent test files. Tests using it must live in \`test/isolated/\` and run via \`test:isolated\`. (3) For \`Bun.spawn\`, use direct property assignment in \`beforeEach\`/\`afterEach\`. - -* **Several commands bypass telemetry by importing buildCommand from @stricli/core directly**: src/lib/command.ts wraps Stricli's buildCommand to auto-capture flag/arg telemetry via Sentry. But trace/list, trace/view, log/view, api.ts, and help.ts import buildCommand directly from @stricli/core, silently skipping telemetry. Fix: change their imports to use ../../lib/command.js. Consider adding a Biome lint rule (noRestrictedImports equivalent) to prevent future regressions. - * **useTestConfigDir without isolateProjectRoot causes DSN scanning of repo tree**: \`useTestConfigDir()\` creates temp dirs under \`.test-tmp/\` in the repo tree. Without \`{ isolateProjectRoot: true }\`, \`findProjectRoot\` walks up and finds the repo's \`.git\`, causing DSN detection to scan real source code and trigger network calls against test mocks (timeouts). Always pass \`isolateProjectRoot: true\` when tests exercise \`resolveOrg\`, \`detectDsn\`, or \`findProjectRoot\`. @@ -688,6 +685,9 @@ mock.module("./some-module", () => ({ * **Shared pagination infrastructure: buildPaginationContextKey and parseCursorFlag**: List commands with cursor pagination use \`buildPaginationContextKey(type, identifier, flags)\` for composite context keys and \`parseCursorFlag(value)\` accepting \`"last"\` magic value. Critical: \`resolveCursor()\` must be called inside the \`org-all\` override closure, not before \`dispatchOrgScopedList\` — otherwise cursor validation errors fire before the correct mode-specific error. + +* **Telemetry instrumentation pattern: withTracingSpan + captureException for handled errors**: For graceful-fallback operations, use \`withTracingSpan\` from \`src/lib/telemetry.ts\` for child spans and \`captureException\` from \`@sentry/bun\` (named import — Biome forbids namespace imports) with \`level: 'warning'\` for non-fatal errors. \`withTracingSpan\` uses \`onlyIfParent: true\` so it's a no-op without active transaction. When returning \`withTracingSpan(...)\` directly, drop \`async\` and use \`Promise.resolve(null)\` for early returns. User-visible fallbacks should use \`log.warn()\` not \`log.debug()\` — debug is invisible at default level. Also: several commands bypass telemetry by importing \`buildCommand\` from \`@stricli/core\` directly instead of \`../../lib/command.js\`. Affected: trace/list, trace/view, log/view, api.ts, help.ts. + ### Preference diff --git a/test/isolated/delta-upgrade.test.ts b/test/isolated/delta-upgrade.test.ts index f5306757..6b2b3885 100644 --- a/test/isolated/delta-upgrade.test.ts +++ b/test/isolated/delta-upgrade.test.ts @@ -10,7 +10,7 @@ */ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; -import { existsSync, unlinkSync, writeFileSync } from "node:fs"; +import { copyFileSync, existsSync, unlinkSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -399,4 +399,91 @@ describe("attemptDeltaUpgrade", () => { if (existsSync(destPath)) unlinkSync(destPath); } }); + + test("returns DeltaResult with telemetry on successful stable patch", async () => { + // Use real TRDIFF10 fixture for end-to-end success + const fixturesDir = join(import.meta.dir, "../fixtures/patches"); + const oldBinaryPath = tempFile("old-success.bin"); + const destPath = tempFile("dest-success.bin"); + + // Copy the real fixture "old" binary — loadOldBinary will copy+mmap this + copyFileSync(join(fixturesDir, "small-old.bin"), oldBinaryPath); + + // SHA-256 of the expected output (small-new.bin) + const expectedSha256 = + "54d0dcd74478bc154b5b24393fdc6129518271baa36f446384d60e84021bb724"; + + // Read the real TRDIFF10 patch + const patchData = await Bun.file( + join(fixturesDir, "small.trdiff10") + ).arrayBuffer(); + + const patchUrl = "https://example.com/small.patch"; + const releases = [ + { + tag_name: "0.14.0", + assets: [ + { + name: BINARY_NAME, + size: 54, + digest: `sha256:${expectedSha256}`, + browser_download_url: `https://example.com/${BINARY_NAME}`, + }, + { + name: `${BINARY_NAME}.patch`, + size: 89, + browser_download_url: patchUrl, + }, + { + name: `${BINARY_NAME}.gz`, + size: 100_000, + browser_download_url: `https://example.com/${BINARY_NAME}.gz`, + }, + ], + }, + { + tag_name: "0.13.0", + assets: [ + { + name: BINARY_NAME, + size: 54, + browser_download_url: `https://example.com/${BINARY_NAME}`, + }, + ], + }, + ]; + + mockFetch(async (url) => { + const urlStr = String(url); + if (urlStr.startsWith("https://api.github.com/")) { + return new Response(JSON.stringify(releases), { status: 200 }); + } + if (urlStr === patchUrl) { + return new Response(patchData, { status: 200 }); + } + return new Response("Not Found", { status: 404 }); + }); + + try { + const result = await attemptDeltaUpgrade( + "0.14.0", + oldBinaryPath, + destPath + ); + expect(result).not.toBeNull(); + expect(result!.sha256).toBe(expectedSha256); + expect(result!.patchBytes).toBe(89); + expect(result!.chainLength).toBe(1); + + // Verify patched output matches expected file + const actual = await Bun.file(destPath).arrayBuffer(); + const expected = await Bun.file( + join(fixturesDir, "small-new.bin") + ).arrayBuffer(); + expect(new Uint8Array(actual)).toEqual(new Uint8Array(expected)); + } finally { + if (existsSync(oldBinaryPath)) unlinkSync(oldBinaryPath); + if (existsSync(destPath)) unlinkSync(destPath); + } + }); }); From 68784aa03e1116a9be422826b750f5772b4bd84d Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 5 Mar 2026 10:50:41 +0000 Subject: [PATCH 3/3] fix(upgrade): remove child-process mmap probe, use direct try/catch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiled Bun binaries don't support the -e flag, so spawning process.execPath -e '...' re-runs the main program instead of evaluating the expression. This made probeMmapSafe() always return false in production, negating the mmap optimization. The probe was unnecessary: mmap on a temp copy is safe because the copy is a regular file (separate inode), not a running binary. ETXTBSY (Linux) and AMFI SIGKILL (macOS) only affect the running binary's inode. Simplify to direct copy → mmap with try/catch fallback to arrayBuffer(). Addresses Seer review comment on PR #344. --- .github/workflows/ci.yml | 6 +- AGENTS.md | 2 +- script/merge-lcov.ts | 187 +++++++++++++++++++++++++++++++++++++++ src/lib/bspatch.ts | 113 +++++++---------------- 4 files changed, 225 insertions(+), 83 deletions(-) create mode 100644 script/merge-lcov.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1edc33a..7295f102 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,11 +171,13 @@ jobs: run: bun run test:unit - name: Isolated Tests run: bun run test:isolated --coverage --coverage-reporter=lcov --coverage-dir=coverage-isolated - - name: Upload Coverage + - name: Merge Coverage Reports + run: bun run script/merge-lcov.ts coverage/lcov.info coverage-isolated/lcov.info > coverage/merged.lcov + - name: Coverage Report uses: getsentry/codecov-action@main with: token: ${{ secrets.GITHUB_TOKEN }} - files: ./coverage/lcov.info,./coverage-isolated/lcov.info + files: ./coverage/merged.lcov build-binary: name: Build Binary (${{ matrix.target }}) diff --git a/AGENTS.md b/AGENTS.md index a954ea25..f9e9a5d7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -666,7 +666,7 @@ mock.module("./some-module", () => ({ * **Install script: BSD sed and awk JSON parsing breaks OCI digest extraction**: The install script parses OCI manifests with awk (no jq). Key trap: BSD sed \`\n\` is literal, not newline. Fix: single awk pass tracking last-seen \`"digest"\`, printing when \`"org.opencontainers.image.title"\` matches target. The config digest (\`sha256:44136fa...\`) is a 2-byte \`{}\` blob — downloading it instead of the real binary causes \`gunzip: unexpected end of file\`. -* **macOS SIGKILL on MAP\_SHARED mmap of signed Mach-O binaries**: \`Bun.mmap()\` always opens files with \`PROT\_WRITE\`/\`O\_RDWR\` regardless of the \`shared\` flag. This fails on the running binary: macOS sends uncatchable SIGKILL (AMFI rejects writable mappings on signed Mach-O), Linux returns ETXTBSY (kernel blocks opening running executables for write). The platform-conditional fix (PR #340: darwin→arrayBuffer, else→mmap) was insufficient — mmap fails on BOTH platforms. Fixed by using \`new Uint8Array(await Bun.file(oldPath).arrayBuffer())\` unconditionally in bspatch.ts. Costs ~100 MB heap for the old binary but is the only approach that works cross-platform. +* **macOS SIGKILL on MAP\_SHARED mmap of signed Mach-O binaries**: \`Bun.mmap()\` always opens files with \`PROT\_WRITE\`/\`O\_RDWR\` regardless of the \`shared\` flag. This fails on the running binary: macOS sends uncatchable SIGKILL (AMFI rejects writable mappings on signed Mach-O), Linux returns ETXTBSY (kernel blocks opening running executables for write). Fix: copy-then-mmap — copy the running binary to a temp file, then mmap the copy (separate inode, no AMFI/ETXTBSY restrictions). Falls back to \`arrayBuffer()\` if copy/mmap fails. Do NOT use child-process probe (\`process.execPath -e ...\`) — compiled Bun binaries don't support \`-e\`, causing the child to rerun the main program. * **Multiple mockFetch calls replace each other — use unified mocks for multi-endpoint tests**: Bun test mocking gotchas: (1) \`mockFetch()\` replaces \`globalThis.fetch\` — calling it twice replaces the first mock. Use a single unified fetch mock dispatching by URL pattern. (2) \`mock.module()\` pollutes the module registry for ALL subsequent test files. Tests using it must live in \`test/isolated/\` and run via \`test:isolated\`. (3) For \`Bun.spawn\`, use direct property assignment in \`beforeEach\`/\`afterEach\`. diff --git a/script/merge-lcov.ts b/script/merge-lcov.ts new file mode 100644 index 00000000..c5052ba6 --- /dev/null +++ b/script/merge-lcov.ts @@ -0,0 +1,187 @@ +#!/usr/bin/env bun +/** + * Merge multiple LCOV coverage files by taking the MAX hit count per line. + * + * The getsentry/codecov-action aggregates multiple coverage files by summing + * all statements — if a source file appears in two lcov files, its hit counts + * get doubled rather than merged. This script merges lcov files properly: + * for each source file, it takes the maximum DA (line hit count) across all + * input files, producing a single lcov with the best coverage from any suite. + * + * All summary fields (LF, LH, FNF, FNH, BRF, BRH) are recomputed from the + * merged data rather than taking the max from inputs. + * + * Usage: bun run script/merge-lcov.ts file1.lcov file2.lcov [file3.lcov ...] + * Output: merged lcov to stdout + */ + +export type FileData = { + /** DA entries: line number → max hit count */ + da: Map; + /** FN entries: function name → "lineNo,name" */ + fn: Map; + /** FNDA entries: function name → max execution count */ + fnda: Map; + /** BRDA entries: "line,block,branch" key → max taken count (string, "-" = not taken) */ + brda: Map; + /** Max FNF seen from inputs (fallback when no FN/FNDA entries available) */ + fnfMax: number; + /** Max FNH seen from inputs (fallback when no FN/FNDA entries available) */ + fnhMax: number; +}; + +function createFileData(): FileData { + return { + da: new Map(), + fn: new Map(), + fnda: new Map(), + brda: new Map(), + fnfMax: 0, + fnhMax: 0, + }; +} + +const files = process.argv.slice(2); +if (files.length === 0) { + console.error("Usage: merge-lcov.ts [file2.lcov ...]"); + process.exit(1); +} + +/** Merged data per source file path */ +const merged = new Map(); +/** Track insertion order of source files */ +const fileOrder: string[] = []; + +for (const filePath of files) { + const content = await Bun.file(filePath).text(); + let currentSF = ""; + let data: FileData | null = null; + + for (const line of content.split("\n")) { + if (line.startsWith("SF:")) { + currentSF = line.slice(3); + if (!merged.has(currentSF)) { + merged.set(currentSF, createFileData()); + fileOrder.push(currentSF); + } + data = merged.get(currentSF) ?? null; + } else if (line.startsWith("DA:") && data) { + const comma = line.indexOf(",", 3); + const lineNo = Number.parseInt(line.slice(3, comma), 10); + const hits = Number.parseInt(line.slice(comma + 1), 10); + if (!data.da.has(lineNo) || hits > (data.da.get(lineNo) ?? 0)) { + data.da.set(lineNo, hits); + } + } else if (line.startsWith("FN:") && data) { + const val = line.slice(3); + const comma = val.indexOf(","); + const name = val.slice(comma + 1); + data.fn.set(name, val); + } else if (line.startsWith("FNDA:") && data) { + const val = line.slice(5); + const comma = val.indexOf(","); + const count = Number.parseInt(val.slice(0, comma), 10); + const name = val.slice(comma + 1); + const prev = data.fnda.get(name) ?? 0; + if (count > prev) { + data.fnda.set(name, count); + } + } else if (line.startsWith("FNF:") && data) { + const v = Number.parseInt(line.slice(4), 10); + if (v > data.fnfMax) { + data.fnfMax = v; + } + } else if (line.startsWith("FNH:") && data) { + const v = Number.parseInt(line.slice(4), 10); + if (v > data.fnhMax) { + data.fnhMax = v; + } + } else if (line.startsWith("BRDA:") && data) { + // BRDA format: line,block,branch,taken + // Use "line,block,branch" as key, take max of "taken" + const val = line.slice(5); + const lastComma = val.lastIndexOf(","); + const branchKey = val.slice(0, lastComma); + const taken = val.slice(lastComma + 1); + const prev = data.brda.get(branchKey); + if (prev === undefined || prev === "-") { + // No previous value or previous was "not taken" + data.brda.set(branchKey, taken); + } else if (taken !== "-") { + // Both are numeric — take the max + const prevNum = Number.parseInt(prev, 10); + const takenNum = Number.parseInt(taken, 10); + if (takenNum > prevNum) { + data.brda.set(branchKey, taken); + } + } + } + // LF, LH, FNF, FNH, BRF, BRH, end_of_record are all skipped — we recompute them + } +} + +// Output merged lcov +const out: string[] = []; + +for (const sf of fileOrder) { + const data = merged.get(sf); + if (!data) { + continue; + } + + out.push(`SF:${sf}`); + + // FN lines + for (const val of data.fn.values()) { + out.push(`FN:${val}`); + } + + // FNDA lines + compute FNF/FNH + let fnh = 0; + for (const [name, count] of data.fnda) { + out.push(`FNDA:${count},${name}`); + if (count > 0) { + fnh += 1; + } + } + + // Prefer recomputed FNF/FNH from merged FN/FNDA entries when available. + // Bun's lcov output only has FNF/FNH summary lines (no individual FN/FNDA), + // so fall back to max-of-inputs for those. + if (data.fn.size > 0) { + out.push(`FNF:${data.fn.size}`); + out.push(`FNH:${fnh}`); + } else if (data.fnfMax > 0) { + out.push(`FNF:${data.fnfMax}`); + out.push(`FNH:${data.fnhMax}`); + } + + // DA lines sorted by line number + compute LF/LH + const sortedLines = [...data.da.entries()].sort((a, b) => a[0] - b[0]); + let lh = 0; + for (const [lineNo, hits] of sortedLines) { + out.push(`DA:${lineNo},${hits}`); + if (hits > 0) { + lh += 1; + } + } + out.push(`LF:${data.da.size}`); + out.push(`LH:${lh}`); + + // BRDA lines + compute BRF/BRH + let brh = 0; + for (const [key, taken] of data.brda) { + out.push(`BRDA:${key},${taken}`); + if (taken !== "-" && Number.parseInt(taken, 10) > 0) { + brh += 1; + } + } + if (data.brda.size > 0) { + out.push(`BRF:${data.brda.size}`); + out.push(`BRH:${brh}`); + } + + out.push("end_of_record"); +} + +process.stdout.write(`${out.join("\n")}\n`); diff --git a/src/lib/bspatch.ts b/src/lib/bspatch.ts index 7a3501e3..25c98ec2 100644 --- a/src/lib/bspatch.ts +++ b/src/lib/bspatch.ts @@ -6,8 +6,7 @@ * minimal memory usage during CLI self-upgrades: * * - Old binary: copy-then-mmap for 0 JS heap (CoW on btrfs/xfs/APFS), - * with child-process probe for mmap safety (macOS AMFI sends uncatchable - * SIGKILL on signed Mach-O), falling back to `arrayBuffer()` if unsafe + * falling back to `arrayBuffer()` if copy/mmap fails * - Diff/extra blocks: streamed via `DecompressionStream('zstd')` * - Output: written incrementally to disk via `Bun.file().writer()` * - Integrity: SHA-256 computed inline via `Bun.CryptoHasher` @@ -31,7 +30,8 @@ * ``` */ -import { copyFileSync, unlinkSync } from "node:fs"; +import { constants, copyFileSync } from "node:fs"; +import { unlink } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -225,96 +225,49 @@ type OldFileHandle = { /** Memory-mapped or in-memory view of the old binary */ data: Uint8Array; /** Cleanup function to call after patching (removes temp copy, if any) */ - cleanup: () => void; + cleanup: () => void | Promise; }; -/** - * Probe whether `Bun.mmap()` works on a file by spawning a child process. - * - * macOS AMFI sends uncatchable SIGKILL when a process creates a writable - * memory mapping of a code-signed Mach-O binary — even a copy. Since - * SIGKILL terminates the process inside the syscall (before any catch - * block runs), we cannot detect the failure in-process. - * - * This probe spawns a minimal child that attempts `Bun.mmap()` on the - * target file. If the child exits 0, mmap is safe. If it's killed - * (SIGKILL) or exits non-zero, we fall back to `arrayBuffer()`. - * - * @param filePath - Path to the file to probe (should be the temp copy) - * @returns true if mmap is safe on this file, false otherwise - */ -async function probeMmapSafe(filePath: string): Promise { - try { - const child = Bun.spawn( - [ - process.execPath, - "-e", - `Bun.mmap(${JSON.stringify(filePath)}, { shared: false })`, - ], - { stdout: "ignore", stderr: "ignore" } - ); - const exitCode = await child.exited; - return exitCode === 0; - } catch { - return false; - } -} - /** * Load the old binary for read access during patching. * - * Strategy: copy to temp file, probe mmap safety via child process, - * then mmap the copy if safe. This avoids `Bun.mmap()` on files that - * would trigger uncatchable SIGKILL (macOS AMFI on signed Mach-O) while - * keeping zero JS heap on platforms where mmap works. On CoW filesystems - * (btrfs, xfs, APFS) the copy is a metadata-only reflink (near-instant). + * Strategy: copy to temp file, then try mmap on the copy. The copy is a + * regular file (no running process), so `Bun.mmap()` succeeds on both + * Linux and macOS — ETXTBSY (Linux) and AMFI SIGKILL (macOS) only affect + * the running binary's inode, not a copy. On CoW filesystems (btrfs, xfs, + * APFS) the copy is a metadata-only reflink (near-instant). * - * Falls back to `Bun.file().arrayBuffer()` (~100 MB heap) if the probe - * fails or if copy/mmap errors for any reason. + * Falls back to `Bun.file().arrayBuffer()` (~100 MB heap) if copy or + * mmap fails for any reason. */ +let loadCounter = 0; + async function loadOldBinary(oldPath: string): Promise { - const tempCopy = join(tmpdir(), `sentry-patch-old-${process.pid}`); + loadCounter += 1; + const tempCopy = join( + tmpdir(), + `sentry-patch-old-${process.pid}-${loadCounter}` + ); try { - copyFileSync(oldPath, tempCopy); - - // Probe mmap safety in a child process — macOS AMFI sends uncatchable - // SIGKILL on writable mappings of signed Mach-O, even for copies. - const mmapSafe = await probeMmapSafe(tempCopy); - - if (mmapSafe) { - const data = Bun.mmap(tempCopy, { shared: false }); - return { - data, - cleanup: () => { - try { - unlinkSync(tempCopy); - } catch { - // Best-effort cleanup — OS will reclaim on reboot - } - }, - }; - } + // COPYFILE_FICLONE: attempt CoW reflink first (near-instant on btrfs/xfs/APFS), + // silently falls back to regular copy on filesystems that don't support it. + copyFileSync(oldPath, tempCopy, constants.COPYFILE_FICLONE); - // mmap probe failed — read copy into JS heap, then clean up temp file - const data = new Uint8Array(await Bun.file(tempCopy).arrayBuffer()); - try { - unlinkSync(tempCopy); - } catch { - // Best-effort cleanup - } + // mmap the copy — safe because it's a separate inode, not the running + // binary. MAP_PRIVATE avoids write-back to disk. + const data = Bun.mmap(tempCopy, { shared: false }); return { data, - cleanup: () => { - // Data is in JS heap — no temp file to clean up - }, + cleanup: () => + unlink(tempCopy).catch(() => { + /* Best-effort cleanup — OS will reclaim on reboot */ + }), }; } catch { - // Copy failed — fall back to reading original directly into JS heap - try { - unlinkSync(tempCopy); - } catch { - // May not exist if copyFileSync failed - } + // Copy or mmap failed — fall back to reading into JS heap + await unlink(tempCopy).catch(() => { + /* May not exist if copyFileSync failed */ + }); return { data: new Uint8Array(await Bun.file(oldPath).arrayBuffer()), cleanup: () => { @@ -416,7 +369,7 @@ export async function applyPatch( try { await writer.end(); } finally { - cleanupOldFile(); + await cleanupOldFile(); } }