From 1a0f5e697449cceda7770c5bfbf42d331a6743f0 Mon Sep 17 00:00:00 2001 From: TerrifiedBug Date: Sat, 7 Mar 2026 18:39:33 +0000 Subject: [PATCH 1/2] fix: handle BigInt serialization in REST API v1 detail endpoints GET /api/v1/pipelines/:id and /api/v1/nodes/:id return 500 because NodePipelineStatus contains BigInt fields (eventsIn, eventsOut, etc.) that JSON.stringify cannot serialize. Adds a jsonResponse() helper using a custom replacer to convert BigInts to numbers. --- src/app/api/v1/_lib/api-handler.ts | 12 ++++++++++++ src/app/api/v1/nodes/[id]/route.ts | 4 ++-- src/app/api/v1/pipelines/[id]/route.ts | 4 ++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/app/api/v1/_lib/api-handler.ts b/src/app/api/v1/_lib/api-handler.ts index 724c8a27..fc14af1c 100644 --- a/src/app/api/v1/_lib/api-handler.ts +++ b/src/app/api/v1/_lib/api-handler.ts @@ -6,6 +6,18 @@ import { type ServiceAccountContext, } from "@/server/middleware/api-auth"; +/** BigInt-safe NextResponse.json() — converts BigInts to numbers before serialization. */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export function jsonResponse(data: any, init?: { status?: number }) { + const body = JSON.stringify(data, (_key, value) => + typeof value === "bigint" ? Number(value) : value, + ); + return new NextResponse(body, { + status: init?.status ?? 200, + headers: { "Content-Type": "application/json" }, + }); +} + const TRPC_TO_HTTP: Record = { NOT_FOUND: 404, BAD_REQUEST: 400, diff --git a/src/app/api/v1/nodes/[id]/route.ts b/src/app/api/v1/nodes/[id]/route.ts index fa4890d8..4861972e 100644 --- a/src/app/api/v1/nodes/[id]/route.ts +++ b/src/app/api/v1/nodes/[id]/route.ts @@ -1,6 +1,6 @@ import { NextResponse } from "next/server"; import { prisma } from "@/lib/prisma"; -import { apiRoute } from "../../_lib/api-handler"; +import { apiRoute, jsonResponse } from "../../_lib/api-handler"; export const GET = apiRoute("nodes.read", async (_req, ctx, params) => { const id = params?.id; @@ -41,5 +41,5 @@ export const GET = apiRoute("nodes.read", async (_req, ctx, params) => { return NextResponse.json({ error: "Node not found" }, { status: 404 }); } - return NextResponse.json({ node }); + return jsonResponse({ node }); }); diff --git a/src/app/api/v1/pipelines/[id]/route.ts b/src/app/api/v1/pipelines/[id]/route.ts index 314a35b2..1beba3a7 100644 --- a/src/app/api/v1/pipelines/[id]/route.ts +++ b/src/app/api/v1/pipelines/[id]/route.ts @@ -1,6 +1,6 @@ import { NextResponse } from "next/server"; import { prisma } from "@/lib/prisma"; -import { apiRoute } from "../../_lib/api-handler"; +import { apiRoute, jsonResponse } from "../../_lib/api-handler"; export const GET = apiRoute("pipelines.read", async (_req, ctx, params) => { const id = params?.id; @@ -58,5 +58,5 @@ export const GET = apiRoute("pipelines.read", async (_req, ctx, params) => { ); } - return NextResponse.json({ pipeline }); + return jsonResponse({ pipeline }); }); From e18b43d0bffa10b53bb6b03b4423cd7eb791baa2 Mon Sep 17 00:00:00 2001 From: TerrifiedBug Date: Sat, 7 Mar 2026 18:56:08 +0000 Subject: [PATCH 2/2] fix: use idle-based CPU formula in dashboard charts and alert evaluator The fleet detail page correctly uses (total - idle) / total for CPU%, but the dashboard charts and alert evaluator still used the old cpuDelta / wallClockSeconds formula which gives per-core percentages (e.g. 787% on an 8-core machine). This aligns all CPU calculations to the same idle-based formula clamped to 0-100%. Affected code paths: - dashboard.chartMetrics: chart CPU time-series - dashboard.nodeCards: sparkline CPU values - alert-evaluator getCpuUsage: alert threshold checks --- src/server/routers/dashboard.ts | 31 ++++++++++++++++++++------ src/server/services/alert-evaluator.ts | 15 +++++-------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/server/routers/dashboard.ts b/src/server/routers/dashboard.ts index e4bc908e..a441b8fd 100644 --- a/src/server/routers/dashboard.ts +++ b/src/server/routers/dashboard.ts @@ -149,6 +149,7 @@ export const dashboardRouter = router({ memoryUsedBytes: true, memoryTotalBytes: true, cpuSecondsTotal: true, + cpuSecondsIdle: true, }, }) : []; @@ -227,11 +228,22 @@ export const dashboardRouter = router({ unhealthyPipelines, rates: { eventsIn: eventsInRate, eventsOut: eventsOutRate, bytesIn: bytesInRate, bytesOut: bytesOutRate, errors: errorsRate }, totals: { eventsIn: totalEventsIn, eventsOut: totalEventsOut, bytesIn: totalBytesIn, bytesOut: totalBytesOut, errors: totalErrors }, - sparkline: (metricsByNode.get(node.id) ?? []).map((m) => ({ - t: m.timestamp.getTime(), - mem: m.memoryTotalBytes ? Number(m.memoryUsedBytes) / Number(m.memoryTotalBytes) * 100 : 0, - cpu: Number(m.cpuSecondsTotal ?? 0), - })), + sparkline: (metricsByNode.get(node.id) ?? []).map((m, i, arr) => { + let cpu = 0; + if (i > 0) { + const prev = arr[i - 1]; + const totalDelta = m.cpuSecondsTotal - prev.cpuSecondsTotal; + const idleDelta = m.cpuSecondsIdle - prev.cpuSecondsIdle; + if (totalDelta > 0) { + cpu = Math.max(0, Math.min(100, ((totalDelta - idleDelta) / totalDelta) * 100)); + } + } + return { + t: m.timestamp.getTime(), + mem: m.memoryTotalBytes ? Number(m.memoryUsedBytes) / Number(m.memoryTotalBytes) * 100 : 0, + cpu, + }; + }), }; }); }), @@ -683,6 +695,7 @@ export const dashboardRouter = router({ nodeId: true, timestamp: true, cpuSecondsTotal: true, + cpuSecondsIdle: true, memoryUsedBytes: true, memoryTotalBytes: true, diskReadBytes: true, @@ -803,6 +816,7 @@ export const dashboardRouter = router({ nodeId: string; timestamp: Date; cpuSecondsTotal: number; + cpuSecondsIdle: number; memoryUsedBytes: bigint; memoryTotalBytes: bigint; diskReadBytes: bigint; @@ -826,8 +840,11 @@ export const dashboardRouter = router({ const dtSec = (t - new Date(prev.timestamp).getTime()) / 1000; if (dtSec <= 0) continue; - const cpuDelta = curr.cpuSecondsTotal - prev.cpuSecondsTotal; - const cpuPct = Math.max(0, Math.min(100, (cpuDelta / dtSec) * 100)); + const cpuTotalDelta = curr.cpuSecondsTotal - prev.cpuSecondsTotal; + const cpuIdleDelta = curr.cpuSecondsIdle - prev.cpuSecondsIdle; + const cpuPct = cpuTotalDelta > 0 + ? Math.max(0, Math.min(100, ((cpuTotalDelta - cpuIdleDelta) / cpuTotalDelta) * 100)) + : 0; addPoint(cpu, label, t, cpuPct); const memTotal = Number(curr.memoryTotalBytes); diff --git a/src/server/services/alert-evaluator.ts b/src/server/services/alert-evaluator.ts index fda646d7..9eabb110 100644 --- a/src/server/services/alert-evaluator.ts +++ b/src/server/services/alert-evaluator.ts @@ -43,22 +43,17 @@ async function getCpuUsage(nodeId: string): Promise { where: { nodeId }, orderBy: { timestamp: "desc" }, take: 2, - select: { cpuSecondsTotal: true, timestamp: true }, + select: { cpuSecondsTotal: true, cpuSecondsIdle: true }, }); if (rows.length < 2) return null; const [newer, older] = rows; - const dtSeconds = - (newer.timestamp.getTime() - older.timestamp.getTime()) / 1000; - if (dtSeconds <= 0) return null; + const totalDelta = newer.cpuSecondsTotal - older.cpuSecondsTotal; + if (totalDelta <= 0) return null; // counter reset or no change - // cpuSecondsTotal is cumulative; the delta / wall-clock-delta gives - // fraction of one core used. Multiply by 100 for a percentage. - const cpuDelta = newer.cpuSecondsTotal - older.cpuSecondsTotal; - if (cpuDelta < 0) return null; // counter reset - - return (cpuDelta / dtSeconds) * 100; + const idleDelta = newer.cpuSecondsIdle - older.cpuSecondsIdle; + return Math.max(0, Math.min(100, ((totalDelta - idleDelta) / totalDelta) * 100)); } /** Compute memory usage percentage from the latest NodeMetric row. */