From 1a0f5e697449cceda7770c5bfbf42d331a6743f0 Mon Sep 17 00:00:00 2001
From: TerrifiedBug <admin@terrifiedbug.com>
Date: Sat, 7 Mar 2026 18:39:33 +0000
Subject: [PATCH 1/2] fix: handle BigInt serialization in REST API v1 detail
 endpoints

GET /api/v1/pipelines/:id and /api/v1/nodes/:id return 500 because
NodePipelineStatus contains BigInt fields (eventsIn, eventsOut, etc.)
that JSON.stringify cannot serialize. Adds a jsonResponse() helper
using a custom replacer to convert BigInts to numbers.
---
 src/app/api/v1/_lib/api-handler.ts     | 12 ++++++++++++
 src/app/api/v1/nodes/[id]/route.ts     |  4 ++--
 src/app/api/v1/pipelines/[id]/route.ts |  4 ++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/app/api/v1/_lib/api-handler.ts b/src/app/api/v1/_lib/api-handler.ts
index 724c8a27..fc14af1c 100644
--- a/src/app/api/v1/_lib/api-handler.ts
+++ b/src/app/api/v1/_lib/api-handler.ts
@@ -6,6 +6,18 @@ import {
   type ServiceAccountContext,
 } from "@/server/middleware/api-auth";
 
+/** BigInt-safe NextResponse.json() — converts BigInts to numbers before serialization. */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export function jsonResponse(data: any, init?: { status?: number }) {
+  const body = JSON.stringify(data, (_key, value) =>
+    typeof value === "bigint" ? Number(value) : value,
+  );
+  return new NextResponse(body, {
+    status: init?.status ?? 200,
+    headers: { "Content-Type": "application/json" },
+  });
+}
+
 const TRPC_TO_HTTP: Record<string, number> = {
   NOT_FOUND: 404,
   BAD_REQUEST: 400,
diff --git a/src/app/api/v1/nodes/[id]/route.ts b/src/app/api/v1/nodes/[id]/route.ts
index fa4890d8..4861972e 100644
--- a/src/app/api/v1/nodes/[id]/route.ts
+++ b/src/app/api/v1/nodes/[id]/route.ts
@@ -1,6 +1,6 @@
 import { NextResponse } from "next/server";
 import { prisma } from "@/lib/prisma";
-import { apiRoute } from "../../_lib/api-handler";
+import { apiRoute, jsonResponse } from "../../_lib/api-handler";
 
 export const GET = apiRoute("nodes.read", async (_req, ctx, params) => {
   const id = params?.id;
@@ -41,5 +41,5 @@ export const GET = apiRoute("nodes.read", async (_req, ctx, params) => {
     return NextResponse.json({ error: "Node not found" }, { status: 404 });
   }
 
-  return NextResponse.json({ node });
+  return jsonResponse({ node });
 });
diff --git a/src/app/api/v1/pipelines/[id]/route.ts b/src/app/api/v1/pipelines/[id]/route.ts
index 314a35b2..1beba3a7 100644
--- a/src/app/api/v1/pipelines/[id]/route.ts
+++ b/src/app/api/v1/pipelines/[id]/route.ts
@@ -1,6 +1,6 @@
 import { NextResponse } from "next/server";
 import { prisma } from "@/lib/prisma";
-import { apiRoute } from "../../_lib/api-handler";
+import { apiRoute, jsonResponse } from "../../_lib/api-handler";
 
 export const GET = apiRoute("pipelines.read", async (_req, ctx, params) => {
   const id = params?.id;
@@ -58,5 +58,5 @@ export const GET = apiRoute("pipelines.read", async (_req, ctx, params) => {
     );
   }
 
-  return NextResponse.json({ pipeline });
+  return jsonResponse({ pipeline });
 });

From e18b43d0bffa10b53bb6b03b4423cd7eb791baa2 Mon Sep 17 00:00:00 2001
From: TerrifiedBug <admin@terrifiedbug.com>
Date: Sat, 7 Mar 2026 18:56:08 +0000
Subject: [PATCH 2/2] fix: use idle-based CPU formula in dashboard charts and
 alert evaluator

The fleet detail page correctly uses (total - idle) / total for CPU%,
but the dashboard charts and alert evaluator still used the old
cpuDelta / wallClockSeconds formula which gives per-core percentages
(e.g. 787% on an 8-core machine). This aligns all CPU calculations to
the same idle-based formula clamped to 0-100%.

Affected code paths:
- dashboard.chartMetrics: chart CPU time-series
- dashboard.nodeCards: sparkline CPU values
- alert-evaluator getCpuUsage: alert threshold checks
---
 src/server/routers/dashboard.ts        | 31 ++++++++++++++++++++------
 src/server/services/alert-evaluator.ts | 15 +++++--------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/server/routers/dashboard.ts b/src/server/routers/dashboard.ts
index e4bc908e..a441b8fd 100644
--- a/src/server/routers/dashboard.ts
+++ b/src/server/routers/dashboard.ts
@@ -149,6 +149,7 @@ export const dashboardRouter = router({
             memoryUsedBytes: true,
             memoryTotalBytes: true,
             cpuSecondsTotal: true,
+            cpuSecondsIdle: true,
           },
         })
       : [];
@@ -227,11 +228,22 @@ export const dashboardRouter = router({
         unhealthyPipelines,
         rates: { eventsIn: eventsInRate, eventsOut: eventsOutRate, bytesIn: bytesInRate, bytesOut: bytesOutRate, errors: errorsRate },
         totals: { eventsIn: totalEventsIn, eventsOut: totalEventsOut, bytesIn: totalBytesIn, bytesOut: totalBytesOut, errors: totalErrors },
-        sparkline: (metricsByNode.get(node.id) ?? []).map((m) => ({
-          t: m.timestamp.getTime(),
-          mem: m.memoryTotalBytes ? Number(m.memoryUsedBytes) / Number(m.memoryTotalBytes) * 100 : 0,
-          cpu: Number(m.cpuSecondsTotal ?? 0),
-        })),
+        sparkline: (metricsByNode.get(node.id) ?? []).map((m, i, arr) => {
+          let cpu = 0;
+          if (i > 0) {
+            const prev = arr[i - 1];
+            const totalDelta = m.cpuSecondsTotal - prev.cpuSecondsTotal;
+            const idleDelta = m.cpuSecondsIdle - prev.cpuSecondsIdle;
+            if (totalDelta > 0) {
+              cpu = Math.max(0, Math.min(100, ((totalDelta - idleDelta) / totalDelta) * 100));
+            }
+          }
+          return {
+            t: m.timestamp.getTime(),
+            mem: m.memoryTotalBytes ? Number(m.memoryUsedBytes) / Number(m.memoryTotalBytes) * 100 : 0,
+            cpu,
+          };
+        }),
       };
     });
   }),
@@ -683,6 +695,7 @@ export const dashboardRouter = router({
                 nodeId: true,
                 timestamp: true,
                 cpuSecondsTotal: true,
+                cpuSecondsIdle: true,
                 memoryUsedBytes: true,
                 memoryTotalBytes: true,
                 diskReadBytes: true,
@@ -803,6 +816,7 @@ export const dashboardRouter = router({
         nodeId: string;
         timestamp: Date;
         cpuSecondsTotal: number;
+        cpuSecondsIdle: number;
         memoryUsedBytes: bigint;
         memoryTotalBytes: bigint;
         diskReadBytes: bigint;
@@ -826,8 +840,11 @@ export const dashboardRouter = router({
           const dtSec = (t - new Date(prev.timestamp).getTime()) / 1000;
           if (dtSec <= 0) continue;
 
-          const cpuDelta = curr.cpuSecondsTotal - prev.cpuSecondsTotal;
-          const cpuPct = Math.max(0, Math.min(100, (cpuDelta / dtSec) * 100));
+          const cpuTotalDelta = curr.cpuSecondsTotal - prev.cpuSecondsTotal;
+          const cpuIdleDelta = curr.cpuSecondsIdle - prev.cpuSecondsIdle;
+          const cpuPct = cpuTotalDelta > 0
+            ? Math.max(0, Math.min(100, ((cpuTotalDelta - cpuIdleDelta) / cpuTotalDelta) * 100))
+            : 0;
           addPoint(cpu, label, t, cpuPct);
 
           const memTotal = Number(curr.memoryTotalBytes);
diff --git a/src/server/services/alert-evaluator.ts b/src/server/services/alert-evaluator.ts
index fda646d7..9eabb110 100644
--- a/src/server/services/alert-evaluator.ts
+++ b/src/server/services/alert-evaluator.ts
@@ -43,22 +43,17 @@ async function getCpuUsage(nodeId: string): Promise<number | null> {
     where: { nodeId },
     orderBy: { timestamp: "desc" },
     take: 2,
-    select: { cpuSecondsTotal: true, timestamp: true },
+    select: { cpuSecondsTotal: true, cpuSecondsIdle: true },
   });
 
   if (rows.length < 2) return null;
 
   const [newer, older] = rows;
-  const dtSeconds =
-    (newer.timestamp.getTime() - older.timestamp.getTime()) / 1000;
-  if (dtSeconds <= 0) return null;
+  const totalDelta = newer.cpuSecondsTotal - older.cpuSecondsTotal;
+  if (totalDelta <= 0) return null; // counter reset or no change
 
-  // cpuSecondsTotal is cumulative; the delta / wall-clock-delta gives
-  // fraction of one core used. Multiply by 100 for a percentage.
-  const cpuDelta = newer.cpuSecondsTotal - older.cpuSecondsTotal;
-  if (cpuDelta < 0) return null; // counter reset
-
-  return (cpuDelta / dtSeconds) * 100;
+  const idleDelta = newer.cpuSecondsIdle - older.cpuSecondsIdle;
+  return Math.max(0, Math.min(100, ((totalDelta - idleDelta) / totalDelta) * 100));
 }
 
 /** Compute memory usage percentage from the latest NodeMetric row. */