Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- AlterTable
ALTER TABLE "VectorNode" ADD COLUMN "maintenanceMode" BOOLEAN NOT NULL DEFAULT false;
ALTER TABLE "VectorNode" ADD COLUMN "maintenanceModeAt" TIMESTAMP(3);
2 changes: 2 additions & 0 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ model VectorNode {
os String?
deploymentMode DeploymentMode @default(UNKNOWN)
pendingAction Json?
maintenanceMode Boolean @default(false)
maintenanceModeAt DateTime?
pipelineStatuses NodePipelineStatus[]
nodeMetrics NodeMetric[]
pipelineLogs PipelineLog[]
Expand Down
63 changes: 62 additions & 1 deletion src/app/(dashboard)/fleet/[nodeId]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { useParams, useRouter } from "next/navigation";
import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
import { useTRPC } from "@/trpc/client";
import { ArrowLeft, ShieldOff, Trash2, Activity, Terminal, Server, Pencil, Check, X } from "lucide-react";
import { ArrowLeft, ShieldOff, Trash2, Activity, Terminal, Server, Pencil, Check, X, Wrench } from "lucide-react";
import { NodeLogs } from "@/components/fleet/node-logs";
import { toast } from "sonner";
import { useState } from "react";
Expand Down Expand Up @@ -131,6 +131,32 @@ export default function NodeDetailPage() {
})
);

const maintenanceMutation = useMutation(
trpc.fleet.setMaintenanceMode.mutationOptions({
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: trpc.fleet.get.queryKey({ id: params.nodeId }) });
queryClient.invalidateQueries({ queryKey: trpc.fleet.list.queryKey() });
queryClient.invalidateQueries({ queryKey: trpc.fleet.listWithPipelineStatus.queryKey() });
},
}),
);

function handleMaintenanceToggle() {
if (!node) return;
if (!node.maintenanceMode) {
const runningCount = node.pipelineStatuses.filter(
(s) => s.status === "RUNNING"
).length;
if (!confirm(
`Enter maintenance mode for "${node.name}"?\n\nThis will stop ${runningCount} running pipeline(s) on this node. Pipelines will automatically resume when maintenance mode is turned off.`
)) return;
}
maintenanceMutation.mutate({
nodeId: node.id,
enabled: !node.maintenanceMode,
});
}

function handleRevoke() {
if (!node) return;
if (!confirm(`Revoke token for "${node.name}"? The agent will no longer be able to connect.`)) {
Expand Down Expand Up @@ -217,6 +243,19 @@ export default function NodeDetailPage() {
</div>
</div>
<div className="flex items-center gap-2">
<Button
variant={node.maintenanceMode ? "default" : "outline"}
size="sm"
onClick={handleMaintenanceToggle}
disabled={maintenanceMutation.isPending}
>
<Wrench className="mr-2 h-4 w-4" />
{maintenanceMutation.isPending
? "Updating..."
: node.maintenanceMode
? "Exit Maintenance"
: "Enter Maintenance"}
</Button>
{node.nodeTokenHash && (
<Button
variant="outline"
Expand All @@ -240,6 +279,28 @@ export default function NodeDetailPage() {
</div>
</div>

{node.maintenanceMode && (
<div className="flex items-center gap-3 rounded-lg border border-orange-500/50 bg-orange-50 px-4 py-3 dark:bg-orange-950/20">
<Wrench className="h-5 w-5 text-orange-600" />
<div className="flex-1">
<p className="text-sm font-medium text-orange-800 dark:text-orange-200">
This node is in maintenance mode
</p>
<p className="text-xs text-orange-600 dark:text-orange-400">
All pipelines are stopped. They will automatically resume when maintenance mode is turned off.
</p>
</div>
<Button
variant="outline"
size="sm"
onClick={handleMaintenanceToggle}
disabled={maintenanceMutation.isPending}
>
Exit Maintenance
</Button>
</div>
)}

<div>
{/* Node Details */}
<Card>
Expand Down
107 changes: 73 additions & 34 deletions src/app/(dashboard)/fleet/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
TooltipTrigger,
} from "@/components/ui/tooltip";
import { Skeleton } from "@/components/ui/skeleton";
import { Wrench } from "lucide-react";
import { DeploymentMatrix } from "@/components/fleet/deployment-matrix";
import { formatLastSeen } from "@/lib/format";
import { nodeStatusVariant, nodeStatusLabel } from "@/lib/status";
Expand Down Expand Up @@ -87,6 +88,15 @@ export default function FleetPage() {
}),
);

const setMaintenance = useMutation(
trpc.fleet.setMaintenanceMode.mutationOptions({
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: trpc.fleet.list.queryKey() });
queryClient.invalidateQueries({ queryKey: trpc.fleet.listWithPipelineStatus.queryKey() });
},
}),
);

return (
<div className="space-y-6">
{isLoading ? (
Expand Down Expand Up @@ -161,54 +171,83 @@ export default function FleetPage() {
</div>
</TableCell>
<TableCell>
<StatusBadge variant={nodeStatusVariant(node.status)}>
{nodeStatusLabel(node.status)}
</StatusBadge>
{node.maintenanceMode ? (
<Badge variant="outline" className="text-orange-600 border-orange-500/50">
<Wrench className="mr-1 h-3 w-3" />
Maintenance
</Badge>
) : (
<StatusBadge variant={nodeStatusVariant(node.status)}>
{nodeStatusLabel(node.status)}
</StatusBadge>
)}
</TableCell>
<TableCell className="text-muted-foreground">
{formatLastSeen(node.lastSeen)}
</TableCell>
<TableCell>
{node.pendingAction ? (
<Badge variant="outline" className="text-blue-600">
Update pending...
</Badge>
) : node.deploymentMode === "DOCKER" ? (
getNodeLatest(node).version &&
node.agentVersion &&
isVersionOlder(node.agentVersion, getNodeLatest(node).version ?? "") ? (
<Tooltip>
<TooltipTrigger asChild>
<span>
<Button variant="outline" size="sm" disabled>
Update
</Button>
</span>
</TooltipTrigger>
<TooltipContent>Update via Docker image pull</TooltipContent>
</Tooltip>
) : null
) : getNodeLatest(node).version &&
node.agentVersion &&
isVersionOlder(node.agentVersion, getNodeLatest(node).version ?? "") ? (
<div className="flex items-center gap-2">
<Button
variant="outline"
variant={node.maintenanceMode ? "default" : "outline"}
size="sm"
disabled={triggerUpdate.isPending}
disabled={setMaintenance.isPending && setMaintenance.variables?.nodeId === node.id}
onClick={(e) => {
e.preventDefault();
const latest = getNodeLatest(node);
triggerUpdate.mutate({
if (!node.maintenanceMode) {
if (!confirm(
`Enter maintenance mode for "${node.name}"?\n\nThis will stop all running pipelines on this node. Pipelines will automatically resume when maintenance mode is turned off.`
)) return;
}
setMaintenance.mutate({
nodeId: node.id,
targetVersion: latest.version!,
downloadUrl: `https://github.com/${AGENT_REPO}/releases/download/${latest.tag}/vf-agent-linux-amd64`,
checksum: `sha256:${latest.checksums["vf-agent-linux-amd64"] ?? ""}`,
enabled: !node.maintenanceMode,
});
}}
>
{triggerUpdate.isPending ? "Updating..." : "Update"}
<Wrench className="mr-1 h-3.5 w-3.5" />
{node.maintenanceMode ? "Exit Maintenance" : "Maintenance"}
</Button>
) : null}
{node.pendingAction ? (
<Badge variant="outline" className="text-blue-600">
Update pending...
</Badge>
) : node.deploymentMode === "DOCKER" ? (
getNodeLatest(node).version &&
node.agentVersion &&
isVersionOlder(node.agentVersion, getNodeLatest(node).version ?? "") ? (
<Tooltip>
<TooltipTrigger asChild>
<span>
<Button variant="outline" size="sm" disabled>
Update
</Button>
</span>
</TooltipTrigger>
<TooltipContent>Update via Docker image pull</TooltipContent>
</Tooltip>
) : null
) : getNodeLatest(node).version &&
node.agentVersion &&
isVersionOlder(node.agentVersion, getNodeLatest(node).version ?? "") ? (
<Button
variant="outline"
size="sm"
disabled={triggerUpdate.isPending}
onClick={(e) => {
e.preventDefault();
const latest = getNodeLatest(node);
triggerUpdate.mutate({
nodeId: node.id,
targetVersion: latest.version!,
downloadUrl: `https://github.com/${AGENT_REPO}/releases/download/${latest.tag}/vf-agent-linux-amd64`,
checksum: `sha256:${latest.checksums["vf-agent-linux-amd64"] ?? ""}`,
});
}}
>
{triggerUpdate.isPending ? "Updating..." : "Update"}
</Button>
) : null}
</div>
</TableCell>
</TableRow>
))}
Expand Down
19 changes: 18 additions & 1 deletion src/app/api/agent/config/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,26 @@ export async function GET(request: Request) {
// Fetch the node to check for pending actions (e.g., self-update)
const node = await prisma.vectorNode.findUnique({
where: { id: agent.nodeId },
select: { pendingAction: true },
select: { pendingAction: true, maintenanceMode: true },
});

if (node?.maintenanceMode) {
const environment = await prisma.environment.findUnique({
where: { id: agent.environmentId },
select: { secretBackend: true },
});
const settings = await prisma.systemSettings.findUnique({
where: { id: "singleton" },
select: { fleetPollIntervalMs: true },
});
return NextResponse.json({
pipelines: [],
pollIntervalMs: settings?.fleetPollIntervalMs ?? 15_000,
secretBackend: environment?.secretBackend ?? "BUILTIN",
pendingAction: node.pendingAction ?? undefined,
});
}
Comment on lines +22 to +37
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

secretBackendConfig omitted for non-BUILTIN backends in maintenance response

When the node is in maintenance mode, the early return fetches the environment with only { secretBackend: true } and never includes secretBackendConfig in the response. The normal path includes it for all non-BUILTIN backends:

// normal path
...(environment.secretBackend !== "BUILTIN"
  ? { secretBackendConfig: environment.secretBackendConfig }
  : {}),

If an environment uses Vault, AWS SM, or another external backend and the agent relies on receiving secretBackendConfig to maintain or re-initialize its connection to that backend on each config poll, it would lose that initialization data for the duration of maintenance mode. When maintenance ends, the first poll would restore the full config, but if the agent's secret-backend client has any transient state derived from that field it could fail to reconnect cleanly.

The fix is to select secretBackendConfig alongside secretBackend in the maintenance-mode environment query, and then conditionally include it in the early-return payload to match the normal path:

const environment = await prisma.environment.findUnique({
  where: { id: agent.environmentId },
  select: { secretBackend: true, secretBackendConfig: true },
});
// ...
return NextResponse.json({
  pipelines: [],
  pollIntervalMs: settings?.fleetPollIntervalMs ?? 15_000,
  secretBackend: environment?.secretBackend ?? "BUILTIN",
  ...(environment?.secretBackend !== "BUILTIN" && environment?.secretBackendConfig
    ? { secretBackendConfig: environment.secretBackendConfig }
    : {}),
  pendingAction: node.pendingAction ?? undefined,
});
Prompt To Fix With AI
This is a comment left during a code review.
Path: src/app/api/agent/config/route.ts
Line: 22-37

Comment:
**`secretBackendConfig` omitted for non-BUILTIN backends in maintenance response**

When the node is in maintenance mode, the early return fetches the environment with only `{ secretBackend: true }` and never includes `secretBackendConfig` in the response. The normal path includes it for all non-BUILTIN backends:

```ts
// normal path
...(environment.secretBackend !== "BUILTIN"
  ? { secretBackendConfig: environment.secretBackendConfig }
  : {}),
```

If an environment uses Vault, AWS SM, or another external backend and the agent relies on receiving `secretBackendConfig` to maintain or re-initialize its connection to that backend on each config poll, it would lose that initialization data for the duration of maintenance mode. When maintenance ends, the first poll would restore the full config, but if the agent's secret-backend client has any transient state derived from that field it could fail to reconnect cleanly.

The fix is to select `secretBackendConfig` alongside `secretBackend` in the maintenance-mode environment query, and then conditionally include it in the early-return payload to match the normal path:

```ts
const environment = await prisma.environment.findUnique({
  where: { id: agent.environmentId },
  select: { secretBackend: true, secretBackendConfig: true },
});
// ...
return NextResponse.json({
  pipelines: [],
  pollIntervalMs: settings?.fleetPollIntervalMs ?? 15_000,
  secretBackend: environment?.secretBackend ?? "BUILTIN",
  ...(environment?.secretBackend !== "BUILTIN" && environment?.secretBackendConfig
    ? { secretBackendConfig: environment.secretBackendConfig }
    : {}),
  pendingAction: node.pendingAction ?? undefined,
});
```

How can I resolve this? If you propose a fix, please make it concise.


const environment = await prisma.environment.findUnique({
where: { id: agent.environmentId },
select: {
Expand Down
16 changes: 12 additions & 4 deletions src/components/fleet/deployment-matrix.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { useQuery } from "@tanstack/react-query";
import { useTRPC } from "@/trpc/client";
import { Badge } from "@/components/ui/badge";
import { Minus } from "lucide-react";
import { Minus, Wrench } from "lucide-react";
import Link from "next/link";
import { StatusDot } from "@/components/ui/status-dot";
import { pipelineStatusVariant, pipelineStatusLabel } from "@/lib/status";
Expand Down Expand Up @@ -51,10 +51,18 @@ export function DeploymentMatrix({ environmentId }: DeploymentMatrixProps) {
{nodes.map((node) => (
<th
key={node.id}
className="px-3 py-2 text-center font-medium text-muted-foreground"
className={`px-3 py-2 text-center font-medium text-muted-foreground ${
node.maintenanceMode ? "bg-orange-50/50 dark:bg-orange-950/10" : ""
}`}
>
<div>{node.name}</div>
<div className="text-xs font-normal">{node.host}</div>
{node.maintenanceMode && (
<div className="mt-1 flex items-center justify-center gap-1 text-xs text-orange-600 dark:text-orange-400">
<Wrench className="h-3 w-3" />
Maintenance
</div>
)}
</th>
))}
</tr>
Expand All @@ -79,7 +87,7 @@ export function DeploymentMatrix({ environmentId }: DeploymentMatrixProps) {

if (!ps) {
return (
<td key={node.id} className="px-3 py-2 text-center">
<td key={node.id} className={`px-3 py-2 text-center ${node.maintenanceMode ? "opacity-30" : ""}`}>
<div className="flex items-center justify-center">
<Minus className="h-4 w-4 text-muted-foreground/50" />
</div>
Expand All @@ -90,7 +98,7 @@ export function DeploymentMatrix({ environmentId }: DeploymentMatrixProps) {
const isOutdated = ps.version < pipeline.latestVersion;

return (
<td key={node.id} className="px-3 py-2 text-center">
<td key={node.id} className={`px-3 py-2 text-center ${node.maintenanceMode ? "opacity-30" : ""}`}>
<div className="flex flex-col items-center gap-0.5">
{isOutdated ? (
<div
Expand Down
25 changes: 25 additions & 0 deletions src/server/routers/fleet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,31 @@ export const fleetRouter = router({
});
}),

setMaintenanceMode: protectedProcedure
.input(
z.object({
nodeId: z.string(),
enabled: z.boolean(),
}),
)
.use(withTeamAccess("ADMIN"))
.use(withAudit("node.maintenance_toggled", "VectorNode"))
.mutation(async ({ input }) => {
const node = await prisma.vectorNode.findUnique({
where: { id: input.nodeId },
});
if (!node) {
throw new TRPCError({ code: "NOT_FOUND", message: "Node not found" });
}
return prisma.vectorNode.update({
where: { id: input.nodeId },
data: {
maintenanceMode: input.enabled,
maintenanceModeAt: input.enabled ? new Date() : null,
},
});
}),

listWithPipelineStatus: protectedProcedure
.input(z.object({ environmentId: z.string() }))
.use(withTeamAccess("VIEWER"))
Expand Down
Loading