From 4bb3995f4d84f712260dde8e1f305d0ad761effc Mon Sep 17 00:00:00 2001
From: Massimiliano Marinucci <mmarinucci@numinate.com>
Date: Thu, 26 Feb 2026 00:55:52 +0100
Subject: [PATCH] feat: per-agent model override + live Ollama capability audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add three features to agent-team.ts:

1. Per-agent model/thinking override via frontmatter fields:
   Agents can now declare `model: provider/model-id` and
   `thinking: level` in their .md definition. Falls back to
   the dispatcher's model when not set.

2. Live Ollama model capability audit:
   On team activation, queries Ollama /api/show for each agent
   using a local model. Checks the `capabilities` array for
   tool-calling support, warns on sub-30B parameter models,
   and compares local digests against registry.ollama.com for
   available updates.

   Three severity levels:
   - BLOCK: model lacks "tools" capability — dispatch is blocked
   - WARN: has tools but < 30B params — unreliable for agentic use
   - UPDATE: newer version available on ollama.com

3. Dispatch-time gate:
   Before spawning a sub-agent with a local model, checks the
   capability cache (or runs a live check if cache is cold).
   Blocks dispatch with a clear error if the model cannot do
   tool calling.

New command: /agents-check — clears cache and re-audits.

Security hardening from adversarial review:
- Registry URL sanitized via SAFE_REGISTRY_NAME regex
- Inverted to LOCAL_PROVIDERS allowlist (forward-compatible)
- Failed Ollama checks not cached (transient failures don't poison)
- dispatchAgent is now async for live capability checks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 extensions/agent-team.ts | 300 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 292 insertions(+), 8 deletions(-)
diff --git a/extensions/agent-team.ts b/extensions/agent-team.ts
index 66ecbef..857c012 100644
--- a/extensions/agent-team.ts
+++ b/extensions/agent-team.ts
@@ -12,6 +12,7 @@
  * Commands:
  *   /agents-team          — switch active team
  *   /agents-list          — list loaded agents
+ *   /agents-check         — audit local model assignments (tool calling, updates)
  *   /agents-grid N        — set column count (default 2)
  *
  * Usage: pi -e extensions/agent-team.ts
@@ -31,6 +32,8 @@ interface AgentDef {
 	name: string;
 	description: string;
 	tools: string;
+	model: string;    // "provider/model-id" override (empty = inherit dispatcher)
+	thinking: string; // thinking level override (empty = "off")
 	systemPrompt: string;
 	file: string;
 }
@@ -48,6 +51,152 @@ interface AgentState {
 	timer?: ReturnType<typeof setInterval>;
 }
 
+// ── Model Capability Checking ───────────────────
+
+// Local/self-hosted providers — these get Ollama capability checks.
+// Everything NOT in this set is assumed cloud (tool-calling capable).
+const LOCAL_PROVIDERS = new Set([
+	"ollama", "m3-ollama", "llama.cpp", "lmstudio", "llamafile", "jan",
+]);
+
+// Below this threshold, tool calling is unreliable for agentic use
+const MIN_RELIABLE_PARAMS_B = 30;
+
+interface ModelCheckResult {
+	model: string;
+	reachable: boolean;
+	capabilities: string[];
+	hasTools: boolean;
+	parameterSize: string;
+	parameterSizeB: number;
+	contextLength: number;
+	updateAvailable: boolean | null; // null = couldn't check
+}
+
+// Cache persists across team switches within a session
+const modelCheckCache = new Map<string, ModelCheckResult>();
+
+function parseModelString(modelStr: string): { provider: string; modelName: string } {
+	const idx = modelStr.indexOf("/");
+	if (idx === -1) return { provider: "", modelName: modelStr };
+	return { provider: modelStr.slice(0, idx), modelName: modelStr.slice(idx + 1) };
+}
+
+function isLocalProvider(provider: string): boolean {
+	if (provider === "") return false; // no provider prefix = inherits dispatcher, skip
+	return LOCAL_PROVIDERS.has(provider.toLowerCase());
+}
+
+function parseParamSize(sizeStr: string): number {
+	// Ollama reports e.g. "30.5B", "7.6B", "3.2B"
+	const match = sizeStr.match(/([\d.]+)\s*([TBMK])/i);
+	if (!match) return 0;
+	const num = parseFloat(match[1]);
+	const unit = match[2].toUpperCase();
+	if (unit === "T") return num * 1000; // trillion → billions
+	if (unit === "B") return num;
+	if (unit === "M") return num / 1000;
+	if (unit === "K") return num / 1_000_000;
+	return 0;
+}
+
+function splitModelTag(modelName: string): [string, string] {
+	const idx = modelName.lastIndexOf(":");
+	const base = idx === -1 ? modelName : modelName.slice(0, idx);
+	const tag = idx === -1 ? "latest" : modelName.slice(idx + 1);
+	return [base, tag];
+}
+
+// Only safe alphanumeric + dot/dash/underscore patterns should hit the registry
+const SAFE_REGISTRY_NAME = /^[a-zA-Z0-9._-]+$/;
+
+async function checkOllamaModel(modelName: string): Promise<ModelCheckResult> {
+	const cached = modelCheckCache.get(modelName);
+	if (cached) return cached;
+
+	const host = process.env.OLLAMA_HOST || "http://localhost:11434";
+	const result: ModelCheckResult = {
+		model: modelName,
+		reachable: false,
+		capabilities: [],
+		hasTools: false,
+		parameterSize: "",
+		parameterSizeB: 0,
+		contextLength: 0,
+		updateAvailable: null,
+	};
+
+	try {
+		const showRes = await fetch(`${host}/api/show`, {
+			method: "POST",
+			headers: { "Content-Type": "application/json" },
+			body: JSON.stringify({ model: modelName }),
+			signal: AbortSignal.timeout(5_000),
+		});
+
+		if (!showRes.ok) {
+			// Host reachable but model not found locally
+			result.reachable = true;
+			modelCheckCache.set(modelName, result);
+			return result;
+		}
+
+		const info = await showRes.json() as any;
+		result.reachable = true;
+		result.capabilities = info.capabilities || [];
+		result.hasTools = result.capabilities.includes("tools");
+		result.parameterSize = info.details?.parameter_size || "";
+		result.parameterSizeB = parseParamSize(result.parameterSize);
+
+		// Context length lives under {architecture}.context_length in model_info
+		const modelInfo = info.model_info || {};
+		for (const [key, value] of Object.entries(modelInfo)) {
+			if (key.includes("context_length") && typeof value === "number") {
+				result.contextLength = value;
+				break;
+			}
+		}
+
+		// Check registry for updates (compare local vs remote blob digest)
+		try {
+			const [baseName, tag] = splitModelTag(modelName);
+			// Only query registry for simple library models (no slashes, no path traversal)
+			if (SAFE_REGISTRY_NAME.test(baseName) && SAFE_REGISTRY_NAME.test(tag)) {
+				const modelfile: string = info.modelfile || "";
+				const digestMatch = modelfile.match(/sha256-([a-f0-9]+)/);
+				if (digestMatch) {
+					const localDigest = digestMatch[1];
+					const regRes = await fetch(
+						`https://registry.ollama.com/v2/library/${baseName}/manifests/${tag}`,
+						{
+							headers: { Accept: "application/vnd.docker.distribution.manifest.v2+json" },
+							signal: AbortSignal.timeout(5_000),
+						},
+					);
+					if (regRes.ok) {
+						const manifest = await regRes.json() as any;
+						const modelLayer = (manifest.layers || []).find(
+							(l: any) => l.mediaType === "application/vnd.ollama.image.model",
+						);
+						if (modelLayer) {
+							const remoteDigest = (modelLayer.digest as string).replace("sha256:", "");
+							result.updateAvailable = localDigest !== remoteDigest;
+						}
+					}
+				}
+			}
+		} catch {
+			// Registry unreachable — non-critical
+		}
+	} catch {
+		// Ollama host unreachable — do NOT cache failures so retry works next time
+		return result;
+	}
+
+	modelCheckCache.set(modelName, result);
+	return result;
+}
+
 // ── Display Name Helper ──────────────────────────
 
 function displayName(name: string): string {
@@ -96,6 +245,8 @@ function parseAgentFile(filePath: string): AgentDef | null {
 			name: frontmatter.name,
 			description: frontmatter.description || "",
 			tools: frontmatter.tools || "read,grep,find,ls",
+			model: frontmatter.model || "",
+			thinking: frontmatter.thinking || "",
 			systemPrompt: match[2].trim(),
 			file: filePath,
 		};
@@ -201,6 +352,77 @@ export default function (pi: ExtensionAPI) {
 		gridCols = size <= 3 ? size : size === 4 ? 2 : 3;
 	}
 
+	// ── Model Audit (runs async after team activation) ──
+
+	async function auditTeamModels(ctx: any): Promise<void> {
+		const warnings: string[] = [];
+		const checks: Promise<void>[] = [];
+
+		for (const state of agentStates.values()) {
+			if (!state.def.model) continue; // inherits dispatcher model — skip
+
+			const { provider, modelName } = parseModelString(state.def.model);
+			if (!isLocalProvider(provider)) continue; // cloud model — skip
+
+			checks.push((async () => {
+				const result = await checkOllamaModel(modelName);
+				const label = displayName(state.def.name);
+
+				if (!result.reachable) {
+					warnings.push(
+						`${label}: Ollama unreachable — cannot verify "${modelName}"\n` +
+						`  Check OLLAMA_HOST or network connectivity`,
+					);
+					return;
+				}
+
+				// Model not installed
+				if (result.capabilities.length === 0 && !result.hasTools) {
+					warnings.push(
+						`${label}: model "${modelName}" not found on Ollama\n` +
+						`  Run: ollama pull ${modelName}`,
+					);
+					return;
+				}
+
+				// No tool calling support — agent WILL fail
+				if (!result.hasTools) {
+					warnings.push(
+						`BLOCK  ${label}: "${modelName}" does NOT support tool calling\n` +
+						`  Capabilities: [${result.capabilities.join(", ")}]\n` +
+						`  Agent will fail to use tools (read, write, bash, etc.)\n` +
+						`  Fix: use a tool-capable model or remove the model override`,
+					);
+				} else if (result.parameterSizeB > 0 && result.parameterSizeB < MIN_RELIABLE_PARAMS_B) {
+					// Has tools but too small for reliable use
+					warnings.push(
+						`WARN   ${label}: "${modelName}" (${result.parameterSize}) — ` +
+						`tool calling unreliable below ${MIN_RELIABLE_PARAMS_B}B\n` +
+						`  Recommend: qwen3-coder:latest (30B+) or a cloud model`,
+					);
+				}
+
+				// Update available
+				if (result.updateAvailable === true) {
+					warnings.push(
+						`UPDATE ${label}: "${modelName}" has a newer version\n` +
+						`  Run: ollama pull ${modelName}`,
+					);
+				}
+			})());
+		}
+
+		await Promise.all(checks);
+
+		if (warnings.length > 0) {
+			ctx.ui.notify(
+				`Model Audit — ${warnings.length} finding(s):\n\n` +
+				warnings.join("\n\n"),
+				"warning",
+			);
+		}
+	}
+
 	// ── Grid Rendering ───────────────────────────
 
 	function renderCard(state: AgentState, colWidth: number, theme: any): string[] {
@@ -298,7 +520,7 @@ export default function (pi: ExtensionAPI) {
 
 	// ── Dispatch Agent (returns Promise) ─────────
 
-	function dispatchAgent(
+	async function dispatchAgent(
 		agentName: string,
 		task: string,
 		ctx: any,
@@ -306,19 +528,42 @@ export default function (pi: ExtensionAPI) {
 		const key = agentName.toLowerCase();
 		const state = agentStates.get(key);
 		if (!state) {
-			return Promise.resolve({
+			return {
 				output: `Agent "${agentName}" not found. Available: ${Array.from(agentStates.values()).map(s => displayName(s.def.name)).join(", ")}`,
 				exitCode: 1,
 				elapsed: 0,
-			});
+			};
 		}
 
 		if (state.status === "running") {
-			return Promise.resolve({
+			return {
 				output: `Agent "${displayName(state.def.name)}" is already running. Wait for it to finish.`,
 				exitCode: 1,
 				elapsed: 0,
-			});
+			};
+		}
+
+		// Pre-dispatch model check for local models — blocks if model lacks tool calling
+		const effectiveModel = state.def.model || (ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "");
+		if (effectiveModel) {
+			const { provider, modelName } = parseModelString(effectiveModel);
+			if (isLocalProvider(provider)) {
+				// If cache is cold, run a live check now (blocks dispatch briefly)
+				let check = modelCheckCache.get(modelName);
+				if (!check) {
+					try { check = await checkOllamaModel(modelName); } catch {}
+				}
+				if (check && !check.hasTools && check.reachable) {
+					return {
+						output: `BLOCKED: "${modelName}" does not support tool calling ` +
+							`(capabilities: [${check.capabilities.join(", ")}]). ` +
+							`Agent "${displayName(state.def.name)}" would fail to use tools. ` +
+							`Fix the model override in ${state.def.file} or run /agents-check.`,
+						exitCode: 1,
+						elapsed: 0,
+					};
+				}
+			}
 		}
 
 		state.status = "running";
@@ -335,10 +580,14 @@ export default function (pi: ExtensionAPI) {
 			updateWidget();
 		}, 1000);
 
-		const model = ctx.model
+		const dispatcherModel = ctx.model
 			? `${ctx.model.provider}/${ctx.model.id}`
 			: "openrouter/google/gemini-3-flash-preview";
 
+		// Per-agent model override from frontmatter, fall back to dispatcher's model
+		const model = state.def.model || dispatcherModel;
+		const thinking = state.def.thinking || "off";
+
 		// Session file for this agent
 		const agentKey = state.def.name.toLowerCase().replace(/\s+/g, "-");
 		const agentSessionFile = join(sessionDir, `${agentKey}.json`);
@@ -350,7 +599,7 @@ export default function (pi: ExtensionAPI) {
 			"--no-extensions",
 			"--model", model,
 			"--tools", state.def.tools,
-			"--thinking", "off",
+			"--thinking", thinking,
 			"--append-system-prompt", state.def.systemPrompt,
 			"--session", agentSessionFile,
 		];
@@ -586,6 +835,8 @@ export default function (pi: ExtensionAPI) {
 			updateWidget();
 			ctx.ui.setStatus("agent-team", `Team: ${name} (${agentStates.size})`);
 			ctx.ui.notify(`Team: ${name} — ${Array.from(agentStates.values()).map(s => displayName(s.def.name)).join(", ")}`, "info");
+			// Async model audit — runs in background, notifies on findings
+			auditTeamModels(ctx).catch(() => {});
 		},
 	});
 
@@ -596,7 +847,9 @@ export default function (pi: ExtensionAPI) {
 			const names = Array.from(agentStates.values())
 				.map(s => {
 					const session = s.sessionFile ? "resumed" : "new";
-					return `${displayName(s.def.name)} (${s.status}, ${session}, runs: ${s.runCount}): ${s.def.description}`;
+					const modelInfo = s.def.model ? ` [${s.def.model}]` : " [dispatcher]";
+					const thinkInfo = s.def.thinking ? ` thinking:${s.def.thinking}` : "";
+					return `${displayName(s.def.name)} (${s.status}, ${session}, runs: ${s.runCount})${modelInfo}${thinkInfo}: ${s.def.description}`;
 				})
 				.join("\n");
 			_ctx.ui.notify(names || "No agents loaded", "info");
@@ -626,6 +879,33 @@ export default function (pi: ExtensionAPI) {
 		},
 	});
 
+	pi.registerCommand("agents-check", {
+		description: "Audit local model assignments — checks tool-calling capability, param size, and updates",
+		handler: async (_args, ctx) => {
+			widgetCtx = ctx;
+			// Clear cache to force fresh checks
+			modelCheckCache.clear();
+
+			const localCount = Array.from(agentStates.values())
+				.filter(s => {
+					if (!s.def.model) return false;
+					const { provider } = parseModelString(s.def.model);
+					return isLocalProvider(provider);
+				}).length;
+
+			if (localCount === 0) {
+				ctx.ui.notify(
+					"No agents use local models — all agents inherit the dispatcher model or use cloud providers.",
+					"info",
+				);
+				return;
+			}
+
+			ctx.ui.notify(`Checking ${localCount} local model assignment(s)...`, "info");
+			await auditTeamModels(ctx);
+		},
+	});
+
 	// ── System Prompt Override ───────────────────
 
 	pi.on("before_agent_start", async (_event, _ctx) => {
@@ -705,11 +985,15 @@ ${agentCatalog}`,
 			`Team sets loaded from: .pi/agents/teams.yaml\n\n` +
 			`/agents-team          Select a team\n` +
 			`/agents-list          List active agents and status\n` +
+			`/agents-check         Audit local model assignments\n` +
 			`/agents-grid <1-6>    Set grid column count`,
 			"info",
 		);
 		updateWidget();
 
+		// Async model audit — runs in background after UI is ready
+		auditTeamModels(_ctx).catch(() => {});
+
 		// Footer: model | team | context bar
 		_ctx.ui.setFooter((_tui, theme, _footerData) => ({
 			dispose: () => {},