From 4bb3995f4d84f712260dde8e1f305d0ad761effc Mon Sep 17 00:00:00 2001 From: Massimiliano Marinucci Date: Thu, 26 Feb 2026 00:55:52 +0100 Subject: [PATCH] feat: per-agent model override + live Ollama capability audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three features to agent-team.ts: 1. Per-agent model/thinking override via frontmatter fields: Agents can now declare `model: provider/model-id` and `thinking: level` in their .md definition. Falls back to the dispatcher's model when not set. 2. Live Ollama model capability audit: On team activation, queries Ollama /api/show for each agent using a local model. Checks the `capabilities` array for tool-calling support, warns on sub-30B parameter models, and compares local digests against registry.ollama.com for available updates. Three severity levels: - BLOCK: model lacks "tools" capability — dispatch is blocked - WARN: has tools but < 30B params — unreliable for agentic use - UPDATE: newer version available on ollama.com 3. Dispatch-time gate: Before spawning a sub-agent with a local model, checks the capability cache (or runs a live check if cache is cold). Blocks dispatch with a clear error if the model cannot do tool calling. New command: /agents-check — clears cache and re-audits. Security hardening from adversarial review: - Registry URL sanitized via SAFE_REGISTRY_NAME regex - Inverted to LOCAL_PROVIDERS allowlist (forward-compatible) - Failed Ollama checks not cached (transient failures don't poison) - dispatchAgent is now async for live capability checks Co-Authored-By: Claude Opus 4.6 --- extensions/agent-team.ts | 300 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 292 insertions(+), 8 deletions(-) diff --git a/extensions/agent-team.ts b/extensions/agent-team.ts index 66ecbef..857c012 100644 --- a/extensions/agent-team.ts +++ b/extensions/agent-team.ts @@ -12,6 +12,7 @@ * Commands: * /agents-team — switch active team * /agents-list — list loaded agents + * /agents-check — audit local model assignments (tool calling, updates) * /agents-grid N — set column count (default 2) * * Usage: pi -e extensions/agent-team.ts @@ -31,6 +32,8 @@ interface AgentDef { name: string; description: string; tools: string; + model: string; // "provider/model-id" override (empty = inherit dispatcher) + thinking: string; // thinking level override (empty = "off") systemPrompt: string; file: string; } @@ -48,6 +51,152 @@ interface AgentState { timer?: ReturnType; } +// ── Model Capability Checking ─────────────────── + +// Local/self-hosted providers — these get Ollama capability checks. +// Everything NOT in this set is assumed cloud (tool-calling capable). +const LOCAL_PROVIDERS = new Set([ + "ollama", "m3-ollama", "llama.cpp", "lmstudio", "llamafile", "jan", +]); + +// Below this threshold, tool calling is unreliable for agentic use +const MIN_RELIABLE_PARAMS_B = 30; + +interface ModelCheckResult { + model: string; + reachable: boolean; + capabilities: string[]; + hasTools: boolean; + parameterSize: string; + parameterSizeB: number; + contextLength: number; + updateAvailable: boolean | null; // null = couldn't check +} + +// Cache persists across team switches within a session +const modelCheckCache = new Map(); + +function parseModelString(modelStr: string): { provider: string; modelName: string } { + const idx = modelStr.indexOf("/"); + if (idx === -1) return { provider: "", modelName: modelStr }; + return { provider: modelStr.slice(0, idx), modelName: modelStr.slice(idx + 1) }; +} + +function isLocalProvider(provider: string): boolean { + if (provider === "") return false; // no provider prefix = inherits dispatcher, skip + return LOCAL_PROVIDERS.has(provider.toLowerCase()); +} + +function parseParamSize(sizeStr: string): number { + // Ollama reports e.g. "30.5B", "7.6B", "3.2B" + const match = sizeStr.match(/([\d.]+)\s*([TBMK])/i); + if (!match) return 0; + const num = parseFloat(match[1]); + const unit = match[2].toUpperCase(); + if (unit === "T") return num * 1000; // trillion → billions + if (unit === "B") return num; + if (unit === "M") return num / 1000; + if (unit === "K") return num / 1_000_000; + return 0; +} + +function splitModelTag(modelName: string): [string, string] { + const idx = modelName.lastIndexOf(":"); + const base = idx === -1 ? modelName : modelName.slice(0, idx); + const tag = idx === -1 ? "latest" : modelName.slice(idx + 1); + return [base, tag]; +} + +// Only safe alphanumeric + dot/dash/underscore patterns should hit the registry +const SAFE_REGISTRY_NAME = /^[a-zA-Z0-9._-]+$/; + +async function checkOllamaModel(modelName: string): Promise { + const cached = modelCheckCache.get(modelName); + if (cached) return cached; + + const host = process.env.OLLAMA_HOST || "http://localhost:11434"; + const result: ModelCheckResult = { + model: modelName, + reachable: false, + capabilities: [], + hasTools: false, + parameterSize: "", + parameterSizeB: 0, + contextLength: 0, + updateAvailable: null, + }; + + try { + const showRes = await fetch(`${host}/api/show`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: modelName }), + signal: AbortSignal.timeout(5_000), + }); + + if (!showRes.ok) { + // Host reachable but model not found locally + result.reachable = true; + modelCheckCache.set(modelName, result); + return result; + } + + const info = await showRes.json() as any; + result.reachable = true; + result.capabilities = info.capabilities || []; + result.hasTools = result.capabilities.includes("tools"); + result.parameterSize = info.details?.parameter_size || ""; + result.parameterSizeB = parseParamSize(result.parameterSize); + + // Context length lives under {architecture}.context_length in model_info + const modelInfo = info.model_info || {}; + for (const [key, value] of Object.entries(modelInfo)) { + if (key.includes("context_length") && typeof value === "number") { + result.contextLength = value; + break; + } + } + + // Check registry for updates (compare local vs remote blob digest) + try { + const [baseName, tag] = splitModelTag(modelName); + // Only query registry for simple library models (no slashes, no path traversal) + if (SAFE_REGISTRY_NAME.test(baseName) && SAFE_REGISTRY_NAME.test(tag)) { + const modelfile: string = info.modelfile || ""; + const digestMatch = modelfile.match(/sha256-([a-f0-9]+)/); + if (digestMatch) { + const localDigest = digestMatch[1]; + const regRes = await fetch( + `https://registry.ollama.com/v2/library/${baseName}/manifests/${tag}`, + { + headers: { Accept: "application/vnd.docker.distribution.manifest.v2+json" }, + signal: AbortSignal.timeout(5_000), + }, + ); + if (regRes.ok) { + const manifest = await regRes.json() as any; + const modelLayer = (manifest.layers || []).find( + (l: any) => l.mediaType === "application/vnd.ollama.image.model", + ); + if (modelLayer) { + const remoteDigest = (modelLayer.digest as string).replace("sha256:", ""); + result.updateAvailable = localDigest !== remoteDigest; + } + } + } + } + } catch { + // Registry unreachable — non-critical + } + } catch { + // Ollama host unreachable — do NOT cache failures so retry works next time + return result; + } + + modelCheckCache.set(modelName, result); + return result; +} + // ── Display Name Helper ────────────────────────── function displayName(name: string): string { @@ -96,6 +245,8 @@ function parseAgentFile(filePath: string): AgentDef | null { name: frontmatter.name, description: frontmatter.description || "", tools: frontmatter.tools || "read,grep,find,ls", + model: frontmatter.model || "", + thinking: frontmatter.thinking || "", systemPrompt: match[2].trim(), file: filePath, }; @@ -201,6 +352,77 @@ export default function (pi: ExtensionAPI) { gridCols = size <= 3 ? size : size === 4 ? 2 : 3; } + // ── Model Audit (runs async after team activation) ── + + async function auditTeamModels(ctx: any): Promise { + const warnings: string[] = []; + const checks: Promise[] = []; + + for (const state of agentStates.values()) { + if (!state.def.model) continue; // inherits dispatcher model — skip + + const { provider, modelName } = parseModelString(state.def.model); + if (!isLocalProvider(provider)) continue; // cloud model — skip + + checks.push((async () => { + const result = await checkOllamaModel(modelName); + const label = displayName(state.def.name); + + if (!result.reachable) { + warnings.push( + `${label}: Ollama unreachable — cannot verify "${modelName}"\n` + + ` Check OLLAMA_HOST or network connectivity`, + ); + return; + } + + // Model not installed + if (result.capabilities.length === 0 && !result.hasTools) { + warnings.push( + `${label}: model "${modelName}" not found on Ollama\n` + + ` Run: ollama pull ${modelName}`, + ); + return; + } + + // No tool calling support — agent WILL fail + if (!result.hasTools) { + warnings.push( + `BLOCK ${label}: "${modelName}" does NOT support tool calling\n` + + ` Capabilities: [${result.capabilities.join(", ")}]\n` + + ` Agent will fail to use tools (read, write, bash, etc.)\n` + + ` Fix: use a tool-capable model or remove the model override`, + ); + } else if (result.parameterSizeB > 0 && result.parameterSizeB < MIN_RELIABLE_PARAMS_B) { + // Has tools but too small for reliable use + warnings.push( + `WARN ${label}: "${modelName}" (${result.parameterSize}) — ` + + `tool calling unreliable below ${MIN_RELIABLE_PARAMS_B}B\n` + + ` Recommend: qwen3-coder:latest (30B+) or a cloud model`, + ); + } + + // Update available + if (result.updateAvailable === true) { + warnings.push( + `UPDATE ${label}: "${modelName}" has a newer version\n` + + ` Run: ollama pull ${modelName}`, + ); + } + })()); + } + + await Promise.all(checks); + + if (warnings.length > 0) { + ctx.ui.notify( + `Model Audit — ${warnings.length} finding(s):\n\n` + + warnings.join("\n\n"), + "warning", + ); + } + } + // ── Grid Rendering ─────────────────────────── function renderCard(state: AgentState, colWidth: number, theme: any): string[] { @@ -298,7 +520,7 @@ export default function (pi: ExtensionAPI) { // ── Dispatch Agent (returns Promise) ───────── - function dispatchAgent( + async function dispatchAgent( agentName: string, task: string, ctx: any, @@ -306,19 +528,42 @@ export default function (pi: ExtensionAPI) { const key = agentName.toLowerCase(); const state = agentStates.get(key); if (!state) { - return Promise.resolve({ + return { output: `Agent "${agentName}" not found. Available: ${Array.from(agentStates.values()).map(s => displayName(s.def.name)).join(", ")}`, exitCode: 1, elapsed: 0, - }); + }; } if (state.status === "running") { - return Promise.resolve({ + return { output: `Agent "${displayName(state.def.name)}" is already running. Wait for it to finish.`, exitCode: 1, elapsed: 0, - }); + }; + } + + // Pre-dispatch model check for local models — blocks if model lacks tool calling + const effectiveModel = state.def.model || (ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : ""); + if (effectiveModel) { + const { provider, modelName } = parseModelString(effectiveModel); + if (isLocalProvider(provider)) { + // If cache is cold, run a live check now (blocks dispatch briefly) + let check = modelCheckCache.get(modelName); + if (!check) { + try { check = await checkOllamaModel(modelName); } catch {} + } + if (check && !check.hasTools && check.reachable) { + return { + output: `BLOCKED: "${modelName}" does not support tool calling ` + + `(capabilities: [${check.capabilities.join(", ")}]). ` + + `Agent "${displayName(state.def.name)}" would fail to use tools. ` + + `Fix the model override in ${state.def.file} or run /agents-check.`, + exitCode: 1, + elapsed: 0, + }; + } + } } state.status = "running"; @@ -335,10 +580,14 @@ export default function (pi: ExtensionAPI) { updateWidget(); }, 1000); - const model = ctx.model + const dispatcherModel = ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "openrouter/google/gemini-3-flash-preview"; + // Per-agent model override from frontmatter, fall back to dispatcher's model + const model = state.def.model || dispatcherModel; + const thinking = state.def.thinking || "off"; + // Session file for this agent const agentKey = state.def.name.toLowerCase().replace(/\s+/g, "-"); const agentSessionFile = join(sessionDir, `${agentKey}.json`); @@ -350,7 +599,7 @@ export default function (pi: ExtensionAPI) { "--no-extensions", "--model", model, "--tools", state.def.tools, - "--thinking", "off", + "--thinking", thinking, "--append-system-prompt", state.def.systemPrompt, "--session", agentSessionFile, ]; @@ -586,6 +835,8 @@ export default function (pi: ExtensionAPI) { updateWidget(); ctx.ui.setStatus("agent-team", `Team: ${name} (${agentStates.size})`); ctx.ui.notify(`Team: ${name} — ${Array.from(agentStates.values()).map(s => displayName(s.def.name)).join(", ")}`, "info"); + // Async model audit — runs in background, notifies on findings + auditTeamModels(ctx).catch(() => {}); }, }); @@ -596,7 +847,9 @@ export default function (pi: ExtensionAPI) { const names = Array.from(agentStates.values()) .map(s => { const session = s.sessionFile ? "resumed" : "new"; - return `${displayName(s.def.name)} (${s.status}, ${session}, runs: ${s.runCount}): ${s.def.description}`; + const modelInfo = s.def.model ? ` [${s.def.model}]` : " [dispatcher]"; + const thinkInfo = s.def.thinking ? ` thinking:${s.def.thinking}` : ""; + return `${displayName(s.def.name)} (${s.status}, ${session}, runs: ${s.runCount})${modelInfo}${thinkInfo}: ${s.def.description}`; }) .join("\n"); _ctx.ui.notify(names || "No agents loaded", "info"); @@ -626,6 +879,33 @@ export default function (pi: ExtensionAPI) { }, }); + pi.registerCommand("agents-check", { + description: "Audit local model assignments — checks tool-calling capability, param size, and updates", + handler: async (_args, ctx) => { + widgetCtx = ctx; + // Clear cache to force fresh checks + modelCheckCache.clear(); + + const localCount = Array.from(agentStates.values()) + .filter(s => { + if (!s.def.model) return false; + const { provider } = parseModelString(s.def.model); + return isLocalProvider(provider); + }).length; + + if (localCount === 0) { + ctx.ui.notify( + "No agents use local models — all agents inherit the dispatcher model or use cloud providers.", + "info", + ); + return; + } + + ctx.ui.notify(`Checking ${localCount} local model assignment(s)...`, "info"); + await auditTeamModels(ctx); + }, + }); + // ── System Prompt Override ─────────────────── pi.on("before_agent_start", async (_event, _ctx) => { @@ -705,11 +985,15 @@ ${agentCatalog}`, `Team sets loaded from: .pi/agents/teams.yaml\n\n` + `/agents-team Select a team\n` + `/agents-list List active agents and status\n` + + `/agents-check Audit local model assignments\n` + `/agents-grid <1-6> Set grid column count`, "info", ); updateWidget(); + // Async model audit — runs in background after UI is ready + auditTeamModels(_ctx).catch(() => {}); + // Footer: model | team | context bar _ctx.ui.setFooter((_tui, theme, _footerData) => ({ dispose: () => {},