feat(ltm): tighten entry budget, add consolidation pass

BYK · BYK · commit 74728df154a4 · 2026-02-27T22:14:37.000Z
- Fix token undercount: estimateTokens uses chars/3 (not chars/4) in all
  LTM budget paths — real tokenization of markdown-heavy technical text is
  ~3 chars/token, reducing overflow risk from systematic undercount
- Halve entry size limits: MAX_ENTRY_CONTENT_LENGTH 2000→1200 chars,
  pruneOversized threshold 2000→1200, curator prompt tightened to 150 words
- Strengthen curator dedup: add PREFER UPDATES OVER CREATES section,
  instruct LLM to replace (not append) on update, merge overlapping entries
- Add consolidation pass: new curator.consolidate() triggered from
  session.idle when entry count exceeds curator.maxEntries (default: 25);
  uses dedicated CONSOLIDATION_SYSTEM prompt with update/delete ops only
diff --git a/src/config.ts b/src/config.ts
@@ -28,6 +28,8 @@ export const LoreConfig = z.object({
       enabled: z.boolean().default(true),
       onIdle: z.boolean().default(true),
       afterTurns: z.number().min(1).default(10),
+      /** Max knowledge entries per project before consolidation triggers. Default: 25. */
+      maxEntries: z.number().min(10).default(25),
     })
     .default({}),
   pruning: z
diff --git a/src/curator.ts b/src/curator.ts
@@ -2,16 +2,16 @@ import type { createOpencodeClient } from "@opencode-ai/sdk";
 import { config } from "./config";
 import * as temporal from "./temporal";
 import * as ltm from "./ltm";
-import { CURATOR_SYSTEM, curatorUser } from "./prompt";
+import { CURATOR_SYSTEM, curatorUser, CONSOLIDATION_SYSTEM, consolidationUser } from "./prompt";
 import { workerSessionIDs } from "./distillation";
 
 /**
  * Maximum length (chars) for a single knowledge entry's content.
- * ~500 tokens. Entries exceeding this are truncated with a notice.
+ * ~400 tokens at chars/3. Entries exceeding this are truncated with a notice.
  * The curator prompt also instructs the model to stay within this limit,
  * so truncation is a last-resort safety net.
  */
-const MAX_ENTRY_CONTENT_LENGTH = 2000;
+const MAX_ENTRY_CONTENT_LENGTH = 1200;
 
 type Client = ReturnType<typeof createOpencodeClient>;
 
@@ -172,3 +172,88 @@ export async function run(input: {
 export function resetCurationTracker() {
   lastCuratedAt = 0;
 }
+
+/**
+ * Consolidation pass: reviews ALL project entries and merges/trims/deletes
+ * to reduce entry count to cfg.curator.maxEntries. Only runs when the current
+ * entry count exceeds the target. Uses the same worker session as curation.
+ *
+ * Only "update" and "delete" ops are applied — consolidation never creates entries.
+ */
+export async function consolidate(input: {
+  client: Client;
+  projectPath: string;
+  sessionID: string;
+  model?: { providerID: string; modelID: string };
+}): Promise<{ updated: number; deleted: number }> {
+  const cfg = config();
+  if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
+
+  const entries = ltm.forProject(input.projectPath, cfg.crossProject);
+  if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
+
+  const entriesForPrompt = entries.map((e) => ({
+    id: e.id,
+    category: e.category,
+    title: e.title,
+    content: e.content,
+  }));
+
+  const userContent = consolidationUser({
+    entries: entriesForPrompt,
+    targetMax: cfg.curator.maxEntries,
+  });
+  const workerID = await ensureWorkerSession(input.client, input.sessionID);
+  const model = input.model ?? cfg.model;
+  const parts = [
+    { type: "text" as const, text: `${CONSOLIDATION_SYSTEM}\n\n${userContent}` },
+  ];
+
+  await input.client.session.prompt({
+    path: { id: workerID },
+    body: {
+      parts,
+      agent: "lore-curator",
+      ...(model ? { model } : {}),
+    },
+  });
+
+  const msgs = await input.client.session.messages({
+    path: { id: workerID },
+    query: { limit: 2 },
+  });
+  const last = msgs.data?.at(-1);
+  if (!last || last.info.role !== "assistant") return { updated: 0, deleted: 0 };
+
+  const responsePart = last.parts.find((p) => p.type === "text");
+  if (!responsePart || responsePart.type !== "text") return { updated: 0, deleted: 0 };
+
+  const ops = parseOps(responsePart.text);
+  let updated = 0;
+  let deleted = 0;
+
+  for (const op of ops) {
+    // Consolidation only applies update and delete — never create.
+    if (op.op === "update") {
+      const entry = ltm.get(op.id);
+      if (entry) {
+        const content =
+          op.content !== undefined && op.content.length > MAX_ENTRY_CONTENT_LENGTH
+            ? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
+              " [truncated — entry too long]"
+            : op.content;
+        ltm.update(op.id, { content, confidence: op.confidence });
+        updated++;
+      }
+    } else if (op.op === "delete") {
+      const entry = ltm.get(op.id);
+      if (entry) {
+        ltm.remove(op.id);
+        deleted++;
+      }
+    }
+    // "create" ops are silently ignored — consolidation must not add entries.
+  }
+
+  return { updated, deleted };
+}
diff --git a/src/index.ts b/src/index.ts
@@ -56,7 +56,7 @@ export const LorePlugin: Plugin = async (ctx) => {
   // Prune any corrupted/oversized knowledge entries left by the AGENTS.md
   // backslash-escaping bug or curator hallucinations. Sets confidence → 0
   // (below the 0.2 query threshold) so they stop polluting the context.
-  const pruned = ltm.pruneOversized(2000);
+  const pruned = ltm.pruneOversized(1200);
   if (pruned > 0) {
     console.error(`[lore] pruned ${pruned} oversized knowledge entries (confidence set to 0)`);
   }
@@ -301,6 +301,29 @@ export const LorePlugin: Plugin = async (ctx) => {
           turnsSinceCuration = 0;
         }
 
+        // Consolidate entries if count exceeds cfg.curator.maxEntries.
+        // Runs after normal curation so newly created entries are counted.
+        // Only triggers when truly over the limit to avoid redundant LLM calls.
+        try {
+          const allEntries = ltm.forProject(projectPath);
+          if (allEntries.length > cfg.curator.maxEntries) {
+            console.error(
+              `[lore] entry count ${allEntries.length} exceeds maxEntries ${cfg.curator.maxEntries} — running consolidation`,
+            );
+            const { updated, deleted } = await curator.consolidate({
+              client: ctx.client,
+              projectPath,
+              sessionID,
+              model: cfg.model,
+            });
+            if (updated > 0 || deleted > 0) {
+              console.error(`[lore] consolidation: ${updated} updated, ${deleted} deleted`);
+            }
+          }
+        } catch (e) {
+          console.error("[lore] consolidation error:", e);
+        }
+
         // Prune temporal messages after distillation and curation have run.
         // Pass 1: TTL — remove distilled messages older than retention period.
         // Pass 2: Size cap — evict oldest distilled messages if over the limit.
@@ -371,7 +394,8 @@ export const LorePlugin: Plugin = async (ctx) => {
       if (formatted) {
         // Track how many tokens we actually consumed so the gradient manager
         // can deduct them from the usable budget for message injection.
-        const ltmTokenCount = Math.ceil(formatted.length / 4);
+        // Use /3 (not /4) — consistent with ltm.ts and prompt.ts estimators.
+        const ltmTokenCount = Math.ceil(formatted.length / 3);
         setLtmTokens(ltmTokenCount);
         output.system.push(formatted);
       } else {
diff --git a/src/ltm.ts b/src/ltm.ts
@@ -2,9 +2,10 @@ import { uuidv7 } from "uuidv7";
 import { db, ensureProject } from "./db";
 import { ftsQuery } from "./temporal";
 
-// Rough token estimate: ~4 chars per token
+// Rough token estimate: ~3 chars per token (conservative for markdown-heavy technical text;
+// real tokenization of code terms and special chars runs ~3.0-3.5 chars/token, not 4).
 function estimateTokens(text: string): number {
-  return Math.ceil(text.length / 4);
+  return Math.ceil(text.length / 3);
 }
 
 export type KnowledgeEntry = {
diff --git a/src/prompt.ts b/src/prompt.ts
@@ -195,12 +195,24 @@ Do NOT extract:
 - Restatements of what the code obviously does (e.g. "the auth module handles authentication")
 
 BREVITY IS CRITICAL — each entry must be concise:
-- content MUST be under 500 words (roughly 2000 characters)
+- content MUST be under 150 words (~600 characters). Capture ONE specific actionable
+  insight in 2-3 sentences. Prefer terse technical language.
+- Each "gotcha": one specific trap + its fix in 1-2 sentences
+- Each "architecture": one design decision and its key constraint
 - Focus on the actionable insight, not the full story behind it
-- If a pattern requires more detail, split into multiple focused entries
+- If a pattern requires more detail, split into multiple focused entries (each under 150 words)
 - Omit code examples unless a single short snippet is essential
 - Never include full file contents, large diffs, or complete command outputs
 
+PREFER UPDATES OVER CREATES:
+- Before creating a new entry, always check if an existing entry covers the same system
+  or component. Update the existing entry rather than creating a new one.
+- When updating, REPLACE the full content with a concise rewrite — do not append to
+  the existing content or repeat what was already there.
+- If multiple existing entries cover the same system from different angles (e.g. different
+  bugs in the same module), consolidate them: update one with merged insights, delete the
+  rest. Fewer, denser entries are better than many scattered ones.
+
 crossProject flag:
 - Default is true — most useful knowledge is worth sharing across projects
 - Set crossProject to false for things that are meaningless outside this specific repo (e.g. a config path, a project-local naming convention that conflicts with your usual style)
@@ -211,14 +223,14 @@ Produce a JSON array of operations:
     "op": "create",
     "category": "decision" | "pattern" | "preference" | "architecture" | "gotcha",
     "title": "Short descriptive title",
-    "content": "Concise knowledge entry — under 500 words",
+    "content": "Concise knowledge entry — under 150 words",
     "scope": "project" | "global",
     "crossProject": true
   },
   {
     "op": "update",
     "id": "existing-entry-id",
-    "content": "Updated content — under 500 words",
+    "content": "Updated content — under 150 words",
     "confidence": 0.0-1.0
   },
   {
@@ -241,8 +253,9 @@ export function curatorUser(input: {
     content: string;
   }>;
 }): string {
-  const existing = input.existing.length
-    ? `Existing knowledge entries (you may update or delete these):\n${input.existing.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`).join("\n")}`
+  const count = input.existing.length;
+  const existing = count
+    ? `Existing knowledge entries (${count} total — you may update or delete these):\n${input.existing.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`).join("\n")}`
     : "No existing knowledge entries.";
   return `${existing}
 
@@ -252,7 +265,67 @@ Recent conversation to extract knowledge from:
 ${input.messages}
 
 ---
-IMPORTANT: If any new entries you would create are semantically duplicative of existing entries (same concept, different wording), prefer updating the existing entry rather than creating a new one. Only create new entries for genuinely distinct knowledge.`;
+IMPORTANT:
+1. Prefer updating existing entries over creating new ones. If a new insight refines or
+   extends an existing entry on the same topic, update that entry — don't create a new one.
+2. When updating, REPLACE the content with a complete rewrite — never append.
+3. If entries cover the same system from different angles, merge them: update one, delete the rest.
+4. Only create a new entry for genuinely distinct knowledge with no existing home.
+5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
+}
+
+/**
+ * System prompt for the consolidation pass.
+ * Unlike the normal curator (which extracts from conversation), consolidation
+ * reviews the FULL entry corpus and aggressively merges/trims/deletes to reduce
+ * entry count while preserving the most actionable knowledge.
+ */
+export const CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
+
+Your goal: reduce the entry count to the target maximum while preserving the most valuable knowledge.
+
+CONSOLIDATION RULES:
+1. MERGE related entries — if multiple entries describe the same system, module, or concept
+   from different angles (e.g. several bug fixes in the same component), merge them into
+   ONE concise entry. Use an "update" op for the surviving entry and "delete" ops for the rest.
+2. TRIM verbose entries — any entry over 150 words must be trimmed to its essential insight.
+   Use an "update" op with the rewritten content.
+3. DELETE low-value entries:
+   - Stale entries about bugs that have been fixed and no longer need gotcha warnings
+   - Entries whose knowledge is fully subsumed by another entry
+   - Entries about one-off incidents with no recurring applicability
+   - General advice available in any documentation
+4. PRESERVE:
+   - Entries describing non-obvious design decisions specific to this codebase
+   - Entries about recurring traps that a developer would hit again
+   - Entries that capture a hard-won gotcha with a concrete fix
+
+OUTPUT: A JSON array of "update" and "delete" ops only. No "create" ops — you are not
+extracting new knowledge, only consolidating existing knowledge.
+
+- "update": Replace content with a concise rewrite (under 150 words). Use to merge survivors or trim verbose entries.
+- "delete": Remove entries that are merged, stale, or low-value.
+
+Output ONLY valid JSON. No markdown fences, no explanation, no preamble.`;
+
+export function consolidationUser(input: {
+  entries: Array<{
+    id: string;
+    category: string;
+    title: string;
+    content: string;
+  }>;
+  targetMax: number;
+}): string {
+  const count = input.entries.length;
+  const listed = input.entries
+    .map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`)
+    .join("\n");
+  return `Current knowledge entries (${count} total, target max: ${input.targetMax}):
+
+${listed}
+
+Produce update/delete ops to reduce entry count to at most ${input.targetMax}. Prioritize merging related entries and trimming verbose ones over outright deletion.`;
 }
 
 // Format distillations for injection into the message context.
@@ -287,9 +360,9 @@ export function formatDistillations(
 }
 
 // Rough token estimate used for budget-gating knowledge entries.
-// Consistent with gradient.ts: ~4 chars per token.
+// Uses ~3 chars/token (conservative for markdown-heavy technical text).
 function estimateTokens(text: string): number {
-  return Math.ceil(text.length / 4);
+  return Math.ceil(text.length / 3);
 }
 
 export function formatKnowledge(
diff --git a/test/config.test.ts b/test/config.test.ts
@@ -40,6 +40,25 @@ describe("LoreConfig — agentsFile schema", () => {
   });
 });
 
+describe("LoreConfig — curator schema", () => {
+  test("curator defaults: enabled=true, onIdle=true, afterTurns=10, maxEntries=25", () => {
+    const cfg = LoreConfig.parse({});
+    expect(cfg.curator.enabled).toBe(true);
+    expect(cfg.curator.onIdle).toBe(true);
+    expect(cfg.curator.afterTurns).toBe(10);
+    expect(cfg.curator.maxEntries).toBe(25);
+  });
+
+  test("curator.maxEntries can be customised", () => {
+    const cfg = LoreConfig.parse({ curator: { maxEntries: 30 } });
+    expect(cfg.curator.maxEntries).toBe(30);
+  });
+
+  test("curator.maxEntries minimum is 10", () => {
+    expect(() => LoreConfig.parse({ curator: { maxEntries: 5 } })).toThrow();
+  });
+});
+
 describe("load — reads config from .lore.json", () => {
   test("loads agentsFile.enabled=false from .lore.json", async () => {
     mkdirSync(TMP, { recursive: true });
diff --git a/test/markdown.test.ts b/test/markdown.test.ts
@@ -199,15 +199,15 @@ describe("formatKnowledge", () => {
     const entries = Array.from({ length: 20 }, (_, i) => ({
       category: "pattern",
       title: `Entry ${i}`,
-      content: "A".repeat(400), // ~100 tokens each
+      content: "A".repeat(400), // ~133 tokens each at chars/3
     }));
     // Budget of 500 tokens — should fit only a few
     const result = formatKnowledge(entries, 500);
     const items = countListItems(result);
     expect(items).toBeGreaterThan(0);
     expect(items).toBeLessThan(20);
-    // Total size should be roughly within budget
-    expect(Math.ceil(result.length / 4)).toBeLessThanOrEqual(600); // some slack for headers
+    // Total size should be roughly within budget (use /3 to match estimateTokens)
+    expect(Math.ceil(result.length / 3)).toBeLessThanOrEqual(600); // some slack for headers
   });
 
   test("token budget — returns empty string when no entries fit", () => {