Skip to content

Commit 74728df

Browse files
committed
feat(ltm): tighten entry budget, add consolidation pass
- Fix token undercount: estimateTokens uses chars/3 (not chars/4) in all LTM budget paths — real tokenization of markdown-heavy technical text is ~3 chars/token, reducing overflow risk from systematic undercount - Halve entry size limits: MAX_ENTRY_CONTENT_LENGTH 2000→1200 chars, pruneOversized threshold 2000→1200, curator prompt tightened to 150 words - Strengthen curator dedup: add PREFER UPDATES OVER CREATES section, instruct LLM to replace (not append) on update, merge overlapping entries - Add consolidation pass: new curator.consolidate() triggered from session.idle when entry count exceeds curator.maxEntries (default: 25); uses dedicated CONSOLIDATION_SYSTEM prompt with update/delete ops only
1 parent 0b41863 commit 74728df

7 files changed

Lines changed: 223 additions & 19 deletions

File tree

src/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ export const LoreConfig = z.object({
2828
enabled: z.boolean().default(true),
2929
onIdle: z.boolean().default(true),
3030
afterTurns: z.number().min(1).default(10),
31+
/** Max knowledge entries per project before consolidation triggers. Default: 25. */
32+
maxEntries: z.number().min(10).default(25),
3133
})
3234
.default({}),
3335
pruning: z

src/curator.ts

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@ import type { createOpencodeClient } from "@opencode-ai/sdk";
22
import { config } from "./config";
33
import * as temporal from "./temporal";
44
import * as ltm from "./ltm";
5-
import { CURATOR_SYSTEM, curatorUser } from "./prompt";
5+
import { CURATOR_SYSTEM, curatorUser, CONSOLIDATION_SYSTEM, consolidationUser } from "./prompt";
66
import { workerSessionIDs } from "./distillation";
77

88
/**
99
* Maximum length (chars) for a single knowledge entry's content.
10-
* ~500 tokens. Entries exceeding this are truncated with a notice.
10+
* ~400 tokens at chars/3. Entries exceeding this are truncated with a notice.
1111
* The curator prompt also instructs the model to stay within this limit,
1212
* so truncation is a last-resort safety net.
1313
*/
14-
const MAX_ENTRY_CONTENT_LENGTH = 2000;
14+
const MAX_ENTRY_CONTENT_LENGTH = 1200;
1515

1616
type Client = ReturnType<typeof createOpencodeClient>;
1717

@@ -172,3 +172,88 @@ export async function run(input: {
172172
export function resetCurationTracker() {
173173
lastCuratedAt = 0;
174174
}
175+
176+
/**
177+
* Consolidation pass: reviews ALL project entries and merges/trims/deletes
178+
* to reduce entry count to cfg.curator.maxEntries. Only runs when the current
179+
* entry count exceeds the target. Uses the same worker session as curation.
180+
*
181+
* Only "update" and "delete" ops are applied — consolidation never creates entries.
182+
*/
183+
export async function consolidate(input: {
184+
client: Client;
185+
projectPath: string;
186+
sessionID: string;
187+
model?: { providerID: string; modelID: string };
188+
}): Promise<{ updated: number; deleted: number }> {
189+
const cfg = config();
190+
if (!cfg.curator.enabled) return { updated: 0, deleted: 0 };
191+
192+
const entries = ltm.forProject(input.projectPath, cfg.crossProject);
193+
if (entries.length <= cfg.curator.maxEntries) return { updated: 0, deleted: 0 };
194+
195+
const entriesForPrompt = entries.map((e) => ({
196+
id: e.id,
197+
category: e.category,
198+
title: e.title,
199+
content: e.content,
200+
}));
201+
202+
const userContent = consolidationUser({
203+
entries: entriesForPrompt,
204+
targetMax: cfg.curator.maxEntries,
205+
});
206+
const workerID = await ensureWorkerSession(input.client, input.sessionID);
207+
const model = input.model ?? cfg.model;
208+
const parts = [
209+
{ type: "text" as const, text: `${CONSOLIDATION_SYSTEM}\n\n${userContent}` },
210+
];
211+
212+
await input.client.session.prompt({
213+
path: { id: workerID },
214+
body: {
215+
parts,
216+
agent: "lore-curator",
217+
...(model ? { model } : {}),
218+
},
219+
});
220+
221+
const msgs = await input.client.session.messages({
222+
path: { id: workerID },
223+
query: { limit: 2 },
224+
});
225+
const last = msgs.data?.at(-1);
226+
if (!last || last.info.role !== "assistant") return { updated: 0, deleted: 0 };
227+
228+
const responsePart = last.parts.find((p) => p.type === "text");
229+
if (!responsePart || responsePart.type !== "text") return { updated: 0, deleted: 0 };
230+
231+
const ops = parseOps(responsePart.text);
232+
let updated = 0;
233+
let deleted = 0;
234+
235+
for (const op of ops) {
236+
// Consolidation only applies update and delete — never create.
237+
if (op.op === "update") {
238+
const entry = ltm.get(op.id);
239+
if (entry) {
240+
const content =
241+
op.content !== undefined && op.content.length > MAX_ENTRY_CONTENT_LENGTH
242+
? op.content.slice(0, MAX_ENTRY_CONTENT_LENGTH) +
243+
" [truncated — entry too long]"
244+
: op.content;
245+
ltm.update(op.id, { content, confidence: op.confidence });
246+
updated++;
247+
}
248+
} else if (op.op === "delete") {
249+
const entry = ltm.get(op.id);
250+
if (entry) {
251+
ltm.remove(op.id);
252+
deleted++;
253+
}
254+
}
255+
// "create" ops are silently ignored — consolidation must not add entries.
256+
}
257+
258+
return { updated, deleted };
259+
}

src/index.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ export const LorePlugin: Plugin = async (ctx) => {
5656
// Prune any corrupted/oversized knowledge entries left by the AGENTS.md
5757
// backslash-escaping bug or curator hallucinations. Sets confidence → 0
5858
// (below the 0.2 query threshold) so they stop polluting the context.
59-
const pruned = ltm.pruneOversized(2000);
59+
const pruned = ltm.pruneOversized(1200);
6060
if (pruned > 0) {
6161
console.error(`[lore] pruned ${pruned} oversized knowledge entries (confidence set to 0)`);
6262
}
@@ -301,6 +301,29 @@ export const LorePlugin: Plugin = async (ctx) => {
301301
turnsSinceCuration = 0;
302302
}
303303

304+
// Consolidate entries if count exceeds cfg.curator.maxEntries.
305+
// Runs after normal curation so newly created entries are counted.
306+
// Only triggers when truly over the limit to avoid redundant LLM calls.
307+
try {
308+
const allEntries = ltm.forProject(projectPath);
309+
if (allEntries.length > cfg.curator.maxEntries) {
310+
console.error(
311+
`[lore] entry count ${allEntries.length} exceeds maxEntries ${cfg.curator.maxEntries} — running consolidation`,
312+
);
313+
const { updated, deleted } = await curator.consolidate({
314+
client: ctx.client,
315+
projectPath,
316+
sessionID,
317+
model: cfg.model,
318+
});
319+
if (updated > 0 || deleted > 0) {
320+
console.error(`[lore] consolidation: ${updated} updated, ${deleted} deleted`);
321+
}
322+
}
323+
} catch (e) {
324+
console.error("[lore] consolidation error:", e);
325+
}
326+
304327
// Prune temporal messages after distillation and curation have run.
305328
// Pass 1: TTL — remove distilled messages older than retention period.
306329
// Pass 2: Size cap — evict oldest distilled messages if over the limit.
@@ -371,7 +394,8 @@ export const LorePlugin: Plugin = async (ctx) => {
371394
if (formatted) {
372395
// Track how many tokens we actually consumed so the gradient manager
373396
// can deduct them from the usable budget for message injection.
374-
const ltmTokenCount = Math.ceil(formatted.length / 4);
397+
// Use /3 (not /4) — consistent with ltm.ts and prompt.ts estimators.
398+
const ltmTokenCount = Math.ceil(formatted.length / 3);
375399
setLtmTokens(ltmTokenCount);
376400
output.system.push(formatted);
377401
} else {

src/ltm.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ import { uuidv7 } from "uuidv7";
22
import { db, ensureProject } from "./db";
33
import { ftsQuery } from "./temporal";
44

5-
// Rough token estimate: ~4 chars per token
5+
// Rough token estimate: ~3 chars per token (conservative for markdown-heavy technical text;
6+
// real tokenization of code terms and special chars runs ~3.0-3.5 chars/token, not 4).
67
function estimateTokens(text: string): number {
7-
return Math.ceil(text.length / 4);
8+
return Math.ceil(text.length / 3);
89
}
910

1011
export type KnowledgeEntry = {

src/prompt.ts

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,24 @@ Do NOT extract:
195195
- Restatements of what the code obviously does (e.g. "the auth module handles authentication")
196196
197197
BREVITY IS CRITICAL — each entry must be concise:
198-
- content MUST be under 500 words (roughly 2000 characters)
198+
- content MUST be under 150 words (~600 characters). Capture ONE specific actionable
199+
insight in 2-3 sentences. Prefer terse technical language.
200+
- Each "gotcha": one specific trap + its fix in 1-2 sentences
201+
- Each "architecture": one design decision and its key constraint
199202
- Focus on the actionable insight, not the full story behind it
200-
- If a pattern requires more detail, split into multiple focused entries
203+
- If a pattern requires more detail, split into multiple focused entries (each under 150 words)
201204
- Omit code examples unless a single short snippet is essential
202205
- Never include full file contents, large diffs, or complete command outputs
203206
207+
PREFER UPDATES OVER CREATES:
208+
- Before creating a new entry, always check if an existing entry covers the same system
209+
or component. Update the existing entry rather than creating a new one.
210+
- When updating, REPLACE the full content with a concise rewrite — do not append to
211+
the existing content or repeat what was already there.
212+
- If multiple existing entries cover the same system from different angles (e.g. different
213+
bugs in the same module), consolidate them: update one with merged insights, delete the
214+
rest. Fewer, denser entries are better than many scattered ones.
215+
204216
crossProject flag:
205217
- Default is true — most useful knowledge is worth sharing across projects
206218
- Set crossProject to false for things that are meaningless outside this specific repo (e.g. a config path, a project-local naming convention that conflicts with your usual style)
@@ -211,14 +223,14 @@ Produce a JSON array of operations:
211223
"op": "create",
212224
"category": "decision" | "pattern" | "preference" | "architecture" | "gotcha",
213225
"title": "Short descriptive title",
214-
"content": "Concise knowledge entry — under 500 words",
226+
"content": "Concise knowledge entry — under 150 words",
215227
"scope": "project" | "global",
216228
"crossProject": true
217229
},
218230
{
219231
"op": "update",
220232
"id": "existing-entry-id",
221-
"content": "Updated content — under 500 words",
233+
"content": "Updated content — under 150 words",
222234
"confidence": 0.0-1.0
223235
},
224236
{
@@ -241,8 +253,9 @@ export function curatorUser(input: {
241253
content: string;
242254
}>;
243255
}): string {
244-
const existing = input.existing.length
245-
? `Existing knowledge entries (you may update or delete these):\n${input.existing.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`).join("\n")}`
256+
const count = input.existing.length;
257+
const existing = count
258+
? `Existing knowledge entries (${count} total — you may update or delete these):\n${input.existing.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`).join("\n")}`
246259
: "No existing knowledge entries.";
247260
return `${existing}
248261
@@ -252,7 +265,67 @@ Recent conversation to extract knowledge from:
252265
${input.messages}
253266
254267
---
255-
IMPORTANT: If any new entries you would create are semantically duplicative of existing entries (same concept, different wording), prefer updating the existing entry rather than creating a new one. Only create new entries for genuinely distinct knowledge.`;
268+
IMPORTANT:
269+
1. Prefer updating existing entries over creating new ones. If a new insight refines or
270+
extends an existing entry on the same topic, update that entry — don't create a new one.
271+
2. When updating, REPLACE the content with a complete rewrite — never append.
272+
3. If entries cover the same system from different angles, merge them: update one, delete the rest.
273+
4. Only create a new entry for genuinely distinct knowledge with no existing home.
274+
5. Keep all entries under 150 words. If an existing entry is too long, use an update op to trim it.`;
275+
}
276+
277+
/**
278+
* System prompt for the consolidation pass.
279+
* Unlike the normal curator (which extracts from conversation), consolidation
280+
* reviews the FULL entry corpus and aggressively merges/trims/deletes to reduce
281+
* entry count while preserving the most actionable knowledge.
282+
*/
283+
export const CONSOLIDATION_SYSTEM = `You are a long-term memory curator performing a consolidation pass. The knowledge base has grown too large and needs to be trimmed.
284+
285+
Your goal: reduce the entry count to the target maximum while preserving the most valuable knowledge.
286+
287+
CONSOLIDATION RULES:
288+
1. MERGE related entries — if multiple entries describe the same system, module, or concept
289+
from different angles (e.g. several bug fixes in the same component), merge them into
290+
ONE concise entry. Use an "update" op for the surviving entry and "delete" ops for the rest.
291+
2. TRIM verbose entries — any entry over 150 words must be trimmed to its essential insight.
292+
Use an "update" op with the rewritten content.
293+
3. DELETE low-value entries:
294+
- Stale entries about bugs that have been fixed and no longer need gotcha warnings
295+
- Entries whose knowledge is fully subsumed by another entry
296+
- Entries about one-off incidents with no recurring applicability
297+
- General advice available in any documentation
298+
4. PRESERVE:
299+
- Entries describing non-obvious design decisions specific to this codebase
300+
- Entries about recurring traps that a developer would hit again
301+
- Entries that capture a hard-won gotcha with a concrete fix
302+
303+
OUTPUT: A JSON array of "update" and "delete" ops only. No "create" ops — you are not
304+
extracting new knowledge, only consolidating existing knowledge.
305+
306+
- "update": Replace content with a concise rewrite (under 150 words). Use to merge survivors or trim verbose entries.
307+
- "delete": Remove entries that are merged, stale, or low-value.
308+
309+
Output ONLY valid JSON. No markdown fences, no explanation, no preamble.`;
310+
311+
export function consolidationUser(input: {
312+
entries: Array<{
313+
id: string;
314+
category: string;
315+
title: string;
316+
content: string;
317+
}>;
318+
targetMax: number;
319+
}): string {
320+
const count = input.entries.length;
321+
const listed = input.entries
322+
.map((e) => `- [${e.id}] (${e.category}) ${e.title}: ${e.content}`)
323+
.join("\n");
324+
return `Current knowledge entries (${count} total, target max: ${input.targetMax}):
325+
326+
${listed}
327+
328+
Produce update/delete ops to reduce entry count to at most ${input.targetMax}. Prioritize merging related entries and trimming verbose ones over outright deletion.`;
256329
}
257330

258331
// Format distillations for injection into the message context.
@@ -287,9 +360,9 @@ export function formatDistillations(
287360
}
288361

289362
// Rough token estimate used for budget-gating knowledge entries.
290-
// Consistent with gradient.ts: ~4 chars per token.
363+
// Uses ~3 chars/token (conservative for markdown-heavy technical text).
291364
function estimateTokens(text: string): number {
292-
return Math.ceil(text.length / 4);
365+
return Math.ceil(text.length / 3);
293366
}
294367

295368
export function formatKnowledge(

test/config.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,25 @@ describe("LoreConfig — agentsFile schema", () => {
4040
});
4141
});
4242

43+
describe("LoreConfig — curator schema", () => {
44+
test("curator defaults: enabled=true, onIdle=true, afterTurns=10, maxEntries=25", () => {
45+
const cfg = LoreConfig.parse({});
46+
expect(cfg.curator.enabled).toBe(true);
47+
expect(cfg.curator.onIdle).toBe(true);
48+
expect(cfg.curator.afterTurns).toBe(10);
49+
expect(cfg.curator.maxEntries).toBe(25);
50+
});
51+
52+
test("curator.maxEntries can be customised", () => {
53+
const cfg = LoreConfig.parse({ curator: { maxEntries: 30 } });
54+
expect(cfg.curator.maxEntries).toBe(30);
55+
});
56+
57+
test("curator.maxEntries minimum is 10", () => {
58+
expect(() => LoreConfig.parse({ curator: { maxEntries: 5 } })).toThrow();
59+
});
60+
});
61+
4362
describe("load — reads config from .lore.json", () => {
4463
test("loads agentsFile.enabled=false from .lore.json", async () => {
4564
mkdirSync(TMP, { recursive: true });

test/markdown.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,15 +199,15 @@ describe("formatKnowledge", () => {
199199
const entries = Array.from({ length: 20 }, (_, i) => ({
200200
category: "pattern",
201201
title: `Entry ${i}`,
202-
content: "A".repeat(400), // ~100 tokens each
202+
content: "A".repeat(400), // ~133 tokens each at chars/3
203203
}));
204204
// Budget of 500 tokens — should fit only a few
205205
const result = formatKnowledge(entries, 500);
206206
const items = countListItems(result);
207207
expect(items).toBeGreaterThan(0);
208208
expect(items).toBeLessThan(20);
209-
// Total size should be roughly within budget
210-
expect(Math.ceil(result.length / 4)).toBeLessThanOrEqual(600); // some slack for headers
209+
// Total size should be roughly within budget (use /3 to match estimateTokens)
210+
expect(Math.ceil(result.length / 3)).toBeLessThanOrEqual(600); // some slack for headers
211211
});
212212

213213
test("token budget — returns empty string when no entries fit", () => {

0 commit comments

Comments
 (0)