From 3fca4d44d2ca4ea73cb9fd83025b327347debcb7 Mon Sep 17 00:00:00 2001 From: ArchieIndian Date: Mon, 16 Mar 2026 00:44:52 +0530 Subject: [PATCH] Add memory-graph-builder: structured knowledge graph from MEMORY.md Parses flat MEMORY.md into nodes with categories, entities, and typed relationships. Detects duplicates (Jaccard >0.7), contradictions, and stale entries. Generates compressed memory digest saving 30-60% tokens. Inspired by OpenLobster's Neo4j graph memory. Cron: nightly 10pm. Co-Authored-By: Claude Sonnet 4.6 --- .../memory-graph-builder/SKILL.md | 144 +++++ .../memory-graph-builder/STATE_SCHEMA.yaml | 54 ++ .../memory-graph-builder/example-state.yaml | 43 ++ .../memory-graph-builder/graph.py | 545 ++++++++++++++++++ 4 files changed, 786 insertions(+) create mode 100644 skills/openclaw-native/memory-graph-builder/SKILL.md create mode 100644 skills/openclaw-native/memory-graph-builder/STATE_SCHEMA.yaml create mode 100644 skills/openclaw-native/memory-graph-builder/example-state.yaml create mode 100755 skills/openclaw-native/memory-graph-builder/graph.py diff --git a/skills/openclaw-native/memory-graph-builder/SKILL.md b/skills/openclaw-native/memory-graph-builder/SKILL.md new file mode 100644 index 0000000..0868901 --- /dev/null +++ b/skills/openclaw-native/memory-graph-builder/SKILL.md @@ -0,0 +1,144 @@ +--- +name: memory-graph-builder +version: "1.0" +category: openclaw-native +description: Parses OpenClaw's flat MEMORY.md into a structured knowledge graph — detects duplicates, contradictions, and stale entries, then builds a compressed memory digest optimized for system prompt injection. +stateful: true +cron: "0 22 * * *" +--- + +# Memory Graph Builder + +## What it does + +OpenClaw stores agent memory in a flat `MEMORY.md` file — one line per fact, no structure, no relationships. This works until your agent has 200+ memories and half of them are duplicates, three contradict each other, and the whole file costs 4,000 tokens every session. + +Memory Graph Builder treats MEMORY.md as a raw data source and builds a structured knowledge graph on top of it. Each memory becomes a node with typed relationships to other nodes. The graph enables: + +- **Duplicate detection** — "User prefers dark mode" and "User likes dark theme" are the same fact +- **Contradiction detection** — "User uses Python 3.8" vs "User uses Python 3.12" +- **Staleness detection** — Facts older than a configurable threshold that haven't been referenced +- **Memory digest** — A compressed, relationship-aware summary that replaces raw MEMORY.md in the system prompt, saving 30-60% tokens + +Inspired by OpenLobster's Neo4j-backed graph memory system, adapted to work on top of OpenClaw's existing MEMORY.md without requiring a database. + +## When to invoke + +- Automatically, nightly at 10pm (cron) +- After bulk memory additions (e.g., after project-onboarding) +- When the agent's context initialisation feels slow (memory bloat) +- Manually to audit memory quality + +## Graph structure + +Each memory line becomes a node: + +```yaml +nodes: + - id: "mem_001" + text: "User prefers Python for backend work" + category: preference # preference | fact | project | person | tool | config + entities: ["user", "python", "backend"] + added_at: "2026-03-01" + last_referenced: "2026-03-15" + confidence: 0.9 +edges: + - from: "mem_001" + to: "mem_014" + relation: related_to # related_to | contradicts | supersedes | depends_on +``` + +## How to use + +```bash +python3 graph.py --build # Parse MEMORY.md, build graph +python3 graph.py --duplicates # Show duplicate clusters +python3 graph.py --contradictions # Show contradicting pairs +python3 graph.py --stale --days 30 # Show memories not referenced in 30 days +python3 graph.py --digest # Generate compressed memory digest +python3 graph.py --digest --max-tokens 1500 # Digest with token budget +python3 graph.py --prune --dry-run # Show what would be removed +python3 graph.py --prune # Remove duplicates + stale entries +python3 graph.py --stats # Graph statistics +python3 graph.py --status # Last build summary +python3 graph.py --format json +``` + +## Cron wakeup behaviour + +Nightly at 10pm: + +1. Read MEMORY.md +2. Rebuild graph (incremental — only re-processes new/changed lines) +3. Detect duplicates and contradictions +4. Flag stale entries (>30 days unreferenced by default) +5. Generate fresh memory digest +6. Write digest to `~/.openclaw/workspace/memory-digest.md` +7. Log summary to state + +## Memory digest + +The digest is a compressed representation of the knowledge graph optimized for LLM consumption. Instead of dumping every raw line, it: + +- Groups related memories by category +- Merges duplicate facts into single entries +- Marks contradictions with `[CONFLICT]` so the agent can resolve them +- Omits stale entries below a confidence threshold +- Respects a configurable max-token budget + +Example digest output: + +```markdown +## Preferences +- Prefers Python for backend, TypeScript for frontend +- Dark mode everywhere; compact UI layouts +- Commit messages: imperative mood, max 72 chars + +## Active Projects +- openclaw-superpowers: skill library, 40 skills, MIT license +- personal-site: Next.js 14, deployed on Vercel + +## People +- Alice (teammate): works on auth, prefers Go + +## Conflicts (needs resolution) +- [CONFLICT] Python version: "3.8" vs "3.12" — ask user to clarify +``` + +## Procedure + +**Step 1 — Build the graph** + +```bash +python3 graph.py --build +``` + +**Step 2 — Review duplicates and contradictions** + +```bash +python3 graph.py --duplicates +python3 graph.py --contradictions +``` + +Fix contradictions by editing MEMORY.md directly or asking the agent to clarify. + +**Step 3 — Prune stale entries** + +```bash +python3 graph.py --prune --dry-run +python3 graph.py --prune +``` + +**Step 4 — Generate and use the digest** + +```bash +python3 graph.py --digest --max-tokens 1500 +``` + +Point OpenClaw's memory injection at `~/.openclaw/workspace/memory-digest.md` instead of raw MEMORY.md. + +## State + +Graph structure, digest cache, and audit history stored in `~/.openclaw/skill-state/memory-graph-builder/state.yaml`. + +Fields: `last_build_at`, `node_count`, `edge_count`, `duplicate_count`, `contradiction_count`, `stale_count`, `digest_tokens`, `build_history`. diff --git a/skills/openclaw-native/memory-graph-builder/STATE_SCHEMA.yaml b/skills/openclaw-native/memory-graph-builder/STATE_SCHEMA.yaml new file mode 100644 index 0000000..3a09c21 --- /dev/null +++ b/skills/openclaw-native/memory-graph-builder/STATE_SCHEMA.yaml @@ -0,0 +1,54 @@ +version: "1.0" +description: Knowledge graph built from MEMORY.md — nodes, edges, digest, and audit metrics. +fields: + last_build_at: + type: datetime + node_count: + type: integer + default: 0 + edge_count: + type: integer + default: 0 + duplicate_count: + type: integer + default: 0 + contradiction_count: + type: integer + default: 0 + stale_count: + type: integer + default: 0 + digest_tokens: + type: integer + default: 0 + nodes: + type: list + description: All memory nodes in the graph + items: + id: { type: string } + text: { type: string } + category: { type: enum, values: [preference, fact, project, person, tool, config, other] } + entities: { type: list, items: { type: string } } + added_at: { type: string } + last_referenced: { type: string } + confidence: { type: float } + is_duplicate_of: { type: string } + is_stale: { type: boolean } + edges: + type: list + description: Relationships between memory nodes + items: + from: { type: string } + to: { type: string } + relation: { type: enum, values: [related_to, contradicts, supersedes, depends_on, duplicate_of] } + weight: { type: float } + build_history: + type: list + description: Rolling log of graph builds (last 20) + items: + built_at: { type: datetime } + node_count: { type: integer } + duplicates_found: { type: integer } + contradictions_found: { type: integer } + stale_found: { type: integer } + digest_tokens: { type: integer } diff --git a/skills/openclaw-native/memory-graph-builder/example-state.yaml b/skills/openclaw-native/memory-graph-builder/example-state.yaml new file mode 100644 index 0000000..2f6c93d --- /dev/null +++ b/skills/openclaw-native/memory-graph-builder/example-state.yaml @@ -0,0 +1,43 @@ +# Example runtime state for memory-graph-builder +last_build_at: "2026-03-15T22:00:12.000000" +node_count: 48 +edge_count: 15 +duplicate_count: 4 +contradiction_count: 1 +stale_count: 3 +digest_tokens: 420 +build_history: + - built_at: "2026-03-15T22:00:12.000000" + node_count: 48 + duplicates_found: 4 + contradictions_found: 1 + stale_found: 3 + digest_tokens: 420 +# ── Walkthrough ────────────────────────────────────────────────────────────── +# Nightly cron runs: python3 graph.py --build +# +# Memory Graph Builder — 2026-03-15 22:00 +# ──────────────────────────────────────────────────────────────── +# Memory lines : 52 +# Nodes : 48 +# Edges : 15 +# Duplicates : 4 +# Contradictions : 1 +# Stale : 3 +# Digest tokens : ~420 +# +# Digest written to: ~/.openclaw/workspace/memory-digest.md +# +# python3 graph.py --duplicates +# DUP: "User prefers dark mode for all applications" +# ORIG: "User likes dark theme everywhere" +# +# python3 graph.py --contradictions +# A: "User uses Python 3.8 for backend services" +# B: "User recently upgraded to Python 3.12" +# → Resolve by editing MEMORY.md +# +# python3 graph.py --prune --dry-run +# Dry run — would prune 7 entries: +# [duplicate] "User prefers dark mode for all applications" +# [stale] "Working on migration to React 17" diff --git a/skills/openclaw-native/memory-graph-builder/graph.py b/skills/openclaw-native/memory-graph-builder/graph.py new file mode 100755 index 0000000..8711cc6 --- /dev/null +++ b/skills/openclaw-native/memory-graph-builder/graph.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +""" +Memory Graph Builder for openclaw-superpowers. + +Parses MEMORY.md into a structured knowledge graph. Detects duplicates, +contradictions, and stale entries. Generates a compressed memory digest. + +Usage: + python3 graph.py --build + python3 graph.py --duplicates + python3 graph.py --contradictions + python3 graph.py --stale [--days 30] + python3 graph.py --digest [--max-tokens 1500] + python3 graph.py --prune [--dry-run] + python3 graph.py --stats + python3 graph.py --status + python3 graph.py --format json +""" + +import argparse +import hashlib +import json +import os +import re +from datetime import datetime, timedelta +from pathlib import Path + +try: + import yaml + HAS_YAML = True +except ImportError: + HAS_YAML = False + +OPENCLAW_DIR = Path(os.environ.get("OPENCLAW_HOME", Path.home() / ".openclaw")) +STATE_FILE = OPENCLAW_DIR / "skill-state" / "memory-graph-builder" / "state.yaml" +MEMORY_FILE = OPENCLAW_DIR / "MEMORY.md" +DIGEST_FILE = OPENCLAW_DIR / "workspace" / "memory-digest.md" +MAX_HISTORY = 20 + +# ── Categories ──────────────────────────────────────────────────────────────── + +CATEGORY_KEYWORDS = { + "preference": ["prefer", "like", "want", "always", "never", "favorite", "style", + "mode", "theme", "format", "convention"], + "project": ["project", "repo", "repository", "codebase", "app", "application", + "deploy", "build", "release", "version"], + "person": ["name is", "works on", "teammate", "colleague", "manager", "friend", + "email", "contact"], + "tool": ["uses", "installed", "runs", "tool", "editor", "ide", "framework", + "library", "database", "api"], + "config": ["config", "setting", "path", "directory", "port", "url", "endpoint", + "key", "token", "env"], + "fact": ["is", "has", "located", "lives", "born", "works at", "speaks", + "timezone", "language"], +} + + +def classify_category(text: str) -> str: + text_lower = text.lower() + scores = {cat: sum(1 for kw in kws if kw in text_lower) + for cat, kws in CATEGORY_KEYWORDS.items()} + best = max(scores, key=scores.get) + return best if scores[best] > 0 else "other" + + +def extract_entities(text: str) -> list[str]: + """Extract meaningful entities (nouns, proper names, tools) from text.""" + # Remove markdown formatting + clean = re.sub(r'[*_`#\[\]()]', '', text) + words = clean.split() + entities = [] + for w in words: + w_clean = w.strip(".,;:!?\"'") + if not w_clean: + continue + # Keep capitalized words, technical terms, or words > 3 chars that aren't stopwords + if (w_clean[0].isupper() and len(w_clean) > 1) or \ + re.match(r'^[A-Z][a-z]+', w_clean) or \ + (len(w_clean) > 3 and w_clean.lower() not in _STOPWORDS): + entities.append(w_clean.lower()) + return list(set(entities))[:8] + + +_STOPWORDS = { + "the", "and", "for", "with", "that", "this", "from", "have", "has", + "been", "were", "will", "would", "could", "should", "about", "into", + "when", "where", "which", "their", "there", "then", "than", "they", + "them", "these", "those", "some", "also", "just", "more", "most", + "very", "only", "over", "such", "after", "before", "between", "each", + "does", "doing", "being", "other", "using", +} + + +# ── Similarity ──────────────────────────────────────────────────────────────── + +def tokenize(text: str) -> set[str]: + words = re.findall(r'[a-z0-9]+', text.lower()) + return {w for w in words if w not in _STOPWORDS and len(w) > 2} + + +def jaccard(a: set, b: set) -> float: + if not a and not b: + return 1.0 + inter = len(a & b) + union = len(a | b) + return inter / union if union > 0 else 0.0 + + +def text_hash(text: str) -> str: + return hashlib.md5(text.strip().lower().encode()).hexdigest()[:12] + + +# ── State helpers ───────────────────────────────────────────────────────────── + +def load_state() -> dict: + if not STATE_FILE.exists(): + return {"nodes": [], "edges": [], "build_history": []} + try: + text = STATE_FILE.read_text() + return (yaml.safe_load(text) or {}) if HAS_YAML else {} + except Exception: + return {} + + +def save_state(state: dict) -> None: + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + if HAS_YAML: + with open(STATE_FILE, "w") as f: + yaml.dump(state, f, default_flow_style=False, allow_unicode=True) + + +# ── MEMORY.md parser ────────────────────────────────────────────────────────── + +def parse_memory_file() -> list[str]: + """Read MEMORY.md and return non-empty, non-header lines.""" + if not MEMORY_FILE.exists(): + return [] + lines = [] + for line in MEMORY_FILE.read_text().splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#") or stripped.startswith("---"): + continue + # Remove leading bullet/dash + if stripped.startswith(("- ", "* ", "+ ")): + stripped = stripped[2:].strip() + if len(stripped) > 5: + lines.append(stripped) + return lines + + +# ── Graph building ──────────────────────────────────────────────────────────── + +def build_graph(lines: list[str], stale_days: int = 30) -> tuple[list, list]: + """Build nodes and edges from memory lines.""" + nodes = [] + edges = [] + now = datetime.now() + + for i, line in enumerate(lines): + node_id = f"mem_{text_hash(line)}" + nodes.append({ + "id": node_id, + "text": line, + "category": classify_category(line), + "entities": extract_entities(line), + "added_at": now.strftime("%Y-%m-%d"), + "last_referenced": now.strftime("%Y-%m-%d"), + "confidence": 1.0, + "is_duplicate_of": None, + "is_stale": False, + }) + + # Detect duplicates (jaccard > 0.7) + for i in range(len(nodes)): + ti = tokenize(nodes[i]["text"]) + for j in range(i + 1, len(nodes)): + tj = tokenize(nodes[j]["text"]) + sim = jaccard(ti, tj) + if sim >= 0.7: + nodes[j]["is_duplicate_of"] = nodes[i]["id"] + nodes[j]["confidence"] = round(1.0 - sim, 2) + edges.append({ + "from": nodes[j]["id"], + "to": nodes[i]["id"], + "relation": "duplicate_of", + "weight": round(sim, 3), + }) + + # Detect contradictions (same entities, opposing signals) + contradiction_signals = [ + (r'\b3\.\d+\b', "version"), + (r'\b(true|false|yes|no|never|always)\b', "boolean"), + (r'\b(use|prefer|like|avoid|hate|dislike)\b', "preference"), + ] + for i in range(len(nodes)): + ei = set(nodes[i]["entities"]) + for j in range(i + 1, len(nodes)): + ej = set(nodes[j]["entities"]) + overlap = ei & ej + if len(overlap) < 2: + continue + # Check for opposing signals + ti = nodes[i]["text"].lower() + tj = nodes[j]["text"].lower() + for pattern, sig_type in contradiction_signals: + mi = re.findall(pattern, ti, re.I) + mj = re.findall(pattern, tj, re.I) + if mi and mj and set(mi) != set(mj): + edges.append({ + "from": nodes[i]["id"], + "to": nodes[j]["id"], + "relation": "contradicts", + "weight": round(len(overlap) / max(len(ei), len(ej), 1), 3), + }) + break + + # Detect related nodes (shared entities, not duplicates/contradictions) + existing_pairs = {(e["from"], e["to"]) for e in edges} + for i in range(len(nodes)): + ei = set(nodes[i]["entities"]) + for j in range(i + 1, len(nodes)): + pair = (nodes[i]["id"], nodes[j]["id"]) + rev = (nodes[j]["id"], nodes[i]["id"]) + if pair in existing_pairs or rev in existing_pairs: + continue + ej = set(nodes[j]["entities"]) + overlap = ei & ej + if len(overlap) >= 2: + edges.append({ + "from": nodes[i]["id"], + "to": nodes[j]["id"], + "relation": "related_to", + "weight": round(len(overlap) / max(len(ei | ej), 1), 3), + }) + + return nodes, edges + + +# ── Digest generator ────────────────────────────────────────────────────────── + +def generate_digest(nodes: list, edges: list, max_tokens: int = 2000) -> str: + """Generate compressed memory digest grouped by category.""" + # Filter out duplicates + active = [n for n in nodes if not n.get("is_duplicate_of") and not n.get("is_stale")] + contradictions = [e for e in edges if e["relation"] == "contradicts"] + + # Group by category + by_cat: dict = {} + for node in active: + cat = node.get("category", "other") + by_cat.setdefault(cat, []).append(node) + + # Build digest + lines = [] + cat_order = ["preference", "project", "person", "tool", "config", "fact", "other"] + cat_labels = { + "preference": "Preferences", "project": "Active Projects", + "person": "People", "tool": "Tools & Technologies", + "config": "Configuration", "fact": "Facts", "other": "Other", + } + + for cat in cat_order: + cat_nodes = by_cat.get(cat, []) + if not cat_nodes: + continue + lines.append(f"## {cat_labels.get(cat, cat.title())}") + for node in cat_nodes: + lines.append(f"- {node['text']}") + lines.append("") + + # Add conflicts section + if contradictions: + lines.append("## Conflicts (needs resolution)") + conflict_ids = set() + for edge in contradictions: + conflict_ids.add(edge["from"]) + conflict_ids.add(edge["to"]) + node_map = {n["id"]: n for n in nodes} + shown = set() + for edge in contradictions: + key = (edge["from"], edge["to"]) + if key in shown: + continue + shown.add(key) + a = node_map.get(edge["from"], {}) + b = node_map.get(edge["to"], {}) + lines.append(f"- [CONFLICT] \"{a.get('text','?')[:60]}\" vs \"{b.get('text','?')[:60]}\"") + lines.append("") + + digest = "\n".join(lines) + + # Rough token estimate: ~0.75 tokens per word + est_tokens = int(len(digest.split()) * 1.33) + if est_tokens > max_tokens: + # Truncate from bottom categories first + while est_tokens > max_tokens and lines: + lines.pop() + digest = "\n".join(lines) + est_tokens = int(len(digest.split()) * 1.33) + + return digest + + +def estimate_tokens(text: str) -> int: + return int(len(text.split()) * 1.33) + + +# ── Commands ────────────────────────────────────────────────────────────────── + +def cmd_build(state: dict, stale_days: int, fmt: str) -> None: + lines = parse_memory_file() + if not lines: + print("MEMORY.md not found or empty.") + return + + nodes, edges = build_graph(lines, stale_days) + dups = sum(1 for n in nodes if n.get("is_duplicate_of")) + contras = sum(1 for e in edges if e["relation"] == "contradicts") + stale = sum(1 for n in nodes if n.get("is_stale")) + + now = datetime.now().isoformat() + state["nodes"] = nodes + state["edges"] = edges + state["last_build_at"] = now + state["node_count"] = len(nodes) + state["edge_count"] = len(edges) + state["duplicate_count"] = dups + state["contradiction_count"] = contras + state["stale_count"] = stale + + # Generate and save digest + digest = generate_digest(nodes, edges) + DIGEST_FILE.parent.mkdir(parents=True, exist_ok=True) + DIGEST_FILE.write_text(digest) + state["digest_tokens"] = estimate_tokens(digest) + + history = state.get("build_history") or [] + history.insert(0, { + "built_at": now, "node_count": len(nodes), + "duplicates_found": dups, "contradictions_found": contras, + "stale_found": stale, "digest_tokens": state["digest_tokens"], + }) + state["build_history"] = history[:MAX_HISTORY] + save_state(state) + + if fmt == "json": + print(json.dumps({ + "node_count": len(nodes), "edge_count": len(edges), + "duplicates": dups, "contradictions": contras, + "stale": stale, "digest_tokens": state["digest_tokens"], + }, indent=2)) + else: + print(f"\nMemory Graph Builder — {now[:16]}") + print("─" * 48) + print(f" Memory lines : {len(lines)}") + print(f" Nodes : {len(nodes)}") + print(f" Edges : {len(edges)}") + print(f" Duplicates : {dups}") + print(f" Contradictions : {contras}") + print(f" Stale : {stale}") + print(f" Digest tokens : ~{state['digest_tokens']}") + print(f"\n Digest written to: {DIGEST_FILE}") + print() + + +def cmd_duplicates(state: dict) -> None: + nodes = state.get("nodes") or [] + dups = [n for n in nodes if n.get("is_duplicate_of")] + node_map = {n["id"]: n for n in nodes} + if not dups: + print("✓ No duplicates detected.") + return + print(f"\nDuplicate Clusters ({len(dups)} duplicates)") + print("─" * 48) + for dup in dups: + orig = node_map.get(dup["is_duplicate_of"], {}) + print(f" DUP: \"{dup['text'][:70]}\"") + print(f" ORIG: \"{orig.get('text','?')[:70]}\"") + print() + + +def cmd_contradictions(state: dict) -> None: + edges = state.get("edges") or [] + nodes = state.get("nodes") or [] + contras = [e for e in edges if e["relation"] == "contradicts"] + node_map = {n["id"]: n for n in nodes} + if not contras: + print("✓ No contradictions detected.") + return + print(f"\nContradictions ({len(contras)} pairs)") + print("─" * 48) + for c in contras: + a = node_map.get(c["from"], {}) + b = node_map.get(c["to"], {}) + print(f" A: \"{a.get('text','?')[:70]}\"") + print(f" B: \"{b.get('text','?')[:70]}\"") + print(f" → Resolve by editing MEMORY.md") + print() + + +def cmd_stale(state: dict, days: int) -> None: + nodes = state.get("nodes") or [] + stale = [n for n in nodes if n.get("is_stale")] + if not stale: + print(f"✓ No memories stale beyond {days} days.") + return + print(f"\nStale Memories ({len(stale)} entries, >{days} days)") + print("─" * 48) + for n in stale: + print(f" [{n.get('category','?')}] \"{n['text'][:70]}\"") + + +def cmd_digest(state: dict, max_tokens: int) -> None: + nodes = state.get("nodes") or [] + edges = state.get("edges") or [] + if not nodes: + print("No graph built yet. Run --build first.") + return + digest = generate_digest(nodes, edges, max_tokens) + DIGEST_FILE.parent.mkdir(parents=True, exist_ok=True) + DIGEST_FILE.write_text(digest) + tokens = estimate_tokens(digest) + print(f"✓ Digest written ({tokens} est. tokens) → {DIGEST_FILE}") + print() + print(digest) + + +def cmd_prune(state: dict, dry_run: bool) -> None: + nodes = state.get("nodes") or [] + to_remove = [n for n in nodes if n.get("is_duplicate_of") or n.get("is_stale")] + if not to_remove: + print("✓ Nothing to prune.") + return + if dry_run: + print(f"\nDry run — would prune {len(to_remove)} entries:") + for n in to_remove: + reason = "duplicate" if n.get("is_duplicate_of") else "stale" + print(f" [{reason}] \"{n['text'][:70]}\"") + return + + # Remove from MEMORY.md + if MEMORY_FILE.exists(): + original = MEMORY_FILE.read_text() + remove_texts = {n["text"] for n in to_remove} + kept_lines = [] + for line in original.splitlines(): + stripped = line.strip() + if stripped.startswith(("- ", "* ", "+ ")): + stripped = stripped[2:].strip() + if stripped not in remove_texts: + kept_lines.append(line) + MEMORY_FILE.write_text("\n".join(kept_lines) + "\n") + + # Rebuild graph + lines = parse_memory_file() + nodes, edges = build_graph(lines) + state["nodes"] = nodes + state["edges"] = edges + state["node_count"] = len(nodes) + state["edge_count"] = len(edges) + save_state(state) + print(f"✓ Pruned {len(to_remove)} entries. {len(nodes)} nodes remain.") + + +def cmd_stats(state: dict, fmt: str) -> None: + nodes = state.get("nodes") or [] + edges = state.get("edges") or [] + by_cat = {} + for n in nodes: + by_cat.setdefault(n.get("category", "other"), []).append(n) + + if fmt == "json": + print(json.dumps({ + "nodes": len(nodes), "edges": len(edges), + "categories": {k: len(v) for k, v in by_cat.items()}, + "duplicates": sum(1 for n in nodes if n.get("is_duplicate_of")), + "contradictions": sum(1 for e in edges if e["relation"] == "contradicts"), + }, indent=2)) + return + + print(f"\nMemory Graph Statistics") + print("─" * 40) + print(f" Total nodes : {len(nodes)}") + print(f" Total edges : {len(edges)}") + for cat, cat_nodes in sorted(by_cat.items()): + print(f" {cat:15s}: {len(cat_nodes)}") + dups = sum(1 for n in nodes if n.get("is_duplicate_of")) + contras = sum(1 for e in edges if e["relation"] == "contradicts") + print(f" Duplicates : {dups}") + print(f" Contradictions: {contras}") + print() + + +def cmd_status(state: dict) -> None: + last = state.get("last_build_at", "never") + print(f"\nMemory Graph Builder — Last build: {last}") + print(f" Nodes: {state.get('node_count',0)} | " + f"Edges: {state.get('edge_count',0)} | " + f"Dups: {state.get('duplicate_count',0)} | " + f"Conflicts: {state.get('contradiction_count',0)} | " + f"Digest: ~{state.get('digest_tokens',0)} tokens") + print() + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Memory Graph Builder") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--build", action="store_true") + group.add_argument("--duplicates", action="store_true") + group.add_argument("--contradictions", action="store_true") + group.add_argument("--stale", action="store_true") + group.add_argument("--digest", action="store_true") + group.add_argument("--prune", action="store_true") + group.add_argument("--stats", action="store_true") + group.add_argument("--status", action="store_true") + parser.add_argument("--days", type=int, default=30) + parser.add_argument("--max-tokens", type=int, default=2000) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--format", choices=["text", "json"], default="text") + args = parser.parse_args() + + state = load_state() + + if args.build: + cmd_build(state, args.days, args.format) + elif args.duplicates: + cmd_duplicates(state) + elif args.contradictions: + cmd_contradictions(state) + elif args.stale: + cmd_stale(state, args.days) + elif args.digest: + cmd_digest(state, args.max_tokens) + elif args.prune: + cmd_prune(state, args.dry_run) + elif args.stats: + cmd_stats(state, args.format) + elif args.status: + cmd_status(state) + + +if __name__ == "__main__": + main()