diff --git a/CLAUDE.md b/CLAUDE.md index 2a7ba78..6c6503c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,9 +15,15 @@ cargo run --release cargo run --release -- --verbose # Run CLI commands -cargo run --release -- init # Initialize project -cargo run --release -- stats # Show database statistics -cargo run --release -- list # List stored items +cargo run --release -- init # Initialize project +cargo run --release -- stats # Show database statistics +cargo run --release -- list # List stored items +cargo run --release -- list --scope global # List global items +cargo run --release -- store "some content" # Store content +cargo run --release -- store - # Store from stdin +cargo run --release -- recall "search query" # Semantic search +cargo run --release -- forget # Delete an item +cargo run --release -- --json list # JSON output (any command) # Run tests (requires model download on first run) cargo test @@ -38,7 +44,7 @@ Sediment is a semantic memory system for AI agents, running as an MCP (Model Con ### Core Components -- **`src/main.rs`** - CLI entry point with subcommands (init, stats, list) and MCP server startup +- **`src/main.rs`** - CLI entry point with subcommands (init, stats, list, store, recall, forget) and MCP server startup - **`src/lib.rs`** - Library root exposing public API, project detection, scope types, and project ID migration - **`src/db.rs`** - LanceDB wrapper handling vector storage, hybrid search (vector + FTS/BM25), and CRUD operations - **`src/embedder.rs`** - Local embeddings using `all-MiniLM-L6-v2` via Candle (384-dim vectors) diff --git a/Cargo.toml b/Cargo.toml index b694110..5a851fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sediment-mcp" -version = "0.4.4" +version = "0.5.0" edition = "2024" repository = "https://github.com/rendro/sediment" homepage = "https://github.com/rendro/sediment" diff --git a/README.md b/README.md index 98788cf..4f2d710 100644 --- a/README.md +++ b/README.md @@ -141,12 +141,19 @@ Go to **Settings > Tools > AI Assistant > MCP Servers**, click **+**, and add: ## CLI ```bash -sediment # Start MCP server -sediment init # Set up Claude Code integration -sediment stats # Show database statistics -sediment list # List stored items +sediment # Start MCP server +sediment init # Set up Claude Code integration +sediment stats # Show database statistics +sediment list # List stored items +sediment list --scope global # List global items +sediment store "some content" # Store content +sediment store - # Store content from stdin +sediment recall "search query" # Search by semantic similarity +sediment forget # Delete an item by ID ``` +All commands support `--json` for machine-readable output. + ## How It Works ### Two-Database Hybrid diff --git a/src/db.rs b/src/db.rs index dbaf15c..1b4047d 100644 --- a/src/db.rs +++ b/src/db.rs @@ -28,7 +28,7 @@ fn sanitize_sql_string(s: &str) -> String { /// Validate that a string looks like a valid item/project ID (UUID hex + hyphens). /// Returns true if the string only contains safe characters for SQL interpolation. /// Use this as an additional guard before `sanitize_sql_string` for ID fields. -pub(crate) fn is_valid_id(s: &str) -> bool { +pub fn is_valid_id(s: &str) -> bool { !s.is_empty() && s.len() <= 64 && s.chars().all(|c| c.is_ascii_hexdigit() || c == '-') } diff --git a/src/main.rs b/src/main.rs index 81c9053..69d29bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,10 +3,12 @@ //! Semantic memory for AI agents - local-first, MCP-native. //! Run this binary to start the MCP server. +use std::io::Read; use std::path::PathBuf; use anyhow::Result; use clap::{Parser, Subcommand}; +use serde_json::json; use tracing_subscriber::{EnvFilter, fmt}; #[derive(Parser)] @@ -22,6 +24,10 @@ struct Cli { #[arg(short, long, global = true)] verbose: bool, + /// Output JSON instead of human-readable text + #[arg(long, global = true)] + json: bool, + #[command(subcommand)] command: Option, } @@ -42,11 +48,45 @@ enum Commands { /// Show database statistics (item count, chunk count) Stats, - /// List stored items for debugging + /// List stored items List { /// Maximum number of items to show #[arg(short, long, default_value = "20")] limit: usize, + + /// Scope: "project" (default), "global", or "all" + #[arg(short, long, default_value = "project")] + scope: String, + }, + + /// Store content for later retrieval + Store { + /// Content to store (use "-" to read from stdin) + content: String, + + /// Scope: "project" (default) or "global" + #[arg(short, long, default_value = "project")] + scope: String, + + /// ID of an existing item to replace + #[arg(long)] + replace: Option, + }, + + /// Search stored content by semantic similarity + Recall { + /// Search query + query: String, + + /// Maximum number of results + #[arg(short, long, default_value = "5")] + limit: usize, + }, + + /// Delete a stored item by its ID + Forget { + /// Item ID to delete + id: String, }, } @@ -77,7 +117,14 @@ fn main() -> Result<()> { None => run_mcp_server(cli.db), Some(Commands::Init { local, global }) => run_init(local, global), Some(Commands::Stats) => run_stats(cli.db), - Some(Commands::List { limit }) => run_list(cli.db, limit), + Some(Commands::List { limit, scope }) => run_list(cli.db, limit, &scope, cli.json), + Some(Commands::Store { + content, + scope, + replace, + }) => run_store(cli.db, &content, &scope, replace, cli.json), + Some(Commands::Recall { query, limit }) => run_recall(cli.db, &query, limit, cli.json), + Some(Commands::Forget { id }) => run_forget(cli.db, &id, cli.json), } } @@ -320,19 +367,89 @@ fn run_stats(db_override: Option) -> Result<()> { }) } -/// List stored items -fn run_list(db_override: Option, limit: usize) -> Result<()> { +/// Shared context for CLI commands that need database access. +struct CliContext { + db_path: PathBuf, + access_db_path: PathBuf, + project_id: Option, +} + +/// Build CLI context: resolve DB path, detect project, derive project ID. +fn cli_context(db_override: Option) -> CliContext { let db_path = db_override.unwrap_or_else(sediment::central_db_path); + let sediment_dir = db_path.parent().unwrap_or(&db_path); + let access_db_path = sediment_dir.join("access.db"); - if !db_path.exists() { - println!("Database does not exist yet."); + let cwd = std::env::current_dir().ok(); + let project_root = cwd + .as_deref() + .map(|dir| sediment::find_project_root(dir).unwrap_or_else(|| dir.to_path_buf())); + let project_id = project_root + .as_ref() + .and_then(|root| sediment::get_or_create_project_id(root).ok()); + + CliContext { + db_path, + access_db_path, + project_id, + } +} + +/// List stored items +fn run_list( + db_override: Option, + limit: usize, + scope: &str, + output_json: bool, +) -> Result<()> { + let ctx = cli_context(db_override); + + if !ctx.db_path.exists() { + if output_json { + println!("{}", json!({"count": 0, "items": []})); + } else { + println!("Database does not exist yet."); + } return Ok(()); } + let scope = scope + .parse::() + .map_err(|e| anyhow::anyhow!(e))?; + let rt = tokio::runtime::Runtime::new()?; rt.block_on(async { - let mut db = sediment::Database::open(&db_path).await?; - let items = db.list_items(Some(limit), sediment::ListScope::All).await?; + let mut db = sediment::Database::open_with_project(&ctx.db_path, ctx.project_id).await?; + let items = db.list_items(Some(limit), scope).await?; + + if output_json { + let formatted: Vec = items + .iter() + .map(|item| { + let mut obj = json!({ + "id": item.id, + "content": item.content, + "created": item.created_at.to_rfc3339(), + }); + if item.project_id.is_some() { + obj["scope"] = json!("project"); + } else { + obj["scope"] = json!("global"); + } + if item.is_chunked { + obj["chunked"] = json!(true); + } + obj + }) + .collect(); + + let result = json!({ + "count": items.len(), + "items": formatted + }); + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); + } if items.is_empty() { println!("No items stored."); @@ -370,6 +487,379 @@ fn run_list(db_override: Option, limit: usize) -> Result<()> { }) } +/// Store content +fn run_store( + db_override: Option, + content: &str, + scope: &str, + replace: Option, + output_json: bool, +) -> Result<()> { + let ctx = cli_context(db_override); + + // Read content from stdin if "-" + let content = if content == "-" { + let mut buf = String::new(); + std::io::stdin().read_to_string(&mut buf)?; + buf + } else { + content.to_string() + }; + + if content.trim().is_empty() { + anyhow::bail!("Content must not be empty"); + } + + let scope = scope + .parse::() + .map_err(|e| anyhow::anyhow!(e))?; + + let rt = tokio::runtime::Runtime::new()?; + rt.block_on(async { + let mut db = sediment::Database::open_with_project(&ctx.db_path, ctx.project_id.clone()).await?; + + let mut item = sediment::Item::new(&content); + + // Set project_id based on scope + if scope == sediment::StoreScope::Project + && let Some(ref project_id) = ctx.project_id + { + item = item.with_project_id(project_id); + } + + let store_result = db.store_item(item).await?; + let new_id = store_result.id.clone(); + + // Create graph node + let graph = sediment::graph::GraphStore::open(&ctx.access_db_path)?; + let now = chrono::Utc::now().timestamp(); + if let Err(e) = graph.add_node(&new_id, ctx.project_id.as_deref(), now) { + tracing::warn!("graph add_node failed: {}", e); + } + + // Enqueue consolidation candidates from conflicts + if !store_result.potential_conflicts.is_empty() + && let Ok(queue) = + sediment::consolidation::ConsolidationQueue::open(&ctx.access_db_path) + { + for conflict in &store_result.potential_conflicts { + if let Err(e) = + queue.enqueue(&new_id, &conflict.id, conflict.similarity as f64) + { + tracing::warn!("enqueue consolidation failed: {}", e); + } + } + } + + // Handle replace: delete old item, preserve graph lineage + let mut replaced = false; + if let Some(ref old_id) = replace { + if !sediment::db::is_valid_id(old_id) { + tracing::warn!("replace ID is not valid: {}", old_id); + } else { + if let Err(e) = graph.add_supersedes_edge(&new_id, old_id) { + tracing::warn!("replace: add_supersedes_edge failed: {}", e); + } + if let Err(e) = graph.transfer_edges(old_id, &new_id) { + tracing::warn!("replace: transfer_edges failed: {}", e); + } + + // Record validation on the new item + if let Ok(tracker) = + sediment::access::AccessTracker::open(&ctx.access_db_path) + { + let created_at = chrono::Utc::now().timestamp(); + if let Err(e) = tracker.record_validation(&new_id, created_at) { + tracing::warn!("replace: record_validation failed: {}", e); + } + } + + match db.delete_item(old_id).await { + Ok(true) => replaced = true, + Ok(false) => tracing::warn!("replace: old item not found: {}", old_id), + Err(e) => tracing::warn!("replace: delete_item failed: {}", e), + } + if let Err(e) = graph.remove_node(old_id) { + tracing::warn!("replace: remove_node failed: {}", e); + } + } + } + + if output_json { + let mut result = json!({ + "success": true, + "id": new_id, + "scope": scope.to_string(), + }); + + if replaced { + result["replaced_id"] = json!(replace); + } + + if !store_result.potential_conflicts.is_empty() { + let conflicts: Vec = store_result + .potential_conflicts + .iter() + .map(|c| json!({"id": c.id, "content": c.content, "similarity": format!("{:.2}", c.similarity)})) + .collect(); + result["potential_conflicts"] = json!(conflicts); + } + + println!("{}", serde_json::to_string_pretty(&result)?); + } else { + let header = if replaced { + format!("Stored (replaced {}):", replace.as_deref().unwrap_or("")) + } else { + "Stored:".to_string() + }; + println!("{}", header); + println!(" ID: {}", new_id); + println!(" Scope: {}", scope); + + if !store_result.potential_conflicts.is_empty() { + println!("\n Potential conflicts:"); + for c in &store_result.potential_conflicts { + let preview: String = c.content.chars().take(60).collect::().replace('\n', " "); + println!(" {} (similarity: {:.2})", c.id, c.similarity); + println!(" {}", preview); + } + } + } + + Ok(()) + }) +} + +/// Search stored content +fn run_recall( + db_override: Option, + query: &str, + limit: usize, + output_json: bool, +) -> Result<()> { + let ctx = cli_context(db_override); + + if !ctx.db_path.exists() { + if output_json { + println!("{}", json!({"count": 0, "results": []})); + } else { + println!("No items found matching your query."); + } + return Ok(()); + } + + let rt = tokio::runtime::Runtime::new()?; + rt.block_on(async { + let mut db = + sediment::Database::open_with_project(&ctx.db_path, ctx.project_id.clone()).await?; + + let tracker = sediment::access::AccessTracker::open(&ctx.access_db_path)?; + let graph = sediment::graph::GraphStore::open(&ctx.access_db_path)?; + + let filters = sediment::ItemFilters::new(); + let config = sediment::mcp::tools::RecallConfig { + enable_background_tasks: false, + ..Default::default() + }; + + let recall_result = sediment::mcp::tools::recall_pipeline( + &mut db, &tracker, &graph, query, limit, filters, &config, + ) + .await + .map_err(|e| anyhow::anyhow!(e))?; + + if output_json { + // Batch-fetch neighbors for related_ids + let all_result_ids: Vec<&str> = recall_result + .results + .iter() + .map(|r| r.id.as_str()) + .collect(); + let neighbors_map = graph + .get_neighbors_mapped(&all_result_ids, 0.5) + .unwrap_or_default(); + + let formatted: Vec = recall_result + .results + .iter() + .map(|r| { + let mut obj = json!({ + "id": r.id, + "content": r.content, + "similarity": format!("{:.2}", r.similarity), + "created": r.created_at.to_rfc3339(), + }); + if let Some(&raw_sim) = recall_result.raw_similarities.get(&r.id) + && (raw_sim - r.similarity).abs() > 0.001 + { + obj["raw_similarity"] = json!(format!("{:.2}", raw_sim)); + } + if let Some(ref excerpt) = r.relevant_excerpt { + obj["relevant_excerpt"] = json!(excerpt); + } + if let Some(ref current_pid) = ctx.project_id + && let Some(ref item_pid) = r.project_id + && item_pid != current_pid + { + obj["cross_project"] = json!(true); + } + if let Some(related) = neighbors_map.get(&r.id) + && !related.is_empty() + { + obj["related_ids"] = json!(related); + } + obj + }) + .collect(); + + let mut result = json!({ + "count": recall_result.results.len(), + "results": formatted, + }); + + if !recall_result.graph_expanded.is_empty() { + result["graph_expanded"] = json!(recall_result.graph_expanded); + } + if !recall_result.suggested.is_empty() { + result["suggested"] = json!(recall_result.suggested); + } + + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); + } + + // Human-readable output + if recall_result.results.is_empty() { + println!("No items found matching your query."); + return Ok(()); + } + + println!("Results ({}):\n", recall_result.results.len()); + for r in &recall_result.results { + println!(" {} (similarity: {:.2})", r.id, r.similarity); + + // Show raw similarity if decay scoring changed it + if let Some(&raw_sim) = recall_result.raw_similarities.get(&r.id) + && (raw_sim - r.similarity).abs() > 0.001 + { + println!(" raw similarity: {:.2}", raw_sim); + } + + let content_preview: String = r + .content + .chars() + .take(80) + .collect::() + .replace('\n', " "); + let ellipsis = if r.content.chars().count() > 80 { + "..." + } else { + "" + }; + println!(" {}{}", content_preview, ellipsis); + + if let Some(ref excerpt) = r.relevant_excerpt { + let excerpt_preview: String = excerpt + .chars() + .take(80) + .collect::() + .replace('\n', " "); + let ellipsis = if excerpt.chars().count() > 80 { + "..." + } else { + "" + }; + println!(" excerpt: {}{}", excerpt_preview, ellipsis); + } + + println!(); + } + + if !recall_result.graph_expanded.is_empty() { + println!("Graph-expanded:"); + for entry in &recall_result.graph_expanded { + if let Some(id) = entry.get("id").and_then(|v| v.as_str()) { + let rel = entry + .get("rel_type") + .and_then(|v| v.as_str()) + .unwrap_or("related"); + println!(" {} (via {})", id, rel); + } + } + println!(); + } + + if !recall_result.suggested.is_empty() { + println!("Suggested:"); + for entry in &recall_result.suggested { + if let Some(id) = entry.get("id").and_then(|v| v.as_str()) { + let reason = entry.get("reason").and_then(|v| v.as_str()).unwrap_or(""); + println!(" {} — {}", id, reason); + } + } + println!(); + } + + Ok(()) + }) +} + +/// Delete a stored item +fn run_forget(db_override: Option, id: &str, output_json: bool) -> Result<()> { + let ctx = cli_context(db_override); + + if !ctx.db_path.exists() { + anyhow::bail!("Database does not exist yet."); + } + + let rt = tokio::runtime::Runtime::new()?; + rt.block_on(async { + let db = + sediment::Database::open_with_project(&ctx.db_path, ctx.project_id.clone()).await?; + + // Access control: verify the item belongs to the current project or is global + if let Some(ref current_pid) = ctx.project_id { + match db.get_item(id).await { + Ok(Some(item)) => { + if let Some(ref item_pid) = item.project_id + && item_pid != current_pid + { + anyhow::bail!("Cannot delete item {} from a different project", id); + } + } + Ok(None) => anyhow::bail!("Item not found: {}", id), + Err(e) => anyhow::bail!("Failed to look up item: {}", e), + } + } + + match db.delete_item(id).await { + Ok(true) => { + // Remove from graph + let graph = sediment::graph::GraphStore::open(&ctx.access_db_path)?; + if let Err(e) = graph.remove_node(id) { + tracing::warn!("remove_node failed: {}", e); + } + + if output_json { + println!( + "{}", + serde_json::to_string_pretty(&json!({ + "success": true, + "message": format!("Deleted item: {}", id), + }))? + ); + } else { + println!("Deleted item: {}", id); + } + } + Ok(false) => anyhow::bail!("Item not found: {}", id), + Err(e) => anyhow::bail!("Failed to delete item: {}", e), + } + + Ok(()) + }) +} + /// Generate CLAUDE.md instructions for Sediment fn generate_claude_md_instructions() -> String { r#"# Sediment Memory System