From 0099eec1260358d30f7bb53f3486277ba124e2fb Mon Sep 17 00:00:00 2001 From: Nik Kale Date: Fri, 26 Dec 2025 16:05:27 -0800 Subject: [PATCH] feat: implement MCP server for Claude Desktop integration --- autorca_core/cli/__main__.py | 19 ++ autorca_core/mcp/__init__.py | 11 + autorca_core/mcp/server.py | 382 +++++++++++++++++++++++++++++++++++ docs/MCP_INTEGRATION.md | 214 ++++++++++++++++++++ pyproject.toml | 10 + 5 files changed, 636 insertions(+) create mode 100644 autorca_core/mcp/__init__.py create mode 100644 autorca_core/mcp/server.py create mode 100644 docs/MCP_INTEGRATION.md diff --git a/autorca_core/cli/__main__.py b/autorca_core/cli/__main__.py index a68b2eb..a0444e8 100644 --- a/autorca_core/cli/__main__.py +++ b/autorca_core/cli/__main__.py @@ -16,6 +16,17 @@ from autorca_core.logging import configure_logging +def run_mcp_server(): + """Start the MCP server.""" + try: + from autorca_core.mcp.server import start_mcp_server + start_mcp_server() + except ImportError: + print("Error: MCP server requires the 'mcp' package.") + print("Install with: pip install 'autorca-core[mcp]'") + sys.exit(1) + + def main(): """Main CLI entry point.""" parser = argparse.ArgumentParser( @@ -31,6 +42,12 @@ def main(): "quickstart", help="Run quickstart example with synthetic data", ) + + # MCP server command + mcp_parser = subparsers.add_parser( + "mcp-server", + help="Start MCP server for Claude Desktop integration", + ) quickstart_parser.add_argument( "--log-level", type=str, @@ -127,6 +144,8 @@ def main(): run_quickstart() elif args.command == "run": run_custom_rca(args) + elif args.command == "mcp-server": + run_mcp_server() else: parser.print_help() sys.exit(1) diff --git a/autorca_core/mcp/__init__.py b/autorca_core/mcp/__init__.py new file mode 100644 index 0000000..e639c5f --- /dev/null +++ b/autorca_core/mcp/__init__.py @@ -0,0 +1,11 @@ +""" +MCP server module for AutoRCA-Core. + +Exposes AutoRCA-Core functionality as MCP tools for integration with +Claude Desktop, Claude Code, and other MCP-compatible clients. +""" + +from autorca_core.mcp.server import create_mcp_server, start_mcp_server + +__all__ = ["create_mcp_server", "start_mcp_server"] + diff --git a/autorca_core/mcp/server.py b/autorca_core/mcp/server.py new file mode 100644 index 0000000..5e20d11 --- /dev/null +++ b/autorca_core/mcp/server.py @@ -0,0 +1,382 @@ +""" +MCP server implementation for AutoRCA-Core. + +Provides MCP tools for: +- Running RCA on observability data +- Analyzing logs for anomalies +- Querying service topology +- Finding root cause candidates +""" + +import asyncio +import json +from datetime import datetime, timezone, timedelta +from typing import Optional, Dict, Any + +from autorca_core.reasoning.loop import run_rca_from_files, DataSourcesConfig, run_rca +from autorca_core.outputs.reports import generate_markdown_report, generate_json_report +from autorca_core.ingestion import load_logs, load_metrics, load_traces +from autorca_core.graph_engine.builder import build_service_graph +from autorca_core.reasoning.rules import apply_rules +from autorca_core.config import ThresholdConfig +from autorca_core.logging import configure_logging, get_logger + +logger = get_logger(__name__) + + +def create_mcp_server(): + """ + Create and configure the MCP server with AutoRCA-Core tools. + + Returns: + Configured MCP server instance + """ + try: + from mcp.server import Server + from mcp.types import Tool, TextContent + except ImportError: + raise ImportError( + "mcp package required for MCP server. Install with: pip install mcp" + ) + + server = Server("autorca-core") + + @server.list_tools() + async def list_tools() -> list[Tool]: + """List available AutoRCA-Core tools.""" + return [ + Tool( + name="run_rca", + description=( + "Run comprehensive root cause analysis on observability data. " + "Analyzes logs, metrics, and traces to identify root causes of incidents. " + "Returns a detailed RCA report with candidates ranked by confidence." + ), + inputSchema={ + "type": "object", + "properties": { + "logs_path": { + "type": "string", + "description": "Path to log files (directory or file)", + }, + "symptom": { + "type": "string", + "description": "Description of the incident symptom", + }, + "metrics_path": { + "type": "string", + "description": "Optional path to metrics files", + }, + "traces_path": { + "type": "string", + "description": "Optional path to trace files", + }, + "configs_path": { + "type": "string", + "description": "Optional path to config change files", + }, + "window_minutes": { + "type": "integer", + "description": "Analysis window in minutes (default: 60)", + "default": 60, + }, + "format": { + "type": "string", + "enum": ["markdown", "json"], + "description": "Output format (default: markdown)", + "default": "markdown", + }, + }, + "required": ["logs_path", "symptom"], + }, + ), + Tool( + name="analyze_logs", + description=( + "Analyze log files for anomalies and patterns. " + "Detects error spikes, unusual patterns, and service issues. " + "Returns summary of findings with timestamps and affected services." + ), + inputSchema={ + "type": "object", + "properties": { + "logs_path": { + "type": "string", + "description": "Path to log files (directory or file)", + }, + "time_from": { + "type": "string", + "description": "Start time in ISO format (optional)", + }, + "time_to": { + "type": "string", + "description": "End time in ISO format (optional)", + }, + "service_filter": { + "type": "string", + "description": "Filter to specific service (optional)", + }, + }, + "required": ["logs_path"], + }, + ), + Tool( + name="get_service_graph", + description=( + "Build and return the service dependency graph from observability data. " + "Shows which services depend on each other and detected incidents. " + "Returns JSON representation of the service topology." + ), + inputSchema={ + "type": "object", + "properties": { + "logs_path": { + "type": "string", + "description": "Path to log files", + }, + "traces_path": { + "type": "string", + "description": "Optional path to trace files (recommended for dependencies)", + }, + "metrics_path": { + "type": "string", + "description": "Optional path to metrics files", + }, + }, + "required": ["logs_path"], + }, + ), + Tool( + name="find_root_causes", + description=( + "Apply rule-based heuristics to find root cause candidates. " + "Uses graph analysis, temporal correlation, and pattern matching. " + "Returns ranked list of candidates with confidence scores and evidence." + ), + inputSchema={ + "type": "object", + "properties": { + "logs_path": { + "type": "string", + "description": "Path to log files", + }, + "metrics_path": { + "type": "string", + "description": "Optional path to metrics", + }, + "traces_path": { + "type": "string", + "description": "Optional path to traces", + }, + "sensitivity": { + "type": "string", + "enum": ["strict", "normal", "relaxed"], + "description": "Detection sensitivity (default: normal)", + "default": "normal", + }, + }, + "required": ["logs_path"], + }, + ), + ] + + @server.call_tool() + async def call_tool(name: str, arguments: Dict[str, Any]) -> list[TextContent]: + """Handle tool calls.""" + logger.info(f"MCP tool called: {name} with args: {arguments}") + + try: + if name == "run_rca": + result = await _handle_run_rca(arguments) + elif name == "analyze_logs": + result = await _handle_analyze_logs(arguments) + elif name == "get_service_graph": + result = await _handle_get_service_graph(arguments) + elif name == "find_root_causes": + result = await _handle_find_root_causes(arguments) + else: + result = f"Unknown tool: {name}" + + return [TextContent(type="text", text=result)] + + except Exception as e: + logger.error(f"Error in tool {name}: {e}", exc_info=True) + error_msg = f"Error executing {name}: {str(e)}" + return [TextContent(type="text", text=error_msg)] + + return server + + +async def _handle_run_rca(args: Dict[str, Any]) -> str: + """Handle run_rca tool call.""" + logs_path = args["logs_path"] + symptom = args["symptom"] + metrics_path = args.get("metrics_path") + traces_path = args.get("traces_path") + configs_path = args.get("configs_path") + window_minutes = args.get("window_minutes", 60) + output_format = args.get("format", "markdown") + + logger.info(f"Running RCA for symptom: {symptom}") + + # Run RCA + result = run_rca_from_files( + logs_path=logs_path, + metrics_path=metrics_path, + traces_path=traces_path, + configs_path=configs_path, + primary_symptom=symptom, + window_minutes=window_minutes, + ) + + # Generate report + if output_format == "json": + return generate_json_report(result) + else: + return generate_markdown_report(result) + + +async def _handle_analyze_logs(args: Dict[str, Any]) -> str: + """Handle analyze_logs tool call.""" + logs_path = args["logs_path"] + time_from_str = args.get("time_from") + time_to_str = args.get("time_to") + service_filter = args.get("service_filter") + + # Parse times + time_from = datetime.fromisoformat(time_from_str) if time_from_str else None + time_to = datetime.fromisoformat(time_to_str) if time_to_str else None + + logger.info(f"Analyzing logs from: {logs_path}") + + # Load logs + logs = load_logs(logs_path, time_from, time_to, service_filter) + + # Analyze + total_logs = len(logs) + error_logs = [log for log in logs if log.is_error()] + services = set(log.service for log in logs) + + # Build summary + summary_parts = [ + "# Log Analysis Summary", + "", + f"**Total Logs:** {total_logs}", + f"**Error Logs:** {len(error_logs)} ({len(error_logs)/max(total_logs,1)*100:.1f}%)", + f"**Unique Services:** {len(services)}", + "", + ] + + if time_from and time_to: + summary_parts.append(f"**Time Range:** {time_from.isoformat()} to {time_to.isoformat()}") + summary_parts.append("") + + if services: + summary_parts.append("**Services Detected:**") + for service in sorted(services): + service_errors = len([log for log in error_logs if log.service == service]) + summary_parts.append(f"- {service}: {service_errors} errors") + summary_parts.append("") + + if error_logs: + summary_parts.append("**Recent Errors:**") + for log in sorted(error_logs, key=lambda x: x.timestamp, reverse=True)[:10]: + summary_parts.append(f"- [{log.timestamp.isoformat()}] {log.service}: {log.message[:100]}") + + return "\n".join(summary_parts) + + +async def _handle_get_service_graph(args: Dict[str, Any]) -> str: + """Handle get_service_graph tool call.""" + logs_path = args["logs_path"] + traces_path = args.get("traces_path") + metrics_path = args.get("metrics_path") + + logger.info("Building service graph") + + # Load data + logs = load_logs(logs_path) if logs_path else [] + traces = load_traces(traces_path) if traces_path else [] + metrics = load_metrics(metrics_path) if metrics_path else [] + + # Build graph + graph = build_service_graph(logs=logs, metrics=metrics, traces=traces) + + # Convert to JSON + graph_dict = graph.to_dict() + return json.dumps(graph_dict, indent=2, default=str) + + +async def _handle_find_root_causes(args: Dict[str, Any]) -> str: + """Handle find_root_causes tool call.""" + logs_path = args["logs_path"] + metrics_path = args.get("metrics_path") + traces_path = args.get("traces_path") + sensitivity = args.get("sensitivity", "normal") + + logger.info(f"Finding root causes with sensitivity: {sensitivity}") + + # Configure thresholds based on sensitivity + if sensitivity == "strict": + thresholds = ThresholdConfig.strict() + elif sensitivity == "relaxed": + thresholds = ThresholdConfig.relaxed() + else: + thresholds = ThresholdConfig() + + # Load data + logs = load_logs(logs_path) if logs_path else [] + metrics = load_metrics(metrics_path) if metrics_path else [] + traces = load_traces(traces_path) if traces_path else [] + + # Build graph and find candidates + graph = build_service_graph(logs=logs, metrics=metrics, traces=traces, thresholds=thresholds) + candidates = apply_rules(graph, thresholds=thresholds) + + # Format results + result_parts = [ + "# Root Cause Candidates", + "", + f"**Total Candidates:** {len(candidates)}", + f"**Sensitivity:** {sensitivity}", + "", + ] + + for i, candidate in enumerate(candidates[:10], 1): + result_parts.append(f"## {i}. {candidate.service}") + result_parts.append(f"**Type:** {candidate.incident_type.value}") + result_parts.append(f"**Confidence:** {candidate.confidence:.0%}") + result_parts.append(f"**Explanation:** {candidate.explanation}") + result_parts.append("") + result_parts.append("**Evidence:**") + for evidence in candidate.evidence[:5]: + result_parts.append(f"- {evidence}") + result_parts.append("") + + return "\n".join(result_parts) + + +def start_mcp_server(): + """Start the MCP server (stdio transport).""" + configure_logging(level="INFO") + logger.info("Starting AutoRCA-Core MCP server") + + server = create_mcp_server() + + try: + from mcp.server.stdio import stdio_server + except ImportError: + raise ImportError( + "mcp package required. Install with: pip install mcp" + ) + + async def run(): + async with stdio_server() as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + server.create_initialization_options(), + ) + + asyncio.run(run()) + diff --git a/docs/MCP_INTEGRATION.md b/docs/MCP_INTEGRATION.md new file mode 100644 index 0000000..f823494 --- /dev/null +++ b/docs/MCP_INTEGRATION.md @@ -0,0 +1,214 @@ +# MCP Integration Guide + +AutoRCA-Core provides a Model Context Protocol (MCP) server that exposes RCA functionality as tools for Claude Desktop, Claude Code, and other MCP-compatible clients. + +## Installation + +Install AutoRCA-Core with MCP support: + +```bash +pip install "autorca-core[mcp]" +``` + +Or install all optional dependencies: + +```bash +pip install "autorca-core[all]" +``` + +## Claude Desktop Integration + +### 1. Start the MCP Server + +The MCP server uses stdio transport and is designed to be launched by Claude Desktop: + +```bash +autorca mcp-server +``` + +### 2. Configure Claude Desktop + +Add AutoRCA-Core to your Claude Desktop MCP configuration: + +**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json` +**Windows**: `%APPDATA%\Claude\claude_desktop_config.json` +**Linux**: `~/.config/Claude/claude_desktop_config.json` + +```json +{ + "mcpServers": { + "autorca": { + "command": "autorca", + "args": ["mcp-server"] + } + } +} +``` + +### 3. Restart Claude Desktop + +After updating the configuration, restart Claude Desktop to load the AutoRCA-Core tools. + +## Available Tools + +Once configured, Claude can use these tools: + +### `run_rca` +Run comprehensive root cause analysis on observability data. + +**Parameters:** +- `logs_path` (required): Path to log files +- `symptom` (required): Description of the incident +- `metrics_path` (optional): Path to metrics files +- `traces_path` (optional): Path to trace files +- `configs_path` (optional): Path to config change files +- `window_minutes` (optional): Analysis window in minutes (default: 60) +- `format` (optional): Output format - "markdown" or "json" (default: markdown) + +**Example Claude prompt:** +``` +Please analyze the logs in /var/log/app for the symptom "API returning 500 errors" +using the metrics in /var/metrics +``` + +### `analyze_logs` +Analyze log files for anomalies and patterns. + +**Parameters:** +- `logs_path` (required): Path to log files +- `time_from` (optional): Start time in ISO format +- `time_to` (optional): End time in ISO format +- `service_filter` (optional): Filter to specific service + +**Example Claude prompt:** +``` +Analyze the logs in /var/log/app from 2025-01-01T10:00:00 to 2025-01-01T11:00:00 +``` + +### `get_service_graph` +Build and return the service dependency graph. + +**Parameters:** +- `logs_path` (required): Path to log files +- `traces_path` (optional): Path to trace files (recommended for dependencies) +- `metrics_path` (optional): Path to metrics files + +**Example Claude prompt:** +``` +Show me the service dependency graph from the logs in /var/log/app and traces in /var/traces +``` + +### `find_root_causes` +Apply rule-based heuristics to find root cause candidates. + +**Parameters:** +- `logs_path` (required): Path to log files +- `metrics_path` (optional): Path to metrics +- `traces_path` (optional): Path to traces +- `sensitivity` (optional): "strict", "normal", or "relaxed" (default: normal) + +**Example Claude prompt:** +``` +Find root causes in /var/log/app with strict sensitivity +``` + +## Example Workflows + +### Incident Investigation +``` +1. "Show me the service graph for the production logs" +2. "Analyze the logs for the last hour" +3. "Find root causes with normal sensitivity" +4. "Run full RCA for symptom: checkout API timeouts" +``` + +### Daily Health Check +``` +1. "Analyze today's logs for any anomalies" +2. "Find any potential root causes in the last 24 hours" +``` + +### Post-Deployment Validation +``` +1. "Get the service graph before and after the deployment" +2. "Run RCA on logs since the deployment started" +``` + +## Troubleshooting + +### "MCP server requires the 'mcp' package" +Install the MCP optional dependency: +```bash +pip install "autorca-core[mcp]" +``` + +### Claude Desktop can't find the command +Ensure `autorca` is in your PATH: +```bash +which autorca # macOS/Linux +where autorca # Windows +``` + +If not found, use the full path in `claude_desktop_config.json`: +```json +{ + "mcpServers": { + "autorca": { + "command": "/full/path/to/autorca", + "args": ["mcp-server"] + } + } +} +``` + +### Server not starting +Check the Claude Desktop logs: +- **macOS**: `~/Library/Logs/Claude/mcp*.log` +- **Windows**: `%APPDATA%\Claude\Logs\mcp*.log` + +## Security Considerations + +- The MCP server runs with the same permissions as Claude Desktop +- Ensure log files don't contain sensitive information +- Consider using read-only file paths +- Use the validation limits to prevent resource exhaustion + +## Advanced Configuration + +### With Virtual Environment + +```json +{ + "mcpServers": { + "autorca": { + "command": "/path/to/venv/bin/autorca", + "args": ["mcp-server"] + } + } +} +``` + +### With Custom Log Level + +The MCP server respects the `AUTORCA_LOG_LEVEL` environment variable: + +```json +{ + "mcpServers": { + "autorca": { + "command": "autorca", + "args": ["mcp-server"], + "env": { + "AUTORCA_LOG_LEVEL": "DEBUG" + } + } + } +} +``` + +## Next Steps + +- Explore the [main README](../README.md) for core AutoRCA concepts +- Check [CONTRIBUTING.md](../CONTRIBUTING.md) for development guidelines +- Review example data in the `examples/` directory + diff --git a/pyproject.toml b/pyproject.toml index c9049dd..38b0ac3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,16 @@ llm = [ "anthropic>=0.18", ] +mcp = [ + "mcp>=0.1.0", +] + +all = [ + "openai>=1.0", + "anthropic>=0.18", + "mcp>=0.1.0", +] + [project.urls] Homepage = "https://github.com/nik-kale/AutoRCA-Core" Repository = "https://github.com/nik-kale/AutoRCA-Core"