From 0099eec1260358d30f7bb53f3486277ba124e2fb Mon Sep 17 00:00:00 2001
From: Nik Kale <nikkal@cisco.com>
Date: Fri, 26 Dec 2025 16:05:27 -0800
Subject: [PATCH] feat: implement MCP server for Claude Desktop integration

---
 autorca_core/cli/__main__.py |  19 ++
 autorca_core/mcp/__init__.py |  11 +
 autorca_core/mcp/server.py   | 382 +++++++++++++++++++++++++++++++++++
 docs/MCP_INTEGRATION.md      | 214 ++++++++++++++++++++
 pyproject.toml               |  10 +
 5 files changed, 636 insertions(+)
 create mode 100644 autorca_core/mcp/__init__.py
 create mode 100644 autorca_core/mcp/server.py
 create mode 100644 docs/MCP_INTEGRATION.md

diff --git a/autorca_core/cli/__main__.py b/autorca_core/cli/__main__.py
index a68b2eb..a0444e8 100644
--- a/autorca_core/cli/__main__.py
+++ b/autorca_core/cli/__main__.py
@@ -16,6 +16,17 @@
 from autorca_core.logging import configure_logging
 
 
+def run_mcp_server():
+    """Start the MCP server."""
+    try:
+        from autorca_core.mcp.server import start_mcp_server
+        start_mcp_server()
+    except ImportError:
+        print("Error: MCP server requires the 'mcp' package.")
+        print("Install with: pip install 'autorca-core[mcp]'")
+        sys.exit(1)
+
+
 def main():
     """Main CLI entry point."""
     parser = argparse.ArgumentParser(
@@ -31,6 +42,12 @@ def main():
         "quickstart",
         help="Run quickstart example with synthetic data",
     )
+    
+    # MCP server command
+    mcp_parser = subparsers.add_parser(
+        "mcp-server",
+        help="Start MCP server for Claude Desktop integration",
+    )
     quickstart_parser.add_argument(
         "--log-level",
         type=str,
@@ -127,6 +144,8 @@ def main():
         run_quickstart()
     elif args.command == "run":
         run_custom_rca(args)
+    elif args.command == "mcp-server":
+        run_mcp_server()
     else:
         parser.print_help()
         sys.exit(1)
diff --git a/autorca_core/mcp/__init__.py b/autorca_core/mcp/__init__.py
new file mode 100644
index 0000000..e639c5f
--- /dev/null
+++ b/autorca_core/mcp/__init__.py
@@ -0,0 +1,11 @@
+"""
+MCP server module for AutoRCA-Core.
+
+Exposes AutoRCA-Core functionality as MCP tools for integration with
+Claude Desktop, Claude Code, and other MCP-compatible clients.
+"""
+
+from autorca_core.mcp.server import create_mcp_server, start_mcp_server
+
+__all__ = ["create_mcp_server", "start_mcp_server"]
+
diff --git a/autorca_core/mcp/server.py b/autorca_core/mcp/server.py
new file mode 100644
index 0000000..5e20d11
--- /dev/null
+++ b/autorca_core/mcp/server.py
@@ -0,0 +1,382 @@
+"""
+MCP server implementation for AutoRCA-Core.
+
+Provides MCP tools for:
+- Running RCA on observability data
+- Analyzing logs for anomalies
+- Querying service topology
+- Finding root cause candidates
+"""
+
+import asyncio
+import json
+from datetime import datetime, timezone, timedelta
+from typing import Optional, Dict, Any
+
+from autorca_core.reasoning.loop import run_rca_from_files, DataSourcesConfig, run_rca
+from autorca_core.outputs.reports import generate_markdown_report, generate_json_report
+from autorca_core.ingestion import load_logs, load_metrics, load_traces
+from autorca_core.graph_engine.builder import build_service_graph
+from autorca_core.reasoning.rules import apply_rules
+from autorca_core.config import ThresholdConfig
+from autorca_core.logging import configure_logging, get_logger
+
+logger = get_logger(__name__)
+
+
+def create_mcp_server():
+    """
+    Create and configure the MCP server with AutoRCA-Core tools.
+
+    Returns:
+        Configured MCP server instance
+    """
+    try:
+        from mcp.server import Server
+        from mcp.types import Tool, TextContent
+    except ImportError:
+        raise ImportError(
+            "mcp package required for MCP server. Install with: pip install mcp"
+        )
+
+    server = Server("autorca-core")
+
+    @server.list_tools()
+    async def list_tools() -> list[Tool]:
+        """List available AutoRCA-Core tools."""
+        return [
+            Tool(
+                name="run_rca",
+                description=(
+                    "Run comprehensive root cause analysis on observability data. "
+                    "Analyzes logs, metrics, and traces to identify root causes of incidents. "
+                    "Returns a detailed RCA report with candidates ranked by confidence."
+                ),
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "logs_path": {
+                            "type": "string",
+                            "description": "Path to log files (directory or file)",
+                        },
+                        "symptom": {
+                            "type": "string",
+                            "description": "Description of the incident symptom",
+                        },
+                        "metrics_path": {
+                            "type": "string",
+                            "description": "Optional path to metrics files",
+                        },
+                        "traces_path": {
+                            "type": "string",
+                            "description": "Optional path to trace files",
+                        },
+                        "configs_path": {
+                            "type": "string",
+                            "description": "Optional path to config change files",
+                        },
+                        "window_minutes": {
+                            "type": "integer",
+                            "description": "Analysis window in minutes (default: 60)",
+                            "default": 60,
+                        },
+                        "format": {
+                            "type": "string",
+                            "enum": ["markdown", "json"],
+                            "description": "Output format (default: markdown)",
+                            "default": "markdown",
+                        },
+                    },
+                    "required": ["logs_path", "symptom"],
+                },
+            ),
+            Tool(
+                name="analyze_logs",
+                description=(
+                    "Analyze log files for anomalies and patterns. "
+                    "Detects error spikes, unusual patterns, and service issues. "
+                    "Returns summary of findings with timestamps and affected services."
+                ),
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "logs_path": {
+                            "type": "string",
+                            "description": "Path to log files (directory or file)",
+                        },
+                        "time_from": {
+                            "type": "string",
+                            "description": "Start time in ISO format (optional)",
+                        },
+                        "time_to": {
+                            "type": "string",
+                            "description": "End time in ISO format (optional)",
+                        },
+                        "service_filter": {
+                            "type": "string",
+                            "description": "Filter to specific service (optional)",
+                        },
+                    },
+                    "required": ["logs_path"],
+                },
+            ),
+            Tool(
+                name="get_service_graph",
+                description=(
+                    "Build and return the service dependency graph from observability data. "
+                    "Shows which services depend on each other and detected incidents. "
+                    "Returns JSON representation of the service topology."
+                ),
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "logs_path": {
+                            "type": "string",
+                            "description": "Path to log files",
+                        },
+                        "traces_path": {
+                            "type": "string",
+                            "description": "Optional path to trace files (recommended for dependencies)",
+                        },
+                        "metrics_path": {
+                            "type": "string",
+                            "description": "Optional path to metrics files",
+                        },
+                    },
+                    "required": ["logs_path"],
+                },
+            ),
+            Tool(
+                name="find_root_causes",
+                description=(
+                    "Apply rule-based heuristics to find root cause candidates. "
+                    "Uses graph analysis, temporal correlation, and pattern matching. "
+                    "Returns ranked list of candidates with confidence scores and evidence."
+                ),
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "logs_path": {
+                            "type": "string",
+                            "description": "Path to log files",
+                        },
+                        "metrics_path": {
+                            "type": "string",
+                            "description": "Optional path to metrics",
+                        },
+                        "traces_path": {
+                            "type": "string",
+                            "description": "Optional path to traces",
+                        },
+                        "sensitivity": {
+                            "type": "string",
+                            "enum": ["strict", "normal", "relaxed"],
+                            "description": "Detection sensitivity (default: normal)",
+                            "default": "normal",
+                        },
+                    },
+                    "required": ["logs_path"],
+                },
+            ),
+        ]
+
+    @server.call_tool()
+    async def call_tool(name: str, arguments: Dict[str, Any]) -> list[TextContent]:
+        """Handle tool calls."""
+        logger.info(f"MCP tool called: {name} with args: {arguments}")
+
+        try:
+            if name == "run_rca":
+                result = await _handle_run_rca(arguments)
+            elif name == "analyze_logs":
+                result = await _handle_analyze_logs(arguments)
+            elif name == "get_service_graph":
+                result = await _handle_get_service_graph(arguments)
+            elif name == "find_root_causes":
+                result = await _handle_find_root_causes(arguments)
+            else:
+                result = f"Unknown tool: {name}"
+
+            return [TextContent(type="text", text=result)]
+
+        except Exception as e:
+            logger.error(f"Error in tool {name}: {e}", exc_info=True)
+            error_msg = f"Error executing {name}: {str(e)}"
+            return [TextContent(type="text", text=error_msg)]
+
+    return server
+
+
+async def _handle_run_rca(args: Dict[str, Any]) -> str:
+    """Handle run_rca tool call."""
+    logs_path = args["logs_path"]
+    symptom = args["symptom"]
+    metrics_path = args.get("metrics_path")
+    traces_path = args.get("traces_path")
+    configs_path = args.get("configs_path")
+    window_minutes = args.get("window_minutes", 60)
+    output_format = args.get("format", "markdown")
+
+    logger.info(f"Running RCA for symptom: {symptom}")
+
+    # Run RCA
+    result = run_rca_from_files(
+        logs_path=logs_path,
+        metrics_path=metrics_path,
+        traces_path=traces_path,
+        configs_path=configs_path,
+        primary_symptom=symptom,
+        window_minutes=window_minutes,
+    )
+
+    # Generate report
+    if output_format == "json":
+        return generate_json_report(result)
+    else:
+        return generate_markdown_report(result)
+
+
+async def _handle_analyze_logs(args: Dict[str, Any]) -> str:
+    """Handle analyze_logs tool call."""
+    logs_path = args["logs_path"]
+    time_from_str = args.get("time_from")
+    time_to_str = args.get("time_to")
+    service_filter = args.get("service_filter")
+
+    # Parse times
+    time_from = datetime.fromisoformat(time_from_str) if time_from_str else None
+    time_to = datetime.fromisoformat(time_to_str) if time_to_str else None
+
+    logger.info(f"Analyzing logs from: {logs_path}")
+
+    # Load logs
+    logs = load_logs(logs_path, time_from, time_to, service_filter)
+
+    # Analyze
+    total_logs = len(logs)
+    error_logs = [log for log in logs if log.is_error()]
+    services = set(log.service for log in logs)
+
+    # Build summary
+    summary_parts = [
+        "# Log Analysis Summary",
+        "",
+        f"**Total Logs:** {total_logs}",
+        f"**Error Logs:** {len(error_logs)} ({len(error_logs)/max(total_logs,1)*100:.1f}%)",
+        f"**Unique Services:** {len(services)}",
+        "",
+    ]
+
+    if time_from and time_to:
+        summary_parts.append(f"**Time Range:** {time_from.isoformat()} to {time_to.isoformat()}")
+        summary_parts.append("")
+
+    if services:
+        summary_parts.append("**Services Detected:**")
+        for service in sorted(services):
+            service_errors = len([log for log in error_logs if log.service == service])
+            summary_parts.append(f"- {service}: {service_errors} errors")
+        summary_parts.append("")
+
+    if error_logs:
+        summary_parts.append("**Recent Errors:**")
+        for log in sorted(error_logs, key=lambda x: x.timestamp, reverse=True)[:10]:
+            summary_parts.append(f"- [{log.timestamp.isoformat()}] {log.service}: {log.message[:100]}")
+
+    return "\n".join(summary_parts)
+
+
+async def _handle_get_service_graph(args: Dict[str, Any]) -> str:
+    """Handle get_service_graph tool call."""
+    logs_path = args["logs_path"]
+    traces_path = args.get("traces_path")
+    metrics_path = args.get("metrics_path")
+
+    logger.info("Building service graph")
+
+    # Load data
+    logs = load_logs(logs_path) if logs_path else []
+    traces = load_traces(traces_path) if traces_path else []
+    metrics = load_metrics(metrics_path) if metrics_path else []
+
+    # Build graph
+    graph = build_service_graph(logs=logs, metrics=metrics, traces=traces)
+
+    # Convert to JSON
+    graph_dict = graph.to_dict()
+    return json.dumps(graph_dict, indent=2, default=str)
+
+
+async def _handle_find_root_causes(args: Dict[str, Any]) -> str:
+    """Handle find_root_causes tool call."""
+    logs_path = args["logs_path"]
+    metrics_path = args.get("metrics_path")
+    traces_path = args.get("traces_path")
+    sensitivity = args.get("sensitivity", "normal")
+
+    logger.info(f"Finding root causes with sensitivity: {sensitivity}")
+
+    # Configure thresholds based on sensitivity
+    if sensitivity == "strict":
+        thresholds = ThresholdConfig.strict()
+    elif sensitivity == "relaxed":
+        thresholds = ThresholdConfig.relaxed()
+    else:
+        thresholds = ThresholdConfig()
+
+    # Load data
+    logs = load_logs(logs_path) if logs_path else []
+    metrics = load_metrics(metrics_path) if metrics_path else []
+    traces = load_traces(traces_path) if traces_path else []
+
+    # Build graph and find candidates
+    graph = build_service_graph(logs=logs, metrics=metrics, traces=traces, thresholds=thresholds)
+    candidates = apply_rules(graph, thresholds=thresholds)
+
+    # Format results
+    result_parts = [
+        "# Root Cause Candidates",
+        "",
+        f"**Total Candidates:** {len(candidates)}",
+        f"**Sensitivity:** {sensitivity}",
+        "",
+    ]
+
+    for i, candidate in enumerate(candidates[:10], 1):
+        result_parts.append(f"## {i}. {candidate.service}")
+        result_parts.append(f"**Type:** {candidate.incident_type.value}")
+        result_parts.append(f"**Confidence:** {candidate.confidence:.0%}")
+        result_parts.append(f"**Explanation:** {candidate.explanation}")
+        result_parts.append("")
+        result_parts.append("**Evidence:**")
+        for evidence in candidate.evidence[:5]:
+            result_parts.append(f"- {evidence}")
+        result_parts.append("")
+
+    return "\n".join(result_parts)
+
+
+def start_mcp_server():
+    """Start the MCP server (stdio transport)."""
+    configure_logging(level="INFO")
+    logger.info("Starting AutoRCA-Core MCP server")
+
+    server = create_mcp_server()
+
+    try:
+        from mcp.server.stdio import stdio_server
+    except ImportError:
+        raise ImportError(
+            "mcp package required. Install with: pip install mcp"
+        )
+
+    async def run():
+        async with stdio_server() as (read_stream, write_stream):
+            await server.run(
+                read_stream,
+                write_stream,
+                server.create_initialization_options(),
+            )
+
+    asyncio.run(run())
+
diff --git a/docs/MCP_INTEGRATION.md b/docs/MCP_INTEGRATION.md
new file mode 100644
index 0000000..f823494
--- /dev/null
+++ b/docs/MCP_INTEGRATION.md
@@ -0,0 +1,214 @@
+# MCP Integration Guide
+
+AutoRCA-Core provides a Model Context Protocol (MCP) server that exposes RCA functionality as tools for Claude Desktop, Claude Code, and other MCP-compatible clients.
+
+## Installation
+
+Install AutoRCA-Core with MCP support:
+
+```bash
+pip install "autorca-core[mcp]"
+```
+
+Or install all optional dependencies:
+
+```bash
+pip install "autorca-core[all]"
+```
+
+## Claude Desktop Integration
+
+### 1. Start the MCP Server
+
+The MCP server uses stdio transport and is designed to be launched by Claude Desktop:
+
+```bash
+autorca mcp-server
+```
+
+### 2. Configure Claude Desktop
+
+Add AutoRCA-Core to your Claude Desktop MCP configuration:
+
+**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
+**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
+**Linux**: `~/.config/Claude/claude_desktop_config.json`
+
+```json
+{
+  "mcpServers": {
+    "autorca": {
+      "command": "autorca",
+      "args": ["mcp-server"]
+    }
+  }
+}
+```
+
+### 3. Restart Claude Desktop
+
+After updating the configuration, restart Claude Desktop to load the AutoRCA-Core tools.
+
+## Available Tools
+
+Once configured, Claude can use these tools:
+
+### `run_rca`
+Run comprehensive root cause analysis on observability data.
+
+**Parameters:**
+- `logs_path` (required): Path to log files
+- `symptom` (required): Description of the incident
+- `metrics_path` (optional): Path to metrics files
+- `traces_path` (optional): Path to trace files
+- `configs_path` (optional): Path to config change files
+- `window_minutes` (optional): Analysis window in minutes (default: 60)
+- `format` (optional): Output format - "markdown" or "json" (default: markdown)
+
+**Example Claude prompt:**
+```
+Please analyze the logs in /var/log/app for the symptom "API returning 500 errors" 
+using the metrics in /var/metrics
+```
+
+### `analyze_logs`
+Analyze log files for anomalies and patterns.
+
+**Parameters:**
+- `logs_path` (required): Path to log files
+- `time_from` (optional): Start time in ISO format
+- `time_to` (optional): End time in ISO format
+- `service_filter` (optional): Filter to specific service
+
+**Example Claude prompt:**
+```
+Analyze the logs in /var/log/app from 2025-01-01T10:00:00 to 2025-01-01T11:00:00
+```
+
+### `get_service_graph`
+Build and return the service dependency graph.
+
+**Parameters:**
+- `logs_path` (required): Path to log files
+- `traces_path` (optional): Path to trace files (recommended for dependencies)
+- `metrics_path` (optional): Path to metrics files
+
+**Example Claude prompt:**
+```
+Show me the service dependency graph from the logs in /var/log/app and traces in /var/traces
+```
+
+### `find_root_causes`
+Apply rule-based heuristics to find root cause candidates.
+
+**Parameters:**
+- `logs_path` (required): Path to log files
+- `metrics_path` (optional): Path to metrics
+- `traces_path` (optional): Path to traces
+- `sensitivity` (optional): "strict", "normal", or "relaxed" (default: normal)
+
+**Example Claude prompt:**
+```
+Find root causes in /var/log/app with strict sensitivity
+```
+
+## Example Workflows
+
+### Incident Investigation
+```
+1. "Show me the service graph for the production logs"
+2. "Analyze the logs for the last hour"
+3. "Find root causes with normal sensitivity"
+4. "Run full RCA for symptom: checkout API timeouts"
+```
+
+### Daily Health Check
+```
+1. "Analyze today's logs for any anomalies"
+2. "Find any potential root causes in the last 24 hours"
+```
+
+### Post-Deployment Validation
+```
+1. "Get the service graph before and after the deployment"
+2. "Run RCA on logs since the deployment started"
+```
+
+## Troubleshooting
+
+### "MCP server requires the 'mcp' package"
+Install the MCP optional dependency:
+```bash
+pip install "autorca-core[mcp]"
+```
+
+### Claude Desktop can't find the command
+Ensure `autorca` is in your PATH:
+```bash
+which autorca  # macOS/Linux
+where autorca  # Windows
+```
+
+If not found, use the full path in `claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "autorca": {
+      "command": "/full/path/to/autorca",
+      "args": ["mcp-server"]
+    }
+  }
+}
+```
+
+### Server not starting
+Check the Claude Desktop logs:
+- **macOS**: `~/Library/Logs/Claude/mcp*.log`
+- **Windows**: `%APPDATA%\Claude\Logs\mcp*.log`
+
+## Security Considerations
+
+- The MCP server runs with the same permissions as Claude Desktop
+- Ensure log files don't contain sensitive information
+- Consider using read-only file paths
+- Use the validation limits to prevent resource exhaustion
+
+## Advanced Configuration
+
+### With Virtual Environment
+
+```json
+{
+  "mcpServers": {
+    "autorca": {
+      "command": "/path/to/venv/bin/autorca",
+      "args": ["mcp-server"]
+    }
+  }
+}
+```
+
+### With Custom Log Level
+
+The MCP server respects the `AUTORCA_LOG_LEVEL` environment variable:
+
+```json
+{
+  "mcpServers": {
+    "autorca": {
+      "command": "autorca",
+      "args": ["mcp-server"],
+      "env": {
+        "AUTORCA_LOG_LEVEL": "DEBUG"
+      }
+    }
+  }
+}
+```
+
+## Next Steps
+
+- Explore the [main README](../README.md) for core AutoRCA concepts
+- Check [CONTRIBUTING.md](../CONTRIBUTING.md) for development guidelines
+- Review example data in the `examples/` directory
+
diff --git a/pyproject.toml b/pyproject.toml
index c9049dd..38b0ac3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,16 @@ llm = [
     "anthropic>=0.18",
 ]
 
+mcp = [
+    "mcp>=0.1.0",
+]
+
+all = [
+    "openai>=1.0",
+    "anthropic>=0.18",
+    "mcp>=0.1.0",
+]
+
 [project.urls]
 Homepage = "https://github.com/nik-kale/AutoRCA-Core"
 Repository = "https://github.com/nik-kale/AutoRCA-Core"