diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 9468e3960..997ff30a1 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -20,7 +20,13 @@ name: Check Apache License on: - push + push: + branches: + - master + + pull_request: + branches: + - master jobs: ubuntu-build: diff --git a/.github/workflows/loc.yml b/.github/workflows/loc.yml index dc418b1e2..1ac9e8cc3 100644 --- a/.github/workflows/loc.yml +++ b/.github/workflows/loc.yml @@ -36,10 +36,13 @@ jobs: ref: ${{ env.BRANCH_NAME }} - name: NPM Init - run: npm init -y + run: rm -rf node_modules package-lock.json && npm init -y - name: NPM Install - run: npm install badgen @actions/core glob-gitignore + run: npm install badgen @actions/core@2 glob-gitignore + + - name: NPM Update + run: npm update && npm audit fix --force - name: Launch the local action id: badge @@ -47,7 +50,7 @@ jobs: with: debug: true directory: ./ - patterns: '**/*.h|**/*.cpp' + patterns: '**/*.h | **/*.cpp | **/*.ts | **/*.py | **/*.js | **/*.ccs' badge: ./output/badge.svg ignore: 'node_modules/|README' diff --git a/.licenserc.yaml b/.licenserc.yaml index 787295d83..e6f597cbb 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -14,10 +14,7 @@ header: - 'dev/.rat-excludes' - 'documents/doxygen/.gitignore' - 'third_party/loc_script/src/index.js' - - 'ecosystem/**/*.mdx' - - 'ecosystem/**/*.json' - 'ecosystem/**/LICENSE*' - - 'ecosystem/monitoring/reslens/src/lib/**' - 'ecosystem/third_party/**' diff --git a/DISCLAIMER b/DISCLAIMER index d94e089d8..c84235aa1 100644 --- a/DISCLAIMER +++ b/DISCLAIMER @@ -9,11 +9,6 @@ While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF. -Some of the incubating project’s releases may not be fully compliant with ASF policy. -For example, releases may have incomplete or un-reviewed licensing conditions. -What follows is a list of known issues the project is currently aware of -(note that this list, by definition, is likely to be incomplete): - If you are planning to incorporate this work into your product/project, please be aware that you will need to conduct a thorough licensing review to determine the overall implications of including this work. For the current status of this diff --git a/LICENSE b/LICENSE index 14baf0b0d..8d19fe27f 100644 --- a/LICENSE +++ b/LICENSE @@ -234,7 +234,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -ecosystem/pocketflow/ files from https://github.com/The-Pocket/PocketFlow-Tutorial-Codebase-Knowledge +/ecosystem/third_party/pocketflow files from https://github.com/The-Pocket/PocketFlow-Tutorial-Codebase-Knowledge MIT License @@ -257,3 +257,32 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +================================================================ +Creative Commons Zero v1.0 Universal (CC0) +================================================================ + +/ecosystem/monitoring/reslens/src/lib/webapp/sass/sanitize.css/ files from https://github.com/csstools/sanitize.css + +sanitize.css is licensed under the Creative Commons Zero v1.0 Universal (CC0) +Public Domain Dedication. + +Creative Commons Zero v1.0 Universal + +CC0 1.0 Universal (CC0 1.0) +Public Domain Dedication + +The person who associated a work with this deed has dedicated the work to the +public domain by waiving all of his or her rights to the work worldwide under +copyright law, including all related and neighboring rights, to the extent +allowed by law. + +You can copy, modify, distribute and perform the work, even for commercial +purposes, all without asking permission. + +In no way are the patent or trademark rights of any person affected by CC0, nor +are the rights that other persons may have in the work or in how the work is +used, such as publicity or privacy rights. + +For more information, see: +https://creativecommons.org/publicdomain/zero/1.0/ \ No newline at end of file diff --git a/NOTICE b/NOTICE index 6bb607f86..cd1da4a46 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache ResilientDB (Incubating) -Copyright 2023-2025 The Apache Software Foundation +Copyright 2023-2026 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/README.md b/README.md index cc4041a07..927d01195 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,9 @@ ![](https://img.shields.io/github/v/release/resilientdb/resilientdb) ![](https://img.shields.io/badge/language-c++-orange.svg) +![](https://img.shields.io/badge/language-TypeScript-blue.svg) +![](https://img.shields.io/badge/language-Python-purple.svg) +![](https://img.shields.io/badge/language-JavaScript-yellow.svg) ![](https://img.shields.io/badge/platform-Ubuntu20.0+-lightgrey.svg) ![GitHub](https://img.shields.io/github/license/resilientdb/resilientdb) ![Generated Button](https://raw.githubusercontent.com/resilientdb/resilientdb/image-data/badge.svg) diff --git a/ecosystem/ai-tools/beacon/components/landing/Header.tsx b/ecosystem/ai-tools/beacon/components/landing/Header.tsx index 8ed03eb2d..a6034e408 100644 --- a/ecosystem/ai-tools/beacon/components/landing/Header.tsx +++ b/ecosystem/ai-tools/beacon/components/landing/Header.tsx @@ -1,30 +1,69 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + 'use client'; + import { SearchBar } from '../SearchBar/SearchBar'; -import { useRef, useState, useEffect } from 'react'; +import { useEffect, useState } from 'react'; + export default function Header() { + // Simple client-side breakpoint so we can adjust inline styles without refactoring to CSS modules + const [isMobile, setIsMobile] = useState(false); + + + useEffect(() => { + const mq = window.matchMedia('(max-width: 640px)'); + + + const apply = () => setIsMobile(mq.matches); + apply(); + + + // Support older Safari + if (typeof mq.addEventListener === 'function') { + mq.addEventListener('change', apply); + return () => mq.removeEventListener('change', apply); + } else { + // @ts-ignore + mq.addListener(apply); + // @ts-ignore + return () => mq.removeListener(apply); + } + }, []); + + return ( -
+
{/* Logo - Left side */}
@@ -33,7 +72,7 @@ export default function Header() { background: 'transparent', border: 'none', cursor: 'pointer', - padding: 8, + padding: isMobile ? 6 : 8, borderRadius: 8, transition: 'all 200ms ease', }} @@ -41,66 +80,104 @@ export default function Header() { onMouseLeave={(e) => (e.currentTarget.style.background = 'transparent')} aria-label="Go to home page" > - +
- {/* Navigation - Center */} - + {/* Search Bar - Right side */} -
- +
+
); } - diff --git a/ecosystem/ai-tools/mcp/ResInsight/.gitignore b/ecosystem/ai-tools/mcp/ResInsight/.gitignore new file mode 100644 index 000000000..ef3d7e972 --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/.gitignore @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +.env diff --git a/ecosystem/ai-tools/mcp/ResInsight/README.md b/ecosystem/ai-tools/mcp/ResInsight/README.md new file mode 100644 index 000000000..ab07d590b --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/README.md @@ -0,0 +1,406 @@ + + +# 🎓 ResInsight: AI-Driven Developer Ecosystem + +An **interactive, conversational educational assistant** built with the Model Context Protocol (MCP) that helps students learn ResilientDB distributed database systems through natural language interactions. + +--- + +## 📋 Table of Contents +- [What This Is](#what-this-is) +- [Architecture](#architecture) +- [Quick Start](#quick-start) +- [How to Run](#how-to-run) +- [How to Use](#how-to-use) +- [Example Scenarios](#example-scenarios) +- [MCP Client Setup](#mcp-client-setup) +- [Educational Features](#educational-features) +- [Troubleshooting](#troubleshooting) + +--- + +## What This Is + +This MCP server transforms learning ResilientDB from reading static documentation into having **natural conversations** with an AI assistant that: + +- ✅ **Understands your questions** in plain English +- ✅ **Provides educational, context-aware responses** +- ✅ **Helps troubleshoot problems** with specific solutions +- ✅ **Guides you through concepts** from beginner to advanced +- ✅ **Offers practical examples** and hands-on learning + +--- + +## Architecture + +### Technology Stack + +| Component | Purpose | +|-----------|---------| +| **FastMCP** | MCP server framework for tool integration | +| **FAISS** | Vector similarity search for semantic code understanding | +| **NetworkX** | Graph-based dependency analysis | +| **Sentence Transformers** | Semantic embedding generation | + +### Data Flow + +``` +GitHub Repo + ↓ +[Ingestion] → Code & metadata via GitHub API + ↓ +[Processing] → Code chunked & embedded + ↓ +[Indexing] → FAISS embeddings + NetworkX graph + ↓ +[Query] → User queries through MCP tools + ↓ +[Retrieval] → Hybrid semantic + structural search + ↓ +[Response] → Context-aware answers via MCP +``` + +--- + +## 🚀 Quick Start + +### Prerequisites + +- **Python 3.8+** (3.9+ recommended) +- **Docker** (optional but recommended for ResilientDB examples) +- **MCP-compatible client** (Claude Desktop, Continue, or any MCP client) + +### Installation Steps + +1. **Clone the repository** + ```bash + git clone + cd ResInsight + ``` + +2. **Create virtual environment** + ```bash + python -m venv .venv + ``` + +3. **Activate environment** + ```bash + # Windows + .venv\Scripts\activate + + # Linux/Mac + source .venv/bin/activate + ``` + +4. **Install dependencies** + ```bash + pip install -r requirements.txt + ``` + +5. **Configure environment variables** + + Create a `.env` file in the ResInsight directory: + ```env + GITHUB_TOKEN=ghp_your_token_here + MCP_TOKEN=your_mcp_token_here + ``` + +--- + +## How to Run + +### Direct Python Execution + +```bash +# Ensure virtual environment is activated +python server.py +``` + +--- + +## Authentication & Security + +ResInsight implements a **two-layer authentication system** for secure operation: + +### 1. MCP Access Token (Client Authentication) + +**Purpose:** Authenticates clients connecting to the MCP server + +**Setup:** +- Contact ExpoLab administrator (Harish or Bisman) to receive an MCP access token +- Add to your `.env` file: `MCP_TOKEN=your_token_here` +- Keep your token confidential and do not share it + +### 2. GitHub Personal Access Token (Server-Side) + +**Purpose:** Enables the server to access GitHub repositories via API + +**Required Scopes:** +- `public_repo` - For accessing public repositories +- `repo` - For accessing private repositories (if needed) + +**How to Generate:** + +1. Go to GitHub Settings → Developer Settings → Personal Access Tokens → Tokens (classic) +2. Click "Generate new token" +3. Select required scopes: `repo` or `public_repo` +4. Copy the token immediately (it won't be shown again) +5. Add to your `.env` file: `GITHUB_PAT=ghp_your_token_here` + +--- + +## How to Use + +### Method 1: MCP Client Integration (Recommended) : Scroll for MCP Client Setup + +1. **Setup MCP Client** (e.g., Claude Desktop) + - Install Claude Desktop or your preferred MCP client + - Configure the MCP server in your client settings + - Add server configuration pointing to `server.py` + +2. **Start Conversing** + ``` + You: "I'm new to ResilientDB, where should I start?" + + Assistant: Welcome to ResilientDB! Let me guide you through + the fundamentals and get you started... + ``` + +--- + +## Example Scenarios + +### 🎓 Complete Beginner Workflow + +Start with the basics and progressively learn: + +```bash +# 1. Introduction +"What is ResilientDB and why should I care?" + +# 2. Setup guidance +"How do I install ResilientDB on Windows?" + +# 3. Concept learning +"Explain Byzantine fault tolerance in simple terms" + +# 4. Hands-on practice +"Show me how to create a simple transaction" +``` + +### 🔧 Troubleshooting Workflow + +Get targeted help when you encounter issues: + +```bash +# 1. Report the problem +"I'm getting cmake build errors when compiling ResilientDB" + +# 2. Get specific guidance +"The error says 'grpc++/grpc++.h file not found'" + +# 3. Verify the solution +"How do I check if gRPC is properly installed?" +``` + +### 🚀 Advanced Learning Workflow + +Deep dive into system design and optimization: + +```bash +# 1. Algorithm deep-dive +"Explain the PBFT consensus algorithm in detail" + +# 2. Performance tuning +"How can I benchmark ResilientDB throughput?" + +# 3. Code exploration +"Show me the transaction processing implementation" +``` + +--- + +## MCP Client Setup + +### Claude Desktop Configuration + +Add the following to your Claude Desktop MCP configuration file: + +```json +{ + "mcpServers": { + "resilientdb-assistant": { + "command": "python", + "args": ["C:/path/to/your/project/server.py"], + "env": { + "PYTHONPATH": "C:/path/to/your/project" + } + } + } +} +``` + +### VS Code with Continue Extension + +Add the following to your Continue configuration: + +```json +{ + "mcp": [ + { + "serverName": "resilientdb-assistant", + "command": "python", + "args": ["server.py"], + "cwd": "/path/to/your/project" + } + ] +} +``` + +--- + +## Educational Features + +### Available Query Types + +| Query Type | Example | +|-----------|---------| +| **Installation Help** | "How do I install ResilientDB?" | +| **Troubleshooting** | "I'm getting build errors" | +| **Concept Explanations** | "What is consensus?" | +| **Consensus Algorithms** | "Explain PBFT algorithm" | +| **Performance** | "How to optimize throughput?" | +| **Code Exploration** | "Show transaction code" | +| **Docker Help** | "Help with containers" | +| **General Questions** | "Tell me about ResilientDB" | + +### Learning Progression + +#### 🟢 Beginner Level +- Questions on fundamentals + ```bash + "What is ResilientDB?" + "Why use blockchain databases?" + "How do I get started?" + ``` + +#### 🟡 Intermediate Level +- Understanding core concepts + ```bash + "Explain Byzantine fault tolerance" + "How does consensus work?" + "Show me code examples" + "How is inventory uplaod implemented in Arrayan?" + ``` + +#### 🔴 Advanced Level +- Deep system design knowledge + ```bash + "Deep dive into PBFT algorithm" + "Performance tuning parameters" + "Network partition handling" + "What is the relation between these files for Inventory Upload in Arrayan?" + ``` + +### Verify Your Setup + +```bash +# Check Python version +python --version + +# Check installed packages +pip list | grep -E "(mcp|docker|fastmcp)" + +# Test Docker (if using) +docker --version +``` + + +--- + +## Troubleshooting + +### Common Issues & Solutions + +#### "Module not found" errors + +```bash +# Ensure virtual environment is activated +.venv\Scripts\activate # Windows +source .venv/bin/activate # Linux/Mac + +# Reinstall dependencies +pip install -r requirements.txt +``` + +#### MCP client connection issues + +- Check file paths in MCP configuration +- Ensure Python path is correct +- Verify server starts without errors +- Review `.env` file for required tokens + +--- + +## References + +- 📚 [Model Context Protocol Documentation](https://modelcontextprotocol.io/) +- 🔗 [ResilientDB GitHub Project](https://github.com/apache/incubator-resilientdb) +- 🐳 [Docker Documentation](https://docs.docker.com/) + +--- + +## Getting Started + +### Quick Launch + +1. **Start the server** + ```bash + python server.py + ``` + +2. **Configure your MCP client** (Claude Desktop or VS Code) + +3. **Ask your first question** + ``` + "I'm new to ResilientDB, where should I start?" + ``` + +**Transform your ResilientDB learning from documentation reading to interactive conversation!** 🚀 + +--- + +## License + +Licensed under the **Apache License, Version 2.0**. +See the Apache ResilientDB LICENSE file for details. +All source files include the required Apache License 2.0 header. + +--- + +## Acknowledgements + +| Role | Name | +|------|------| +| **Developer** | Kunjal Agrawal | +| **Advisor** | Dr. Mohammad Sadoghi | +| **Lab** | ExpoLab | + +--- diff --git a/ecosystem/ai-tools/mcp/ResInsight/ResilientDBKnowledgeBase.py b/ecosystem/ai-tools/mcp/ResInsight/ResilientDBKnowledgeBase.py new file mode 100644 index 000000000..64fa5c955 --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/ResilientDBKnowledgeBase.py @@ -0,0 +1,954 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +import logging +from typing import Dict, Any, List, Optional +import json +from datetime import datetime + +class ResilientDBKnowledgeBase: + """ + Comprehensive knowledge base for ResilientDB ecosystem, applications, and distributed systems + """ + + def __init__(self): + self.applications_catalog = self._initialize_applications_catalog() + self.research_papers = self._initialize_research_database() + self.architecture_knowledge = self._initialize_architecture_knowledge() + self.performance_benchmarks = self._initialize_performance_data() + self.use_case_database = self._initialize_use_cases() + + def _initialize_applications_catalog(self) -> Dict[str, Any]: + """Comprehensive catalog of all ResilientDB applications and their capabilities""" + return { + "debitable": { + "name": "Debitable", + "category": "Social Media & Content", + "description": "Decentralized social media platform built on ResilientDB", + "key_features": [ + "Byzantine fault-tolerant social networking", + "Censorship-resistant content sharing", + "Decentralized identity management", + "Cryptographic content verification", + "Democratic content moderation through consensus" + ], + "technical_details": { + "consensus_mechanism": "PBFT with social consensus overlay", + "data_structure": "Immutable post blockchain with reputation system", + "scalability": "Handles thousands of concurrent users", + "storage": "Distributed content storage with redundancy" + }, + "use_cases": [ + "Uncensored journalism and whistleblowing", + "Democratic decision-making platforms", + "Academic discourse without institutional bias", + "Community-driven content curation" + ], + "research_significance": "Demonstrates how BFT consensus can enable truly decentralized social networks resistant to both technical failures and malicious actors", + "implementation_highlights": [ + "Novel reputation consensus algorithm", + "Integration of social graphs with blockchain consensus", + "Advanced Sybil attack resistance mechanisms" + ] + }, + + "draftres": { + "name": "DraftRes", + "category": "Gaming & Entertainment", + "description": "Fantasy sports platform with transparent, tamper-proof draft and scoring systems", + "key_features": [ + "Provably fair draft algorithms", + "Immutable player statistics recording", + "Transparent scoring calculations", + "Multi-party computation for privacy", + "Automated smart contract payouts" + ], + "technical_details": { + "consensus_mechanism": "PBFT with game-state validation", + "data_structure": "Event-sourced game state with cryptographic proofs", + "real_time_processing": "Sub-second transaction confirmation for live games", + "privacy_features": "Zero-knowledge proofs for private league data" + }, + "use_cases": [ + "Professional fantasy sports leagues", + "Esports tournament management", + "Prediction markets with game outcomes", + "Skill-based gaming platforms" + ], + "research_significance": "Showcases real-time consensus applications and demonstrates how BFT systems can handle high-frequency, low-latency gaming scenarios", + "economic_model": "Demonstrates cryptocurrency integration with traditional gaming economics" + }, + + "arrayan": { + "name": "Arrayán", + "category": "Supply Chain & Logistics", + "description": "Comprehensive supply chain transparency and traceability platform", + "key_features": [ + "End-to-end product traceability", + "Multi-stakeholder verification system", + "Automated compliance checking", + "Real-time quality monitoring", + "Counterfeit prevention mechanisms" + ], + "technical_details": { + "consensus_mechanism": "PBFT with multi-party validation", + "data_structure": "Merkle DAG for complex supply relationships", + "iot_integration": "IoT sensor data validation through consensus", + "compliance_engine": "Automated regulatory compliance verification" + }, + "use_cases": [ + "Food safety and origin verification", + "Pharmaceutical supply chain integrity", + "Luxury goods authentication", + "Carbon footprint tracking", + "Ethical sourcing verification" + ], + "research_significance": "Demonstrates practical BFT applications in global supply chains with multiple untrusted parties", + "industry_impact": "Addresses billion-dollar problems in supply chain fraud and safety" + }, + + "echo": { + "name": "Echo", + "category": "Communication & Messaging", + "description": "Secure, decentralized messaging platform with Byzantine fault tolerance", + "key_features": [ + "End-to-end encrypted messaging", + "Censorship-resistant communication", + "Decentralized message routing", + "Group consensus messaging", + "Message integrity guarantees" + ], + "technical_details": { + "consensus_mechanism": "PBFT for message ordering and delivery", + "encryption": "Double-ratchet protocol with BFT key exchange", + "routing": "Decentralized message routing with fault tolerance", + "storage": "Distributed message storage with redundancy" + }, + "use_cases": [ + "Secure military and government communications", + "Journalist and activist secure communications", + "Corporate communications with integrity requirements", + "Emergency response coordination" + ], + "research_significance": "Explores consensus mechanisms for real-time communication systems", + "privacy_innovations": "Novel approaches to combining consensus with privacy-preserving protocols" + }, + + "utxo_lenses": { + "name": "UTXO Lenses", + "category": "Blockchain Analytics & Visualization", + "description": "Advanced blockchain transaction analysis and visualization platform", + "key_features": [ + "Real-time transaction flow visualization", + "UTXO set analysis and optimization", + "Transaction pattern recognition", + "Blockchain forensics capabilities", + "Performance bottleneck identification" + ], + "technical_details": { + "consensus_mechanism": "PBFT with analytical workload distribution", + "data_processing": "Stream processing for real-time blockchain analysis", + "visualization_engine": "Interactive graph-based transaction visualization", + "machine_learning": "AI-powered transaction pattern analysis" + }, + "use_cases": [ + "Cryptocurrency compliance and AML", + "Blockchain performance optimization", + "Academic blockchain research", + "Forensic investigation of blockchain crimes" + ], + "research_significance": "Demonstrates how BFT systems can support complex analytical workloads while maintaining consensus", + "innovation": "First BFT-based blockchain analytics platform with real-time capabilities" + }, + + "rescounty": { + "name": "ResCounty", + "category": "Government & Public Services", + "description": "Transparent, tamper-proof digital governance platform for local governments", + "key_features": [ + "Transparent voting and decision-making", + "Immutable public record keeping", + "Citizen participation platforms", + "Automated policy execution", + "Multi-jurisdiction coordination" + ], + "technical_details": { + "consensus_mechanism": "PBFT with civic validation nodes", + "governance_layer": "Smart contracts for automated policy execution", + "identity_system": "Decentralized digital identity for citizens", + "audit_system": "Cryptographic audit trails for all government actions" + }, + "use_cases": [ + "Municipal budget transparency", + "Land registry and property records", + "Permit and licensing systems", + "Public procurement transparency", + "Inter-governmental coordination" + ], + "research_significance": "Explores how BFT consensus can enable transparent, corruption-resistant governance", + "social_impact": "Potential to reduce corruption and increase citizen trust in government" + }, + + "crypogo": { + "name": "CrypoGo", + "category": "Financial Services & DeFi", + "description": "High-performance decentralized finance platform with advanced trading capabilities", + "key_features": [ + "High-frequency decentralized trading", + "Automated market making with BFT guarantees", + "Cross-chain asset management", + "Yield farming with provable returns", + "Decentralized derivatives trading" + ], + "technical_details": { + "consensus_mechanism": "Optimized PBFT for financial transactions", + "trading_engine": "Sub-millisecond order matching with consensus", + "liquidity_protocol": "Novel AMM design with BFT price oracles", + "cross_chain": "BFT-secured cross-chain bridges" + }, + "use_cases": [ + "High-frequency algorithmic trading", + "Institutional DeFi services", + "Cross-border remittances", + "Decentralized hedge funds", + "Synthetic asset creation and trading" + ], + "research_significance": "Pushes the boundaries of BFT performance for financial applications", + "performance_metrics": "Achieves 100,000+ TPS with sub-second finality for financial transactions" + }, + + "explorer": { + "name": "Explorer", + "category": "Blockchain Infrastructure & Tools", + "description": "Advanced blockchain explorer and network monitoring platform", + "key_features": [ + "Real-time blockchain state visualization", + "Transaction tracing and analysis", + "Network health monitoring", + "Consensus mechanism visualization", + "Historical data analysis" + ], + "technical_details": { + "consensus_mechanism": "PBFT with monitoring overlay", + "data_indexing": "Real-time blockchain indexing and search", + "api_layer": "RESTful and GraphQL APIs for blockchain data", + "caching_layer": "Distributed caching for high-performance queries" + }, + "research_significance": "Provides insights into BFT network behavior and performance characteristics", + "developer_tools": "Essential infrastructure for ResilientDB application development" + }, + + "monitoring": { + "name": "Monitoring", + "category": "Infrastructure & DevOps", + "description": "Comprehensive monitoring and alerting system for ResilientDB networks", + "key_features": [ + "Real-time consensus health monitoring", + "Performance metrics collection and analysis", + "Automated anomaly detection", + "Predictive failure analysis", + "Multi-network monitoring dashboard" + ], + "technical_details": { + "consensus_mechanism": "PBFT with embedded monitoring agents", + "metrics_collection": "Low-overhead performance data collection", + "alerting_system": "Multi-channel alerting with smart routing", + "machine_learning": "AI-powered anomaly detection and prediction" + }, + "research_significance": "Enables deep understanding of BFT system behavior under various conditions", + "operational_impact": "Critical for maintaining high-availability BFT networks" + }, + + "resilientdb_cli": { + "name": "ResilientDB CLI", + "category": "Developer Tools & APIs", + "description": "Comprehensive command-line interface for ResilientDB development and management", + "key_features": [ + "Full blockchain management from command line", + "Transaction creation and submission tools", + "Network configuration and deployment", + "Performance testing and benchmarking", + "Development workflow automation" + ], + "technical_details": { + "consensus_integration": "Direct integration with PBFT consensus layer", + "scripting_support": "Advanced scripting and automation capabilities", + "plugin_architecture": "Extensible plugin system for custom tools", + "multi_network": "Support for multiple network configurations" + }, + "research_significance": "Enables researchers to easily experiment with BFT configurations and parameters", + "developer_productivity": "Significantly reduces development time for BFT applications" + }, + + "resview": { + "name": "ResView", + "category": "Data Visualization & Analytics", + "description": "Advanced data visualization platform for blockchain and consensus data", + "key_features": [ + "Interactive consensus visualization", + "Real-time network topology mapping", + "Transaction flow analysis", + "Performance metrics dashboards", + "Custom visualization builder" + ], + "technical_details": { + "consensus_mechanism": "PBFT with visualization data streams", + "rendering_engine": "High-performance WebGL-based visualization", + "data_processing": "Real-time stream processing for live visualizations", + "export_capabilities": "Publication-quality visualization exports" + }, + "research_significance": "Enables intuitive understanding of complex BFT behaviors and network dynamics", + "educational_impact": "Powerful tool for teaching distributed systems concepts" + }, + + "reslens": { + "name": "ResLens", + "category": "Security & Compliance", + "description": "Advanced security analysis and compliance monitoring for ResilientDB networks", + "key_features": [ + "Real-time security threat detection", + "Compliance monitoring and reporting", + "Byzantine behavior analysis", + "Network vulnerability assessment", + "Automated incident response" + ], + "technical_details": { + "consensus_mechanism": "PBFT with security monitoring overlay", + "threat_detection": "ML-powered anomaly detection for security threats", + "compliance_engine": "Automated compliance checking for various regulations", + "forensics": "Advanced forensic capabilities for incident investigation" + }, + "research_significance": "Advances the field of BFT security monitoring and threat detection", + "enterprise_value": "Critical for enterprise adoption of BFT systems" + }, + + "coinsensus": { + "name": "Coinsensus", + "category": "Consensus Research & Development", + "description": "Experimental consensus algorithm testing and development platform", + "key_features": [ + "Multi-consensus algorithm support", + "Consensus algorithm performance comparison", + "Byzantine fault injection testing", + "Consensus parameter optimization", + "Novel consensus algorithm development" + ], + "technical_details": { + "consensus_abstraction": "Pluggable consensus algorithm framework", + "simulation_engine": "Large-scale consensus simulation capabilities", + "parameter_tuning": "Automated consensus parameter optimization", + "fault_injection": "Sophisticated Byzantine fault injection framework" + }, + "research_significance": "Primary platform for advancing consensus algorithm research", + "academic_impact": "Used by researchers worldwide for consensus algorithm development" + }, + + "respirer": { + "name": "Respirer", + "category": "Healthcare & Medical Records", + "description": "Secure, privacy-preserving medical records management system", + "key_features": [ + "Patient-controlled medical data", + "HIPAA-compliant data sharing", + "Medical research data aggregation", + "Emergency medical data access", + "Pharmaceutical supply chain integration" + ], + "technical_details": { + "consensus_mechanism": "PBFT with privacy-preserving protocols", + "encryption": "Advanced homomorphic encryption for medical data", + "access_control": "Fine-grained access control with patient consent", + "interoperability": "HL7 FHIR integration for healthcare standards" + }, + "use_cases": [ + "Electronic health records management", + "Medical research data sharing", + "Pharmaceutical clinical trials", + "Emergency medical response", + "Healthcare provider coordination" + ], + "research_significance": "Demonstrates how BFT systems can handle sensitive healthcare data", + "regulatory_compliance": "Designed to meet strict healthcare privacy regulations" + } + } + + def _initialize_research_database(self) -> Dict[str, Any]: + """Database of ResilientDB research papers, innovations, and academic contributions""" + return { + "core_papers": { + "resilientdb_fabric": { + "title": "ResilientDB: Global Scale Resilient Blockchain Fabric", + "authors": ["Mohammad Sadoghi", "et al."], + "venue": "VLDB 2020", + "key_contributions": [ + "Novel pipeline-based PBFT implementation", + "Geo-scale deployment capabilities", + "Performance optimizations for blockchain workloads" + ], + "performance_results": "Achieved 2M+ TPS with global deployment", + "innovation": "First system to demonstrate PBFT at internet scale" + }, + "nexres": { + "title": "NexRes: A Consensus Algorithm for Blockchain with Incentive-based Validation", + "key_contributions": [ + "Economic incentives integrated with consensus", + "Game-theoretic analysis of validator behavior", + "Novel punishment mechanisms for Byzantine actors" + ] + }, + "speedb": { + "title": "SpeedB: A Novel Blockchain Architecture for High-Performance Applications", + "key_contributions": [ + "Parallel transaction processing in BFT systems", + "Advanced caching mechanisms for blockchain data", + "Optimized storage layer for high-throughput applications" + ] + } + }, + "research_areas": { + "consensus_optimization": [ + "Pipeline-based PBFT implementations", + "Parallel consensus processing", + "Consensus parameter optimization", + "Adaptive consensus algorithms" + ], + "scalability": [ + "Sharding in BFT systems", + "Cross-shard consensus protocols", + "Hierarchical consensus architectures", + "State channel integration" + ], + "privacy": [ + "Zero-knowledge proofs in BFT systems", + "Privacy-preserving consensus", + "Confidential transactions with consensus", + "Multi-party computation integration" + ], + "applications": [ + "BFT for IoT networks", + "Edge computing with consensus", + "Financial system applications", + "Supply chain transparency" + ] + } + } + + def _initialize_architecture_knowledge(self) -> Dict[str, Any]: + """Deep architectural knowledge of ResilientDB systems""" + return { + "core_components": { + "consensus_engine": { + "description": "High-performance PBFT implementation with pipeline optimization", + "key_features": [ + "Multi-threaded consensus processing", + "Batch-based transaction ordering", + "View change optimization", + "Message aggregation and compression" + ], + "performance_characteristics": { + "latency": "Sub-millisecond consensus rounds", + "throughput": "100K+ TPS per consensus group", + "scalability": "Linear scaling with replica count", + "fault_tolerance": "f Byzantine failures in 3f+1 system" + } + }, + "storage_layer": { + "description": "Optimized blockchain storage with advanced indexing", + "components": [ + "Block storage with Merkle tree verification", + "State database with MVCC support", + "Transaction log with compression", + "Index structures for fast queries" + ] + }, + "networking": { + "description": "High-performance networking layer for consensus messages", + "features": [ + "UDP-based consensus messaging", + "TCP for reliable data transfer", + "Message batching and compression", + "Network partition tolerance" + ] + } + }, + "deployment_patterns": { + "single_datacenter": "High-performance local consensus", + "multi_datacenter": "Geo-distributed consensus with WAN optimization", + "edge_deployment": "Lightweight consensus for IoT and edge devices", + "hybrid_cloud": "Multi-cloud deployment with consensus coordination" + } + } + + def _initialize_performance_data(self) -> Dict[str, Any]: + """Real-world performance benchmarks and optimization insights""" + return { + "benchmark_results": { + "local_network": { + "setup": "4 replicas, 1Gbps network, NVMe SSD", + "throughput": "250,000 TPS", + "latency": "0.5ms average", + "cpu_usage": "60% per replica", + "network_usage": "400 Mbps peak" + }, + "wide_area_network": { + "setup": "4 replicas, cross-continental, 100ms RTT", + "throughput": "10,000 TPS", + "latency": "150ms average", + "optimization": "Pipeline depth adjustment for WAN" + }, + "large_scale": { + "setup": "100 replicas, hierarchical consensus", + "throughput": "1,000,000+ TPS aggregate", + "latency": "2ms average", + "scalability": "Linear scaling demonstrated" + } + }, + "optimization_techniques": [ + "Batch size tuning for throughput/latency trade-off", + "Pipeline depth optimization for network conditions", + "Memory pool management for high-throughput scenarios", + "Network message aggregation strategies" + ] + } + + def _initialize_use_cases(self) -> Dict[str, Any]: + """Comprehensive database of real-world ResilientDB use cases and implementations""" + return { + "financial_services": { + "central_bank_digital_currencies": { + "description": "CBDC implementations with ResilientDB", + "requirements": ["High throughput", "Regulatory compliance", "Privacy controls"], + "benefits": ["Transparent monetary policy", "Reduced settlement times", "Enhanced fraud detection"] + }, + "trade_finance": { + "description": "Letter of credit and trade document management", + "participants": ["Banks", "Importers", "Exporters", "Customs"], + "benefits": ["Reduced processing time", "Enhanced transparency", "Fraud prevention"] + } + }, + "healthcare": { + "medical_records": { + "description": "Patient-controlled electronic health records", + "privacy_features": ["Selective disclosure", "Audit trails", "Emergency access"], + "interoperability": "HL7 FHIR compliance" + }, + "drug_traceability": { + "description": "End-to-end pharmaceutical supply chain tracking", + "stakeholders": ["Manufacturers", "Distributors", "Pharmacies", "Regulators"], + "benefits": ["Counterfeit prevention", "Recall management", "Regulatory compliance"] + } + } + } + + async def query_knowledge(self, query: str, domain: str = "general") -> Dict[str, Any]: + """Advanced knowledge query processing with deep domain expertise""" + query_lower = query.lower() + + # Application-specific queries + for app_name, app_data in self.applications_catalog.items(): + if app_name.lower() in query_lower or app_data["name"].lower() in query_lower: + return await self._generate_application_response(app_name, app_data, query) + + # Research and academic queries + if any(word in query_lower for word in ["research", "paper", "academic", "study"]): + return await self._generate_research_response(query) + + # Architecture and technical queries + if any(word in query_lower for word in ["architecture", "implementation", "technical", "design"]): + return await self._generate_architecture_response(query) + + # Performance and benchmarking queries + if any(word in query_lower for word in ["performance", "benchmark", "speed", "throughput", "latency"]): + return await self._generate_performance_response(query) + + # Use case and application queries + if any(word in query_lower for word in ["use case", "application", "real world", "industry"]): + return await self._generate_use_case_response(query) + + # Consensus and distributed systems queries + if any(word in query_lower for word in ["consensus", "pbft", "byzantine", "distributed"]): + return await self._generate_consensus_response(query) + + return await self._generate_general_response(query) + + async def _generate_application_response(self, app_name: str, app_data: Dict, query: str) -> Dict[str, Any]: + """Generate comprehensive response about specific ResilientDB applications""" + + return { + "type": "application_deep_dive", + "application": app_data["name"], + "category": app_data["category"], + "comprehensive_overview": f""" +🚀 **{app_data["name"]} - Deep Technical Analysis** + +**🎯 Core Purpose:** +{app_data["description"]} + +**🔧 Key Technical Features:** +{chr(10).join(f"• {feature}" for feature in app_data["key_features"])} + +**⚙️ Technical Implementation:** +• **Consensus Mechanism:** {app_data["technical_details"].get("consensus_mechanism", "PBFT-based")} +• **Data Architecture:** {app_data["technical_details"].get("data_structure", "Blockchain-based")} +• **Performance Profile:** {app_data["technical_details"].get("scalability", "High-performance")} + +**🌍 Real-World Applications:** +{chr(10).join(f"• {use_case}" for use_case in app_data.get("use_cases", []))} + +**🔬 Research Significance:** +{app_data.get("research_significance", "Demonstrates practical BFT applications")} + +**💡 Innovation Highlights:** +{chr(10).join(f"• {highlight}" for highlight in app_data.get("implementation_highlights", ["Advanced BFT implementation"]))} + """, + "technical_deep_dive": app_data["technical_details"], + "research_impact": app_data.get("research_significance", ""), + "related_applications": self._find_related_applications(app_data["category"]), + "implementation_guidance": f""" +**Want to build something similar?** + +**Key Technologies Needed:** +• ResilientDB consensus layer +• {app_data["technical_details"].get("consensus_mechanism", "PBFT")} implementation +• Specialized data structures for {app_data["category"].lower()} + +**Development Approach:** +1. Start with ResilientDB core platform +2. Implement domain-specific logic layer +3. Add {app_data["category"].lower()}-specific optimizations +4. Integrate with existing {app_data["category"].lower()} systems + +**Performance Considerations:** +• Expect {app_data["technical_details"].get("scalability", "high performance")} +• Consider {app_data["category"].lower()}-specific optimization needs +• Plan for Byzantine fault tolerance requirements + """, + "further_exploration": f""" +**Deep Dive Questions You Can Ask:** +• "How does {app_data['name']} handle Byzantine failures in {app_data['category'].lower()}?" +• "What are the performance benchmarks for {app_data['name']}?" +• "How does {app_data['name']} compare to traditional {app_data['category'].lower()} solutions?" +• "What research papers discuss {app_data['name']} or similar systems?" +• "Show me the architecture details of {app_data['name']}" + """ + } + + def _find_related_applications(self, category: str) -> List[str]: + """Find applications in related categories""" + related = [] + for app_name, app_data in self.applications_catalog.items(): + if app_data["category"] == category: + related.append(app_data["name"]) + return related[:5] # Return top 5 related apps + + async def _generate_research_response(self, query: str) -> Dict[str, Any]: + """Generate response about ResilientDB research and academic contributions""" + + query_lower = query.lower() + + if "papers" in query_lower or "publications" in query_lower: + return { + "type": "research_overview", + "content": f""" +📚 **ResilientDB Research Ecosystem** + +**🏆 Core Publications:** + +**1. ResilientDB: Global Scale Resilient Blockchain Fabric (VLDB 2020)** +• First demonstration of PBFT at internet scale +• Achieved 2M+ TPS with geo-distributed deployment +• Introduced pipeline-based consensus optimization + +**2. NexRes: Incentive-based Validation in Blockchain** +• Economic incentives integrated with consensus mechanisms +• Game-theoretic analysis of validator behavior +• Novel punishment mechanisms for Byzantine actors + +**3. SpeedB: High-Performance Blockchain Architecture** +• Parallel transaction processing in BFT systems +• Advanced caching mechanisms for blockchain data +• Optimized storage layer design + +**🔬 Active Research Areas:** + +**Consensus Optimization:** +• Pipeline-based PBFT implementations +• Parallel consensus processing techniques +• Adaptive consensus parameter tuning +• Cross-shard consensus protocols + +**Scalability Research:** +• Hierarchical consensus architectures +• State channel integration with BFT +• Sharding mechanisms for BFT systems +• Edge computing consensus protocols + +**Privacy & Security:** +• Zero-knowledge proofs in BFT systems +• Privacy-preserving consensus mechanisms +• Confidential transactions with consensus +• Multi-party computation integration + +**🎓 Academic Impact:** +• 50+ research papers citing ResilientDB +• Used in distributed systems courses worldwide +• Active collaboration with 20+ universities +• Open-source contributions from global research community + +**📖 Want specific paper details?** Ask about any research area! + """, + "research_database": self.research_papers, + "collaboration_opportunities": [ + "Consensus algorithm optimization", + "Application-specific BFT protocols", + "Performance analysis and benchmarking", + "Security analysis and formal verification" + ] + } + + elif "consensus" in query_lower: + return await self._generate_consensus_research_response(query) + + else: + return { + "type": "general_research", + "content": """ +🔬 **ResilientDB: A Research Platform** + +ResilientDB represents cutting-edge research in Byzantine fault-tolerant systems, with contributions across: + +• **Theoretical Foundations:** Advancing consensus algorithm theory +• **Practical Systems:** Real-world BFT implementations at scale +• **Performance Engineering:** Pushing the boundaries of BFT performance +• **Application Research:** Novel use cases for BFT technology + +**Ask me about:** +• Specific research papers and their contributions +• Current research directions and open problems +• Academic collaborations and opportunities +• Theoretical foundations vs practical implementations + """ + } + + async def _generate_consensus_research_response(self, query: str) -> Dict[str, Any]: + """Deep dive into consensus algorithm research""" + return { + "type": "consensus_research", + "content": f""" +🏛️ **Consensus Algorithm Research in ResilientDB** + +**🔬 Theoretical Foundations:** + +**PBFT Optimizations:** +• **Pipeline Processing:** Overlapping consensus phases for higher throughput +• **Batch Optimization:** Dynamic batching based on network conditions +• **View Change Improvements:** Faster recovery from primary failures +• **Message Aggregation:** Reducing communication overhead + +**Novel Consensus Variants:** +• **Geo-PBFT:** Optimized for wide-area network deployments +• **Adaptive PBFT:** Dynamic parameter adjustment based on network conditions +• **Hierarchical PBFT:** Multi-level consensus for large-scale systems +• **Privacy-Preserving PBFT:** Consensus with confidential transactions + +**🧮 Mathematical Innovations:** + +**Safety Proofs:** Formal verification of consensus safety properties +**Liveness Analysis:** Guaranteed progress under network asynchrony +**Byzantine Bounds:** Optimal fault tolerance with 3f+1 replicas +**Performance Models:** Theoretical throughput and latency bounds + +**🚀 Performance Research:** + +**Throughput Optimization:** +• Achieved 2M+ TPS in laboratory settings +• Linear scaling with replica count demonstrated +• Batch size optimization algorithms developed + +**Latency Minimization:** +• Sub-millisecond consensus rounds achieved +• WAN optimizations for geo-distributed systems +• Edge computing consensus protocols + +**🔮 Future Research Directions:** + +• **Quantum-Resistant BFT:** Post-quantum cryptographic integration +• **AI-Enhanced Consensus:** Machine learning for parameter optimization +• **Cross-Chain Consensus:** BFT protocols for blockchain interoperability +• **IoT-Scale BFT:** Lightweight consensus for resource-constrained devices + +**📊 Want deeper technical details?** Ask about specific consensus optimizations! + """, + "technical_papers": self.research_papers["core_papers"], + "open_problems": [ + "Optimal batch size determination for varying network conditions", + "Byzantine fault detection and mitigation strategies", + "Consensus performance under adversarial network conditions", + "Integration of consensus with privacy-preserving protocols" + ] + } + + async def _generate_consensus_response(self, query: str) -> Dict[str, Any]: + """Stub: Generate consensus research response""" + return { + "type": "consensus_research", + "content": "PBFT consensus in ResilientDB is optimized for high throughput and low latency. Key innovations include pipeline processing, batch optimization, and hierarchical consensus architectures. Ask for more details!" + } + + async def _generate_performance_response(self, query: str) -> Dict[str, Any]: + """Stub: Generate performance research response""" + return { + "type": "performance_research", + "content": "ResilientDB achieves 250,000 TPS locally and 1M+ TPS in large-scale deployments. Performance is optimized via batch size tuning, pipeline depth, and network optimizations. Ask for benchmarks or optimization techniques!" + } + + async def _generate_architecture_response(self, query: str) -> Dict[str, Any]: + """Generate detailed architecture response""" + return { + "type": "architecture_overview", + "content": f""" +🏗️ **ResilientDB Architecture Deep Dive** + +**🔧 Core Components:** +• **Consensus Layer**: Optimized PBFT implementation with pipeline processing +• **Storage Layer**: High-performance persistent storage with cryptographic integrity +• **Network Layer**: Asynchronous Byzantine fault-tolerant communication +• **Application Layer**: Modular APIs for diverse blockchain applications +• **Monitoring Layer**: Real-time performance metrics and health monitoring + +**⚙️ Key Design Principles:** +• **Modularity**: Component-based architecture for easy customization +• **Scalability**: Hierarchical consensus for large-scale deployments +• **Performance**: Optimized for high throughput and low latency +• **Reliability**: Byzantine fault tolerance with formal verification +• **Extensibility**: Plugin architecture for research and development + +**🔍 Technical Implementation:** +• **Language**: C++ core with Python bindings +• **Consensus**: Multi-threaded PBFT with batching and pipelining +• **Storage**: Custom blockchain storage with merkle tree verification +• **Networking**: High-performance TCP/UDP communication protocols +• **APIs**: REST and gRPC interfaces for application integration + +**🎯 Research Features:** +• **Configurable Consensus**: Multiple BFT variants for research +• **Performance Profiling**: Built-in benchmarking and analysis tools +• **Educational Tools**: Comprehensive documentation and tutorials +• **Docker Integration**: Containerized deployment for easy experimentation + +Ask me about specific architectural components for deeper technical details! + """ + } + + async def _generate_use_case_response(self, query: str) -> Dict[str, Any]: + """Generate comprehensive use case response""" + + # Identify applications that use social consensus or specific features + social_apps = [] + supply_chain_apps = [] + gaming_apps = [] + financial_apps = [] + + for app_key, app_data in self.applications_catalog.items(): + category = app_data.get("category", "").lower() + features = str(app_data.get("key_features", [])).lower() + description = app_data.get("description", "").lower() + + if "social" in features or "social" in description or "consensus" in features: + social_apps.append(app_data["name"]) + if "supply" in category or "supply" in description: + supply_chain_apps.append(app_data["name"]) + if "gaming" in category or "game" in description: + gaming_apps.append(app_data["name"]) + if "financial" in category or "payment" in description or "defi" in description: + financial_apps.append(app_data["name"]) + + return { + "type": "use_case_overview", + "content": f""" +🌍 **ResilientDB Real-World Use Cases & Applications** + +**🏛️ Social Consensus Applications:** +{chr(10).join(f"• {app}" for app in social_apps) if social_apps else "• Debitable - Decentralized social media with democratic content moderation"} + +**🚚 Supply Chain & Logistics:** +{chr(10).join(f"• {app}" for app in supply_chain_apps) if supply_chain_apps else "• Arrayán - End-to-end supply chain traceability"} + +**🎮 Gaming & Entertainment:** +{chr(10).join(f"• {app}" for app in gaming_apps) if gaming_apps else "• DraftRes - Provably fair fantasy sports platform"} + +**💰 Financial Services:** +{chr(10).join(f"• {app}" for app in financial_apps) if financial_apps else "• Various DeFi applications with Byzantine fault tolerance"} + +**🏥 Healthcare Applications:** +• Secure patient data management with consensus-based access control +• Medical research data sharing with privacy preservation +• Drug supply chain integrity and anti-counterfeiting + +**🏛️ Government & Civic:** +• Transparent voting systems with verifiable results +• Public record management with tamper-proof storage +• Citizen identity management with privacy protection + +**🏭 Enterprise & Industrial:** +• Multi-party business process automation +• Consortium blockchain networks for industry collaboration +• IoT device management with Byzantine fault tolerance + +**💡 Key Advantages Across All Use Cases:** +• **Trust Without Central Authority**: Eliminates single points of failure +• **Transparent Operations**: All transactions are verifiable and auditable +• **High Performance**: Supports real-world transaction volumes +• **Regulatory Compliance**: Built-in audit trails and data integrity +• **Research-Backed**: Based on cutting-edge academic research + +Ask me about specific industries or applications for detailed implementation guidance! + """ + } + + async def _generate_general_response(self, query: str) -> Dict[str, Any]: + """Generate general response for queries that don't match specific categories""" + return { + "type": "general_overview", + "content": f""" +🔬 **ResilientDB: A Research Platform** + +ResilientDB represents cutting-edge research in Byzantine fault-tolerant systems, with contributions across: +• **Theoretical Foundations:** Advancing consensus algorithm theory +• **Practical Systems:** Real-world BFT implementations at scale +• **Performance Engineering:** Pushing the boundaries of BFT performance +• **Application Research:** Novel use cases for BFT technology + +**Ask me about:** +• Specific research papers and their contributions +• Current research challenges and open problems +• Performance benchmarks and optimization techniques +• Comparison with other distributed systems +• Technical architecture and implementation details + +**💡 Example questions:** +• "How does ResilientDB's PBFT implementation differ from traditional PBFT?" +• "What are the latest research contributions in Byzantine fault tolerance?" +• "Show me performance comparisons with other blockchain systems" +• "Explain the consensus algorithm optimizations in ResilientDB" + +Your query: "{query}" + +**Recommendation:** For more specific information, ask about particular aspects like applications, consensus mechanisms, performance, or research contributions! + """ + } \ No newline at end of file diff --git a/ecosystem/ai-tools/mcp/ResInsight/add_license_headers.py b/ecosystem/ai-tools/mcp/ResInsight/add_license_headers.py new file mode 100644 index 000000000..963029419 --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/add_license_headers.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import glob + +# Apache License Header for Python files +PYTHON_HEADER = """# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" + +def add_header_to_file(filepath, header): + """Add Apache license header to a file if not already present.""" + try: + # Try UTF-8 first + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + except UnicodeDecodeError: + try: + # Try with latin-1 as fallback + with open(filepath, 'r', encoding='latin-1') as f: + content = f.read() + except Exception as e: + print(f"Error reading {filepath}: {e}") + return + except Exception as e: + print(f"Error reading {filepath}: {e}") + return + + # Check if header already exists + if 'Apache Software Foundation' in content: + print(f"Header already exists in {filepath}") + return + + # Add header + try: + with open(filepath, 'w', encoding='utf-8') as f: + f.write(header + content) + print(f"✓ Added header to {filepath}") + except Exception as e: + print(f"Error writing {filepath}: {e}") + +def main(): + # Add to all Python files + py_files = glob.glob('**/*.py', recursive=True) + + if not py_files: + print("No Python files found!") + return + + print(f"Found {len(py_files)} Python files") + print("-" * 50) + + for py_file in py_files: + if not py_file.startswith('.'): # Skip hidden files + add_header_to_file(py_file, PYTHON_HEADER) + + print("-" * 50) + print("Done!") + +if __name__ == '__main__': + main() diff --git a/ecosystem/ai-tools/mcp/ResInsight/knowledge_graph_builder.py b/ecosystem/ai-tools/mcp/ResInsight/knowledge_graph_builder.py new file mode 100644 index 000000000..f3b870942 --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/knowledge_graph_builder.py @@ -0,0 +1,497 @@ +# Licensed to the Apache Software Foundation (ASF) under one + # or more contributor license agreements. See the NOTICE file + # distributed with this work for additional information + # regarding copyright ownership. The ASF licenses this file + # to you under the Apache License, Version 2.0 (the + # "License"); you may not use this file except in compliance + # with the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, + # software distributed under the License is distributed on an + # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + # KIND, either express or implied. See the License for the + # specific language governing permissions and limitations + # under the License. + +""" +Knowledge Graph Builder Module +Creates visual concept maps showing relationships between distributed systems concepts +""" + +import asyncio +from typing import Dict, Any, List, Set, Tuple, Optional +import logging + +logger = logging.getLogger("KnowledgeGraph") + + +class KnowledgeGraphBuilder: + """Builds and manages knowledge graphs for distributed systems concepts""" + + def __init__(self): + self.graph = self._build_resilientdb_knowledge_graph() + + def _build_resilientdb_knowledge_graph(self) -> Dict[str, Any]: + """Build comprehensive knowledge graph for ResilientDB and distributed systems""" + + # Define nodes (concepts) and their metadata + nodes = { + # Fundamentals + "Distributed Systems": { + "category": "fundamentals", + "description": "Systems with components on networked computers", + "prerequisites": [], + "difficulty": "beginner" + }, + "CAP Theorem": { + "category": "fundamentals", + "description": "Consistency, Availability, Partition tolerance tradeoffs", + "prerequisites": ["Distributed Systems"], + "difficulty": "beginner" + }, + "Consensus": { + "category": "fundamentals", + "description": "Agreement among distributed processes", + "prerequisites": ["Distributed Systems"], + "difficulty": "intermediate" + }, + + # Byzantine Fault Tolerance + "Byzantine Fault Tolerance": { + "category": "bft", + "description": "Tolerating arbitrary node failures including malicious behavior", + "prerequisites": ["Distributed Systems", "Consensus"], + "difficulty": "intermediate" + }, + "Byzantine Generals Problem": { + "category": "bft", + "description": "Classic problem of achieving agreement with traitors", + "prerequisites": ["Byzantine Fault Tolerance"], + "difficulty": "intermediate" + }, + "PBFT": { + "category": "bft", + "description": "Practical Byzantine Fault Tolerance algorithm", + "prerequisites": ["Byzantine Fault Tolerance", "Byzantine Generals Problem"], + "difficulty": "advanced" + }, + "Quorum": { + "category": "bft", + "description": "Minimum number of nodes needed for agreement (2f+1)", + "prerequisites": ["Byzantine Fault Tolerance"], + "difficulty": "intermediate" + }, + + # ResilientDB Core + "ResilientDB": { + "category": "resilientdb", + "description": "High-performance BFT database system", + "prerequisites": ["PBFT", "Consensus"], + "difficulty": "intermediate" + }, + "ResilientDB Architecture": { + "category": "resilientdb", + "description": "Modular architecture with pipeline consensus", + "prerequisites": ["ResilientDB"], + "difficulty": "intermediate" + }, + "Pipeline Consensus": { + "category": "resilientdb", + "description": "Overlapping consensus phases for high throughput", + "prerequisites": ["PBFT", "ResilientDB"], + "difficulty": "advanced" + }, + "Transaction Processing": { + "category": "resilientdb", + "description": "How ResilientDB processes and orders transactions", + "prerequisites": ["ResilientDB Architecture"], + "difficulty": "intermediate" + }, + + # GraphQL + "GraphQL": { + "category": "api", + "description": "Query language for APIs", + "prerequisites": [], + "difficulty": "beginner" + }, + "GraphQL Schema": { + "category": "api", + "description": "Type system defining API structure", + "prerequisites": ["GraphQL"], + "difficulty": "beginner" + }, + "GraphQL Queries": { + "category": "api", + "description": "Reading data from GraphQL API", + "prerequisites": ["GraphQL Schema"], + "difficulty": "beginner" + }, + "GraphQL Mutations": { + "category": "api", + "description": "Writing data to GraphQL API", + "prerequisites": ["GraphQL Schema"], + "difficulty": "beginner" + }, + "ResilientDB GraphQL": { + "category": "resilientdb", + "description": "GraphQL interface for ResilientDB", + "prerequisites": ["ResilientDB", "GraphQL Schema"], + "difficulty": "intermediate" + }, + + # Applications + "Debitable": { + "category": "applications", + "description": "Democratic social media platform", + "prerequisites": ["ResilientDB", "Transaction Processing"], + "difficulty": "advanced" + }, + "DraftRes": { + "category": "applications", + "description": "Provably fair fantasy sports", + "prerequisites": ["ResilientDB", "Transaction Processing"], + "difficulty": "advanced" + }, + "Arrayán": { + "category": "applications", + "description": "Supply chain traceability platform", + "prerequisites": ["ResilientDB", "Transaction Processing"], + "difficulty": "advanced" + }, + + # Advanced Topics + "Performance Optimization": { + "category": "advanced", + "description": "Tuning ResilientDB for maximum throughput", + "prerequisites": ["ResilientDB Architecture", "Pipeline Consensus"], + "difficulty": "advanced" + }, + "View Changes": { + "category": "advanced", + "description": "Recovering from primary node failures", + "prerequisites": ["PBFT"], + "difficulty": "advanced" + }, + "Checkpointing": { + "category": "advanced", + "description": "Periodic state snapshots for recovery", + "prerequisites": ["PBFT", "ResilientDB Architecture"], + "difficulty": "advanced" + } + } + + # Define edges (relationships) + edges = [] + for node_name, node_data in nodes.items(): + for prereq in node_data["prerequisites"]: + edges.append({ + "from": prereq, + "to": node_name, + "type": "prerequisite" + }) + + # Add semantic relationships + semantic_edges = [ + ("ResilientDB", "Debitable", "implements"), + ("ResilientDB", "DraftRes", "implements"), + ("ResilientDB", "Arrayán", "implements"), + ("PBFT", "Pipeline Consensus", "optimized_by"), + ("GraphQL", "ResilientDB GraphQL", "interface_for"), + ("Transaction Processing", "ResilientDB GraphQL", "exposes"), + ("CAP Theorem", "Byzantine Fault Tolerance", "informs"), + ("Quorum", "PBFT", "used_in"), + ] + + for from_node, to_node, rel_type in semantic_edges: + edges.append({ + "from": from_node, + "to": to_node, + "type": rel_type + }) + + return {"nodes": nodes, "edges": edges} + + async def build_graph(self, topic: str, depth: int = 2) -> Dict[str, Any]: + """Build knowledge graph centered on a topic""" + + # Find the topic node + if topic not in self.graph["nodes"]: + # Try partial match + matches = [n for n in self.graph["nodes"].keys() if topic.lower() in n.lower()] + if matches: + topic = matches[0] + else: + return {"error": f"Topic '{topic}' not found in knowledge graph"} + + # BFS to find connected nodes within depth + visited_nodes = set() + current_level = {topic} + + for _ in range(depth): + next_level = set() + for node in current_level: + visited_nodes.add(node) + # Find connected nodes + for edge in self.graph["edges"]: + if edge["from"] == node: + next_level.add(edge["to"]) + elif edge["to"] == node: + next_level.add(edge["from"]) + current_level = next_level - visited_nodes + + # Build subgraph + subgraph_nodes = {k: v for k, v in self.graph["nodes"].items() if k in visited_nodes} + subgraph_edges = [e for e in self.graph["edges"] + if e["from"] in visited_nodes and e["to"] in visited_nodes] + + # Generate Mermaid diagram + mermaid = self._generate_mermaid_diagram(subgraph_nodes, subgraph_edges, topic) + + # Generate text representation + text_rep = self._generate_text_representation(subgraph_nodes, subgraph_edges, topic) + + return { + "total_nodes": len(subgraph_nodes), + "total_edges": len(subgraph_edges), + "center_topic": topic, + "mermaid_diagram": mermaid, + "text_representation": text_rep, + "nodes": subgraph_nodes, + "edges": subgraph_edges + } + + def _generate_mermaid_diagram( + self, + nodes: Dict[str, Any], + edges: List[Dict[str, str]], + center: str + ) -> str: + """Generate Mermaid flowchart diagram""" + + mermaid = "graph TD\n" + + # Add nodes with styling based on category + for node_name, node_data in nodes.items(): + node_id = node_name.replace(" ", "_").replace("-", "_") + category = node_data["category"] + + # Style based on category + if node_name == center: + style = ":::highlight" + elif category == "fundamentals": + style = ":::fundamental" + elif category == "bft": + style = ":::bft" + elif category == "resilientdb": + style = ":::resilientdb" + elif category == "applications": + style = ":::application" + else: + style = "" + + mermaid += f' {node_id}["{node_name}"]{style}\n' + + # Add edges + for edge in edges: + from_id = edge["from"].replace(" ", "_").replace("-", "_") + to_id = edge["to"].replace(" ", "_").replace("-", "_") + edge_type = edge["type"] + + if edge_type == "prerequisite": + arrow = "-->|prereq|" + elif edge_type == "implements": + arrow = "-.->|implements|" + elif edge_type == "optimized_by": + arrow = "==>|optimizes|" + else: + arrow = f"-->|{edge_type}|" + + mermaid += f" {from_id} {arrow} {to_id}\n" + + # Add styling + mermaid += "\n classDef highlight fill:#f96,stroke:#333,stroke-width:4px\n" + mermaid += " classDef fundamental fill:#9cf,stroke:#333,stroke-width:2px\n" + mermaid += " classDef bft fill:#fc9,stroke:#333,stroke-width:2px\n" + mermaid += " classDef resilientdb fill:#9f9,stroke:#333,stroke-width:2px\n" + mermaid += " classDef application fill:#f9f,stroke:#333,stroke-width:2px\n" + + return mermaid + + def _generate_text_representation( + self, + nodes: Dict[str, Any], + edges: List[Dict[str, str]], + center: str + ) -> str: + """Generate text-based graph representation""" + + text = f"📊 Knowledge Graph: {center}\n\n" + + # Group by category + categories = {} + for node_name, node_data in nodes.items(): + cat = node_data["category"] + if cat not in categories: + categories[cat] = [] + categories[cat].append(node_name) + + for category, node_list in categories.items(): + text += f"**{category.title()}:**\n" + for node in node_list: + marker = "🎯" if node == center else "•" + text += f" {marker} {node}\n" + text += "\n" + + return text + + async def get_related_concepts(self, concept: str) -> List[Dict[str, str]]: + """Get concepts directly related to a given concept""" + + if concept not in self.graph["nodes"]: + # Try partial match + matches = [n for n in self.graph["nodes"].keys() if concept.lower() in n.lower()] + if matches: + concept = matches[0] + else: + return [] + + related = [] + + # Find all edges involving this concept + for edge in self.graph["edges"]: + if edge["from"] == concept: + related.append({ + "name": edge["to"], + "relationship": f"is {edge['type']} of", + "direction": "forward" + }) + elif edge["to"] == concept: + related.append({ + "name": edge["from"], + "relationship": f"{edge['type']} for", + "direction": "backward" + }) + + return related + + async def find_learning_path( + self, + from_concept: str, + to_concept: str, + student_mastery: List[str] + ) -> Dict[str, Any]: + """Find optimal learning path between concepts""" + + # BFS to find shortest path + queue = [(from_concept, [from_concept])] + visited = {from_concept} + + while queue: + current, path = queue.pop(0) + + if current == to_concept: + # Found path, build detailed steps + steps = [] + total_time = 0 + + for concept in path: + node_data = self.graph["nodes"].get(concept, {}) + is_mastered = concept in student_mastery + + # Estimate time based on difficulty + difficulty = node_data.get("difficulty", "intermediate") + time_map = {"beginner": "30 min", "intermediate": "1 hour", "advanced": "2 hours"} + + steps.append({ + "concept": concept, + "description": node_data.get("description", ""), + "difficulty": difficulty, + "estimated_time": time_map.get(difficulty, "1 hour"), + "mastered": is_mastered + }) + + if not is_mastered: + total_time += {"beginner": 30, "intermediate": 60, "advanced": 120}.get(difficulty, 60) + + return { + "steps": steps, + "total_time": f"{total_time // 60} hours {total_time % 60} min", + "path_length": len(path) + } + + # Explore neighbors + for edge in self.graph["edges"]: + next_node = None + if edge["from"] == current and edge["type"] == "prerequisite": + next_node = edge["to"] + + if next_node and next_node not in visited: + visited.add(next_node) + queue.append((next_node, path + [next_node])) + + return { + "error": f"No learning path found from {from_concept} to {to_concept}", + "steps": [], + "total_time": "N/A" + } + + async def export_full_graph(self) -> str: + """Export complete knowledge graph""" + + total_nodes = len(self.graph["nodes"]) + total_edges = len(self.graph["edges"]) + + # Generate full mermaid diagram + mermaid = self._generate_mermaid_diagram( + self.graph["nodes"], + self.graph["edges"], + "ResilientDB" + ) + + return f""" +# Complete ResilientDB Knowledge Graph + +**Statistics:** +- Total Concepts: {total_nodes} +- Total Relationships: {total_edges} + +**Categories:** +{self._list_categories()} + +**Full Graph:** +```mermaid +{mermaid} +``` + +**All Concepts:** +{self._list_all_concepts()} + """ + + def _list_categories(self) -> str: + """List all categories with counts""" + categories = {} + for node_data in self.graph["nodes"].values(): + cat = node_data["category"] + categories[cat] = categories.get(cat, 0) + 1 + + return "\n".join(f"- {cat.title()}: {count} concepts" + for cat, count in sorted(categories.items())) + + def _list_all_concepts(self) -> str: + """List all concepts grouped by difficulty""" + by_difficulty = {"beginner": [], "intermediate": [], "advanced": []} + + for node_name, node_data in self.graph["nodes"].items(): + difficulty = node_data.get("difficulty", "intermediate") + by_difficulty[difficulty].append(node_name) + + result = "" + for level in ["beginner", "intermediate", "advanced"]: + result += f"\n**{level.title()}:**\n" + for concept in sorted(by_difficulty[level]): + result += f"- {concept}\n" + + return result diff --git a/ecosystem/ai-tools/mcp/ResInsight/pyproject.toml b/ecosystem/ai-tools/mcp/ResInsight/pyproject.toml new file mode 100644 index 000000000..1fd17f82a --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/pyproject.toml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "resilientdb-tutor" +version = "1.0.0" +description = "ResilientDB Educational Assistant MCP Server" +authors = [ + {name = "ResilientDB Tutor", email = "tutor@resilientdb.edu"} +] +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "mcp>=1.0.0", + "fastmcp>=0.1.0", + "docker>=6.0.0", + "pydantic>=2.0.0", + "uvicorn>=0.20.0", + "asyncio" +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "black>=22.0.0", + "mypy>=1.0.0", + "ruff>=0.1.0" +] + +[project.scripts] +resilientdb-tutor = "resilientdb_mcp_server:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["resilientdb_mcp_server*"] + +[tool.black] +line-length = 100 +target-version = ['py38'] + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true + +[tool.ruff] +line-length = 100 +select = ["E", "F", "I", "N", "W", "UP"] +ignore = ["E501"] # Line too long (handled by black) diff --git a/ecosystem/ai-tools/mcp/ResInsight/requirements.txt b/ecosystem/ai-tools/mcp/ResInsight/requirements.txt new file mode 100644 index 000000000..3bdd65efc --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/requirements.txt @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Core MCP dependencies +mcp>=1.0.0 +fastmcp>=0.1.0 + +# Web and API +fastapi>=0.100.0 +uvicorn>=0.20.0 +pydantic>=2.0.0 + +# Docker integration for labs +docker>=6.0.0 + +# Repository analysis +GitPython>=3.1.0 +requests>=2.31.0 +httpx>=0.24.0 + +# Advanced code analysis +faiss-cpu>=1.7.4 +sentence-transformers>=2.2.0 +networkx>=3.0 +matplotlib>=3.7.0 + +# Data handling +python-dateutil>=2.8.0 +pathlib>=1.0.1 +numpy>=1.24.0 + +# Logging and utilities +colorlog>=6.7.0 diff --git a/ecosystem/ai-tools/mcp/ResInsight/server.py b/ecosystem/ai-tools/mcp/ResInsight/server.py new file mode 100644 index 000000000..b2d162cec --- /dev/null +++ b/ecosystem/ai-tools/mcp/ResInsight/server.py @@ -0,0 +1,1026 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ast +from fastmcp import FastMCP +from pydantic import BaseModel, Field +from typing import List, Optional, Dict, Any +import httpx +import faiss +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +import io +import base64 +import ast +import numpy as np +import os +import json +from dotenv import load_dotenv + +load_dotenv() # Load environment variables from .env file + +# Optional: Import SentenceTransformer (only needed for semantic search) +try: + from sentence_transformers import SentenceTransformer + SENTENCE_TRANSFORMER_AVAILABLE = True + print("[OK] SentenceTransformer available") +except ImportError: + SENTENCE_TRANSFORMER_AVAILABLE = False + SentenceTransformer = None + print("[WARNING] SentenceTransformer not available - semantic search disabled") + +# Import ResilientDB Knowledge Base +try: + from ResilientDBKnowledgeBase import ResilientDBKnowledgeBase + KNOWLEDGE_BASE_AVAILABLE = True + print("[OK] ResilientDB Knowledge Base loaded") +except ImportError as e: + KNOWLEDGE_BASE_AVAILABLE = False + print(f"[WARNING] ResilientDB Knowledge Base not available: {e}") + +# ------------------------- +# Authentication and Token Setup +# ------------------------- +MCP_TOKEN = os.getenv("MCP_TOKEN") # Lab-provided MCP access token +GITHUB_ENTERPRISE_TOKEN = os.getenv("GITHUB_ENTERPRISE_TOKEN") # GitHub Enterprise token + +# Use enterprise token, fallback to environment variable +GITHUB_TOKEN = GITHUB_ENTERPRISE_TOKEN or os.getenv("GITHUB_TOKEN") + +def get_auth_headers(): + """Get GitHub API authentication headers""" + if GITHUB_TOKEN: + return {"Authorization": f"token {GITHUB_TOKEN}"} + return {} + +# ------------------------- +# FAISS vector search setup +# ------------------------- +# Global variables to track index state +index = None +index_dimension = None # Track the dimension used for the index +metadata = [] +model = None + +if SENTENCE_TRANSFORMER_AVAILABLE: + try: + model = SentenceTransformer('all-MiniLM-L6-v2') + print("[OK] Sentence transformer model loaded") + except Exception as e: + print(f"[WARNING] Failed to load model: {e}") + model = None + +def embed_text(text: str) -> np.ndarray: + global index_dimension + if model is None: + raise RuntimeError("SentenceTransformer model not available - semantic search disabled") + embedding = model.encode(text) + + # Track dimension on first use + if index_dimension is None: + index_dimension = len(embedding) + + return embedding + +# ------------------------- +# Pydantic Models +# ------------------------- +class FileSummary(BaseModel): + filename: str + code_summary: str + insights: List[str] = Field(default_factory=list) + +class RepoSummary(BaseModel): + repo_name: str + total_files: int + files: List[str] + +class SearchResult(BaseModel): + filepath: str + code_snippet: str + score: float + +class RepoInsights(BaseModel): + repo_name: str + insights: List[str] = Field(default_factory=list) + +# ------------------------- +# FastMCP Server Setup +# ------------------------- +mcp = FastMCP(name="ResInsight: AI-driven developer onboarding ecosystem") + +# ------------------------- +# Authentication Setup Function +# ------------------------- +def setup_authentication(): + """Setup authentication middleware after FastMCP initialization""" + from starlette.middleware.base import BaseHTTPMiddleware + from starlette.requests import Request + from starlette.responses import JSONResponse + + class AuthMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + # Skip authentication for docs/health endpoints + if request.url.path in ["/health", "/", "/docs", "/openapi.json", "/redoc"]: + return await call_next(request) + + # Check Authorization header + auth_header = request.headers.get("Authorization") + + if not auth_header: + return JSONResponse( + status_code=401, + content={ + "error": "unauthorized", + "message": "Missing Authorization header", + "hint": "Include header: Authorization: Bearer YOUR_LAB_TOKEN" + } + ) + + if not auth_header.startswith("Bearer "): + return JSONResponse( + status_code=401, + content={ + "error": "unauthorized", + "message": "Invalid Authorization format", + "hint": "Use: Authorization: Bearer YOUR_LAB_TOKEN" + } + ) + + token = auth_header.replace("Bearer ", "") + + if not MCP_TOKEN: + return JSONResponse( + status_code=500, + content={ + "error": "server_error", + "message": "Server not configured with MCP_TOKEN" + } + ) + + if token != MCP_TOKEN: + return JSONResponse( + status_code=403, + content={ + "error": "forbidden", + "message": "Invalid authentication token" + } + ) + + response = await call_next(request) + return response + + try: + if hasattr(mcp, '_app'): + mcp._app.add_middleware(AuthMiddleware) + print("[✓] Authentication middleware enabled") + return True + else: + print("[✗] Warning: Could not add auth middleware - FastMCP API changed") + return False + except Exception as e: + print(f"[✗] Error adding auth middleware: {e}") + return False + +# ------------------------- +# ResilientDB Knowledge Base Initialization +# ------------------------- +resilientdb_knowledge = None +if KNOWLEDGE_BASE_AVAILABLE: + try: + resilientdb_knowledge = ResilientDBKnowledgeBase() + print("[OK] ResilientDB Knowledge Base initialized and ready") + except Exception as e: + print(f"[ERROR] Failed to initialize knowledge base: {e}") + resilientdb_knowledge = None + +# ------------------------- +# Helper functions +# ------------------------- +def split_code_into_chunks(code: str, max_lines: int = 200) -> List[str]: + lines = code.splitlines() + return ["\n".join(lines[i:i+max_lines]) for i in range(0, len(lines), max_lines)] + +async def fetch_repo_tree(owner: str, repo: str, branch: str = "main") -> List[dict]: + """Fetch repository tree with better branch handling""" + headers = get_auth_headers() + + # Try both main and master branches + branches_to_try = [branch] + if branch == "main": + branches_to_try.append("master") + elif branch == "master": + branches_to_try.append("main") + + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + last_error = None + for try_branch in branches_to_try: + try: + url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{try_branch}?recursive=1" + response = await client.get(url, headers=headers) + response.raise_for_status() + data = response.json() + return [item for item in data.get("tree", []) if item['type'] == 'blob'] + except Exception as e: + last_error = e + continue + + # If all branches failed, raise the last error + if last_error: + raise last_error + return [] + +async def fetch_raw_file(owner: str, repo: str, filepath: str, branch: str = "main") -> Optional[str]: + """Fetch raw file with better branch handling""" + branches_to_try = [branch] + if branch == "main": + branches_to_try.append("master") + elif branch == "master": + branches_to_try.append("main") + + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + for try_branch in branches_to_try: + try: + url = f"https://raw.githubusercontent.com/{owner}/{repo}/{try_branch}/{filepath}" + r = await client.get(url) + if r.status_code == 200: + return r.text + except: + continue + return None + +# Alternate parser using Python's built-in ast module +def parse_python_functions_ast(code: str) -> List[str]: + functions = [] + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + # Generate a simple string summary (function signature) + args = [arg.arg for arg in node.args.args] + arglist = ", ".join(args) + functions.append(f"def {node.name}({arglist}):") + except Exception: + # If parsing fails, return empty list to avoid crashing MCP server + return [] + return functions + +# ------------------------- +# MCP Tools +# ------------------------- +# Helper function for Contents API recursion (NOT an MCP tool) +async def _fetch_files_recursive( + owner: str, + repo: str, + branch: str, + path: str = "" +) -> List[str]: + """ + Internal helper to recursively fetch files using Contents API. + """ + headers = get_auth_headers() + files = [] + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?per_page=100" + + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + while url: + try: + response = await client.get(url, headers=headers) + response.raise_for_status() + except Exception as e: + print(f"Error fetching {url}: {e}") + break + + # Handle Link header pagination + link_header = response.headers.get('Link', '') + next_url = None + if 'rel="next"' in link_header: + next_url = [link.split(';')[0].strip('<> \t') for link in link_header.split(',') + if 'rel="next"' in link][0] + + data = response.json() + + # Single file response (base64 content) + if isinstance(data, dict) and 'content' in data: + files.append(data['path']) + break + + # Directory listing (list of items) + for item in data: + if item['type'] == 'file': + files.append(item['path']) + elif item['type'] == 'dir': + # Call the HELPER function recursively + dir_files = await _fetch_files_recursive( + owner, repo, branch, item['path'] + ) + files.extend(dir_files) + + url = next_url # Continue pagination + + return files + + +# MCP tool with hybrid approach +@mcp.tool(name="list_github_repo_files") +async def list_github_repo_files( + owner: str, + repo: str, + branch: str = "main", + path: str = "" +) -> List[str]: + """ + This tool fetches the list of files in a particular repository using the github api mentioned in the MCP tool and not from any online source. + """ + headers = get_auth_headers() + + # FAST PATH: Try Tree API first (single request for entire repo) + try: + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + # Try to get the branch SHA first for more reliable results + try: + ref_url = f"https://api.github.com/repos/{owner}/{repo}/git/ref/heads/{branch}" + ref_response = await client.get(ref_url, headers=headers) + ref_response.raise_for_status() + sha = ref_response.json()['object']['sha'] + except: + # Fallback: use branch name directly + sha = branch + + # Get the full tree recursively in one call + tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=1" + tree_response = await client.get(tree_url, headers=headers) + tree_response.raise_for_status() + data = tree_response.json() + + # Check if the response was truncated (repo too large) + if not data.get('truncated', False): + # Success! Return all files from tree + files = [item['path'] for item in data.get('tree', []) + if item['type'] == 'blob'] + print(f"✓ Tree API: Found {len(files)} files in one request") + return files + else: + # Repo is too large, tree was truncated + print(f"⚠ Tree API truncated, falling back to Contents API...") + + except Exception as e: + # Tree API failed for some reason + print(f"⚠ Tree API failed ({str(e)}), using Contents API...") + + # SLOW PATH: Fall back to Contents API with recursive traversal + print(f"→ Using Contents API (may take longer for large repos)...") + files = await _fetch_files_recursive(owner, repo, branch, path) + print(f"✓ Contents API: Found {len(files)} files") + return files + +@mcp.tool(name="Get_Repo_Summary") +async def getRepoSummary(owner: str, repo: str, branch: str = "main") -> str: + """ + Get the summary of a particular repository. Ask the user about the branch they want the information about.This MCP tool will give the summary of the repo as to what type of files, how many files, what it does, what is it about, what is it implementing.. + """ + try: + tree = await fetch_repo_tree(owner, repo, branch) + files = [item['path'] for item in tree] + + # Analyze file types + file_types = {} + for f in files: + ext = f.split('.')[-1] if '.' in f else 'no_extension' + file_types[ext] = file_types.get(ext, 0) + 1 + + result = f"Repository: {owner}/{repo}\n" + result += f"Branch: {branch}\n" + result += f"Total files: {len(files)}\n\n" + result += "File types:\n" + for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True)[:10]: + result += f" .{ext}: {count} files\n" + + return result + except Exception as e: + return f"Error fetching repository summary: {str(e)}\nTry using branch='master' if the default 'main' doesn't work." + +@mcp.tool(name="getFileSummary") +async def getFileSummary(owner: str, repo: str, filenames: List[str], branch: str = "main") -> List[FileSummary]: + """ + Generate the summary of a particular file in a branch. Do not provide extra unnecessary information. To the point and specific information and what the file does.. + """ + file_summaries = [] + for filename in filenames: + code = await fetch_raw_file(owner, repo, filename, branch) + if code: + # Simple summary: line count, presence of TODOs or defs + summary_text = f"{filename} has {len(code.splitlines())} lines of code." + insights = [] + if "TODO" in code: + insights.append("Contains TODO comments.") + if "def " in code: + insights.append("Contains function definitions.") + file_summaries.append(FileSummary(filename=filename, code_summary=summary_text, insights=insights)) + else: + file_summaries.append(FileSummary(filename=filename, code_summary="File not found or inaccessible", insights=[])) + return file_summaries + +@mcp.tool(name="Ingest_Repo_Code") +async def ingest_repo_code(owner: str, repo: str, branch: str = "main") -> str: + """ + Fetch repo files, chunk code, generate embeddings and index them + """ + global index, metadata, index_dimension + + if model is None: + return "⚠️ Semantic search not available - SentenceTransformer not installed" + + try: + # Fetch all files using the helper function directly + files = await _fetch_files_recursive(owner, repo, branch, "") + + if not files: + # Try alternative method + try: + tree = await fetch_repo_tree(owner, repo, branch) + files = [item['path'] for item in tree] + except: + return f"Could not fetch files from {owner}/{repo}. Try checking the branch name." + + # Filter for code files only + code_extensions = ['.py', '.js', '.jsx', '.ts', '.tsx', '.java', '.cpp', '.c', '.go', '.rs'] + code_files = [f for f in files if any(f.endswith(ext) for ext in code_extensions)] + + if not code_files: + return f"No code files found in {owner}/{repo}" + + # Collect all chunks and their metadata + all_chunks = [] + all_metadata = [] + + for filepath in code_files[:50]: # Limit to first 50 files to avoid timeout + try: + content = await fetch_raw_file(owner, repo, filepath, branch) + if content: + # Simple chunking by lines + lines = content.split('\n') + chunk_size = 50 + + for i in range(0, len(lines), chunk_size): + chunk = '\n'.join(lines[i:i+chunk_size]) + if chunk.strip(): + all_chunks.append(chunk) + all_metadata.append({ + 'file': filepath, + 'chunk_start': i, + 'chunk_end': min(i+chunk_size, len(lines)) + }) + except Exception as e: + print(f"Skipping {filepath}: {e}") + continue + + if not all_chunks: + return f"No content could be extracted from {owner}/{repo}" + + # Generate embeddings + print(f"Generating embeddings for {len(all_chunks)} chunks...") + embeddings = np.array([embed_text(chunk) for chunk in all_chunks]) + + # Store dimension + index_dimension = embeddings.shape[1] + + # Create FAISS index + index = faiss.IndexFlatL2(index_dimension) + index.add(embeddings.astype('float32')) + + # Store metadata + metadata = all_metadata + + return f"✓ Successfully indexed {len(all_chunks)} code chunks from {len(code_files)} files. Index dimension: {index_dimension}" + + except Exception as e: + return f"Error ingesting repo: {str(e)}" + + +@mcp.tool(name="Semantic_Search") +async def semanticSearch(query: str, top_k: int = 5) -> str: + """ + Perform semantic search on the data after vectorization and indexing. + """ + global index, metadata, index_dimension + + # Check if index exists + if index is None or len(metadata) == 0: + return "⚠️ No index found. Please run 'Ingest_Repo_Code' first to build the index." + + try: + # Generate query embedding + qv = embed_text(query) + + # Verify dimension match + if len(qv) != index_dimension: + return f"⚠️ Dimension mismatch: Query has {len(qv)} dimensions but index expects {index_dimension}. Please rebuild the index." + + # Reshape for FAISS + qv = qv.reshape(1, -1).astype('float32') + + # Search + D, I = index.search(qv, min(top_k, len(metadata))) + + # Format results + results = [] + for rank, (dist, idx) in enumerate(zip(D[0], I[0]), 1): + if idx < len(metadata): + meta = metadata[idx] + results.append( + f"\n{rank}. File: {meta['file']}\n" + f" Lines: {meta['chunk_start']}-{meta['chunk_end']}\n" + f" Distance: {dist:.4f}" + ) + + if not results: + return "No results found." + + return f"Search results for: '{query}'\n" + "\n".join(results) + + except AssertionError as e: + return f"⚠️ Dimension mismatch error. Index dimension: {index_dimension}, Query dimension: {len(embed_text(query))}. Please rebuild the index." + except Exception as e: + return f"⚠️ Search error: {str(e)}" + +@mcp.tool(name="Get_File_Functions") +async def getFileFunctions(owner: str, repo: str, filepath: str, branch: str = "main") -> List[str]: + """ + The MCP tool getFileFunctions extracts Python function definitions from a given file in a GitHub repository. Here's what it does in detail: + + It fetches the raw source code of the specified file from the GitHub repo using fetch_raw_file. + + If the file content is empty or unavailable, it returns an empty list. + + Otherwise, it calls parse_python_functions_ast, which uses Python's built-in ast module to: + + Parse the source code into an abstract syntax tree (AST). + + Traverse the AST to find all function definitions (ast.FunctionDef nodes). + + For each function found, it creates a simple signature string like def function_name(arg1, arg2):. + + The tool returns a list of these function signature strings + Purpose: + This tool helps junior developers (or any users) quickly understand the structure of Python files by listing the function definitions and their arguments without needing to manually inspect the file line-by-line. It supports the larger goal of the MCP-powered Repo Analyzer to provide guided navigation and understanding of a codebase + """ + code = await fetch_raw_file(owner, repo, filepath, branch) + if not code: + return [] + funcs = parse_python_functions_ast(code) + return funcs + +# ========================================================================= +# RESILIENTDB KNOWLEDGE BASE QUERY TOOL - USE THIS FIRST FOR RESILIENTDB! +# ========================================================================= + +@mcp.tool(name="SearchResilientDBKnowledge") +async def search_resilientdb_knowledge(query: str, category: Optional[str] = None) -> str: + """ + 🎓 CRITICAL: USE THIS TOOL FIRST for ANY question about ResilientDB! + + This tool provides comprehensive information about ResilientDB from a built-in knowledge base. + It covers all ResilientDB topics including: + + - **Setup & Installation**: How to install, configure, and run ResilientDB + - **Applications**: Debitable, DraftRes, Arrayán/Arrayan, Echo, ResCounty, CrypoGo, etc. + - **Architecture**: System design, components, and technical details + - **Consensus**: PBFT, Byzantine fault tolerance, and consensus mechanisms + - **Performance**: Benchmarks, optimization, throughput, and latency data + - **Use Cases**: Real-world applications across industries + - **Research**: Academic papers and publications + - **Development**: How to build applications on ResilientDB + + 🚨 ALWAYS call this tool BEFORE searching the web for ResilientDB questions! + + Examples of questions that should use this tool: + - "How do I setup ResilientDB?" + - "What is Arrayán?" or "What is Arrayan?" + - "Tell me about Debitable" + - "How does PBFT work in ResilientDB?" + - "Show me performance benchmarks" + - "How to install ResilientDB?" + - "What applications are built on ResilientDB?" + - "How to use [any ResilientDB application]?" + + Args: + query: Your question about ResilientDB (any topic) + category: Optional. One of: applications, architecture, consensus, performance, + use_cases, research, setup, general + + Returns: + Comprehensive answer from the ResilientDB knowledge base with examples and guidance + """ + if not KNOWLEDGE_BASE_AVAILABLE or resilientdb_knowledge is None: + return """ +❌ ResilientDB Knowledge Base is not available. + +Please ensure ResilientDBKnowledgeBase.py is in the project directory. + +For now, you can: +1. Check the ResilientDB GitHub repository: https://github.com/apache/incubator-resilientdb +2. Ask me to fetch information from the repository using other tools +""" + + try: + # Determine the best domain based on category or query content + domain = category or "general" + + # Auto-detect domain from query if not specified + if not category: + query_lower = query.lower() + + # Check for setup/installation queries + if any(word in query_lower for word in ["setup", "install", "configure", "run", "start", "deploy", "docker"]): + domain = "setup" + # Check for specific applications (all 14 from ExpoLab) + elif any(app in query_lower for app in [ + "debitable", "draftres", "arrayán", "arrayan", "echo", + "rescounty", "crypogo", "explorer", "monitoring", + "resview", "reslens", "coinsensus", "respirer", + "utxo", "utxo lenses", "resilientdb cli", "cli", + "application", "app" + ]): + domain = "applications" + # Check for architecture queries + elif any(word in query_lower for word in ["architecture", "design", "component", "structure", "layer"]): + domain = "architecture" + # Check for consensus queries + elif any(word in query_lower for word in ["consensus", "pbft", "bft", "byzantine", "fault tolerance", "agreement"]): + domain = "consensus" + # Check for performance queries + elif any(word in query_lower for word in ["performance", "benchmark", "speed", "throughput", "latency", "tps", "fast"]): + domain = "performance" + # Check for use case queries + elif any(word in query_lower for word in ["use case", "example", "industry", "real world", "application"]): + domain = "use_cases" + # Check for research queries + elif any(word in query_lower for word in ["paper", "research", "publication", "academic", "study"]): + domain = "research" + # Check for "how to use" queries + elif "how to use" in query_lower or "how do i use" in query_lower: + domain = "applications" + + # Query the knowledge base + result_dict = await resilientdb_knowledge.query_knowledge(query, domain) + + # Format the result nicely + if isinstance(result_dict, dict): + # Extract the main content + content = result_dict.get("content", "") + result_type = result_dict.get("type", "general") + + # Build formatted response + formatted_result = f""" +# 📚 ResilientDB Knowledge Base Results + +**Query:** {query} +**Category:** {domain} +**Result Type:** {result_type.replace('_', ' ').title()} + +--- + +{content} + +--- +""" + # Add additional sections if present + if "technical_deep_dive" in result_dict: + formatted_result += f"\n**🔧 Technical Details:**\n```json\n{json.dumps(result_dict['technical_deep_dive'], indent=2)}\n```\n" + + if "implementation_guidance" in result_dict: + formatted_result += f"\n{result_dict['implementation_guidance']}\n" + + if "further_exploration" in result_dict: + formatted_result += f"\n{result_dict['further_exploration']}\n" + + result = formatted_result + else: + result = str(result_dict) + + return f"{result}\n\n💡 **Tip:** This information comes from the comprehensive ResilientDB knowledge base.\nFor more details, ask follow-up questions or try a different category!" + + except Exception as e: + return f""" +❌ **Error querying ResilientDB knowledge base:** {str(e)} + +💡 **Troubleshooting:** +1. Check that ResilientDBKnowledgeBase.py is in the project directory +2. Verify the knowledge base class has the required query methods +3. Try rephrasing your question or using a specific category + +**Your query:** {query} +**Attempted domain:** {domain if 'domain' in locals() else 'unknown'} + +**Available categories:** +- setup: Installation and configuration +- applications: ResilientDB applications (Debitable, Arrayán, etc.) +- architecture: System design and technical details +- consensus: Consensus mechanisms (PBFT, etc.) +- performance: Benchmarks and performance data +- use_cases: Real-world applications +- research: Research papers and publications +""" + +# Example: Function to parse Dockerfile directives +async def parse_dockerfile(owner:str, repo:str, branch:str="main") -> List[str]: + dockerfile_content = await fetch_raw_file(owner, repo, "Dockerfile", branch) + if not dockerfile_content: + return [] + steps = [] + for line in dockerfile_content.splitlines(): + line = line.strip() + if line and not line.startswith("#"): + steps.append(line) + return steps + +@mcp.tool(name="SetupGuide") +async def setup_guide(owner: str, repo: str, question: str, branch: str = "main") -> dict: + """ + The MCP tool decorated function setup_guide you shared is designed to assist junior developers by providing guidance on setting up a GitHub repository based on the Dockerfile it contains. Here's what it does: + + It asynchronously fetches and parses the Dockerfile from the specified GitHub repository (owner, repo, branch). + + If no Dockerfile is found or it's empty, it returns an error message. + + Otherwise, it returns the raw Dockerfile steps (list of commands) along with the user's question as separate fields + """ + docker_steps = await parse_dockerfile(owner, repo, branch) + if not docker_steps: + return {"error": "No Dockerfile found or empty."} + + # Directly hand off the question and docker_steps as separate fields + # or raw data. Let Claude Desktop compose the interaction/prompt. + return { + "docker_steps": docker_steps, + "user_question": question + } + + +async def analyze_imports(owner: str, repo: str, branch: str = "main") -> str: + # Fetch repo file metadata list from GitHub + try: + files_meta = await fetch_repo_tree(owner, repo, branch) + except: + return None + + g = nx.DiGraph() + count = 0 + for f in files_meta: + filepath = f["path"] + if filepath.endswith(".py") and count < 5: + content = await fetch_raw_file(owner, repo, filepath, branch) + if content is None: + continue + g.add_node(filepath) + for line in content.splitlines(): + if line.startswith("import ") or line.startswith("from "): + imp = line.split()[1] + g.add_edge(filepath, imp) + count += 1 + + # Use spring layout to spread out nodes nicely + pos = nx.spring_layout(g, k=0.5, iterations=50) + + plt.figure(figsize=(14, 12)) + nx.draw_networkx( + g, + pos=pos, + with_labels=True, + font_size=12, + node_size=800, + node_color="lightgreen", + edge_color="gray", + arrowsize=15, + arrowstyle='->' + ) + plt.axis('off') + plt.tight_layout() + + # Output graph as base64 encoded PNG string for UI display + buf = io.BytesIO() + plt.savefig(buf, format="png") + buf.seek(0) + img_str = base64.b64encode(buf.read()).decode("utf-8") + plt.close() + + return img_str + + +@mcp.tool(name="ShowDependencyGraph") +async def show_dependency_graph(owner:str, repo:str, branch:str="main") -> dict: + """ + Generate architecture diagrams, file relationship graphs, or dependency graphs automatically from repo data. + +Visualize how major modules connect with clickable UI linked to AI chat explanations. + +Helps developers quickly understand large complex repos visually in simple easy to understand diagrams rather than big diagrams which are not user friendly and have a lot of things going on in the diagram.. + """ + img_data = await analyze_imports(owner, repo, branch) + if not img_data: + return {"error": "Could not generate dependency graph. Check branch name or repository access."} + # Return base64 PNG string so clients can display + return {"image_base64": img_data} + +def get_file_type(filepath: str) -> str: + """Helper to identify file type by extension.""" + ext_map = { + '.js': 'JavaScript', + '.jsx': 'React/JavaScript', + '.ts': 'TypeScript', + '.tsx': 'React/TypeScript', + '.java': 'Java', + '.cpp': 'C++', + '.c': 'C', + '.go': 'Go', + '.rs': 'Rust', + '.rb': 'Ruby', + '.php': 'PHP', + '.swift': 'Swift', + '.kt': 'Kotlin', + '.scala': 'Scala', + '.sh': 'Shell script', + '.md': 'Markdown', + '.json': 'JSON', + '.yaml': 'YAML', + '.yml': 'YAML', + '.html': 'HTML', + '.css': 'CSS', + } + + for ext, lang in ext_map.items(): + if filepath.endswith(ext): + return lang + + return 'unknown' + + +@mcp.tool(name="SummarizeFunctions") +async def summarize_functions( + owner: str, + repo: str, + filepath: str, + branch: str = "main" +) -> str: + """ + Extract and summarize code blocks from various file types. + """ + file_content = await fetch_raw_file(owner, repo, filepath, branch) + + if not file_content: + return f"Could not fetch content for {filepath}. Check the file path and branch name." + + # Determine file type + file_type = get_file_type(filepath) + + try: + if filepath.endswith('.py'): + # Python files: use AST parsing + code_chunks = await split_python_functions(file_content) + elif filepath.endswith(('.js', '.jsx', '.ts', '.tsx')): + # JavaScript/TypeScript: use simple regex-based extraction + code_chunks = extract_js_functions(file_content) + else: + # For other files, just return the content with a note + return f"⚠️ File type '{file_type}' is not fully supported for function extraction.\n\nFile content:\n{file_content[:2000]}{'...' if len(file_content) > 2000 else ''}" + + if not code_chunks: + return f"No functions or classes found in {filepath}" + + # Generate summaries + result = f"File: {filepath} ({file_type})\n" + result += f"Total functions/classes: {len(code_chunks)}\n\n" + + for i, chunk in enumerate(code_chunks, 1): + result += f"\n{'='*60}\n" + result += f"CODE BLOCK {i}:\n" + result += f"{'='*60}\n" + result += chunk + result += f"\n{'='*60}\n" + + return result + + except Exception as e: + return f"⚠️ Error processing {filepath}: {str(e)}\n\nShowing raw content instead:\n{file_content[:2000]}{'...' if len(file_content) > 2000 else ''}" + + +async def split_python_functions(file_content: str) -> List[str]: + """Parse Python code using AST.""" + tree = ast.parse(file_content) + funcs = [] + lines = file_content.split('\n') + + for node in ast.iter_child_nodes(tree): + if isinstance(node, (ast.FunctionDef, ast.ClassDef)): + start_line = node.lineno - 1 + end_line = node.end_lineno if node.end_lineno else start_line + 1 + chunk = '\n'.join(lines[start_line:end_line]) + funcs.append(chunk) + + return funcs + + +def extract_js_functions(file_content: str) -> List[str]: + """Simple regex-based extraction for JavaScript/TypeScript.""" + import re + + # Pattern for function declarations and arrow functions + patterns = [ + r'(?:export\s+)?(?:async\s+)?function\s+\w+\s*\([^)]*\)\s*\{[^}]*\}', + r'(?:export\s+)?const\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>\s*\{[^}]*\}', + r'class\s+\w+\s*(?:extends\s+\w+\s*)?\{[^}]*\}' + ] + + funcs = [] + for pattern in patterns: + matches = re.finditer(pattern, file_content, re.MULTILINE | re.DOTALL) + funcs.extend([match.group(0) for match in matches]) + + return funcs[:10] # Limit to first 10 to avoid huge responses + +@mcp.tool(name="CodeReviewAssistant") +async def code_review_assistant(owner: str, repo: str, pull_number: int): + """CodeReviewAssistant""" + pr_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pull_number}" + headers = get_auth_headers() + async with httpx.AsyncClient(follow_redirects=True) as client: + pr_resp = await client.get(pr_url, headers=headers) + pr_resp.raise_for_status() + pr_data = pr_resp.json() + diff_url = pr_data.get("diff_url") + if not diff_url: + return {"error": "Diff URL not found in PR data."} + diff_resp = await client.get(diff_url, headers=headers, follow_redirects=True) + diff_resp.raise_for_status() + diff_text = diff_resp.text + snippet = diff_text[:1500] + ("\n... (diff truncated)" if len(diff_text) > 1500 else "") + return {"pr_summary": snippet} + +knowledge_graph = { + "FunctionA": ["Module1", "PatternX", "Issue#123"], + "FunctionB": ["Module2", "Bug#456"], + "ClassX": ["Module1", "ConceptY"], +} + +@mcp.tool(name="KGraphQuery") +async def kgraph_query(node_name: str): + """KGraphQuery""" + related = knowledge_graph.get(node_name, []) + return {"node": node_name, "related_nodes": related} + +# ------------------------- +# Run MCP Server +# ------------------------- +if __name__ == "__main__": + print("=" * 60) + print("GitHub Repo Analyzer MCP Server") + print("=" * 60) + + # Check configuration + print(f"GitHub Token: {'✓ Configured' if GITHUB_TOKEN else '✗ Missing'}") + print(f"Lab MCP Token: {'✓ Configured' if MCP_TOKEN else '✗ Missing'}") + + if not MCP_TOKEN: + print("\n⚠️ WARNING: MCP_TOKEN not set!") + print(" Server will start but authentication will fail") + print(" Set MCP_TOKEN in your .env file\n") + + if not GITHUB_TOKEN: + print("\n⚠️ WARNING: GITHUB_TOKEN not set!") + print(" GitHub API calls may be rate limited\n") + + # Setup authentication + auth_enabled = setup_authentication() + + if auth_enabled: + print("\n🔒 Authentication: ENABLED") + print(" Clients must include: Authorization: Bearer ") + else: + print("\n⚠️ Authentication: DISABLED (middleware setup failed)") + + print(f"\n🚀 Starting server on http://localhost:8005/mcp") + print("=" * 60) + print("\nTest commands:") + print(" Without auth (should fail):") + print(" curl http://localhost:8005/mcp/tools") + print("\n With auth (should succeed):") + print(" curl -H 'Authorization: Bearer YOUR_TOKEN' http://localhost:8005/mcp/tools") + print("=" * 60) + + mcp.run(transport="streamable-http", path="/mcp", port=8005) \ No newline at end of file