From 32149f5bd1cfa07c44388146a752c3fa861f4d3d Mon Sep 17 00:00:00 2001 From: rocknroll Date: Tue, 1 Jul 2025 22:32:01 +0900 Subject: [PATCH 1/3] feat: make embedding services optional - Allow running without EMBEDDING_PROVIDER configuration - Disable embedding tools when no provider is set - Update README with optional embedding examples - Add integration docs for pre-running servers Fixes runtime errors for database-only usage --- README.md | 53 +++++++++++++++++++++++++++++++++++++++------------ src/config.py | 7 ++++--- src/server.py | 18 ++++++++++------- 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 717d9e9..8026505 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab - Retrieving table schemas - Executing safe, read-only SQL queries - Creating and managing vector stores for embedding-based search -- Integrating with embedding providers (currently OpenAI, Gemini, and HuggingFace) +- Integrating with embedding providers (currently OpenAI, Gemini, and HuggingFace) (optional) --- @@ -63,7 +63,9 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab - Creates a new database if it doesn't exist. - Parameters: `database_name` (string, required) -### Vector Store & Embedding Tools +### Vector Store & Embedding Tools (optional) + +**Note**: These tools are only available when `EMBEDDING_PROVIDER` is configured. If no embedding provider is set, these tools will be disabled. - **create_vector_store** - Creates a new vector store (table) for embeddings. @@ -89,6 +91,10 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab ## Embeddings & Vector Store +### Overview + +The MCP MariaDB Server provides **optional** embedding and vector store capabilities. These features can be enabled by configuring an embedding provider, or completely disabled if you only need standard database operations. + ### Supported Providers - **OpenAI** @@ -97,11 +103,10 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab ### Configuration -- `EMBEDDING_PROVIDER`: Set to `openai` (default option), can change it to required providers +- `EMBEDDING_PROVIDER`: Set to `openai`, `gemini`, `huggingface`, or leave unset to disable - `OPENAI_API_KEY`: Required if using OpenAI embeddings -- GEMINI_API_KEY`: Required if using Gemini embeddings -- Open models from HUGGINGFACE: Required open model currently provided option for "intfloat/multilingual-e5-large-instruct" & "BAAI/bge-m3" - +- `GEMINI_API_KEY`: Required if using Gemini embeddings +- `HF_MODEL`: Required if using HuggingFace embeddings (e.g., "intfloat/multilingual-e5-large-instruct" or "BAAI/bge-m3") ### Model Selection - Default and allowed models are configurable in code (`DEFAULT_OPENAI_MODEL`, `ALLOWED_OPENAI_MODELS`) @@ -130,13 +135,14 @@ All configuration is via environment variables (typically set in a `.env` file): | `DB_NAME` | Default database (optional; can be set per query) | No | | | `MCP_READ_ONLY` | Enforce read-only SQL mode (`true`/`false`) | No | `true` | | `MCP_MAX_POOL_SIZE` | Max DB connection pool size | No | `10` | -| `EMBEDDING_PROVIDER` | Embedding provider (`openai`/`gemini`/`huggingface`) | No | `openai` | -| `OPENAI_API_KEY` | API key for OpenAI embeddings | Yes (if using embeddings) | | -| `GEMINII_API_KEY` | API key for Gemini embeddings | Yes (if using embeddings) | | -| `HF_MODEL` | Open models from Huggingface | Yes (if using embeddings) | | +| `EMBEDDING_PROVIDER` | Embedding provider (`openai`/`gemini`/`huggingface`) | No |`None`(Disabled)| +| `OPENAI_API_KEY` | API key for OpenAI embeddings | Yes (if EMBEDDING_PROVIDER=openai) | | +| `GEMINI_API_KEY` | API key for Gemini embeddings | Yes (if EMBEDDING_PROVIDER=gemini) | | +| `HF_MODEL` | Open models from Huggingface | Yes (if EMBEDDING_PROVIDER=huggingface) | | #### Example `.env` file +**With Embedding Support (OpenAI):** ```dotenv DB_HOST=localhost DB_USER=your_db_user @@ -153,6 +159,17 @@ GEMINI_API_KEY=AI... HF_MODEL="BAAI/bge-m3" ``` +**Without Embedding Support:** +```dotenv +DB_HOST=localhost +DB_USER=your_db_user +DB_PASSWORD=your_db_password +DB_PORT=3306 +DB_NAME=your_default_database +MCP_READ_ONLY=true +MCP_MAX_POOL_SIZE=10 +``` + --- ## Installation & Setup @@ -244,9 +261,9 @@ HF_MODEL="BAAI/bge-m3" ``` --- -## Integration - Claude desktop/Cursor/Windsurf +## Integration - Claude desktop/Cursor/Windsurf/VSCode -```python +```json { "mcpServers": { "MariaDB_Server": { @@ -262,6 +279,18 @@ HF_MODEL="BAAI/bge-m3" } } ``` +or +**If already running MCP server** +```json +{ + "servers": { + "mariadb-mcp-server": { + "url": "http://{host}:9001/sse", + "type": "sse" + } + } +} +``` --- ## Logging diff --git a/src/config.py b/src/config.py index b9364a7..cc33919 100644 --- a/src/config.py +++ b/src/config.py @@ -59,7 +59,8 @@ # --- Embedding Configuration --- # Provider selection ('openai' or 'gemini' or 'huggingface') -EMBEDDING_PROVIDER = os.getenv("EMBEDDING_PROVIDER", "openai").lower() +EMBEDDING_PROVIDER = os.getenv("EMBEDDING_PROVIDER") +EMBEDDING_PROVIDER = EMBEDDING_PROVIDER.lower() if EMBEDDING_PROVIDER else None # API Keys OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") @@ -86,8 +87,8 @@ logger.error("EMBEDDING_PROVIDER is 'huggingface' but HF_MODEL is missing.") raise ValueError("HuggingFace model is required when EMBEDDING_PROVIDER is 'huggingface'.") else: - logger.error(f"Invalid EMBEDDING_PROVIDER specified: '{EMBEDDING_PROVIDER}'. Use 'openai' or 'gemini' or 'huggingface'.") - raise ValueError(f"Invalid EMBEDDING_PROVIDER: '{EMBEDDING_PROVIDER}'.") + EMBEDDING_PROVIDER = None + logger.info(f"No EMBEDDING_PROVIDER selected or it is set to None. Disabling embedding features.") logger.info(f"Read-only mode: {MCP_READ_ONLY}") logger.info(f"Logging to console and to file: {LOG_FILE_PATH} (Level: {LOG_LEVEL}, MaxSize: {LOG_MAX_BYTES}B, Backups: {LOG_BACKUP_COUNT})") \ No newline at end of file diff --git a/src/server.py b/src/server.py index dce8af3..b1dcec1 100644 --- a/src/server.py +++ b/src/server.py @@ -12,14 +12,17 @@ # Import configuration settings from config import ( DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME, - MCP_READ_ONLY, MCP_MAX_POOL_SIZE, logger + MCP_READ_ONLY, MCP_MAX_POOL_SIZE, EMBEDDING_PROVIDER, + logger ) # Import EmbeddingService for vector store creation from embeddings import EmbeddingService # Singleton instance for embedding service -embedding_service = EmbeddingService() +embedding_service = None +if EMBEDDING_PROVIDER is not None: + embedding_service = EmbeddingService() from asyncmy.errors import Error as AsyncMyError @@ -698,11 +701,12 @@ def register_tools(self): self.mcp.add_tool(self.get_table_schema) self.mcp.add_tool(self.execute_sql) self.mcp.add_tool(self.create_database) - self.mcp.add_tool(self.create_vector_store) - self.mcp.add_tool(self.list_vector_stores) - self.mcp.add_tool(self.delete_vector_store) - self.mcp.add_tool(self.insert_docs_vector_store) - self.mcp.add_tool(self.search_vector_store) + if EMBEDDING_PROVIDER is not None: + self.mcp.add_tool(self.create_vector_store) + self.mcp.add_tool(self.list_vector_stores) + self.mcp.add_tool(self.delete_vector_store) + self.mcp.add_tool(self.insert_docs_vector_store) + self.mcp.add_tool(self.search_vector_store) logger.info("Registered MCP tools explicitly.") # --- Async Main Server Logic --- From 3b5a4d28bb430090b126ebdca8817ac767fede19 Mon Sep 17 00:00:00 2001 From: rocknroll17 <53882578+rocknroll17@users.noreply.github.com> Date: Thu, 3 Jul 2025 01:44:03 +0000 Subject: [PATCH 2/3] Rebased --- README.md | 14 ++++++++++++++ src/server.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8026505..ea204b8 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab - Listing databases and tables - Retrieving table schemas - Executing safe, read-only SQL queries +- Query performance analysis with EXPLAIN and EXPLAIN EXTENDED +- Comprehensive tool usage guide for LLM self-discovery - Creating and managing vector stores for embedding-based search - Integrating with embedding providers (currently OpenAI, Gemini, and HuggingFace) (optional) @@ -63,6 +65,18 @@ The MCP MariaDB Server exposes a set of tools for interacting with MariaDB datab - Creates a new database if it doesn't exist. - Parameters: `database_name` (string, required) +### Query Performance Analysis Tools + +- **explain_query** + - Executes EXPLAIN on a SQL query to show the execution plan for performance analysis. + - Parameters: `sql_query` (string, required), `database_name` (string, required), `parameters` (list, optional) + - _Note: Helps analyze query performance and optimization opportunities. Does not execute the actual query._ + +- **explain_query_extended** + - Executes EXPLAIN EXTENDED on a SQL query to show detailed execution plan with additional information. + - Parameters: `sql_query` (string, required), `database_name` (string, required), `parameters` (list, optional) + - _Note: Provides comprehensive analysis including filtered rows percentage and extra optimization details._ + ### Vector Store & Embedding Tools (optional) **Note**: These tools are only available when `EMBEDDING_PROVIDER` is configured. If no embedding provider is set, these tools will be disabled. diff --git a/src/server.py b/src/server.py index b1dcec1..ca9d2b1 100644 --- a/src/server.py +++ b/src/server.py @@ -112,7 +112,7 @@ async def _execute_query(self, sql: str, params: Optional[tuple] = None, databas logger.error("Connection pool is not initialized.") raise RuntimeError("Database connection pool not available.") - allowed_prefixes = ('SELECT', 'SHOW', 'DESC', 'DESCRIBE', 'USE') + allowed_prefixes = ('SELECT', 'SHOW', 'DESC', 'DESCRIBE', 'USE', 'EXPLAIN') query_upper = sql.strip().upper() is_allowed_read_query = any(query_upper.startswith(prefix) for prefix in allowed_prefixes) @@ -359,6 +359,52 @@ async def create_database(self, database_name: str) -> Dict[str, Any]: logger.error(f"TOOL ERROR: create_database. {error_message} Error: {e}", exc_info=True) raise RuntimeError(f"{error_message} Reason: {str(e)}") + async def explain_query(self, sql_query: str, database_name: str, parameters: Optional[List[Any]] = None) -> List[Dict[str, Any]]: + """ + Executes EXPLAIN on a SQL query to show the execution plan. + This helps analyze query performance and optimization opportunities. + Example `parameters`: ["value1", 123] corresponding to %s placeholders in `sql_query`. + """ + logger.info(f"TOOL START: explain_query called. database_name={database_name}, sql_query={sql_query[:100]}, parameters={parameters}") + if database_name and not database_name.isidentifier(): + logger.warning(f"TOOL WARNING: explain_query called with invalid database_name: {database_name}") + raise ValueError(f"Invalid database name provided: {database_name}") + + # EXPLAIN 키워드를 쿼리 앞에 추가 + explain_sql = f"EXPLAIN {sql_query.strip()}" + param_tuple = tuple(parameters) if parameters is not None else None + + try: + results = await self._execute_query(explain_sql, params=param_tuple, database=database_name) + logger.info(f"TOOL END: explain_query completed. Execution plan rows returned: {len(results)}.") + return results + except Exception as e: + logger.error(f"TOOL ERROR: explain_query failed for database_name={database_name}, sql_query={sql_query[:100]}, parameters={parameters}: {e}", exc_info=True) + raise + + async def explain_query_extended(self, sql_query: str, database_name: str, parameters: Optional[List[Any]] = None) -> List[Dict[str, Any]]: + """ + Executes EXPLAIN EXTENDED on a SQL query to show detailed execution plan with additional information. + This provides more comprehensive analysis including filtered rows percentage and extra information. + Example `parameters`: ["value1", 123] corresponding to %s placeholders in `sql_query`. + """ + logger.info(f"TOOL START: explain_query_extended called. database_name={database_name}, sql_query={sql_query[:100]}, parameters={parameters}") + if database_name and not database_name.isidentifier(): + logger.warning(f"TOOL WARNING: explain_query_extended called with invalid database_name: {database_name}") + raise ValueError(f"Invalid database name provided: {database_name}") + + # EXPLAIN EXTENDED 키워드를 쿼리 앞에 추가 + explain_sql = f"EXPLAIN EXTENDED {sql_query.strip()}" + param_tuple = tuple(parameters) if parameters is not None else None + + try: + results = await self._execute_query(explain_sql, params=param_tuple, database=database_name) + logger.info(f"TOOL END: explain_query_extended completed. Extended execution plan rows returned: {len(results)}.") + return results + except Exception as e: + logger.error(f"TOOL ERROR: explain_query_extended failed for database_name={database_name}, sql_query={sql_query[:100]}, parameters={parameters}: {e}", exc_info=True) + raise + async def create_vector_store_tool(self, database_name: str, vector_store_name: str, @@ -701,12 +747,15 @@ def register_tools(self): self.mcp.add_tool(self.get_table_schema) self.mcp.add_tool(self.execute_sql) self.mcp.add_tool(self.create_database) + self.mcp.add_tool(self.explain_query) + self.mcp.add_tool(self.explain_query_extended) if EMBEDDING_PROVIDER is not None: self.mcp.add_tool(self.create_vector_store) self.mcp.add_tool(self.list_vector_stores) self.mcp.add_tool(self.delete_vector_store) self.mcp.add_tool(self.insert_docs_vector_store) self.mcp.add_tool(self.search_vector_store) + logger.info("Registered MCP tools explicitly.") # --- Async Main Server Logic --- From e530e4c133ffa57cb8b9787577332c8d73724d3c Mon Sep 17 00:00:00 2001 From: rocknroll Date: Sat, 5 Jul 2025 00:15:23 +0900 Subject: [PATCH 3/3] chore:Modified some comments --- src/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server.py b/src/server.py index 7c17b2e..1e8bb6f 100644 --- a/src/server.py +++ b/src/server.py @@ -370,7 +370,7 @@ async def explain_query(self, sql_query: str, database_name: str, parameters: Op logger.warning(f"TOOL WARNING: explain_query called with invalid database_name: {database_name}") raise ValueError(f"Invalid database name provided: {database_name}") - # EXPLAIN 키워드를 쿼리 앞에 추가 + # Add EXPLAIN keyword to the query explain_sql = f"EXPLAIN {sql_query.strip()}" param_tuple = tuple(parameters) if parameters is not None else None @@ -393,7 +393,7 @@ async def explain_query_extended(self, sql_query: str, database_name: str, param logger.warning(f"TOOL WARNING: explain_query_extended called with invalid database_name: {database_name}") raise ValueError(f"Invalid database name provided: {database_name}") - # EXPLAIN EXTENDED 키워드를 쿼리 앞에 추가 + # Add EXPLAIN EXTENDED keyword to the query explain_sql = f"EXPLAIN EXTENDED {sql_query.strip()}" param_tuple = tuple(parameters) if parameters is not None else None