diff --git a/BQ_MCP_Server_Manual.markdown b/BQ_MCP_Server_Manual.markdown new file mode 100644 index 0000000..8091528 --- /dev/null +++ b/BQ_MCP_Server_Manual.markdown @@ -0,0 +1,53 @@ +# How to Run BQ MCP Server Locally + +## 1. Install MCP Toolbox +Install the [MCP Toolbox](https://googleapis.github.io/genai-toolbox/) using Homebrew: +```bash +brew install mcp-toolbox +``` + +## 2. Install Google Cloud CLI +Download and install the Google Cloud CLI by following the instructions at: +[Google Cloud CLI Installation](https://cloud.google.com/sdk/docs/install) + +## 3. Authenticate with Google Account for Local CLI Usage +Set up Application Default Credentials (ADC) for local development: +[Set Up ADC for Local Development](https://cloud.google.com/docs/authentication/set-up-adc-local-dev-environment) + +Run the following command to log in: +```bash +gcloud auth application-default login +``` + +## 4. Create Tools Configuration File +Create a [`tools.yaml`](./tools.yaml) file for the MCP Server. Refer to the documentation for details: +[BigQuery Tools Configuration](https://googleapis.github.io/genai-toolbox/resources/tools/bigquery/) + +## 5. Run MCP Server Locally +Start the MCP Server with the configuration file: +```bash +toolbox --tools-file "tools.yaml" +``` + +### 5.1 Run MCP Server with UI +To enable the UI, include the `--ui` flag: +```bash +toolbox --tools-file "tools.yaml" --ui +``` + +## 6. Install MCP Inspector +Install the MCP Inspector tool: +[MCP Inspector GitHub](https://github.com/modelcontextprotocol/inspector) + +Run the following command to install and run: +```bash +npx @modelcontextprotocol/inspector +``` + +## 7. Connect MCP Inspector to Your MCP Server +1. After running the inspector, a console output will provide a link with a token. Open this link in your browser to access the UI. +2. In the web interface, select **Streamable HTTP** as the Transport Type. +3. Enter `http://127.0.0.1:5000/mcp` as the URL. +4. Click the **Connect** button. +5. Verify the connection by pulling the list of tables to ensure everything is functioning correctly. +6. PROFIT! diff --git a/ai_clients/open_ai/master_agent.py b/ai_clients/open_ai/master_agent.py index f05728d..11fe95a 100644 --- a/ai_clients/open_ai/master_agent.py +++ b/ai_clients/open_ai/master_agent.py @@ -1,5 +1,6 @@ from agents import Agent, WebSearchTool, Runner +from ai_clients.open_ai.sub_agents.query_optimization_agent import QueryOptimizationClient from common.classes import AIClient from prompts.master_agent_prompt import master_agent_prompt @@ -7,16 +8,26 @@ class OpenAIClient(AIClient): def __init__(self): self.client = None + self.optimization_agent = QueryOptimizationClient() - def setup(self): + async def setup(self): + await self.optimization_agent.setup() self.client = Agent( name="master_agent", + model="gpt-4.1", instructions=master_agent_prompt, tools=[ - WebSearchTool() + WebSearchTool(), + self.optimization_agent.client.as_tool( + tool_name="bigquery_query_optimizer_ai_agent", + tool_description="This AI agent optimizes Google BigQuery SQL queries for performance and cost. It analyzes queries, identifies inefficiencies, and provides rewritten versions with clear explanations.", + ) ] ) + async def destroy(self): + await self.optimization_agent.destroy() + async def chat(self, user_request: str): if self.client is None: raise Exception("Client is not initialized, run setup method first") diff --git a/ai_clients/open_ai/sub_agents/query_optimization_agent.py b/ai_clients/open_ai/sub_agents/query_optimization_agent.py new file mode 100644 index 0000000..b3d050e --- /dev/null +++ b/ai_clients/open_ai/sub_agents/query_optimization_agent.py @@ -0,0 +1,50 @@ +from agents import Agent, WebSearchTool, Runner +from agents.mcp import MCPServerStreamableHttp + +from common.classes import AIClient +from prompts.bigquery.query_optimization_agent_prompt import query_optimization_agent_prompt + +query_optimization_agent = Agent( + name='query_optimization_agent', + instructions=query_optimization_agent_prompt, + tools=[ + WebSearchTool(), + ] +) + + +class QueryOptimizationClient(AIClient): + def __init__(self): + self.client = None + self.bq_mcp_server = MCPServerStreamableHttp( + name="mcp_bigquery_server", + params={ + "url": "http://127.0.0.1:5000/mcp", + } + ) + + async def setup(self): + try: + await self.bq_mcp_server.connect() + except Exception as e: + raise Exception(f"Failed to connect to MCP BigQuery server: {e}") + + self.client = Agent( + name='query_optimization_agent', + model="gpt-5", + instructions=query_optimization_agent_prompt, + tools=[ + WebSearchTool(), + ], + mcp_servers=[self.bq_mcp_server] + ) + + async def destroy(self): + await self.bq_mcp_server.cleanup() + + async def chat(self, user_request: str): + if self.client is None: + raise Exception("Client is not initialized, run setup method first") + + result = await Runner.run(self.client, user_request) + return result.final_output diff --git a/assistant_agent_module/routes.py b/assistant_agent_module/routes.py index f6a98c5..318d0ea 100644 --- a/assistant_agent_module/routes.py +++ b/assistant_agent_module/routes.py @@ -1,19 +1,16 @@ -from fastapi import APIRouter -from pydantic import BaseModel +from fastapi import APIRouter, Body from ai_clients.open_ai.master_agent import OpenAIClient +# from ai_clients.open_ai.sub_agents.query_optimization_agent import QueryOptimizationClient from common.classes import AIAssistant router = APIRouter(prefix="/agent", tags=["AI Agent"]) -class AssistRequest(BaseModel): - message: str - - @router.post("/assist") -async def query_handler(request: AssistRequest): - user_request = request.message +async def query_handler(user_request: str = Body(..., media_type="text/plain")): ai_assistant = AIAssistant(client=OpenAIClient) + await ai_assistant.initialize() response = await ai_assistant.chat(user_request) + await ai_assistant.cleanup() return response diff --git a/common/classes.py b/common/classes.py index fd09d13..9496b00 100644 --- a/common/classes.py +++ b/common/classes.py @@ -9,15 +9,25 @@ async def chat(self, user_request: str): pass @abstractmethod - def setup(self): + async def setup(self): """Initialization agent with sub-agents and internal tools like MCP servers""" pass + @abstractmethod + def destroy(self): + """Destroying agent with sub-agents and disconnect from MCP servers""" + pass + class AIAssistant: def __init__(self, client: Type[AIClient]): self.client = client() - self.client.setup() + + async def initialize(self): + await self.client.setup() + + async def cleanup(self): + await self.client.destroy() async def chat(self, user_request: str): return await self.client.chat(user_request) \ No newline at end of file diff --git a/prompts/bigquery/query_optimization_agent_prompt.py b/prompts/bigquery/query_optimization_agent_prompt.py new file mode 100644 index 0000000..f9a64da --- /dev/null +++ b/prompts/bigquery/query_optimization_agent_prompt.py @@ -0,0 +1,56 @@ +query_optimization_agent_prompt = """ +You are a BigQuery specialist, and your primary goal is to optimize provided SQL queries for Google BigQuery. +Focus on improving efficiency, reducing execution time, and minimizing costs by applying best practices. + +You must not use the following statements under any circumstances, as the query should only retrieve or analyze data, not modify it or alter schemas: +- DELETE +- DROP +- INSERT +- UPDATE +- ALTER +- MERGE +- TRUNCATE +- CREATE +- CREATE TABLE +- CREATE SCHEMA +- CREATE VIEW +- CREATE MATERIALIZED VIEW +- CREATE OR REPLACE TABLE +- CREATE OR REPLACE VIEW +- DROP SCHEMA +- DROP VIEW +- CALL + +To achieve this, use web search and any available tools to research optimizations. And official BigQuery documentation, including but not limited to: + +- https://cloud.google.com/bigquery/docs/best-practices-performance-compute +- https://cloud.google.com/bigquery/docs/best-practices-performance-functions + +Always analyze the query step-by-step: first, identify potential issues or inefficiencies; then, apply optimizations; and finally, verify improvements using reasoning or tools if needed. + +Key optimization guidelines: +- Avoid common anti-SQL patterns, such as unnecessary subqueries, correlated subqueries, or overuse of wildcard SELECT statements (e.g., prefer explicit column selection over SELECT *). +- For partitioned tables, ensure the WHERE clause includes a filter on the partitioning column (e.g., _PARTITIONTIME or _PARTITIONDATE) to enable partition pruning and reduce scanned data. +- Check if JOINs can be avoided entirely; for example, if the data is already denormalized or if a single table contains the required joined columns, refactor accordingly to simplify the query. +- When JOINs are necessary, place the larger table first (on the left side) to optimize join order and performance. +- Apply filters in the WHERE clause as early as possible. For JOINs, include filters on both sides to eliminate unnecessary data before the join operation. +- Order filters in the WHERE clause so that the most selective ones (those eliminating the most rows) come first. +- Remember that LIMIT does not reduce the amount of data scanned in BigQuery; advise using WHERE clauses for effective filtering instead. + +Output format rules (must follow): +- Return only the final optimized SQL query. +- Place inline comments (`-- comment`) directly at the location of each change inside the query. +- Each comment must explain what was changed and why (e.g., `-- replaced SELECT * with explicit columns to reduce scanned data`). +- No explanations above or below the query—only inline. +- Don't add comments to parts you haven't changed. +- If no optimizations are possible, return the original query with inline comments like `-- no change needed here`. +- Never output text outside the SQL code block. + +Example of required output style: + +```sql +SELECT user_id, created_at -- replaced SELECT * with explicit columns for efficiency +FROM my_table +WHERE _PARTITIONDATE >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) -- added partition filter to reduce scanned data +``` +""" diff --git a/prompts/master_agent_prompt.py b/prompts/master_agent_prompt.py index 3814c75..95d4740 100644 --- a/prompts/master_agent_prompt.py +++ b/prompts/master_agent_prompt.py @@ -1,3 +1,12 @@ master_agent_prompt = """ -You are the Assistant AI, a central orchestrator designed to manage and fulfill all types of user requests +You are the Assistant AI, a central orchestrator designed to manage and fulfill all types of user requests. + +Your primary responsibility: +- Always delegate the user’s request without changes to the most appropriate specialized subagent. +- Do NOT attempt to optimize, rewrite, or otherwise modify queries or text that subagents return. +- The subagent’s output must be delivered to the user verbatim, without editing, formatting, or explanation. + +Fallback behavior: +- Only if no subagent exists for the request, you may generate a direct response. +- In such cases, you must clearly state: "⚠️ This is a generic response, as no specialized subagent was available. It may not fully fit your request." """ diff --git a/tools.yaml b/tools.yaml new file mode 100644 index 0000000..54fa0df --- /dev/null +++ b/tools.yaml @@ -0,0 +1,76 @@ +# Define data sources +sources: + my-bigquery-source: + kind: bigquery + project: fiverr-bigquery-dev + # authentication defaults to ADC (Application Default Credentials) + # If running locally, ensure `gcloud auth application-default login` is done. + +# Expose tools from that source +tools: + ask_data_insights: + kind: bigquery-conversational-analytics + source: my-bigquery-source + description: | + Use this tool to perform data analysis, get insights, or answer complex + questions about the contents of specific BigQuery tables. + + execute_sql_tool: + kind: bigquery-execute-sql + source: my-bigquery-source + description: Use this tool to execute sql statement. + + forecast_tool: + kind: bigquery-forecast + source: my-bigquery-source + description: Use this tool to forecast time series data in BigQuery. + + bigquery_get_dataset_info: + kind: bigquery-get-dataset-info + source: my-bigquery-source + description: Use this tool to get dataset metadata. + + bigquery_get_table_info: + kind: bigquery-get-table-info + source: my-bigquery-source + description: Use this tool to get table metadata. + + bigquery_list_dataset_ids: + kind: bigquery-list-dataset-ids + source: my-bigquery-source + description: Use this tool to get dataset metadata. + + bigquery_list_table_ids: + kind: bigquery-list-table-ids + source: my-bigquery-source + description: Use this tool to get table metadata. + +# # Example: Querying a user table in BigQuery +# search_users_bq: +# kind: bigquery-sql +# source: my-bigquery-source +# statement: | +# SELECT +# id, +# name, +# email +# FROM +# `my-project.my-dataset.users` +# WHERE +# id = @id OR email = @email; +# description: | +# Use this tool to get information for a specific user. +# Takes an id number or a name and returns info on the user. +# +# Example: +# {{ +# "id": 123, +# "name": "Alice", +# }} +# parameters: +# - name: id +# type: integer +# description: User ID +# - name: email +# type: string +# description: Email address of the user