From 4a25f18a7e0778a86cfc58c13a1082edc3b8774f Mon Sep 17 00:00:00 2001 From: AayushTyagi1 Date: Tue, 11 Nov 2025 13:48:30 +0530 Subject: [PATCH 1/2] DA-1203 Modified Add Slow Running Queries tools --- src/tools/__init__.py | 21 +++++ src/tools/query.py | 183 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 203 insertions(+), 1 deletion(-) diff --git a/src/tools/__init__.py b/src/tools/__init__.py index 27c4946..56e7b2d 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -16,7 +16,14 @@ # Query tools from .query import ( + get_queries_not_selective, + get_queries_not_using_covering_index, + get_queries_using_primary_index, + get_queries_with_large_result_count, + get_queries_with_largest_response_sizes, get_schema_for_collection, + get_top_longest_running_queries, + get_top_most_frequent_queries, run_sql_plus_plus_query, ) @@ -47,6 +54,13 @@ get_index_advisor_recommendations, list_indexes, get_cluster_health_and_services, + get_top_longest_running_queries, + get_top_most_frequent_queries, + get_queries_with_largest_response_sizes, + get_queries_with_large_result_count, + get_queries_using_primary_index, + get_queries_not_using_covering_index, + get_queries_not_selective, ] __all__ = [ @@ -65,6 +79,13 @@ "get_index_advisor_recommendations", "list_indexes", "get_cluster_health_and_services", + "get_top_longest_running_queries", + "get_top_most_frequent_queries", + "get_queries_with_largest_response_sizes", + "get_queries_with_large_result_count", + "get_queries_using_primary_index", + "get_queries_not_using_covering_index", + "get_queries_not_selective", # Convenience "ALL_TOOLS", ] diff --git a/src/tools/query.py b/src/tools/query.py index d6a44d7..d3076d8 100644 --- a/src/tools/query.py +++ b/src/tools/query.py @@ -81,7 +81,6 @@ def run_sql_plus_plus_query( raise -# Don't expose this function to the MCP server until we have a use case def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]: """Run a query on the cluster object and return the results as a list of JSON objects.""" @@ -96,3 +95,185 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, except Exception as e: logger.error(f"Error running query: {e}") raise + + +def get_top_longest_running_queries( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get the top N longest running queries from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average service time and count + """ + query = f""" + SELECT statement, + DURATION_TO_STR(avgServiceTime) AS avgServiceTime, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime)) + ORDER BY avgServiceTime DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_top_most_frequent_queries( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get the top N most frequent queries from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their frequency count + """ + query = f""" + SELECT statement, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING queries = COUNT(1) + ORDER BY queries DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_queries_with_largest_response_sizes( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries with the largest response sizes from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average result size in bytes, KB, and MB + """ + query = f""" + SELECT statement, + avgResultSize AS avgResultSizeBytes, + (avgResultSize / 1000) AS avgResultSizeKB, + (avgResultSize / 1000000) AS avgResultSizeMB, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgResultSize = AVG(resultSize) + ORDER BY avgResultSize DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_queries_with_large_result_count( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries with the largest result counts from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average result count + """ + query = f""" + SELECT statement, + avgResultCount, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgResultCount = AVG(resultCount) + ORDER BY avgResultCount DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_queries_using_primary_index( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries that use a primary index from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries that use primary indexes, ordered by result count + """ + query = f""" + SELECT * + FROM system:completed_requests + WHERE phaseCounts.`primaryScan` IS NOT MISSING + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + ORDER BY resultCount DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_queries_not_using_covering_index( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries that don't use a covering index from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries that perform index scans but also require fetches (not covering) + """ + query = f""" + SELECT * + FROM system:completed_requests + WHERE phaseCounts.`indexScan` IS NOT MISSING + AND phaseCounts.`fetch` IS NOT MISSING + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + ORDER BY resultCount DESC + LIMIT {limit} + """ + + return run_cluster_query(ctx, query) + + +def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]: + """Get queries that are not very selective from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries where index scans return significantly more documents than the final result + """ + query = f""" + SELECT statement, + AVG(phaseCounts.`indexScan` - resultCount) AS diff + FROM system:completed_requests + WHERE phaseCounts.`indexScan` > resultCount + GROUP BY statement + ORDER BY diff DESC + LIMIT {limit}; + """ + + return run_cluster_query(ctx, query) From aa864e2f9bdebd3e0d98c2b718c1c15d79a95d27 Mon Sep 17 00:00:00 2001 From: AayushTyagi1 Date: Tue, 11 Nov 2025 14:18:51 +0530 Subject: [PATCH 2/2] SQL Injection Issue fixed --- src/tools/__init__.py | 20 ++++++++++---------- src/tools/query.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/tools/__init__.py b/src/tools/__init__.py index 56e7b2d..6aef142 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -54,13 +54,13 @@ get_index_advisor_recommendations, list_indexes, get_cluster_health_and_services, + get_queries_not_selective, + get_queries_not_using_covering_index, + get_queries_using_primary_index, + get_queries_with_large_result_count, + get_queries_with_largest_response_sizes, get_top_longest_running_queries, get_top_most_frequent_queries, - get_queries_with_largest_response_sizes, - get_queries_with_large_result_count, - get_queries_using_primary_index, - get_queries_not_using_covering_index, - get_queries_not_selective, ] __all__ = [ @@ -79,13 +79,13 @@ "get_index_advisor_recommendations", "list_indexes", "get_cluster_health_and_services", + "get_queries_not_selective", + "get_queries_not_using_covering_index", + "get_queries_using_primary_index", + "get_queries_with_large_result_count", + "get_queries_with_largest_response_sizes", "get_top_longest_running_queries", "get_top_most_frequent_queries", - "get_queries_with_largest_response_sizes", - "get_queries_with_large_result_count", - "get_queries_using_primary_index", - "get_queries_not_using_covering_index", - "get_queries_not_selective", # Convenience "ALL_TOOLS", ] diff --git a/src/tools/query.py b/src/tools/query.py index d3076d8..c09e7c6 100644 --- a/src/tools/query.py +++ b/src/tools/query.py @@ -108,7 +108,7 @@ def get_top_longest_running_queries( Returns: List of queries with their average service time and count """ - query = f""" + query = """ SELECT statement, DURATION_TO_STR(avgServiceTime) AS avgServiceTime, COUNT(1) AS queries @@ -119,10 +119,10 @@ def get_top_longest_running_queries( GROUP BY statement LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime)) ORDER BY avgServiceTime DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_top_most_frequent_queries( @@ -136,7 +136,7 @@ def get_top_most_frequent_queries( Returns: List of queries with their frequency count """ - query = f""" + query = """ SELECT statement, COUNT(1) AS queries FROM system:completed_requests @@ -146,10 +146,10 @@ def get_top_most_frequent_queries( GROUP BY statement LETTING queries = COUNT(1) ORDER BY queries DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_queries_with_largest_response_sizes( @@ -163,7 +163,7 @@ def get_queries_with_largest_response_sizes( Returns: List of queries with their average result size in bytes, KB, and MB """ - query = f""" + query = """ SELECT statement, avgResultSize AS avgResultSizeBytes, (avgResultSize / 1000) AS avgResultSizeKB, @@ -176,10 +176,10 @@ def get_queries_with_largest_response_sizes( GROUP BY statement LETTING avgResultSize = AVG(resultSize) ORDER BY avgResultSize DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_queries_with_large_result_count( @@ -193,7 +193,7 @@ def get_queries_with_large_result_count( Returns: List of queries with their average result count """ - query = f""" + query = """ SELECT statement, avgResultCount, COUNT(1) AS queries @@ -204,10 +204,10 @@ def get_queries_with_large_result_count( GROUP BY statement LETTING avgResultCount = AVG(resultCount) ORDER BY avgResultCount DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_queries_using_primary_index( @@ -221,16 +221,16 @@ def get_queries_using_primary_index( Returns: List of queries that use primary indexes, ordered by result count """ - query = f""" + query = """ SELECT * FROM system:completed_requests WHERE phaseCounts.`primaryScan` IS NOT MISSING AND UPPER(statement) NOT LIKE '% SYSTEM:%' ORDER BY resultCount DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_queries_not_using_covering_index( @@ -244,17 +244,17 @@ def get_queries_not_using_covering_index( Returns: List of queries that perform index scans but also require fetches (not covering) """ - query = f""" + query = """ SELECT * FROM system:completed_requests WHERE phaseCounts.`indexScan` IS NOT MISSING AND phaseCounts.`fetch` IS NOT MISSING AND UPPER(statement) NOT LIKE '% SYSTEM:%' ORDER BY resultCount DESC - LIMIT {limit} + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit) def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]: @@ -266,14 +266,14 @@ def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, A Returns: List of queries where index scans return significantly more documents than the final result """ - query = f""" + query = """ SELECT statement, AVG(phaseCounts.`indexScan` - resultCount) AS diff FROM system:completed_requests WHERE phaseCounts.`indexScan` > resultCount GROUP BY statement ORDER BY diff DESC - LIMIT {limit}; + LIMIT $limit """ - return run_cluster_query(ctx, query) + return run_cluster_query(ctx, query, limit=limit)