diff --git a/src/tools/__init__.py b/src/tools/__init__.py index 27c4946..6aef142 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -16,7 +16,14 @@ # Query tools from .query import ( + get_queries_not_selective, + get_queries_not_using_covering_index, + get_queries_using_primary_index, + get_queries_with_large_result_count, + get_queries_with_largest_response_sizes, get_schema_for_collection, + get_top_longest_running_queries, + get_top_most_frequent_queries, run_sql_plus_plus_query, ) @@ -47,6 +54,13 @@ get_index_advisor_recommendations, list_indexes, get_cluster_health_and_services, + get_queries_not_selective, + get_queries_not_using_covering_index, + get_queries_using_primary_index, + get_queries_with_large_result_count, + get_queries_with_largest_response_sizes, + get_top_longest_running_queries, + get_top_most_frequent_queries, ] __all__ = [ @@ -65,6 +79,13 @@ "get_index_advisor_recommendations", "list_indexes", "get_cluster_health_and_services", + "get_queries_not_selective", + "get_queries_not_using_covering_index", + "get_queries_using_primary_index", + "get_queries_with_large_result_count", + "get_queries_with_largest_response_sizes", + "get_top_longest_running_queries", + "get_top_most_frequent_queries", # Convenience "ALL_TOOLS", ] diff --git a/src/tools/query.py b/src/tools/query.py index d6a44d7..c09e7c6 100644 --- a/src/tools/query.py +++ b/src/tools/query.py @@ -81,7 +81,6 @@ def run_sql_plus_plus_query( raise -# Don't expose this function to the MCP server until we have a use case def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]: """Run a query on the cluster object and return the results as a list of JSON objects.""" @@ -96,3 +95,185 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, except Exception as e: logger.error(f"Error running query: {e}") raise + + +def get_top_longest_running_queries( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get the top N longest running queries from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average service time and count + """ + query = """ + SELECT statement, + DURATION_TO_STR(avgServiceTime) AS avgServiceTime, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime)) + ORDER BY avgServiceTime DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_top_most_frequent_queries( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get the top N most frequent queries from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their frequency count + """ + query = """ + SELECT statement, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING queries = COUNT(1) + ORDER BY queries DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_queries_with_largest_response_sizes( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries with the largest response sizes from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average result size in bytes, KB, and MB + """ + query = """ + SELECT statement, + avgResultSize AS avgResultSizeBytes, + (avgResultSize / 1000) AS avgResultSizeKB, + (avgResultSize / 1000000) AS avgResultSizeMB, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgResultSize = AVG(resultSize) + ORDER BY avgResultSize DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_queries_with_large_result_count( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries with the largest result counts from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries with their average result count + """ + query = """ + SELECT statement, + avgResultCount, + COUNT(1) AS queries + FROM system:completed_requests + WHERE UPPER(statement) NOT LIKE 'INFER %' + AND UPPER(statement) NOT LIKE 'CREATE INDEX%' + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + GROUP BY statement + LETTING avgResultCount = AVG(resultCount) + ORDER BY avgResultCount DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_queries_using_primary_index( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries that use a primary index from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries that use primary indexes, ordered by result count + """ + query = """ + SELECT * + FROM system:completed_requests + WHERE phaseCounts.`primaryScan` IS NOT MISSING + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + ORDER BY resultCount DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_queries_not_using_covering_index( + ctx: Context, limit: int = 10 +) -> list[dict[str, Any]]: + """Get queries that don't use a covering index from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries that perform index scans but also require fetches (not covering) + """ + query = """ + SELECT * + FROM system:completed_requests + WHERE phaseCounts.`indexScan` IS NOT MISSING + AND phaseCounts.`fetch` IS NOT MISSING + AND UPPER(statement) NOT LIKE '% SYSTEM:%' + ORDER BY resultCount DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit) + + +def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]: + """Get queries that are not very selective from the system:completed_requests catalog. + + Args: + limit: Number of queries to return (default: 10) + + Returns: + List of queries where index scans return significantly more documents than the final result + """ + query = """ + SELECT statement, + AVG(phaseCounts.`indexScan` - resultCount) AS diff + FROM system:completed_requests + WHERE phaseCounts.`indexScan` > resultCount + GROUP BY statement + ORDER BY diff DESC + LIMIT $limit + """ + + return run_cluster_query(ctx, query, limit=limit)