Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@

# Query tools
from .query import (
get_queries_not_selective,
get_queries_not_using_covering_index,
get_queries_using_primary_index,
get_queries_with_large_result_count,
get_queries_with_largest_response_sizes,
get_schema_for_collection,
get_top_longest_running_queries,
get_top_most_frequent_queries,
run_sql_plus_plus_query,
)

Expand Down Expand Up @@ -47,6 +54,13 @@
get_index_advisor_recommendations,
list_indexes,
get_cluster_health_and_services,
get_queries_not_selective,
get_queries_not_using_covering_index,
get_queries_using_primary_index,
get_queries_with_large_result_count,
get_queries_with_largest_response_sizes,
get_top_longest_running_queries,
get_top_most_frequent_queries,
]

__all__ = [
Expand All @@ -65,6 +79,13 @@
"get_index_advisor_recommendations",
"list_indexes",
"get_cluster_health_and_services",
"get_queries_not_selective",
"get_queries_not_using_covering_index",
"get_queries_using_primary_index",
"get_queries_with_large_result_count",
"get_queries_with_largest_response_sizes",
"get_top_longest_running_queries",
"get_top_most_frequent_queries",
# Convenience
"ALL_TOOLS",
]
183 changes: 182 additions & 1 deletion src/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def run_sql_plus_plus_query(
raise


# Don't expose this function to the MCP server until we have a use case
def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]:
"""Run a query on the cluster object and return the results as a list of JSON objects."""

Expand All @@ -96,3 +95,185 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str,
except Exception as e:
logger.error(f"Error running query: {e}")
raise


def get_top_longest_running_queries(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get the top N longest running queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average service time and count
"""
query = """
SELECT statement,
DURATION_TO_STR(avgServiceTime) AS avgServiceTime,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime))
ORDER BY avgServiceTime DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_top_most_frequent_queries(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get the top N most frequent queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their frequency count
"""
query = """
SELECT statement,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING queries = COUNT(1)
ORDER BY queries DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_queries_with_largest_response_sizes(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest response sizes from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result size in bytes, KB, and MB
"""
query = """
SELECT statement,
avgResultSize AS avgResultSizeBytes,
(avgResultSize / 1000) AS avgResultSizeKB,
(avgResultSize / 1000000) AS avgResultSizeMB,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultSize = AVG(resultSize)
ORDER BY avgResultSize DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_queries_with_large_result_count(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest result counts from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result count
"""
query = """
SELECT statement,
avgResultCount,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultCount = AVG(resultCount)
ORDER BY avgResultCount DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_queries_using_primary_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that use a primary index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that use primary indexes, ordered by result count
"""
query = """
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +227 to +228
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with other query-analyzing tools in this file, consider filtering out INFER and CREATE INDEX statements. This will help focus the results on application-level queries.

Suggested change
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

ORDER BY resultCount DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_queries_not_using_covering_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that don't use a covering index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that perform index scans but also require fetches (not covering)
"""
query = """
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +250 to +252
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters for INFER and CREATE INDEX statements, which are present in other similar functions. Adding them would provide more consistent and focused results on application queries.

Suggested change
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

ORDER BY resultCount DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)


def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
"""Get queries that are not very selective from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries where index scans return significantly more documents than the final result
"""
query = """
SELECT statement,
AVG(phaseCounts.`indexScan` - resultCount) AS diff
Copy link

Copilot AI Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The subtraction phaseCounts.indexScan - resultCount could produce negative values if resultCount exceeds phaseCounts.indexScan. While the WHERE clause filters for phaseCounts.indexScan > resultCount, aggregating across multiple query executions with AVG could still yield unexpected results. Consider using AVG(GREATEST(phaseCounts.indexScan - resultCount, 0)) or adding AND phaseCounts.indexScan > resultCount to the GROUP BY filter to ensure all records in each group satisfy the condition.

Suggested change
AVG(phaseCounts.`indexScan` - resultCount) AS diff
AVG(GREATEST(phaseCounts.`indexScan` - resultCount, 0)) AS diff

Copilot uses AI. Check for mistakes.
FROM system:completed_requests
WHERE phaseCounts.`indexScan` > resultCount
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters to exclude system-level statements (like INFER, CREATE INDEX, and queries on SYSTEM: keyspaces). Including these filters, as done in other functions in this file, will help to focus on application performance issues.

Suggested change
WHERE phaseCounts.`indexScan` > resultCount
WHERE phaseCounts.`indexScan` > resultCount
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

GROUP BY statement
ORDER BY diff DESC
LIMIT $limit
"""

return run_cluster_query(ctx, query, limit=limit)