Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@

# Query tools
from .query import (
get_queries_not_selective,
get_queries_not_using_covering_index,
get_queries_using_primary_index,
get_queries_with_large_result_count,
get_queries_with_largest_response_sizes,
get_schema_for_collection,
get_top_longest_running_queries,
get_top_most_frequent_queries,
run_sql_plus_plus_query,
)

Expand Down Expand Up @@ -47,6 +54,13 @@
get_index_advisor_recommendations,
list_indexes,
get_cluster_health_and_services,
get_top_longest_running_queries,
get_top_most_frequent_queries,
get_queries_with_largest_response_sizes,
get_queries_with_large_result_count,
get_queries_using_primary_index,
get_queries_not_using_covering_index,
get_queries_not_selective,
]

__all__ = [
Expand All @@ -65,6 +79,13 @@
"get_index_advisor_recommendations",
"list_indexes",
"get_cluster_health_and_services",
"get_top_longest_running_queries",
"get_top_most_frequent_queries",
"get_queries_with_largest_response_sizes",
"get_queries_with_large_result_count",
"get_queries_using_primary_index",
"get_queries_not_using_covering_index",
"get_queries_not_selective",
# Convenience
"ALL_TOOLS",
]
183 changes: 182 additions & 1 deletion src/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def run_sql_plus_plus_query(
raise


# Don't expose this function to the MCP server until we have a use case
def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]:
"""Run a query on the cluster object and return the results as a list of JSON objects."""

Expand All @@ -96,3 +95,185 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str,
except Exception as e:
logger.error(f"Error running query: {e}")
raise


def get_top_longest_running_queries(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get the top N longest running queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average service time and count
"""
query = f"""
SELECT statement,
DURATION_TO_STR(avgServiceTime) AS avgServiceTime,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime))
ORDER BY avgServiceTime DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_top_most_frequent_queries(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get the top N most frequent queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their frequency count
"""
query = f"""
SELECT statement,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
Copy link

@kaminijagtiani kaminijagtiani Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For every query that we run via UI, we also run explain and advise on it.
So you might also need to filter out explain and advise for top most frequent queries

AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING queries = COUNT(1)
ORDER BY queries DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_queries_with_largest_response_sizes(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest response sizes from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result size in bytes, KB, and MB
"""
query = f"""
SELECT statement,
avgResultSize AS avgResultSizeBytes,
(avgResultSize / 1000) AS avgResultSizeKB,
(avgResultSize / 1000000) AS avgResultSizeMB,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultSize = AVG(resultSize)
ORDER BY avgResultSize DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_queries_with_large_result_count(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest result counts from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result count
"""
query = f"""
SELECT statement,
avgResultCount,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultCount = AVG(resultCount)
ORDER BY avgResultCount DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_queries_using_primary_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that use a primary index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that use primary indexes, ordered by result count
"""
query = f"""
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +291 to +292
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with other query-analyzing tools in this file, consider filtering out INFER and CREATE INDEX statements. This will help focus the results on application-level queries.

Suggested change
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are adding INFER and SYSTEM, might as well add explain and advise as well- for consistency

ORDER BY resultCount DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_queries_not_using_covering_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that don't use a covering index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that perform index scans but also require fetches (not covering)
"""
query = f"""
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +321 to +323
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters for INFER and CREATE INDEX statements, which are present in other similar functions. Adding them would provide more consistent and focused results on application queries.

Suggested change
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

ORDER BY resultCount DESC
LIMIT {limit}
"""

return run_cluster_query(ctx, query)


def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
"""Get queries that are not very selective from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries where index scans return significantly more documents than the final result
"""
query = f"""
SELECT statement,
AVG(phaseCounts.`indexScan` - resultCount) AS diff
FROM system:completed_requests
WHERE phaseCounts.`indexScan` > resultCount
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters to exclude system-level statements (like INFER, CREATE INDEX, and queries on SYSTEM: keyspaces). Including these filters, as done in other functions in this file, will help to focus on application performance issues.

Suggested change
WHERE phaseCounts.`indexScan` > resultCount
WHERE phaseCounts.`indexScan` > resultCount
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

GROUP BY statement
ORDER BY diff DESC
LIMIT {limit};
"""

return run_cluster_query(ctx, query)