Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ This finds tests with `explain_todo` entries in their metadata.

## Running Tests

Always run parser tests with a 5 second timeout:
Always run parser tests with a 10 second timeout:

```bash
go test ./parser/... -timeout 5s
go test ./parser/... -timeout 10s
```

The tests are very fast. If a test is timing out, it indicates a bug (likely an infinite loop in the parser).
Expand Down
102 changes: 100 additions & 2 deletions internal/explain/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer
}
children := countSelectUnionChildren(n)
fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children)
// ClickHouse optimizes UNION ALL when selects have identical expressions but different aliases.
// In that case, only the first SELECT is shown since column names come from the first SELECT anyway.
selects := simplifyUnionSelects(n.Selects)
// Wrap selects in ExpressionList
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects))
for _, sel := range n.Selects {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(selects))
for _, sel := range selects {
Node(sb, sel, depth+2)
}
// INTO OUTFILE clause - check if any SelectQuery has IntoOutfile set
Expand Down Expand Up @@ -252,6 +255,101 @@ func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int {
return count
}

// simplifyUnionSelects implements ClickHouse's UNION ALL optimization:
// When all SELECT queries in a UNION have identical expressions (ignoring aliases)
// but different aliases, only the first SELECT is returned.
// This only applies when ALL columns in ALL SELECTs have explicit aliases.
// If aliases are the same across all SELECTs, or if any column lacks an alias, all are kept.
func simplifyUnionSelects(selects []ast.Statement) []ast.Statement {
if len(selects) <= 1 {
return selects
}

// Check if all are simple SelectQuery with only literal columns
var queries []*ast.SelectQuery
for _, sel := range selects {
sq, ok := sel.(*ast.SelectQuery)
if !ok {
// Not a simple SelectQuery, can't simplify
return selects
}
// Only handle simple SELECT with just columns, no FROM/WHERE/etc.
if sq.From != nil || sq.Where != nil || sq.GroupBy != nil ||
sq.Having != nil || sq.OrderBy != nil || len(sq.With) > 0 {
return selects
}
queries = append(queries, sq)
}

// Check if all have the same number of columns
numCols := len(queries[0].Columns)
for _, q := range queries[1:] {
if len(q.Columns) != numCols {
return selects
}
}

// Check if columns are all literals with aliases
// and compare expressions (without aliases) and aliases separately
allSameAliases := true
allSameExprs := true
allHaveAliases := true

for colIdx := 0; colIdx < numCols; colIdx++ {
firstAlias := ""
firstExpr := ""

for i, q := range queries {
col := q.Columns[colIdx]
alias := ""
exprStr := ""
hasAlias := false

switch c := col.(type) {
case *ast.AliasedExpr:
alias = c.Alias
hasAlias = c.Alias != ""
// Get string representation of the expression
if lit, ok := c.Expr.(*ast.Literal); ok {
exprStr = fmt.Sprintf("%v", lit.Value)
} else {
// Non-literal expression, can't simplify
return selects
}
case *ast.Literal:
exprStr = fmt.Sprintf("%v", c.Value)
hasAlias = false
default:
// Not a simple literal or aliased literal
return selects
}

if !hasAlias {
allHaveAliases = false
}

if i == 0 {
firstAlias = alias
firstExpr = exprStr
} else {
if alias != firstAlias {
allSameAliases = false
}
if exprStr != firstExpr {
allSameExprs = false
}
}
}
}

// If expressions are the same, all have aliases, but aliases differ, return only first SELECT
if allSameExprs && allHaveAliases && !allSameAliases {
return selects[:1]
}

return selects
}

func countSelectQueryChildren(n *ast.SelectQuery) int {
count := 1 // columns ExpressionList
// WITH clause
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
{
"explain_todo": {
"stmt1": true
}
}
{}