diff --git a/CLAUDE.md b/CLAUDE.md index 45dc502a81..f3b32c086f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -12,10 +12,10 @@ This finds tests with `explain_todo` entries in their metadata. ## Running Tests -Always run parser tests with a 5 second timeout: +Always run parser tests with a 10 second timeout: ```bash -go test ./parser/... -timeout 5s +go test ./parser/... -timeout 10s ``` The tests are very fast. If a test is timing out, it indicates a bug (likely an infinite loop in the parser). diff --git a/internal/explain/select.go b/internal/explain/select.go index 5dcc9d8c4e..4a0e6a8b3e 100644 --- a/internal/explain/select.go +++ b/internal/explain/select.go @@ -20,9 +20,12 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer } children := countSelectUnionChildren(n) fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children) + // ClickHouse optimizes UNION ALL when selects have identical expressions but different aliases. + // In that case, only the first SELECT is shown since column names come from the first SELECT anyway. + selects := simplifyUnionSelects(n.Selects) // Wrap selects in ExpressionList - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects)) - for _, sel := range n.Selects { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(selects)) + for _, sel := range selects { Node(sb, sel, depth+2) } // INTO OUTFILE clause - check if any SelectQuery has IntoOutfile set @@ -252,6 +255,101 @@ func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int { return count } +// simplifyUnionSelects implements ClickHouse's UNION ALL optimization: +// When all SELECT queries in a UNION have identical expressions (ignoring aliases) +// but different aliases, only the first SELECT is returned. +// This only applies when ALL columns in ALL SELECTs have explicit aliases. +// If aliases are the same across all SELECTs, or if any column lacks an alias, all are kept. +func simplifyUnionSelects(selects []ast.Statement) []ast.Statement { + if len(selects) <= 1 { + return selects + } + + // Check if all are simple SelectQuery with only literal columns + var queries []*ast.SelectQuery + for _, sel := range selects { + sq, ok := sel.(*ast.SelectQuery) + if !ok { + // Not a simple SelectQuery, can't simplify + return selects + } + // Only handle simple SELECT with just columns, no FROM/WHERE/etc. + if sq.From != nil || sq.Where != nil || sq.GroupBy != nil || + sq.Having != nil || sq.OrderBy != nil || len(sq.With) > 0 { + return selects + } + queries = append(queries, sq) + } + + // Check if all have the same number of columns + numCols := len(queries[0].Columns) + for _, q := range queries[1:] { + if len(q.Columns) != numCols { + return selects + } + } + + // Check if columns are all literals with aliases + // and compare expressions (without aliases) and aliases separately + allSameAliases := true + allSameExprs := true + allHaveAliases := true + + for colIdx := 0; colIdx < numCols; colIdx++ { + firstAlias := "" + firstExpr := "" + + for i, q := range queries { + col := q.Columns[colIdx] + alias := "" + exprStr := "" + hasAlias := false + + switch c := col.(type) { + case *ast.AliasedExpr: + alias = c.Alias + hasAlias = c.Alias != "" + // Get string representation of the expression + if lit, ok := c.Expr.(*ast.Literal); ok { + exprStr = fmt.Sprintf("%v", lit.Value) + } else { + // Non-literal expression, can't simplify + return selects + } + case *ast.Literal: + exprStr = fmt.Sprintf("%v", c.Value) + hasAlias = false + default: + // Not a simple literal or aliased literal + return selects + } + + if !hasAlias { + allHaveAliases = false + } + + if i == 0 { + firstAlias = alias + firstExpr = exprStr + } else { + if alias != firstAlias { + allSameAliases = false + } + if exprStr != firstExpr { + allSameExprs = false + } + } + } + } + + // If expressions are the same, all have aliases, but aliases differ, return only first SELECT + if allSameExprs && allHaveAliases && !allSameAliases { + return selects[:1] + } + + return selects +} + func countSelectQueryChildren(n *ast.SelectQuery) int { count := 1 // columns ExpressionList // WITH clause diff --git a/parser/testdata/00592_union_all_different_aliases/metadata.json b/parser/testdata/00592_union_all_different_aliases/metadata.json index e9d6e46171..0967ef424b 100644 --- a/parser/testdata/00592_union_all_different_aliases/metadata.json +++ b/parser/testdata/00592_union_all_different_aliases/metadata.json @@ -1,5 +1 @@ -{ - "explain_todo": { - "stmt1": true - } -} +{}