Skip to content

Commit 93d8182

Browse files
authored
Add deduplicate suffix to OPTIMIZE query explain output (#114)
1 parent 1af4b3d commit 93d8182

File tree

233 files changed

+1881
-1421
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

233 files changed

+1881
-1421
lines changed

ast/ast.go

Lines changed: 102 additions & 43 deletions
Large diffs are not rendered by default.

internal/explain/dictionary.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,16 @@ func explainDictionaryDefinition(sb *strings.Builder, n *ast.DictionaryDefinitio
8080
explainDictionaryLifetime(sb, n.Lifetime, indent+" ", depth+1)
8181
}
8282

83-
// RANGE (if present, comes before LAYOUT)
84-
if n.Range != nil {
85-
explainDictionaryRange(sb, n.Range, indent+" ", depth+1)
86-
}
87-
88-
// LAYOUT
83+
// LAYOUT (comes before RANGE in EXPLAIN output)
8984
if n.Layout != nil {
9085
explainDictionaryLayout(sb, n.Layout, indent+" ", depth+1)
9186
}
9287

88+
// RANGE
89+
if n.Range != nil {
90+
explainDictionaryRange(sb, n.Range, indent+" ", depth+1)
91+
}
92+
9393
// SETTINGS
9494
if len(n.Settings) > 0 {
9595
fmt.Fprintf(sb, "%s Set\n", indent)

internal/explain/explain.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
155155
fmt.Fprintf(sb, "%sCreateSettingsProfileQuery\n", indent)
156156
case *ast.DropSettingsProfileQuery:
157157
fmt.Fprintf(sb, "%sDROP SETTINGS PROFILE query\n", indent)
158+
case *ast.CreateNamedCollectionQuery:
159+
fmt.Fprintf(sb, "%sCreateNamedCollectionQuery\n", indent)
160+
case *ast.AlterNamedCollectionQuery:
161+
fmt.Fprintf(sb, "%sAlterNamedCollectionQuery\n", indent)
162+
case *ast.DropNamedCollectionQuery:
163+
fmt.Fprintf(sb, "%sDropNamedCollectionQuery\n", indent)
158164
case *ast.ShowCreateSettingsProfileQuery:
159165
// Use PROFILES (plural) when multiple profiles are specified
160166
queryName := "SHOW CREATE SETTINGS PROFILE query"
@@ -334,6 +340,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
334340
if len(col.Settings) > 0 {
335341
children++
336342
}
343+
if col.Comment != "" {
344+
children++
345+
}
337346
if children > 0 {
338347
fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children)
339348
} else {
@@ -360,6 +369,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
360369
if len(col.Settings) > 0 {
361370
fmt.Fprintf(sb, "%s Set\n", indent)
362371
}
372+
if col.Comment != "" {
373+
fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, col.Comment)
374+
}
363375
}
364376

365377
// explainCodecExpr handles CODEC expressions in column declarations

internal/explain/expressions.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package explain
22

33
import (
44
"fmt"
5+
"strconv"
56
"strings"
67

78
"github.com/sqlc-dev/doubleclick/ast"
@@ -402,8 +403,9 @@ func collectLogicalOperands(n *ast.BinaryExpr) []ast.Expression {
402403

403404
func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) {
404405
// Handle negate of literal numbers - output as negative literal instead of function
406+
// BUT only if the literal is NOT parenthesized (e.g., -1 folds, but -(1) stays as negate function)
405407
if n.Op == "-" {
406-
if lit, ok := n.Operand.(*ast.Literal); ok {
408+
if lit, ok := n.Operand.(*ast.Literal); ok && !lit.Parenthesized {
407409
switch lit.Type {
408410
case ast.LiteralInteger:
409411
// Convert positive integer to negative
@@ -433,6 +435,19 @@ func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, dept
433435
s := FormatFloat(-val)
434436
fmt.Fprintf(sb, "%sLiteral Float64_%s\n", indent, s)
435437
return
438+
case ast.LiteralString:
439+
// Handle BigInt - very large numbers stored as strings
440+
// ClickHouse converts these to Float64 in scientific notation
441+
if lit.IsBigInt {
442+
if strVal, ok := lit.Value.(string); ok {
443+
// Parse the string as float64 and negate it
444+
if f, err := strconv.ParseFloat(strVal, 64); err == nil {
445+
s := FormatFloat(-f)
446+
fmt.Fprintf(sb, "%sLiteral Float64_%s\n", indent, s)
447+
return
448+
}
449+
}
450+
}
436451
}
437452
}
438453
}
@@ -477,8 +492,13 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) {
477492
needsFunctionFormat = true
478493
break
479494
}
480-
// Also check if nested arrays/tuples contain non-literal elements
495+
// Check if tuple contains array literals - these need Function tuple format
481496
if lit, ok := expr.(*ast.Literal); ok {
497+
if lit.Type == ast.LiteralArray {
498+
needsFunctionFormat = true
499+
break
500+
}
501+
// Also check if nested arrays/tuples contain non-literal elements
482502
if containsNonLiteralInNested(lit) {
483503
needsFunctionFormat = true
484504
break

internal/explain/format.go

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,22 @@ func formatArrayLiteral(val interface{}) string {
160160
if lit.Type == ast.LiteralInteger {
161161
switch val := lit.Value.(type) {
162162
case int64:
163-
parts = append(parts, fmt.Sprintf("Int64_%d", -val))
163+
negVal := -val
164+
// ClickHouse normalizes -0 to UInt64_0
165+
if negVal == 0 {
166+
parts = append(parts, "UInt64_0")
167+
} else if negVal > 0 {
168+
parts = append(parts, fmt.Sprintf("UInt64_%d", negVal))
169+
} else {
170+
parts = append(parts, fmt.Sprintf("Int64_%d", negVal))
171+
}
164172
case uint64:
165-
parts = append(parts, fmt.Sprintf("Int64_-%d", val))
173+
// ClickHouse normalizes -0 to UInt64_0
174+
if val == 0 {
175+
parts = append(parts, "UInt64_0")
176+
} else {
177+
parts = append(parts, fmt.Sprintf("Int64_-%d", val))
178+
}
166179
default:
167180
parts = append(parts, fmt.Sprintf("Int64_-%v", lit.Value))
168181
}
@@ -195,8 +208,19 @@ func formatNumericExpr(e ast.Expression) (string, bool) {
195208
if lit, ok := unary.Operand.(*ast.Literal); ok {
196209
switch val := lit.Value.(type) {
197210
case int64:
198-
return fmt.Sprintf("Int64_%d", -val), true
211+
negVal := -val
212+
// ClickHouse normalizes -0 to UInt64_0
213+
if negVal == 0 {
214+
return "UInt64_0", true
215+
} else if negVal > 0 {
216+
return fmt.Sprintf("UInt64_%d", negVal), true
217+
}
218+
return fmt.Sprintf("Int64_%d", negVal), true
199219
case uint64:
220+
// ClickHouse normalizes -0 to UInt64_0
221+
if val == 0 {
222+
return "UInt64_0", true
223+
}
200224
return fmt.Sprintf("Int64_%d", -int64(val)), true
201225
case float64:
202226
return fmt.Sprintf("Float64_%s", FormatFloat(-val)), true
@@ -289,6 +313,13 @@ func FormatDataType(dt *ast.DataType) string {
289313
} else if ident, ok := p.(*ast.Identifier); ok {
290314
// Identifier (e.g., function name in AggregateFunction types)
291315
params = append(params, ident.Name())
316+
} else if unary, ok := p.(*ast.UnaryExpr); ok {
317+
// Unary expression (e.g., -1 for negative numbers)
318+
if lit, ok := unary.Operand.(*ast.Literal); ok {
319+
params = append(params, fmt.Sprintf("%s%v", unary.Op, lit.Value))
320+
} else {
321+
params = append(params, fmt.Sprintf("%v", p))
322+
}
292323
} else {
293324
params = append(params, fmt.Sprintf("%v", p))
294325
}
@@ -469,7 +500,7 @@ func formatExprAsString(expr ast.Expression) string {
469500
case ast.LiteralNull:
470501
return "NULL"
471502
case ast.LiteralArray:
472-
return formatArrayAsString(e.Value)
503+
return formatArrayAsStringFromLiteral(e)
473504
case ast.LiteralTuple:
474505
return formatTupleAsString(e.Value)
475506
default:
@@ -519,6 +550,28 @@ func formatExprAsString(expr ast.Expression) string {
519550
}
520551
}
521552

553+
// formatArrayAsStringFromLiteral formats an array literal as a string for :: cast syntax
554+
// It preserves original spacing from the source
555+
func formatArrayAsStringFromLiteral(lit *ast.Literal) string {
556+
exprs, ok := lit.Value.([]ast.Expression)
557+
if !ok {
558+
return "[]"
559+
}
560+
var parts []string
561+
for _, e := range exprs {
562+
parts = append(parts, formatElementAsString(e))
563+
}
564+
separator := ","
565+
if lit.SpacedCommas {
566+
separator = ", "
567+
}
568+
// Use outer spaces when source had whitespace after [ (e.g., for multi-line arrays)
569+
if lit.SpacedBrackets {
570+
return "[ " + strings.Join(parts, separator) + " ]"
571+
}
572+
return "[" + strings.Join(parts, separator) + "]"
573+
}
574+
522575
// formatArrayAsString formats an array literal as a string for :: cast syntax
523576
func formatArrayAsString(val interface{}) string {
524577
exprs, ok := val.([]ast.Expression)
@@ -555,9 +608,14 @@ func formatElementAsString(expr ast.Expression) string {
555608
case ast.LiteralFloat:
556609
return fmt.Sprintf("%v", e.Value)
557610
case ast.LiteralString:
611+
s := e.Value.(string)
612+
// Check if this is a big integer stored as string (too large for int64/uint64)
613+
// These should NOT be quoted when formatted in arrays
614+
if e.IsBigInt {
615+
return s
616+
}
558617
// Quote strings with single quotes, triple-escape for nested context
559618
// Expected output format is \\\' (three backslashes + quote)
560-
s := e.Value.(string)
561619
// Triple-escape single quotes for nested string literal context
562620
s = strings.ReplaceAll(s, "'", "\\\\\\'")
563621
return "\\\\\\'" + s + "\\\\\\'"
@@ -569,7 +627,7 @@ func formatElementAsString(expr ast.Expression) string {
569627
case ast.LiteralNull:
570628
return "NULL"
571629
case ast.LiteralArray:
572-
return formatArrayAsString(e.Value)
630+
return formatArrayAsStringFromLiteral(e)
573631
case ast.LiteralTuple:
574632
return formatTupleAsString(e.Value)
575633
default:

internal/explain/functions.go

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,35 @@ func explainFunctionCallWithAlias(sb *strings.Builder, n *ast.FunctionCall, alia
115115
if n.Distinct {
116116
fnName = fnName + "Distinct"
117117
}
118+
// Append "If" if the function has a FILTER clause
119+
if n.Filter != nil {
120+
fnName = fnName + "If"
121+
}
118122
if alias != "" {
119123
fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, alias, children)
120124
} else {
121125
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children)
122126
}
123127
// Arguments (Settings are included as part of argument count)
124-
argCount := len(n.Arguments)
128+
// FILTER condition is appended to arguments for -If suffix functions
129+
// count(name) FILTER (WHERE cond) -> countIf(name, cond) - 2 args
130+
// count(*) FILTER (WHERE cond) -> countIf(cond) - 1 arg (asterisk dropped)
131+
var argCount int
132+
filterArgs := n.Arguments
133+
if n.Filter != nil {
134+
// Filter condition is appended as an extra argument
135+
// But first, remove any Asterisk arguments (count(*) case)
136+
var nonAsteriskArgs []ast.Expression
137+
for _, arg := range n.Arguments {
138+
if _, isAsterisk := arg.(*ast.Asterisk); !isAsterisk {
139+
nonAsteriskArgs = append(nonAsteriskArgs, arg)
140+
}
141+
}
142+
filterArgs = nonAsteriskArgs
143+
argCount = len(filterArgs) + 1 // +1 for filter condition
144+
} else {
145+
argCount = len(n.Arguments)
146+
}
125147
if len(n.Settings) > 0 {
126148
argCount++ // Set is counted as one argument
127149
}
@@ -130,7 +152,12 @@ func explainFunctionCallWithAlias(sb *strings.Builder, n *ast.FunctionCall, alia
130152
fmt.Fprintf(sb, " (children %d)", argCount)
131153
}
132154
fmt.Fprintln(sb)
133-
for _, arg := range n.Arguments {
155+
// Output arguments (filterArgs excludes Asterisk when FILTER is present)
156+
argsToOutput := filterArgs
157+
if n.Filter == nil {
158+
argsToOutput = n.Arguments
159+
}
160+
for _, arg := range argsToOutput {
134161
// For view() table function, unwrap Subquery wrapper
135162
// Also reset the subquery context since view() SELECT is not in a Subquery node
136163
if strings.ToLower(n.Name) == "view" {
@@ -144,6 +171,10 @@ func explainFunctionCallWithAlias(sb *strings.Builder, n *ast.FunctionCall, alia
144171
}
145172
Node(sb, arg, depth+2)
146173
}
174+
// Append filter condition at the end
175+
if n.Filter != nil {
176+
Node(sb, n.Filter, depth+2)
177+
}
147178
// Settings appear as Set node inside ExpressionList
148179
if len(n.Settings) > 0 {
149180
fmt.Fprintf(sb, "%s Set\n", indent)
@@ -567,8 +598,8 @@ func explainCastExprWithAlias(sb *strings.Builder, n *ast.CastExpr, alias string
567598
if lit.Type == ast.LiteralArray || lit.Type == ast.LiteralTuple {
568599
if useArrayFormat {
569600
fmt.Fprintf(sb, "%s Literal %s\n", indent, FormatLiteral(lit))
570-
} else if containsCastExpressions(lit) {
571-
// Array contains CastExpr elements - output as Function array with children
601+
} else if containsCastExpressions(lit) || !containsOnlyLiterals(lit) {
602+
// Array contains CastExpr or non-literal elements - output as Function array with children
572603
Node(sb, n.Expr, depth+2)
573604
} else {
574605
// Simple literals (including negative numbers) - format as string
@@ -738,6 +769,7 @@ func containsCastExpressions(lit *ast.Literal) bool {
738769
}
739770

740771
// containsOnlyLiterals checks if a literal array/tuple contains only literal values (no expressions)
772+
// This includes negated literals (UnaryExpr with Op="-" and Literal operand)
741773
func containsOnlyLiterals(lit *ast.Literal) bool {
742774
var exprs []ast.Expression
743775
switch lit.Type {
@@ -752,16 +784,24 @@ func containsOnlyLiterals(lit *ast.Literal) bool {
752784
}
753785

754786
for _, e := range exprs {
755-
innerLit, ok := e.(*ast.Literal)
756-
if !ok {
757-
return false
787+
// Check if it's a direct literal
788+
if innerLit, ok := e.(*ast.Literal); ok {
789+
// Nested arrays/tuples need recursive check
790+
if innerLit.Type == ast.LiteralArray || innerLit.Type == ast.LiteralTuple {
791+
if !containsOnlyLiterals(innerLit) {
792+
return false
793+
}
794+
}
795+
continue
758796
}
759-
// Nested arrays/tuples need recursive check
760-
if innerLit.Type == ast.LiteralArray || innerLit.Type == ast.LiteralTuple {
761-
if !containsOnlyLiterals(innerLit) {
762-
return false
797+
// Check if it's a negated literal (e.g., -1)
798+
if unary, ok := e.(*ast.UnaryExpr); ok && unary.Op == "-" {
799+
if _, isLit := unary.Operand.(*ast.Literal); isLit {
800+
continue
763801
}
764802
}
803+
// Not a literal or negated literal
804+
return false
765805
}
766806
return true
767807
}
@@ -986,10 +1026,11 @@ func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int)
9861026
// Check if this tuple contains only primitive literals (including unary negation)
9871027
if !containsOnlyPrimitiveLiteralsWithUnary(lit) {
9881028
allTuplesArePrimitive = false
1029+
allPrimitiveLiterals = false // Non-primitive tuple breaks the mixed literal check too
9891030
}
9901031
}
991-
// Check if it's a primitive literal type (not a tuple or complex type)
992-
if lit.Type == ast.LiteralTuple || lit.Type == ast.LiteralArray {
1032+
// Arrays break the primitive literals check
1033+
if lit.Type == ast.LiteralArray {
9931034
allPrimitiveLiterals = false
9941035
}
9951036
} else if isNumericExpr(item) {
@@ -1133,7 +1174,8 @@ func explainInExprWithAlias(sb *strings.Builder, n *ast.InExpr, alias string, in
11331174
allBooleansOrNull := true
11341175
allTuples := true
11351176
allTuplesArePrimitive := true
1136-
hasNonNull := false // Need at least one non-null value
1177+
allPrimitiveLiterals := true // Any mix of primitive literals (numbers, strings, booleans, null, primitive tuples)
1178+
hasNonNull := false // Need at least one non-null value
11371179
for _, item := range n.List {
11381180
if lit, ok := item.(*ast.Literal); ok {
11391181
if lit.Type == ast.LiteralNull {
@@ -1155,6 +1197,7 @@ func explainInExprWithAlias(sb *strings.Builder, n *ast.InExpr, alias string, in
11551197
} else {
11561198
if !containsOnlyPrimitiveLiterals(lit) {
11571199
allTuplesArePrimitive = false
1200+
allPrimitiveLiterals = false
11581201
}
11591202
}
11601203
} else if isNumericExpr(item) {
@@ -1167,10 +1210,11 @@ func explainInExprWithAlias(sb *strings.Builder, n *ast.InExpr, alias string, in
11671210
allStringsOrNull = false
11681211
allBooleansOrNull = false
11691212
allTuples = false
1213+
allPrimitiveLiterals = false
11701214
break
11711215
}
11721216
}
1173-
canBeTupleLiteral = hasNonNull && (allNumericOrNull || (allStringsOrNull && len(n.List) <= maxStringTupleSizeWithAlias) || allBooleansOrNull || (allTuples && allTuplesArePrimitive))
1217+
canBeTupleLiteral = hasNonNull && (allNumericOrNull || (allStringsOrNull && len(n.List) <= maxStringTupleSizeWithAlias) || allBooleansOrNull || (allTuples && allTuplesArePrimitive) || allPrimitiveLiterals)
11741218
}
11751219

11761220
// Count arguments

0 commit comments

Comments
 (0)