Skip to content

Commit 8bfdf70

Browse files
committed
draft
1 parent 553ac83 commit 8bfdf70

File tree

4 files changed

+235
-58
lines changed

4 files changed

+235
-58
lines changed

pkg/sql/opt/memo/testdata/stats/canary-stats

Lines changed: 179 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ SELECT a FROM sel ORDER BY a
6262
----
6363
scan sel
6464
├── columns: a:1(int!null)
65-
├── stats: [rows=20]
65+
├── stats: [rows=30]
6666
├── key: (1)
6767
└── ordering: +1
6868

@@ -101,7 +101,7 @@ SELECT a FROM sel ORDER BY a
101101
----
102102
scan sel
103103
├── columns: a:1(int!null)
104-
├── stats: [rows=10]
104+
├── stats: [rows=20]
105105
├── key: (1)
106106
└── ordering: +1
107107

@@ -134,7 +134,7 @@ scan sel
134134
├── key: (1)
135135
└── ordering: +1
136136

137-
137+
# Test that for stable stats, we always pick the last available *full* stats.
138138
exec-ddl
139139
ALTER TABLE sel INJECT STATISTICS '[
140140
{
@@ -145,9 +145,9 @@ ALTER TABLE sel INJECT STATISTICS '[
145145
},
146146
{
147147
"columns": ["a"],
148-
"created_at": "2018-01-01 11:00:00.00000+00:00",
149-
"row_count": 30,
150-
"distinct_count": 30,
148+
"created_at": "2018-01-01 10:00:30.00000+00:00",
149+
"row_count": 40,
150+
"distinct_count": 40,
151151
"partial_predicate": "(a < 0:::INT8) OR ((a > 8:::INT8) OR (a IS NULL))"
152152
}
153153
]'
@@ -161,3 +161,176 @@ scan sel
161161
├── stats: [rows=30]
162162
├── key: (1)
163163
└── ordering: +1
164+
165+
# Test with a query with multiple tables, with one having canary_window defined but the other not.
166+
exec-ddl
167+
CREATE TABLE sel2 (a INT PRIMARY KEY)
168+
----
169+
170+
exec-ddl
171+
ALTER TABLE sel2 INJECT STATISTICS '[
172+
{
173+
"columns": ["a"],
174+
"created_at": "2018-01-01 11:00:40.00000+00:00",
175+
"row_count": 100,
176+
"distinct_count": 100
177+
},
178+
{
179+
"columns": ["a"],
180+
"created_at": "2018-01-01 11:00:30.00000+00:00",
181+
"row_count": 200,
182+
"distinct_count": 200
183+
}
184+
]'
185+
----
186+
187+
exec-ddl
188+
ALTER TABLE sel INJECT STATISTICS '[
189+
{
190+
"columns": ["a"],
191+
"created_at": "2018-01-01 11:00:40.00000+00:00",
192+
"row_count": 30,
193+
"distinct_count": 30
194+
},
195+
{
196+
"columns": ["a"],
197+
"created_at": "2018-01-01 11:00:30.00000+00:00",
198+
"row_count": 20,
199+
"distinct_count": 20
200+
}
201+
]'
202+
----
203+
204+
opt stats-as-of=(2018-01-01 11:00:50.00000+00:00) canary-stats=true
205+
SELECT * FROM sel JOIN sel2 ON sel.a = sel2.a ORDER BY sel.a
206+
----
207+
inner-join (merge)
208+
├── columns: a:1(int!null) a:4(int!null)
209+
├── left ordering: +4
210+
├── right ordering: +1
211+
├── stats: [rows=30, distinct(1)=30, null(1)=0, distinct(4)=30, null(4)=0]
212+
├── key: (4)
213+
├── fd: (1)==(4), (4)==(1)
214+
├── ordering: +(1|4) [actual: +4]
215+
├── scan sel2
216+
│ ├── columns: sel2.a:4(int!null)
217+
│ ├── stats: [rows=100, distinct(4)=100, null(4)=0]
218+
│ ├── key: (4)
219+
│ └── ordering: +4
220+
├── scan sel
221+
│ ├── columns: sel.a:1(int!null)
222+
│ ├── stats: [rows=30, distinct(1)=30, null(1)=0]
223+
│ ├── key: (1)
224+
│ └── ordering: +1
225+
└── filters (true)
226+
227+
228+
opt stats-as-of=(2018-01-01 11:00:50.00000+00:00) canary-stats=false
229+
SELECT * FROM sel JOIN sel2 ON sel.a = sel2.a ORDER BY sel.a
230+
----
231+
inner-join (lookup sel2)
232+
├── columns: a:1(int!null) a:4(int!null)
233+
├── key columns:
234+
├── lookup columns are key
235+
├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(4)=20, null(4)=0]
236+
├── key: (4)
237+
├── fd: (1)==(4), (4)==(1)
238+
├── ordering: +(1|4) [actual: +1]
239+
├── scan sel
240+
│ ├── columns: sel.a:1(int!null)
241+
│ ├── stats: [rows=20, distinct(1)=20, null(1)=0]
242+
│ ├── key: (1)
243+
│ └── ordering: +1
244+
└── filters (true)
245+
246+
# Test with a query with multiple tables, with one having canary_window defined but the other not.
247+
exec-ddl
248+
CREATE TABLE sel2 (a INT PRIMARY KEY)
249+
----
250+
251+
252+
exec-ddl
253+
ALTER TABLE sel2 INJECT STATISTICS '[
254+
{
255+
"columns": ["a"],
256+
"created_at": "2018-01-01 11:00:40.00000+00:00",
257+
"row_count": 10,
258+
"distinct_count": 10
259+
},
260+
{
261+
"columns": ["a"],
262+
"created_at": "2018-01-01 11:00:30.00000+00:00",
263+
"row_count": 5,
264+
"distinct_count": 5
265+
}
266+
]'
267+
----
268+
269+
exec-ddl
270+
ALTER TABLE sel INJECT STATISTICS '[
271+
{
272+
"columns": ["a"],
273+
"created_at": "2018-01-01 11:00:40.00000+00:00",
274+
"row_count": 30,
275+
"distinct_count": 30
276+
},
277+
{
278+
"columns": ["a"],
279+
"created_at": "2018-01-01 11:00:30.00000+00:00",
280+
"row_count": 20,
281+
"distinct_count": 20
282+
}
283+
]'
284+
----
285+
286+
# Table sel2 doesn't have canary window defined, so we should always use
287+
# the most recent stats for it, which has 10 rows for sel2.
288+
# So with canary-stats=false, we should pick stable stats for table sel (rowCount = 20)
289+
# and the latest stats for table sel2 (rowCount=10).
290+
opt stats-as-of=(2018-01-01 11:00:50.00000+00:00) canary-stats=false
291+
SELECT * FROM sel JOIN sel2 ON sel.a = sel2.a ORDER BY sel.a
292+
----
293+
inner-join (merge)
294+
├── columns: a:1(int!null) a:4(int!null)
295+
├── left ordering: +1
296+
├── right ordering: +4
297+
├── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0]
298+
├── key: (4)
299+
├── fd: (1)==(4), (4)==(1)
300+
├── ordering: +(1|4) [actual: +1]
301+
├── scan sel
302+
│ ├── columns: sel.a:1(int!null)
303+
│ ├── stats: [rows=20, distinct(1)=20, null(1)=0]
304+
│ ├── key: (1)
305+
│ └── ordering: +1
306+
├── scan sel2
307+
│ ├── columns: sel2.a:4(int!null)
308+
│ ├── stats: [rows=10, distinct(4)=10, null(4)=0]
309+
│ ├── key: (4)
310+
│ └── ordering: +4
311+
└── filters (true)
312+
313+
# With canary-stats=true, we should pick the canary stats for table sel
314+
# (rowCount = 30) and the latest stats for table sel2 (rowCount=10).
315+
opt stats-as-of=(2018-01-01 11:00:50.00000+00:00) canary-stats=true
316+
SELECT * FROM sel JOIN sel2 ON sel.a = sel2.a ORDER BY sel.a
317+
----
318+
inner-join (merge)
319+
├── columns: a:1(int!null) a:4(int!null)
320+
├── left ordering: +1
321+
├── right ordering: +4
322+
├── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0]
323+
├── key: (4)
324+
├── fd: (1)==(4), (4)==(1)
325+
├── ordering: +(1|4) [actual: +1]
326+
├── scan sel
327+
│ ├── columns: sel.a:1(int!null)
328+
│ ├── stats: [rows=30, distinct(1)=30, null(1)=0]
329+
│ ├── key: (1)
330+
│ └── ordering: +1
331+
├── scan sel2
332+
│ ├── columns: sel2.a:4(int!null)
333+
│ ├── stats: [rows=10, distinct(4)=10, null(4)=0]
334+
│ ├── key: (4)
335+
│ └── ordering: +4
336+
└── filters (true)

pkg/sql/opt/memo/testdata/stats/histogram-canary-stats

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -213,23 +213,23 @@ ALTER TABLE hist_tbl INJECT STATISTICS '[
213213
----
214214

215215
# Test histogram-based range queries with canary stats
216-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=1.0
216+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=true
217217
SELECT score FROM hist_tbl WHERE score BETWEEN 50 AND 200
218218
----
219219
select
220220
├── columns: score:2(int!null)
221-
├── stats: [rows=441.766, distinct(2)=14.9061, null(2)=0]
222-
│ histogram(2)= 0 0 91.011 45 44.662 0.45113
221+
├── stats: [rows=490.851, distinct(2)=16.576, null(2)=0]
222+
│ histogram(2)= 0 0 101.12 50 49.624 0.50125
223223
│ <--- 49 -------- 100 --------- 200 -
224224
├── scan hist_tbl
225225
│ ├── columns: score:2(int)
226-
│ └── stats: [rows=1800, distinct(2)=90, null(2)=0]
227-
│ histogram(2)= 0 18 162 45 180 27 270 36
226+
│ └── stats: [rows=2000, distinct(2)=100, null(2)=0]
227+
│ histogram(2)= 0 20 180 50 200 30 300 40
228228
│ <--- 10 ----- 100 ----- 500 ----- 1000
229229
└── filters
230230
└── (score:2 >= 50) AND (score:2 <= 200) [type=bool, outer=(2), constraints=(/2: [/50 - /200]; tight)]
231231

232-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=0.0
232+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=false
233233
SELECT score FROM hist_tbl WHERE score BETWEEN 50 AND 200
234234
----
235235
select
@@ -246,24 +246,24 @@ select
246246
└── (score:2 >= 50) AND (score:2 <= 200) [type=bool, outer=(2), constraints=(/2: [/50 - /200]; tight)]
247247

248248
# Test string histogram with equality predicates
249-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=1.0
249+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=true
250250
SELECT category FROM hist_tbl WHERE category = 'electronics'
251251
----
252252
select
253253
├── columns: category:3(string!null)
254-
├── stats: [rows=334.711, distinct(3)=1, null(3)=0]
255-
│ histogram(3)= 0 225
254+
├── stats: [rows=371.747, distinct(3)=1, null(3)=0]
255+
│ histogram(3)= 0 250
256256
│ <--- 'electronics'
257257
├── fd: ()-->(3)
258258
├── scan hist_tbl
259259
│ ├── columns: category:3(string)
260-
│ └── stats: [rows=1800, distinct(3)=7, null(3)=4]
261-
│ histogram(3)= 0 4 0 225 270 180 135 162 90 144
262-
│ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ---- 'toys'
260+
│ └── stats: [rows=2000, distinct(3)=8, null(3)=5]
261+
│ histogram(3)= 0 5 0 250 300 200 150 180 100 160
262+
│ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ----- 'toys'
263263
└── filters
264264
└── category:3 = 'electronics' [type=bool, outer=(3), constraints=(/3: [/'electronics' - /'electronics']; tight), fd=()-->(3)]
265265

266-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=0.0
266+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=false
267267
SELECT category FROM hist_tbl WHERE category = 'electronics'
268268
----
269269
select
@@ -281,24 +281,24 @@ select
281281
└── category:3 = 'electronics' [type=bool, outer=(3), constraints=(/3: [/'electronics' - /'electronics']; tight), fd=()-->(3)]
282282

283283
# Test decimal histogram with range queries
284-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=1.0
284+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=true
285285
SELECT price FROM hist_tbl WHERE price > 50.0 AND price <= 500.0
286286
----
287287
select
288288
├── columns: price:4(decimal!null)
289289
├── immutable
290-
├── stats: [rows=825.185, distinct(4)=110.992, null(4)=0]
291-
│ histogram(4)= 0 0 199.96 72 120 0
292-
│ <--- 50.0 -------- 99.99 ----- 500.0
290+
├── stats: [rows=916.873, distinct(4)=123.213, null(4)=0]
291+
│ histogram(4)= 0 0 222.18 80 133.34 0
292+
│ <--- 50.0 -------- 99.99 -------- 500.0
293293
├── scan hist_tbl
294294
│ ├── columns: price:4(decimal)
295-
│ └── stats: [rows=1800, distinct(4)=450, null(4)=9]
296-
│ histogram(4)= 0 9 0 90 360 72 270 54
295+
│ └── stats: [rows=2000, distinct(4)=500, null(4)=10]
296+
│ histogram(4)= 0 10 0 100 400 80 300 60
297297
│ <--- NULL --- 9.99 ----- 99.99 ----- 999.99
298298
└── filters
299299
└── (price:4 > 50.0) AND (price:4 <= 500.0) [type=bool, outer=(4), immutable, constraints=(/4: (/50.0 - /500.0]; tight)]
300300

301-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=0.0
301+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=false
302302
SELECT price FROM hist_tbl WHERE price > 50.0 AND price <= 500.0
303303
----
304304
select
@@ -316,32 +316,32 @@ select
316316
└── (price:4 > 50.0) AND (price:4 <= 500.0) [type=bool, outer=(4), immutable, constraints=(/4: (/50.0 - /500.0]; tight)]
317317

318318
# Test combined histogram and non-histogram columns
319-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=1.0
319+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=true
320320
SELECT * FROM hist_tbl WHERE score > 100 AND category LIKE 'h%'
321321
----
322322
select
323323
├── columns: id:1(int!null) score:2(int!null) category:3(string!null) price:4(decimal)
324-
├── stats: [rows=233.644, distinct(2)=47, null(2)=0, distinct(3)=1.53618, null(3)=0, distinct(2,3)=72.2006, null(2,3)=0]
325-
│ histogram(2)= 0 0 81.98 12.297 122.97 16.396
326-
│ <--- 100 ------- 500 --------- 1000
327-
│ histogram(3)= 0 0 39.018 180 6.9286 0
324+
├── stats: [rows=259.497, distinct(2)=52, null(2)=0, distinct(3)=1.53618, null(3)=0, distinct(2,3)=79.8815, null(2,3)=0]
325+
│ histogram(2)= 0 0 91.052 13.658 136.58 18.21
326+
│ <--- 100 -------- 500 --------- 1000
327+
│ histogram(3)= 0 0 43.354 200 7.6984 0
328328
│ <--- 'h' -------- 'home' -------- 'i'
329329
├── key: (1)
330330
├── fd: (1)-->(2-4)
331331
├── scan hist_tbl
332332
│ ├── columns: id:1(int!null) score:2(int) category:3(string) price:4(decimal)
333-
│ ├── stats: [rows=1800, distinct(1)=1800, null(1)=0, distinct(2)=90, null(2)=0, distinct(3)=7, null(3)=4, distinct(2,3)=630, null(2,3)=0]
334-
│ │ histogram(2)= 0 18 162 45 180 27 270 36
333+
│ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=8, null(3)=5, distinct(2,3)=800, null(2,3)=0]
334+
│ │ histogram(2)= 0 20 180 50 200 30 300 40
335335
│ │ <--- 10 ----- 100 ----- 500 ----- 1000
336-
│ │ histogram(3)= 0 4 0 225 270 180 135 162 90 144
337-
│ │ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ---- 'toys'
336+
│ │ histogram(3)= 0 5 0 250 300 200 150 180 100 160
337+
│ │ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ----- 'toys'
338338
│ ├── key: (1)
339339
│ └── fd: (1)-->(2-4)
340340
└── filters
341341
├── score:2 > 100 [type=bool, outer=(2), constraints=(/2: [/101 - ]; tight)]
342342
└── category:3 LIKE 'h%' [type=bool, outer=(3), constraints=(/3: [/'h' - /'i'); tight)]
343343

344-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=0.0
344+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=false
345345
SELECT * FROM hist_tbl WHERE score > 100 AND category LIKE 'h%'
346346
----
347347
select
@@ -367,28 +367,28 @@ select
367367
└── category:3 LIKE 'h%' [type=bool, outer=(3), constraints=(/3: [/'h' - /'i'); tight)]
368368

369369
# Test histogram with aggregations
370-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=1.0
370+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=true
371371
SELECT category, count(*), avg(score), max(price) FROM hist_tbl GROUP BY category
372372
----
373373
group-by (hash)
374374
├── columns: category:3(string) count:7(int!null) avg:8(decimal) max:9(decimal)
375375
├── grouping columns: category:3(string)
376-
├── stats: [rows=7, distinct(3)=7, null(3)=1]
376+
├── stats: [rows=8, distinct(3)=8, null(3)=1]
377377
├── key: (3)
378378
├── fd: (3)-->(7-9)
379379
├── scan hist_tbl
380380
│ ├── columns: score:2(int) category:3(string) price:4(decimal)
381-
│ └── stats: [rows=1800, distinct(3)=7, null(3)=4]
382-
│ histogram(3)= 0 4 0 225 270 180 135 162 90 144
383-
│ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ---- 'toys'
381+
│ └── stats: [rows=2000, distinct(3)=8, null(3)=5]
382+
│ histogram(3)= 0 5 0 250 300 200 150 180 100 160
383+
│ <--- NULL --- 'electronics' ----- 'home' ----- 'sports' ----- 'toys'
384384
└── aggregations
385385
├── count-rows [as=count_rows:7, type=int]
386386
├── avg [as=avg:8, type=decimal, outer=(2)]
387387
│ └── score:2 [type=int]
388388
└── max [as=max:9, type=decimal, outer=(4)]
389389
└── price:4 [type=decimal]
390390

391-
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-fraction=0.0
391+
opt stats-as-of=(2018-01-01 13:00:30.00000+00:00) canary-stats=false
392392
SELECT category, count(*), avg(score), max(price) FROM hist_tbl GROUP BY category
393393
----
394394
group-by (hash)

0 commit comments

Comments
 (0)