Skip to content

Commit 5717cfc

Browse files
avoid more select stars
1 parent 2d382c6 commit 5717cfc

File tree

11 files changed

+121
-173
lines changed

11 files changed

+121
-173
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def compile_concat(
293293
]
294294

295295
return sqlglot_ir.SQLGlotIR.from_union(
296-
[child._as_select() for child in children],
296+
[child.expr.as_select_all() for child in children],
297297
output_aliases=output_aliases,
298298
uid_gen=uid_gen,
299299
)

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,6 @@ def select(
254254
limit: typing.Optional[int] = None,
255255
) -> SQLGlotIR:
256256
# TODO: Explicitly insert CTEs into plan
257-
new_expr = sge.Select().from_(self._as_from_item())
258-
259-
if len(sorting) > 0:
260-
new_expr = new_expr.order_by(*sorting)
261-
262257
if len(selections) > 0:
263258
to_select = [
264259
sge.Alias(
@@ -269,9 +264,12 @@ def select(
269264
else expr
270265
for id, expr in selections
271266
]
272-
new_expr = new_expr.select(*to_select, append=False)
267+
new_expr = self.expr.select(*to_select)
273268
else:
274-
new_expr = new_expr.select(sge.Star(), append=False)
269+
new_expr = self.expr.as_select_all()
270+
271+
if len(sorting) > 0:
272+
new_expr = new_expr.order_by(*sorting)
275273

276274
if len(predicates) > 0:
277275
condition = _and(predicates)
@@ -340,8 +338,8 @@ def join(
340338
joins_nulls: bool = True,
341339
) -> SQLGlotIR:
342340
"""Joins the current query with another SQLGlotIR instance."""
343-
left_from = self._as_from_item()
344-
right_from = right._as_from_item()
341+
left_from = self.expr.as_from_item()
342+
right_from = right.expr.as_from_item()
345343

346344
join_on = _and(
347345
tuple(
@@ -365,7 +363,7 @@ def isin_join(
365363
joins_nulls: bool = True,
366364
) -> SQLGlotIR:
367365
"""Joins the current query with another SQLGlotIR instance."""
368-
left_from = self._as_from_item()
366+
left_from = self.expr.as_from_item()
369367

370368
new_column: sge.Expression
371369
if joins_nulls:
@@ -470,12 +468,12 @@ def with_ctes(
470468
) -> SQLGlotIR:
471469
sge_ctes = [
472470
sge.CTE(
473-
this=cte._as_select(),
471+
this=cte.expr.as_select_all(),
474472
alias=sql.identifier(cte_name),
475473
)
476474
for cte_name, cte in ctes
477475
]
478-
select_expr = _set_query_ctes(self._as_select(), sge_ctes)
476+
select_expr = _set_query_ctes(self.expr.as_select_all(), sge_ctes)
479477
return SQLGlotIR.from_expr(expr=select_expr, uid_gen=self.uid_gen)
480478

481479
def resample(
@@ -507,8 +505,8 @@ def resample(
507505
new_expr = (
508506
sge.Select()
509507
.select(unnested_column_alias.as_(final_col_id))
510-
.from_(self._as_from_item())
511-
.join(right._as_from_item(), join_type="cross")
508+
.from_(self.expr.as_from_item())
509+
.join(right.expr.as_from_item(), join_type="cross")
512510
.join(unnest_expr, join_type="cross")
513511
)
514512

@@ -574,15 +572,9 @@ def _explode_multiple_columns(
574572
new_expr = self.expr.select(selection).join(unnest_expr, join_type="LEFT")
575573
return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen)
576574

577-
def _as_from_item(self) -> typing.Union[sge.Subquery, sge.Table, sge.Unnest]:
578-
return self.expr.as_from_item()
579-
580-
def _as_select(self) -> sge.Select:
581-
return self.expr.as_select_all()
582-
583575
def _as_subquery(self) -> sge.Subquery:
584576
# Sometimes explicitly need a subquery, e.g. for IN expressions.
585-
return self._as_select().subquery()
577+
return self.expr.as_select_all().subquery()
586578

587579

588580
def _and(conditions: tuple[sge.Expression, ...]) -> typing.Optional[sge.Expression]:

tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ WITH `bfcte_0` AS (
1414
7000000
1515
)
1616
) AS INT64) AS `bfcol_14`
17-
FROM (
18-
SELECT
19-
*
20-
FROM `bfcte_0`
21-
CROSS JOIN `bfcte_1`
22-
)
17+
FROM `bfcte_0`
18+
CROSS JOIN `bfcte_1`
2319
)
2420
SELECT
2521
CAST(TIMESTAMP_MICROS(
@@ -32,48 +28,40 @@ FROM (
3228
*
3329
FROM (
3430
SELECT
35-
*
31+
`bfcol_27` AS `bfcol_17`
3632
FROM (
3733
SELECT
38-
`bfcol_27` AS `bfcol_17`
39-
FROM (
40-
SELECT
41-
MIN(`bfcol_14`) AS `bfcol_16`
42-
FROM `bfcte_2`
43-
)
44-
CROSS JOIN (
45-
SELECT
46-
MAX(`bfcol_14`) AS `bfcol_15`
47-
FROM `bfcte_2`
48-
)
49-
CROSS JOIN UNNEST(GENERATE_ARRAY(`bfcol_16`, `bfcol_15`, 1)) AS `bfcol_27`
34+
MIN(`bfcol_14`) AS `bfcol_16`
35+
FROM `bfcte_2`
5036
)
51-
CROSS JOIN `bfcte_1`
52-
)
53-
LEFT JOIN (
54-
SELECT
55-
`bfcol_6` AS `bfcol_11`,
56-
`bfcol_7` AS `bfcol_12`,
57-
CAST(FLOOR(
58-
IEEE_DIVIDE(
59-
UNIX_MICROS(CAST(`bfcol_5` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)),
60-
7000000
61-
)
62-
) AS INT64) AS `bfcol_13`
63-
FROM (
37+
CROSS JOIN (
6438
SELECT
65-
*
66-
FROM (
67-
SELECT
68-
`bfcol_1` AS `bfcol_5`,
69-
`bfcol_2` AS `bfcol_6`,
70-
`bfcol_3` AS `bfcol_7`
71-
FROM UNNEST(ARRAY<STRUCT<`bfcol_1` DATETIME, `bfcol_2` INT64, `bfcol_3` INT64>>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME), 0, 10), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME), 1, 11), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME), 2, 12), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME), 3, 13), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME), 4, 14), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME), 5, 15), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME), 6, 16), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME), 7, 17), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME), 8, 18), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME), 9, 19), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME), 10, 20), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME), 11, 21), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME), 12, 22), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME), 13, 23), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME), 14, 24), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME), 15, 25), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME), 16, 26), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME), 17, 27), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME), 18, 28), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME), 19, 29), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME), 20, 30), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME), 21, 31), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME), 22, 32), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME), 23, 33), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME), 24, 34), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME), 25, 35), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME), 26, 36), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME), 27, 37), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME), 28, 38), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME), 29, 39)])
72-
)
73-
CROSS JOIN `bfcte_1`
39+
MAX(`bfcol_14`) AS `bfcol_15`
40+
FROM `bfcte_2`
7441
)
42+
CROSS JOIN UNNEST(GENERATE_ARRAY(`bfcol_16`, `bfcol_15`, 1)) AS `bfcol_27`
43+
)
44+
CROSS JOIN `bfcte_1`
45+
)
46+
LEFT JOIN (
47+
SELECT
48+
`bfcol_6` AS `bfcol_11`,
49+
`bfcol_7` AS `bfcol_12`,
50+
CAST(FLOOR(
51+
IEEE_DIVIDE(
52+
UNIX_MICROS(CAST(`bfcol_5` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)),
53+
7000000
54+
)
55+
) AS INT64) AS `bfcol_13`
56+
FROM (
57+
SELECT
58+
`bfcol_1` AS `bfcol_5`,
59+
`bfcol_2` AS `bfcol_6`,
60+
`bfcol_3` AS `bfcol_7`
61+
FROM UNNEST(ARRAY<STRUCT<`bfcol_1` DATETIME, `bfcol_2` INT64, `bfcol_3` INT64>>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME), 0, 10), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME), 1, 11), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME), 2, 12), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME), 3, 13), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME), 4, 14), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME), 5, 15), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME), 6, 16), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME), 7, 17), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME), 8, 18), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME), 9, 19), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME), 10, 20), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME), 11, 21), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME), 12, 22), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME), 13, 23), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME), 14, 24), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME), 15, 25), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME), 16, 26), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME), 17, 27), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME), 18, 28), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME), 19, 29), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME), 20, 30), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME), 21, 31), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME), 22, 32), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME), 23, 33), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME), 24, 34), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME), 25, 35), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME), 26, 36), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME), 27, 37), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME), 28, 38), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME), 29, 39)])
7562
)
76-
ON `bfcol_17` = `bfcol_13`
63+
CROSS JOIN `bfcte_1`
7764
)
65+
ON `bfcol_17` = `bfcol_13`
7866
ORDER BY
7967
`bfcol_17` ASC NULLS LAST

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@ SELECT
33
`bfcol_5` AS `int64_too`
44
FROM (
55
SELECT
6-
*
7-
FROM (
8-
SELECT
9-
`rowindex` AS `bfcol_6`,
10-
`int64_col` AS `bfcol_7`
11-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
12-
)
13-
LEFT JOIN (
14-
SELECT
15-
`int64_col` AS `bfcol_4`,
16-
`int64_too` AS `bfcol_5`
17-
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
18-
)
19-
ON COALESCE(`bfcol_6`, 0) = COALESCE(`bfcol_4`, 0)
20-
AND COALESCE(`bfcol_6`, 1) = COALESCE(`bfcol_4`, 1)
21-
)
6+
`rowindex` AS `bfcol_6`,
7+
`int64_col` AS `bfcol_7`
8+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
9+
)
10+
LEFT JOIN (
11+
SELECT
12+
`int64_col` AS `bfcol_4`,
13+
`int64_too` AS `bfcol_5`
14+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
15+
)
16+
ON COALESCE(`bfcol_6`, 0) = COALESCE(`bfcol_4`, 0)
17+
AND COALESCE(`bfcol_6`, 1) = COALESCE(`bfcol_4`, 1)

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,15 @@ SELECT
1010
`bfcol_2` AS `rowindex_y`
1111
FROM (
1212
SELECT
13-
*
14-
FROM (
15-
SELECT
16-
`bfcol_1` AS `bfcol_4`,
17-
`bfcol_0` AS `bfcol_5`
18-
FROM `bfcte_0`
19-
)
20-
INNER JOIN (
21-
SELECT
22-
`bfcol_1` AS `bfcol_2`,
23-
`bfcol_0` AS `bfcol_3`
24-
FROM `bfcte_0`
25-
)
26-
ON COALESCE(CAST(`bfcol_5` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0')
27-
AND COALESCE(CAST(`bfcol_5` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1')
28-
)
13+
`bfcol_1` AS `bfcol_4`,
14+
`bfcol_0` AS `bfcol_5`
15+
FROM `bfcte_0`
16+
)
17+
INNER JOIN (
18+
SELECT
19+
`bfcol_1` AS `bfcol_2`,
20+
`bfcol_0` AS `bfcol_3`
21+
FROM `bfcte_0`
22+
)
23+
ON COALESCE(CAST(`bfcol_5` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0')
24+
AND COALESCE(CAST(`bfcol_5` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1')

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,15 @@ SELECT
1010
`bfcol_2` AS `rowindex_y`
1111
FROM (
1212
SELECT
13-
*
14-
FROM (
15-
SELECT
16-
`bfcol_1` AS `bfcol_4`,
17-
`bfcol_0` AS `bfcol_5`
18-
FROM `bfcte_0`
19-
)
20-
INNER JOIN (
21-
SELECT
22-
`bfcol_1` AS `bfcol_2`,
23-
`bfcol_0` AS `bfcol_3`
24-
FROM `bfcte_0`
25-
)
26-
ON IF(IS_NAN(`bfcol_5`), 2, COALESCE(`bfcol_5`, 0)) = IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0))
27-
AND IF(IS_NAN(`bfcol_5`), 3, COALESCE(`bfcol_5`, 1)) = IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1))
28-
)
13+
`bfcol_1` AS `bfcol_4`,
14+
`bfcol_0` AS `bfcol_5`
15+
FROM `bfcte_0`
16+
)
17+
INNER JOIN (
18+
SELECT
19+
`bfcol_1` AS `bfcol_2`,
20+
`bfcol_0` AS `bfcol_3`
21+
FROM `bfcte_0`
22+
)
23+
ON IF(IS_NAN(`bfcol_5`), 2, COALESCE(`bfcol_5`, 0)) = IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0))
24+
AND IF(IS_NAN(`bfcol_5`), 3, COALESCE(`bfcol_5`, 1)) = IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1))

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,15 @@ SELECT
1010
`bfcol_2` AS `rowindex_y`
1111
FROM (
1212
SELECT
13-
*
14-
FROM (
15-
SELECT
16-
`bfcol_1` AS `bfcol_4`,
17-
`bfcol_0` AS `bfcol_5`
18-
FROM `bfcte_0`
19-
)
20-
INNER JOIN (
21-
SELECT
22-
`bfcol_1` AS `bfcol_2`,
23-
`bfcol_0` AS `bfcol_3`
24-
FROM `bfcte_0`
25-
)
26-
ON COALESCE(`bfcol_5`, 0) = COALESCE(`bfcol_3`, 0)
27-
AND COALESCE(`bfcol_5`, 1) = COALESCE(`bfcol_3`, 1)
28-
)
13+
`bfcol_1` AS `bfcol_4`,
14+
`bfcol_0` AS `bfcol_5`
15+
FROM `bfcte_0`
16+
)
17+
INNER JOIN (
18+
SELECT
19+
`bfcol_1` AS `bfcol_2`,
20+
`bfcol_0` AS `bfcol_3`
21+
FROM `bfcte_0`
22+
)
23+
ON COALESCE(`bfcol_5`, 0) = COALESCE(`bfcol_3`, 0)
24+
AND COALESCE(`bfcol_5`, 1) = COALESCE(`bfcol_3`, 1)

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/numeric_col/out.sql

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,15 @@ SELECT
1010
`bfcol_2` AS `rowindex_y`
1111
FROM (
1212
SELECT
13-
*
14-
FROM (
15-
SELECT
16-
`bfcol_1` AS `bfcol_4`,
17-
`bfcol_0` AS `bfcol_5`
18-
FROM `bfcte_0`
19-
)
20-
INNER JOIN (
21-
SELECT
22-
`bfcol_1` AS `bfcol_2`,
23-
`bfcol_0` AS `bfcol_3`
24-
FROM `bfcte_0`
25-
)
26-
ON COALESCE(`bfcol_5`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(0 AS NUMERIC))
27-
AND COALESCE(`bfcol_5`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(1 AS NUMERIC))
28-
)
13+
`bfcol_1` AS `bfcol_4`,
14+
`bfcol_0` AS `bfcol_5`
15+
FROM `bfcte_0`
16+
)
17+
INNER JOIN (
18+
SELECT
19+
`bfcol_1` AS `bfcol_2`,
20+
`bfcol_0` AS `bfcol_3`
21+
FROM `bfcte_0`
22+
)
23+
ON COALESCE(`bfcol_5`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(0 AS NUMERIC))
24+
AND COALESCE(`bfcol_5`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(1 AS NUMERIC))

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/string_col/out.sql

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,12 @@ SELECT
88
`bfcol_0` AS `rowindex_x`,
99
`bfcol_1` AS `string_col`,
1010
`bfcol_2` AS `rowindex_y`
11-
FROM (
11+
FROM `bfcte_0`
12+
INNER JOIN (
1213
SELECT
13-
*
14+
`bfcol_0` AS `bfcol_2`,
15+
`bfcol_1` AS `bfcol_3`
1416
FROM `bfcte_0`
15-
INNER JOIN (
16-
SELECT
17-
`bfcol_0` AS `bfcol_2`,
18-
`bfcol_1` AS `bfcol_3`
19-
FROM `bfcte_0`
20-
)
21-
ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0')
22-
AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1')
23-
)
17+
)
18+
ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0')
19+
AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1')

0 commit comments

Comments
 (0)