Skip to content

Commit 14ecd01

Browse files
authored
Merge pull request #13 from NHSDigital/feature/dep008-1002_use_of_dynamic_fields_in_select
Feature/dep008 1002 use of dynamic fields in select
2 parents 73bbd0c + 0035ecd commit 14ecd01

File tree

5 files changed

+109
-4
lines changed

5 files changed

+109
-4
lines changed

src/dve/core_engine/backends/implementations/duckdb/utilities.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Utility objects for use with duckdb backend"""
22

3+
import itertools
4+
35
from dve.core_engine.backends.base.utilities import _split_multiexpr_string
46

57

@@ -24,7 +26,11 @@ def expr_mapping_to_columns(expressions: dict) -> list[str]:
2426

2527
def expr_array_to_columns(expressions: list[str]) -> list[str]:
2628
"""Create list of duckdb expressions from list of expressions"""
27-
return [f"{expression}" for expression in expressions]
29+
return list(
30+
itertools.chain.from_iterable(
31+
_split_multiexpr_string(expression) for expression in expressions
32+
)
33+
)
2834

2935

3036
def multiexpr_string_to_columns(expressions: str) -> list[str]:

src/dve/core_engine/backends/implementations/spark/utilities.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Some utilities which are useful for implementing Spark transformations."""
22

33
import datetime as dt
4+
import itertools
45
from collections.abc import Callable
56
from json import JSONEncoder
67
from operator import and_, or_
@@ -70,7 +71,13 @@ def expr_mapping_to_columns(expressions: ExpressionMapping) -> list[Column]:
7071

7172
def expr_array_to_columns(expressions: ExpressionArray) -> list[Column]:
7273
"""Convert an array of expressions to a list of columns."""
73-
return list(map(sf.expr, expressions))
74+
75+
_expr_list = list(
76+
itertools.chain.from_iterable(
77+
_split_multiexpr_string(expression) for expression in expressions
78+
)
79+
)
80+
return list(map(sf.expr, _expr_list))
7481

7582

7683
def multiexpr_string_to_columns(expressions: MultiExpression) -> list[Column]:

src/dve/core_engine/type_hints.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313

1414
# TODO - cannot remove List from Typing. See L60 for details.
1515

16-
17-
1816
if TYPE_CHECKING: # pragma: no cover
1917
from dve.core_engine.message import FeedbackMessage
2018

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from typing import Dict, List
2+
import pytest
3+
4+
from dve.core_engine.backends.implementations.duckdb.utilities import (
5+
expr_mapping_to_columns,
6+
expr_array_to_columns,
7+
)
8+
9+
10+
@pytest.mark.parametrize(
11+
["expressions", "expected"],
12+
[
13+
(
14+
{"size(array_field)": "field_length", "another_field": "rename_another_field"},
15+
["size(array_field) as field_length", "another_field as rename_another_field"],
16+
),
17+
],
18+
)
19+
def test_expr_mapping_to_columns(expressions: Dict[str, str], expected: list[str]):
20+
observed = expr_mapping_to_columns(expressions)
21+
assert observed == expected
22+
23+
24+
@pytest.mark.parametrize(
25+
["expressions", "expected"],
26+
[
27+
(
28+
[
29+
"a_field",
30+
"another_field as renamed",
31+
"struct(a_field, another_field) as struct_field",
32+
],
33+
[
34+
"a_field",
35+
"another_field as renamed",
36+
"struct(a_field, another_field) as struct_field",
37+
],
38+
),
39+
(
40+
[
41+
"size(array_field)",
42+
"another_field as rename_another_field",
43+
"a_dynamic_field, another_dynamic_field",
44+
],
45+
[
46+
"size(array_field)",
47+
"another_field as rename_another_field",
48+
"a_dynamic_field",
49+
"another_dynamic_field",
50+
],
51+
),
52+
],
53+
)
54+
def test_expr_array_to_columns(expressions: Dict[str, str], expected: list[str]):
55+
observed = expr_array_to_columns(expressions)
56+
assert observed == expected
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pytest
2+
from pyspark.sql.functions import expr
3+
4+
from dve.core_engine.backends.implementations.spark.utilities import (
5+
expr_mapping_to_columns,
6+
expr_array_to_columns,
7+
)
8+
9+
10+
@pytest.mark.parametrize(
11+
["expressions"],
12+
[
13+
(
14+
{"size(array_field)": "field_length", "another_field": "rename_another_field"},
15+
),
16+
]
17+
)
18+
def test_expr_mapping_to_columns(spark, expressions: dict[str, str]):
19+
observed = expr_mapping_to_columns(expressions)
20+
assert [cl._jc.toString() for cl in observed] == [expr(expression).alias(rename)._jc.toString() for expression, rename in expressions.items()]
21+
22+
23+
@pytest.mark.parametrize(
24+
["expressions", "expected"],
25+
[
26+
(
27+
["a_field", "another_field as renamed", "struct(a_field, another_field) as struct_field"],
28+
["a_field", "another_field as renamed", "struct(a_field, another_field) as struct_field"]
29+
),
30+
(
31+
["size(array_field)", "another_field as rename_another_field", "a_dynamic_field, another_dynamic_field"],
32+
["size(array_field)", "another_field as rename_another_field", "a_dynamic_field", "another_dynamic_field"],
33+
),
34+
],
35+
)
36+
def test_expr_array_to_columns(spark, expressions: dict[str, str], expected: list[str]):
37+
observed = expr_array_to_columns(expressions)
38+
assert [cl._jc.toString() for cl in observed] == [expr(expression)._jc.toString() for expression in expected]

0 commit comments

Comments
 (0)