Skip to content

Commit 62b573e

Browse files
Merge pull request #14 from NHSDigital/develop_v03
Develop v0.3.0
2 parents 1d8f7bb + 5943385 commit 62b573e

File tree

19 files changed

+322
-43
lines changed

19 files changed

+322
-43
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## v0.3.0 (2025-11-19)
2+
3+
### Feat
4+
5+
- new domain type formattedtime for time only data
6+
7+
### Refactor
8+
9+
- small tweak to allow use of dynamic fields in select rules
10+
111
## v0.2.0 (2025-11-12)
212

313
### Refactor

docs/detailed_guidance/domain_types.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,24 @@ Domain types are custom defined pydantic types that solve common problems with u
44
This might include Postcodes, NHS Numbers, dates with specific formats etc.
55

66
Below is a list of defined types, their output type and any contraints. Nested beneath them are any constraints that area allowed and their default values if there are any.
7-
| Defined Type | Output Type | Contraints & Defaults |
8-
| ------------ | ----------- | --------------------- |
9-
| NHSNumber | str |
10-
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> |
11-
| Postcode | str |
12-
| OrgId | str |
13-
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> |
14-
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> |
15-
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> |
16-
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> |
17-
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li>
7+
| Defined Type | Output Type | Contraints & Defaults | Supported Implementations |
8+
| ------------ | ----------- | --------------------- | ------------------------- |
9+
| NHSNumber | str | | Spark, DuckDB |
10+
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> | Spark, DuckDB |
11+
| Postcode | str | | Spark, DuckDB |
12+
| OrgId | str | | Spark, DuckDB |
13+
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> | Spark, DuckDB |
14+
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> | Spark, DuckDB |
15+
| formattedtime | time | <li>time_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require"] = "permit" | DuckDB |
16+
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> | Spark, DuckDB |
17+
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
18+
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
1819

19-
Other types that are allowed include:
20+
**Other types that are allowed include:**
2021
- str
2122
- int
2223
- date
2324
- datetime
2425
- Decimal
2526
- float
26-
27-
And any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
27+
- Any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "nhs_dve"
3-
version = "0.2.0"
3+
version = "0.3.0"
44
description = "`nhs data validation engine` is a framework used to validate data"
55
authors = ["NHS England <england.contactus@nhs.net>"]
66
readme = "README.md"

src/dve/core_engine/backends/implementations/duckdb/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Implementation of duckdb backend"""
2+
23
from dve.core_engine.backends.implementations.duckdb.readers.json import DuckDBJSONReader
34
from dve.core_engine.backends.readers import register_reader
45

src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""Helper objects for duckdb data contract implementation"""
55
from collections.abc import Generator, Iterator
66
from dataclasses import is_dataclass
7-
from datetime import date, datetime
7+
from datetime import date, datetime, time
88
from decimal import Decimal
99
from pathlib import Path
1010
from typing import Any, ClassVar, Union
@@ -87,6 +87,7 @@ def __call__(self):
8787
date: ddbtyp.DATE,
8888
datetime: ddbtyp.TIMESTAMP,
8989
Decimal: DDBDecimal()(),
90+
time: ddbtyp.TIME,
9091
}
9192
"""A mapping of Python types to the equivalent DuckDB types."""
9293

src/dve/core_engine/backends/implementations/duckdb/utilities.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Utility objects for use with duckdb backend"""
22

3+
import itertools
4+
35
from dve.core_engine.backends.base.utilities import _split_multiexpr_string
46

57

@@ -24,7 +26,11 @@ def expr_mapping_to_columns(expressions: dict) -> list[str]:
2426

2527
def expr_array_to_columns(expressions: list[str]) -> list[str]:
2628
"""Create list of duckdb expressions from list of expressions"""
27-
return [f"{expression}" for expression in expressions]
29+
return list(
30+
itertools.chain.from_iterable(
31+
_split_multiexpr_string(expression) for expression in expressions
32+
)
33+
)
2834

2935

3036
def multiexpr_string_to_columns(expressions: str) -> list[str]:

src/dve/core_engine/backends/implementations/spark/spark_helpers.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@
1212
from dataclasses import dataclass, is_dataclass
1313
from decimal import Decimal
1414
from functools import wraps
15-
from typing import (
16-
Any,
17-
ClassVar,
18-
Optional,
19-
TypeVar,
20-
Union,
21-
overload,
22-
)
15+
from typing import Any, ClassVar, Optional, TypeVar, Union, overload
2316

2417
from delta.exceptions import ConcurrentAppendException, DeltaConcurrentModificationException
2518
from pydantic import BaseModel

src/dve/core_engine/backends/implementations/spark/utilities.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Some utilities which are useful for implementing Spark transformations."""
22

33
import datetime as dt
4+
import itertools
45
from collections.abc import Callable
56
from json import JSONEncoder
67
from operator import and_, or_
@@ -70,7 +71,13 @@ def expr_mapping_to_columns(expressions: ExpressionMapping) -> list[Column]:
7071

7172
def expr_array_to_columns(expressions: ExpressionArray) -> list[Column]:
7273
"""Convert an array of expressions to a list of columns."""
73-
return list(map(sf.expr, expressions))
74+
75+
_expr_list = list(
76+
itertools.chain.from_iterable(
77+
_split_multiexpr_string(expression) for expression in expressions
78+
)
79+
)
80+
return list(map(sf.expr, _expr_list))
7481

7582

7683
def multiexpr_string_to_columns(expressions: MultiExpression) -> list[Column]:

src/dve/core_engine/backends/readers/xml.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,7 @@
33

44
import re
55
from collections.abc import Collection, Iterator
6-
from typing import (
7-
IO,
8-
Any,
9-
GenericAlias, # type: ignore
10-
Optional,
11-
Union,
12-
overload
13-
)
6+
from typing import IO, Any, GenericAlias, Optional, Union, overload # type: ignore
147

158
import polars as pl
169
from lxml import etree # type: ignore

src/dve/core_engine/backends/utilities.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import sys
44
from dataclasses import is_dataclass
5-
from datetime import date, datetime
5+
from datetime import date, datetime, time
66
from decimal import Decimal
7-
from typing import Any, ClassVar, Union
87
from typing import GenericAlias # type: ignore
8+
from typing import Any, ClassVar, Union
99

1010
import polars as pl # type: ignore
1111
from polars.datatypes.classes import DataTypeClass as PolarsType
@@ -33,13 +33,16 @@
3333
date: pl.Date, # type: ignore
3434
datetime: pl.Datetime, # type: ignore
3535
Decimal: pl.Utf8, # type: ignore
36+
time: pl.Time, # type: ignore
3637
}
3738
"""A mapping of Python types to the equivalent Polars types."""
3839

3940

4041
def stringify_type(type_: Union[type, GenericAlias]) -> type:
4142
"""Stringify an individual type."""
42-
if isinstance(type_, type) and not isinstance(type_, GenericAlias): # A model, return the contents. # pylint: disable=C0301
43+
if isinstance(type_, type) and not isinstance(
44+
type_, GenericAlias
45+
): # A model, return the contents. # pylint: disable=C0301
4346
if issubclass(type_, BaseModel):
4447
return stringify_model(type_)
4548

0 commit comments

Comments
 (0)