Skip to content

Commit 73bbd0c

Browse files
authored
Merge pull request #12 from NHSDigital/feature/gr-ndit-535_create_time_domain_type
Feature/gr ndit 535 create time domain type
2 parents 1d8f7bb + 10a2496 commit 73bbd0c

File tree

13 files changed

+204
-40
lines changed

13 files changed

+204
-40
lines changed

docs/detailed_guidance/domain_types.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,24 @@ Domain types are custom defined pydantic types that solve common problems with u
44
This might include Postcodes, NHS Numbers, dates with specific formats etc.
55

66
Below is a list of defined types, their output type and any contraints. Nested beneath them are any constraints that area allowed and their default values if there are any.
7-
| Defined Type | Output Type | Contraints & Defaults |
8-
| ------------ | ----------- | --------------------- |
9-
| NHSNumber | str |
10-
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> |
11-
| Postcode | str |
12-
| OrgId | str |
13-
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> |
14-
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> |
15-
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> |
16-
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> |
17-
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li>
7+
| Defined Type | Output Type | Contraints & Defaults | Supported Implementations |
8+
| ------------ | ----------- | --------------------- | ------------------------- |
9+
| NHSNumber | str | | Spark, DuckDB |
10+
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> | Spark, DuckDB |
11+
| Postcode | str | | Spark, DuckDB |
12+
| OrgId | str | | Spark, DuckDB |
13+
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> | Spark, DuckDB |
14+
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> | Spark, DuckDB |
15+
| formattedtime | time | <li>time_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require"] = "permit" | DuckDB |
16+
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> | Spark, DuckDB |
17+
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
18+
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
1819

19-
Other types that are allowed include:
20+
**Other types that are allowed include:**
2021
- str
2122
- int
2223
- date
2324
- datetime
2425
- Decimal
2526
- float
26-
27-
And any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
27+
- Any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)

src/dve/core_engine/backends/implementations/duckdb/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Implementation of duckdb backend"""
2+
23
from dve.core_engine.backends.implementations.duckdb.readers.json import DuckDBJSONReader
34
from dve.core_engine.backends.readers import register_reader
45

src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""Helper objects for duckdb data contract implementation"""
55
from collections.abc import Generator, Iterator
66
from dataclasses import is_dataclass
7-
from datetime import date, datetime
7+
from datetime import date, datetime, time
88
from decimal import Decimal
99
from pathlib import Path
1010
from typing import Any, ClassVar, Union
@@ -87,6 +87,7 @@ def __call__(self):
8787
date: ddbtyp.DATE,
8888
datetime: ddbtyp.TIMESTAMP,
8989
Decimal: DDBDecimal()(),
90+
time: ddbtyp.TIME,
9091
}
9192
"""A mapping of Python types to the equivalent DuckDB types."""
9293

src/dve/core_engine/backends/implementations/spark/spark_helpers.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@
1212
from dataclasses import dataclass, is_dataclass
1313
from decimal import Decimal
1414
from functools import wraps
15-
from typing import (
16-
Any,
17-
ClassVar,
18-
Optional,
19-
TypeVar,
20-
Union,
21-
overload,
22-
)
15+
from typing import Any, ClassVar, Optional, TypeVar, Union, overload
2316

2417
from delta.exceptions import ConcurrentAppendException, DeltaConcurrentModificationException
2518
from pydantic import BaseModel

src/dve/core_engine/backends/readers/xml.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,7 @@
33

44
import re
55
from collections.abc import Collection, Iterator
6-
from typing import (
7-
IO,
8-
Any,
9-
GenericAlias, # type: ignore
10-
Optional,
11-
Union,
12-
overload
13-
)
6+
from typing import IO, Any, GenericAlias, Optional, Union, overload # type: ignore
147

158
import polars as pl
169
from lxml import etree # type: ignore

src/dve/core_engine/backends/utilities.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import sys
44
from dataclasses import is_dataclass
5-
from datetime import date, datetime
5+
from datetime import date, datetime, time
66
from decimal import Decimal
7-
from typing import Any, ClassVar, Union
87
from typing import GenericAlias # type: ignore
8+
from typing import Any, ClassVar, Union
99

1010
import polars as pl # type: ignore
1111
from polars.datatypes.classes import DataTypeClass as PolarsType
@@ -33,13 +33,16 @@
3333
date: pl.Date, # type: ignore
3434
datetime: pl.Datetime, # type: ignore
3535
Decimal: pl.Utf8, # type: ignore
36+
time: pl.Time, # type: ignore
3637
}
3738
"""A mapping of Python types to the equivalent Polars types."""
3839

3940

4041
def stringify_type(type_: Union[type, GenericAlias]) -> type:
4142
"""Stringify an individual type."""
42-
if isinstance(type_, type) and not isinstance(type_, GenericAlias): # A model, return the contents. # pylint: disable=C0301
43+
if isinstance(type_, type) and not isinstance(
44+
type_, GenericAlias
45+
): # A model, return the contents. # pylint: disable=C0301
4346
if issubclass(type_, BaseModel):
4447
return stringify_model(type_)
4548

src/dve/core_engine/message.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import copy
44
import datetime as dt
5-
import operator
65
import json
6+
import operator
77
from collections.abc import Callable
88
from decimal import Decimal
99
from functools import reduce

src/dve/core_engine/type_hints.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
from pathlib import Path
77
from queue import Queue as ThreadQueue
88
from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union # pylint: disable=W1901
9-
# TODO - cannot remove List from Typing. See L60 for details.
109

1110
from pyspark.sql import DataFrame
1211
from pyspark.sql.types import StructType
1312
from typing_extensions import Literal, ParamSpec, get_args
1413

14+
# TODO - cannot remove List from Typing. See L60 for details.
15+
16+
1517

1618
if TYPE_CHECKING: # pragma: no cover
1719
from dve.core_engine.message import FeedbackMessage

src/dve/metadata_parser/domain_types.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,97 @@ def __get_validators__(cls) -> Iterator[classmethod]:
392392
yield cls.validate # type: ignore
393393

394394

395+
class FormattedTime(dt.time):
396+
"""A time, provided as a datetime or a string in a specific format."""
397+
398+
TIME_FORMAT: ClassVar[Optional[str]] = None
399+
"""The specific format of the time."""
400+
TIMEZONE_TREATMENT: ClassVar[Literal["forbid", "permit", "require"]] = "permit"
401+
"""How to treat the presence of timezone-related information."""
402+
DEFAULT_PATTERNS: Sequence[str] = list(
403+
# 24 hour time pattern combinations
404+
map(
405+
"".join,
406+
itertools.product(
407+
("%H:%M:%S", "%H%M%S"),
408+
("", ".%f"),
409+
("%p", "%P", ""),
410+
("%z", ""),
411+
),
412+
)
413+
) + list(
414+
# 12 hour time pattern combinations
415+
map(
416+
"".join,
417+
itertools.product(
418+
("%I:%M:%S", "%I%M%S"),
419+
("", ".%f"),
420+
("%z", ""),
421+
(" %p", "%p", "%P", " %P", ""),
422+
),
423+
)
424+
)
425+
"""A sequence of time format patterns to try if `TIME_FORMAT` is unset."""
426+
427+
@classmethod
428+
def convert_to_time(cls, value: dt.datetime) -> dt.time:
429+
"""
430+
Convert `datetime.datetime` to `datetime.time`. If datetime contains timezone info, that
431+
will be retained.
432+
"""
433+
if value.tzinfo:
434+
return value.timetz()
435+
436+
return value.time()
437+
438+
@classmethod
439+
def parse_time(cls, string: str) -> dt.time:
440+
"""Attempt to parse a datetime using various formats in sequence."""
441+
string = string.strip()
442+
if string.endswith("Z"): # Convert 'zulu' time to UTC.
443+
string = string[:-1] + "+00:00"
444+
445+
for pattern in cls.DEFAULT_PATTERNS:
446+
try:
447+
datetime = dt.datetime.strptime(string, pattern)
448+
except ValueError:
449+
continue
450+
451+
time = cls.convert_to_time(datetime)
452+
453+
return time # pragma: no cover
454+
raise ValueError("Unable to parse provided time")
455+
456+
@classmethod
457+
def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
458+
"""Validate a passed time, datetime or string."""
459+
if value is None:
460+
return value
461+
462+
if isinstance(value, dt.time):
463+
new_time = value
464+
elif isinstance(value, dt.datetime):
465+
new_time = cls.convert_to_time(value)
466+
else:
467+
if cls.TIME_FORMAT is not None:
468+
try:
469+
new_time = dt.datetime.strptime(value, cls.TIME_FORMAT) # type: ignore
470+
new_time = cls.convert_to_time(new_time) # type: ignore
471+
except ValueError as err:
472+
raise ValueError(
473+
f"Unable to parse provided time in format {cls.TIME_FORMAT}"
474+
) from err
475+
else:
476+
new_time = cls.parse_time(value)
477+
478+
if cls.TIMEZONE_TREATMENT == "forbid" and new_time.tzinfo:
479+
raise ValueError("Provided time has timezone, but this is forbidden for this field")
480+
if cls.TIMEZONE_TREATMENT == "require" and not new_time.tzinfo:
481+
raise ValueError("Provided time missing timezone, but this is required for this field")
482+
483+
return new_time
484+
485+
395486
@lru_cache()
396487
@validate_arguments
397488
def formatteddatetime(
@@ -412,6 +503,23 @@ def formatteddatetime(
412503
return type("FormattedDatetime", (FormattedDatetime, *FormattedDatetime.__bases__), dict_)
413504

414505

506+
@lru_cache()
507+
@validate_arguments
508+
def formattedtime(
509+
time_format: Optional[str] = None,
510+
timezone_treatment: Literal["forbid", "permit", "require"] = "permit",
511+
) -> type[FormattedTime]:
512+
"""Return a formatted time class with a set time format and timezone treatment."""
513+
if time_format is None and timezone_treatment == "permit":
514+
return FormattedTime
515+
516+
dict_ = FormattedTime.__dict__.copy()
517+
dict_["TIME_FORMAT"] = time_format
518+
dict_["TIMEZONE_TREATMENT"] = timezone_treatment
519+
520+
return type("FormattedTime", (FormattedTime, *FormattedTime.__bases__), dict_)
521+
522+
415523
class ReportingPeriod(dt.date):
416524
"""A reporting period field, with the type of reporting period supplied"""
417525

src/dve/metadata_parser/model_generator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def constr(
7272
"identifier": domain_types.identifier,
7373
"orgid": domain_types.OrgID,
7474
"formatteddatetime": domain_types.formatteddatetime,
75+
"formattedtime": domain_types.formattedtime,
7576
"conformatteddate": domain_types.conformatteddate,
7677
"reportingperiodstart": domain_types.reportingperiod(reporting_period_type="start"),
7778
"reportingperiodend": domain_types.reportingperiod(reporting_period_type="end"),

0 commit comments

Comments
 (0)