Skip to content

Commit 0035ecd

Browse files
committed
style: resolving formatting merge conflicts
2 parents 61827ae + 73bbd0c commit 0035ecd

File tree

9 files changed

+193
-21
lines changed

9 files changed

+193
-21
lines changed

docs/detailed_guidance/domain_types.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,24 @@ Domain types are custom defined pydantic types that solve common problems with u
44
This might include Postcodes, NHS Numbers, dates with specific formats etc.
55

66
Below is a list of defined types, their output type and any contraints. Nested beneath them are any constraints that area allowed and their default values if there are any.
7-
| Defined Type | Output Type | Contraints & Defaults |
8-
| ------------ | ----------- | --------------------- |
9-
| NHSNumber | str |
10-
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> |
11-
| Postcode | str |
12-
| OrgId | str |
13-
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> |
14-
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> |
15-
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> |
16-
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> |
17-
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li>
7+
| Defined Type | Output Type | Contraints & Defaults | Supported Implementations |
8+
| ------------ | ----------- | --------------------- | ------------------------- |
9+
| NHSNumber | str | | Spark, DuckDB |
10+
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> | Spark, DuckDB |
11+
| Postcode | str | | Spark, DuckDB |
12+
| OrgId | str | | Spark, DuckDB |
13+
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> | Spark, DuckDB |
14+
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> | Spark, DuckDB |
15+
| formattedtime | time | <li>time_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require"] = "permit" | DuckDB |
16+
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> | Spark, DuckDB |
17+
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
18+
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
1819

19-
Other types that are allowed include:
20+
**Other types that are allowed include:**
2021
- str
2122
- int
2223
- date
2324
- datetime
2425
- Decimal
2526
- float
26-
27-
And any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
27+
- Any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)

src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""Helper objects for duckdb data contract implementation"""
55
from collections.abc import Generator, Iterator
66
from dataclasses import is_dataclass
7-
from datetime import date, datetime
7+
from datetime import date, datetime, time
88
from decimal import Decimal
99
from pathlib import Path
1010
from typing import Any, ClassVar, Union
@@ -87,6 +87,7 @@ def __call__(self):
8787
date: ddbtyp.DATE,
8888
datetime: ddbtyp.TIMESTAMP,
8989
Decimal: DDBDecimal()(),
90+
time: ddbtyp.TIME,
9091
}
9192
"""A mapping of Python types to the equivalent DuckDB types."""
9293

src/dve/core_engine/backends/utilities.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import sys
44
from dataclasses import is_dataclass
5-
from datetime import date, datetime
5+
from datetime import date, datetime, time
66
from decimal import Decimal
77
from typing import GenericAlias # type: ignore
88
from typing import Any, ClassVar, Union
@@ -33,6 +33,7 @@
3333
date: pl.Date, # type: ignore
3434
datetime: pl.Datetime, # type: ignore
3535
Decimal: pl.Utf8, # type: ignore
36+
time: pl.Time, # type: ignore
3637
}
3738
"""A mapping of Python types to the equivalent Polars types."""
3839

src/dve/core_engine/type_hints.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
# TODO - cannot remove List from Typing. See L60 for details.
1515

16-
1716
if TYPE_CHECKING: # pragma: no cover
1817
from dve.core_engine.message import FeedbackMessage
1918

src/dve/metadata_parser/domain_types.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,97 @@ def __get_validators__(cls) -> Iterator[classmethod]:
392392
yield cls.validate # type: ignore
393393

394394

395+
class FormattedTime(dt.time):
396+
"""A time, provided as a datetime or a string in a specific format."""
397+
398+
TIME_FORMAT: ClassVar[Optional[str]] = None
399+
"""The specific format of the time."""
400+
TIMEZONE_TREATMENT: ClassVar[Literal["forbid", "permit", "require"]] = "permit"
401+
"""How to treat the presence of timezone-related information."""
402+
DEFAULT_PATTERNS: Sequence[str] = list(
403+
# 24 hour time pattern combinations
404+
map(
405+
"".join,
406+
itertools.product(
407+
("%H:%M:%S", "%H%M%S"),
408+
("", ".%f"),
409+
("%p", "%P", ""),
410+
("%z", ""),
411+
),
412+
)
413+
) + list(
414+
# 12 hour time pattern combinations
415+
map(
416+
"".join,
417+
itertools.product(
418+
("%I:%M:%S", "%I%M%S"),
419+
("", ".%f"),
420+
("%z", ""),
421+
(" %p", "%p", "%P", " %P", ""),
422+
),
423+
)
424+
)
425+
"""A sequence of time format patterns to try if `TIME_FORMAT` is unset."""
426+
427+
@classmethod
428+
def convert_to_time(cls, value: dt.datetime) -> dt.time:
429+
"""
430+
Convert `datetime.datetime` to `datetime.time`. If datetime contains timezone info, that
431+
will be retained.
432+
"""
433+
if value.tzinfo:
434+
return value.timetz()
435+
436+
return value.time()
437+
438+
@classmethod
439+
def parse_time(cls, string: str) -> dt.time:
440+
"""Attempt to parse a datetime using various formats in sequence."""
441+
string = string.strip()
442+
if string.endswith("Z"): # Convert 'zulu' time to UTC.
443+
string = string[:-1] + "+00:00"
444+
445+
for pattern in cls.DEFAULT_PATTERNS:
446+
try:
447+
datetime = dt.datetime.strptime(string, pattern)
448+
except ValueError:
449+
continue
450+
451+
time = cls.convert_to_time(datetime)
452+
453+
return time # pragma: no cover
454+
raise ValueError("Unable to parse provided time")
455+
456+
@classmethod
457+
def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
458+
"""Validate a passed time, datetime or string."""
459+
if value is None:
460+
return value
461+
462+
if isinstance(value, dt.time):
463+
new_time = value
464+
elif isinstance(value, dt.datetime):
465+
new_time = cls.convert_to_time(value)
466+
else:
467+
if cls.TIME_FORMAT is not None:
468+
try:
469+
new_time = dt.datetime.strptime(value, cls.TIME_FORMAT) # type: ignore
470+
new_time = cls.convert_to_time(new_time) # type: ignore
471+
except ValueError as err:
472+
raise ValueError(
473+
f"Unable to parse provided time in format {cls.TIME_FORMAT}"
474+
) from err
475+
else:
476+
new_time = cls.parse_time(value)
477+
478+
if cls.TIMEZONE_TREATMENT == "forbid" and new_time.tzinfo:
479+
raise ValueError("Provided time has timezone, but this is forbidden for this field")
480+
if cls.TIMEZONE_TREATMENT == "require" and not new_time.tzinfo:
481+
raise ValueError("Provided time missing timezone, but this is required for this field")
482+
483+
return new_time
484+
485+
395486
@lru_cache()
396487
@validate_arguments
397488
def formatteddatetime(
@@ -412,6 +503,23 @@ def formatteddatetime(
412503
return type("FormattedDatetime", (FormattedDatetime, *FormattedDatetime.__bases__), dict_)
413504

414505

506+
@lru_cache()
507+
@validate_arguments
508+
def formattedtime(
509+
time_format: Optional[str] = None,
510+
timezone_treatment: Literal["forbid", "permit", "require"] = "permit",
511+
) -> type[FormattedTime]:
512+
"""Return a formatted time class with a set time format and timezone treatment."""
513+
if time_format is None and timezone_treatment == "permit":
514+
return FormattedTime
515+
516+
dict_ = FormattedTime.__dict__.copy()
517+
dict_["TIME_FORMAT"] = time_format
518+
dict_["TIMEZONE_TREATMENT"] = timezone_treatment
519+
520+
return type("FormattedTime", (FormattedTime, *FormattedTime.__bases__), dict_)
521+
522+
415523
class ReportingPeriod(dt.date):
416524
"""A reporting period field, with the type of reporting period supplied"""
417525

src/dve/metadata_parser/model_generator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def constr(
7272
"identifier": domain_types.identifier,
7373
"orgid": domain_types.OrgID,
7474
"formatteddatetime": domain_types.formatteddatetime,
75+
"formattedtime": domain_types.formattedtime,
7576
"conformatteddate": domain_types.conformatteddate,
7677
"reportingperiodstart": domain_types.reportingperiod(reporting_period_type="start"),
7778
"reportingperiodend": domain_types.reportingperiod(reporting_period_type="end"),

tests/test_core_engine/test_backends/fixtures.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# pylint: disable=redefined-outer-name
44
import json
5-
from datetime import date, datetime
5+
from datetime import date, datetime, time
66
from pathlib import Path
77
from tempfile import TemporaryDirectory
88
from typing import Any, Dict, Iterator, List, Tuple
@@ -83,10 +83,10 @@ def temp_duckdb_dir():
8383

8484
@pytest.fixture
8585
def temp_csv_file(temp_duckdb_dir: Path):
86-
header: str = "ID,varchar_field,bigint_field,date_field,timestamp_field"
86+
header: str = "ID,varchar_field,bigint_field,date_field,timestamp_field,time_field"
8787
typed_data = [
88-
[1, "hi", 3, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3)],
89-
[2, "bye", 4, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53)],
88+
[1, "hi", 3, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3), time(12, 0, 0)],
89+
[2, "bye", 4, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53), time(13, 0 ,0)],
9090
]
9191

9292
class SimpleModel(BaseModel):
@@ -95,6 +95,7 @@ class SimpleModel(BaseModel):
9595
bigint_field: int
9696
date_field: date
9797
timestamp_field: datetime
98+
time_field: time
9899

99100
with open(temp_duckdb_dir.joinpath("dummy.csv"), mode="w") as csv_file:
100101
csv_file.write(header + "\n")

tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ def test_duckdb_data_contract_csv(temp_csv_file):
4141
"bigint_field": "NonNegativeInt",
4242
"date_field": "date",
4343
"timestamp_field": "datetime",
44+
"time_field": {
45+
"description": "test",
46+
"callable": "formattedtime",
47+
"constraints": {
48+
"time_format": "%Y-%m-%d",
49+
"timezone_treatment": "forbid"
50+
}
51+
}
4452
},
4553
"reader_config": {
4654
".csv": {

tests/test_model_generation/test_domain_types.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,56 @@ def test_reportingperiod_raises(field, value):
307307
data = {field: value}
308308
with pytest.raises(ValueError):
309309
model = ReportingPeriodModel(**data)
310+
311+
312+
@pytest.mark.parametrize(
313+
["time_to_validate", "time_format", "timezone_treatment", "expected"],
314+
[
315+
["23:00:00", "%H:%M:%S", "forbid", dt.time(23, 0, 0)],
316+
["11:00:00", "%I:%M:%S", "forbid", dt.time(11, 0, 0)],
317+
["23:00:00Z", None, "require", dt.time(23, 0, 0, tzinfo=UTC)],
318+
["12:00:00Zam", None, "permit", dt.time(0, 0, 0, tzinfo=UTC)],
319+
["12:00:00pm", None, "forbid", dt.time(12, 0, 0)],
320+
["1970-01-01", "%Y-%m-%d", "forbid", dt.time(0, 0)],
321+
# not great that it effectively returns incorrect time object here. However, this would be
322+
# down to user error in setting up the dischema.
323+
[dt.datetime(2025, 12, 1, 13, 0, 5), "%H:%M:%S", "forbid", dt.time(13, 0, 5)],
324+
[dt.datetime(2025, 12, 1, 13, 0, 5, tzinfo=UTC), "%H:%M:%S", "require", dt.time(13, 0, 5, tzinfo=UTC)],
325+
[dt.time(13, 0, 0), "%H:%M:%S", "forbid", dt.time(13, 0, 0)],
326+
[dt.time(13, 0, 0, tzinfo=UTC), "%H:%M:%S", "permit", dt.time(13, 0, 0, tzinfo=UTC)],
327+
[dt.time(13, 0, 0, tzinfo=UTC), "%H:%M:%S", "require", dt.time(13, 0, 0, tzinfo=UTC)],
328+
]
329+
)
330+
def test_formattedtime(
331+
time_to_validate: str | dt.datetime | dt.time,
332+
time_format: str,
333+
timezone_treatment: str,
334+
expected: dt.time
335+
):
336+
"""Test serialised time objects can be parsed correctly when valid."""
337+
time_type = hct.formattedtime(time_format, timezone_treatment)
338+
assert time_type.validate(time_to_validate) == expected
339+
340+
341+
@pytest.mark.parametrize(
342+
["time_to_validate", "time_format", "timezone_treatment"],
343+
[
344+
["1970-01-01", "%H:%M:%S", "forbid",],
345+
["1970-01-01", "%H:%M:%S", "forbid",],
346+
["23:00:00", "%I:%M:%S", "permit",],
347+
["23:00:00", "%H:%M:%S", "require",],
348+
["23:00:00Z", "%I:%M:%S", "forbid",],
349+
[dt.datetime(2025, 12, 1, 13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",],
350+
[dt.time(13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",]
351+
]
352+
)
353+
def test_formattedtime_raises(
354+
time_to_validate: str | dt.datetime | dt.time, time_format: str, timezone_treatment: str
355+
):
356+
"""
357+
Test incorrect serialised objects can be handled correctly when attempting to parse into time
358+
objects.
359+
"""
360+
time_type = hct.formattedtime(time_format, timezone_treatment)
361+
with pytest.raises(ValueError):
362+
time_type.validate(time_to_validate) # pylint: disable=W0106

0 commit comments

Comments
 (0)