Skip to content

Commit c0b62bc

Browse files
merge: merge from main
2 parents e13b9c5 + 62b573e commit c0b62bc

File tree

90 files changed

+1505
-822
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+1505
-822
lines changed

.github/workflows/ci_linting.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
steps:
1414
- uses: actions/checkout@v5
1515

16-
- name: Install extra dependencies for a python 3.7.17 install
16+
- name: Install extra dependencies for a python install
1717
run: |
1818
sudo apt-get update
1919
sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
@@ -26,6 +26,9 @@ jobs:
2626

2727
- name: reshim asdf
2828
run: asdf reshim
29+
30+
- name: ensure poetry using desired python version
31+
run: poetry env use $(asdf which python)
2932

3033
- name: Cache Poetry virtualenv
3134
uses: actions/cache@v4

.github/workflows/ci_testing.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ jobs:
1313
steps:
1414
- name: Checkout code
1515
uses: actions/checkout@v5
16-
17-
- name: Install extra dependencies for a python 3.7.17 install
16+
17+
- name: Install extra dependencies for a python install
1818
run: |
1919
sudo apt-get update
2020
sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
21+
2122
- name: Install asdf cli
2223
uses: asdf-vm/actions/setup@v4
2324

@@ -26,6 +27,9 @@ jobs:
2627

2728
- name: reshim asdf
2829
run: asdf reshim
30+
31+
- name: ensure poetry using desired python version
32+
run: poetry env use $(asdf which python)
2933

3034
- name: Cache Poetry virtualenv
3135
uses: actions/cache@v4
@@ -42,7 +46,6 @@ jobs:
4246
- name: Run pytest and coverage
4347
run: |
4448
export JAVA_HOME=$(asdf where java)
45-
echo "JAVA_HOME - $JAVA_HOME"
4649
make coverage
4750
4851
- name: Upload Coverage Report
@@ -54,5 +57,4 @@ jobs:
5457
- name: Run behave tests
5558
run: |
5659
export JAVA_HOME=$(asdf where java)
57-
echo "JAVA_HOME - $JAVA_HOME"
5860
make behave

.mise.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[tools]
2-
python="3.7.17"
3-
poetry="1.4.2"
2+
python="3.11"
3+
poetry="2.2"
44
java="liberica-1.8.0"

.tool-versions

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
python 3.7.17
2-
poetry 1.4.2
1+
python 3.11.14
2+
poetry 2.2.0
33
java liberica-1.8.0

CHANGELOG.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,38 @@
1+
## v0.3.0 (2025-11-19)
2+
3+
### Feat
4+
5+
- new domain type formattedtime for time only data
6+
7+
### Refactor
8+
9+
- small tweak to allow use of dynamic fields in select rules
10+
11+
## v0.2.0 (2025-11-12)
12+
13+
### Refactor
14+
15+
- ensure dve working on python 3.10
16+
- ensure dve working on python 3.11
17+
18+
### BREAKING CHANGE
19+
20+
- Numerous typing updates that will make this codebase unusable below python 3.9
21+
22+
note - this does not mean the package will work on python 3.9. Minimum working version is 3.10.
23+
24+
### Feat
25+
26+
- added functionality to allow error messages in business rules t… (#8)
27+
28+
### Refactor
29+
30+
- bump pylint to work correctly with py3.11 and fix numerous linting issues
31+
132
## 0.1.0 (2025-11-10)
233

34+
*NB - This was previously v1.0.0 and v1.1.0 but has been rolled back into a 0.1.0 release to reflect lack of package stability.*
35+
336
### Feat
437

538
- Added ability to define custom error codes and templated messages for data contract feedback messages

Makefile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ activate = poetry run
33
# dev
44
install:
55
poetry lock
6-
poetry install --with dev,test
6+
poetry install --with dev
77

88
# dist
99
wheel:
@@ -27,6 +27,15 @@ coverage:
2727
$(activate) coverage report
2828
$(activate) coverage xml
2929

30+
# lint
31+
pylint:
32+
${activate} pylint src/
33+
34+
mypy:
35+
${activate} mypy src/
36+
37+
lint: mypy pylint
38+
3039
# pre-commit
3140
pre-commit-all:
3241
${activate} pre-commit run --all-files

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Data Validation Engine
22

3-
The Data Validation Engine (DVE) is a configuration driven data validation library built and utilised by NHS England.
3+
The Data Validation Engine (DVE) is a configuration driven data validation library built and utilised by NHS England. Currently the package has been reverted from v1.0.0 release to a 0.x as we feel the package is not yet mature enough to be considered a 1.0.0 release. So please bear this in mind if reading through the commits and references to a v1+ release when on v0.x.
44

55
As mentioned above, the DVE is "configuration driven" which means the majority of development for you as a user will be building a JSON document to describe how the data will be validated. The JSON document is known as a `dischema` file and example files can be accessed [here](./tests/testdata/). If you'd like to learn more about JSON document and how to build one from scratch, then please read the documentation [here](./docs/).
66

@@ -9,7 +9,7 @@ Once a dischema file has been defined, you are ready to use the DVE. The DVE is
99
| | Service | Purpose |
1010
| -- | ------- | ------- |
1111
| 1. | File Transformation | This service will take submitted files and turn them into stringified parquet file(s) to ensure that a consistent data structure can be passed through the other services. |
12-
| 2. | Data Contract | This service will validate and peform type casting against a stringified parquet file using [pydantic models](https://docs.pydantic.dev/1.10/). |
12+
| 2. | Data Contract | This service will validate and perform type casting against a stringified parquet file using [pydantic models](https://docs.pydantic.dev/1.10/). |
1313
| 3. | Business Rules | The business rules service will perform more complex validations such as comparisons between fields and tables, aggregations, filters etc to generate new entities. |
1414
| 4. | Error Reports | The error reports service will take all the errors raised in previous services and surface them into a readable format for a downstream users/service. Currently, this implemented to be an excel spreadsheet but could be reconfigured to meet other requirements/use cases. |
1515

@@ -21,7 +21,7 @@ Additionally, if you'd like to contribute a new backend implementation into the
2121

2222
## Installation and usage
2323

24-
The DVE is a Python package and can be installed using `pip`. As of release v0.1.0 we currently only supports Python 3.7, with Spark version 3.2.1 and DuckDB version of 1.1.0. We are currently working on upgrading the DVE to work on Python 3.11+ and this will be made available asap with version 1.0.0 release.
24+
The DVE is a Python package and can be installed using `pip`. As of release v0.1.x we currently only supports Python 3.7, with Spark version 3.2.1 and DuckDB version of 1.1.0. We are currently working on upgrading the DVE to work on Python 3.10-3.11 and this will be made available with version v0.2.x release.
2525

2626
In addition to a working Python 3.7+ installation you will need OpenJDK 11 installed if you're planning to use the Spark backend implementation.
2727

@@ -33,7 +33,7 @@ To install the DVE package you can simply install using a package manager such a
3333
pip install git+https://github.com/NHSDigital/data-validation-engine.git@v0.1.0
3434
```
3535

36-
Once you have installed the DVE you are ready to use it. For guidance on how to create your dischema json document (configuration), please read the [documentation](./docs/).
36+
Once you have installed the DVE you are ready to use it. For guidance on how to create your dischema JSON document (configuration), please read the [documentation](./docs/).
3737

3838
Please note - The long term aim is to make the DVE available via PyPi and Conda but we are not quite there yet. Once available this documentation will be updated to contain the new installation options.
3939

@@ -49,7 +49,7 @@ Below is a list of features that we would like to implement or have been request
4949
| Feature | Release Version | Released? |
5050
| ------- | --------------- | --------- |
5151
| Open source release | 0.1.0 | Yes |
52-
| Uplift to Python 3.11 | 1.0.0 | No |
52+
| Uplift to Python 3.11 | 0.2.0 | Yes |
5353
| Upgrade to Pydantic 2.0 | Not yet confirmed | No |
5454
| Create a more user friendly interface for building and modifying dischema files | Not yet confirmed | No |
5555

docs/detailed_guidance/domain_types.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,24 @@ Domain types are custom defined pydantic types that solve common problems with u
44
This might include Postcodes, NHS Numbers, dates with specific formats etc.
55

66
Below is a list of defined types, their output type and any contraints. Nested beneath them are any constraints that area allowed and their default values if there are any.
7-
| Defined Type | Output Type | Contraints & Defaults |
8-
| ------------ | ----------- | --------------------- |
9-
| NHSNumber | str |
10-
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> |
11-
| Postcode | str |
12-
| OrgId | str |
13-
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> |
14-
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> |
15-
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> |
16-
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> |
17-
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li>
7+
| Defined Type | Output Type | Contraints & Defaults | Supported Implementations |
8+
| ------------ | ----------- | --------------------- | ------------------------- |
9+
| NHSNumber | str | | Spark, DuckDB |
10+
| permissive_nhs_number | str | <li> warn_on_test_numbers = False </li> | Spark, DuckDB |
11+
| Postcode | str | | Spark, DuckDB |
12+
| OrgId | str | | Spark, DuckDB |
13+
| conformatteddate | date | <li>date_format: str</li><li>ge: date</li><li>le: date</li><li>gt: date</li><li>lt: date</li> | Spark, DuckDB |
14+
| formatteddatetime | datetime | <li>date_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require] = "permit"</li> | Spark, DuckDB |
15+
| formattedtime | time | <li>time_format: str </li><li>timezone_treatment: one_of ["forbid", "permit", "require"] = "permit" | DuckDB |
16+
| reportingperiod | date | <li>reporting_period_type: one_of ["start", "end"]</li><li>date_format: str = "%Y-%m-%d"</li> | Spark, DuckDB |
17+
| alphanumeric | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
18+
| identifier | str | <li>min_digits : NonNegativeInt = 1</li><li>max_digits: PositiveInt = 1</li> | Spark, DuckDB |
1819

19-
Other types that are allowed include:
20+
**Other types that are allowed include:**
2021
- str
2122
- int
2223
- date
2324
- datetime
2425
- Decimal
2526
- float
26-
27-
And any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)
27+
- Any types that are included in [pydantic version 1.10](https://docs.pydantic.dev/1.10/usage/types/#pydantic-types)

pyproject.toml

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "nhs_dve"
3-
version = "0.1.0"
3+
version = "0.3.0"
44
description = "`nhs data validation engine` is a framework used to validate data"
55
authors = ["NHS England <england.contactus@nhs.net>"]
66
readme = "README.md"
@@ -9,58 +9,73 @@ packages = [
99
]
1010
classifiers = [
1111
"Programming Language :: Python :: 3",
12-
"Programming Language :: Python :: 3.7",
12+
"Programming Language :: Python :: 3.10",
13+
"Programming Language :: Python :: 3.11",
1314
"Operating System :: OS Independent",
1415
"Topic :: Software Development :: Libraries",
1516
"Typing :: Typed",
1617
]
1718

1819
[tool.poetry.dependencies]
19-
python = ">=3.7.2,<3.8"
20-
boto3 = "1.28.47" # Boto3 will no longer support Python 3.7 starting December 13, 2023
21-
botocore = "1.31.47"
22-
delta-spark = "1.1.0"
20+
python = ">=3.10,<3.12"
21+
boto3 = "1.34.162"
22+
botocore = "1.34.162"
23+
delta-spark = "2.4.0"
2324
duckdb = "1.1.0" # mitigates security vuln in < 1.1.0
2425
formulas = "1.2.4"
2526
idna = "3.7" # Downstream dep of requests but has security vuln < 3.7
2627
Jinja2 = "3.1.6" # mitigates security vuln in < 3.1.6
2728
lxml = "4.9.1"
2829
openpyxl = "3.1.0"
29-
pandas = "1.3.5"
30-
polars = "0.17.14"
31-
pyarrow = "7.0.0"
30+
pandas = "2.2.2"
31+
polars = "0.20.14"
32+
pyarrow = "17.0.0"
3233
pydantic = "1.10.15" # Mitigates security vuln in < 1.10.13
3334
pymongo = "4.6.3"
34-
pyspark = "3.2.1"
35+
pyspark = "3.4.4"
3536
pytz = "2022.1"
36-
PyYAML = "5.4"
37-
requests = "2.31.0"
37+
PyYAML = "6.0.3"
38+
requests = "2.32.4" # Mitigates security vuln in < 2.31.0
3839
schedula = "1.2.19"
3940
sqlalchemy = "2.0.19"
4041
typing_extensions = "4.6.2"
41-
urllib3 = "1.26.19" # Used transiently, but has security vuln < 1.26.19
42+
urllib3 = "2.5.0" # Mitigates security vuln in < 1.26.19
4243
xmltodict = "0.13.0"
4344

45+
[tool.poetry.group.dev]
46+
optional = true
47+
include-groups = [
48+
"test",
49+
"lint"
50+
]
51+
4452
[tool.poetry.group.dev.dependencies]
45-
commitizen = "3.9.1" # latest version to support Python 3.7.17
46-
pre-commit = "2.21.0" # latest version to support Python 3.7.17
53+
commitizen = "4.9.1"
54+
pre-commit = "4.3.0"
55+
56+
[tool.poetry.group.test]
57+
optional = true
4758

4859
[tool.poetry.group.test.dependencies]
4960
faker = "18.11.1"
50-
behave = "1.2.6"
51-
coverage = "6.4.3"
52-
moto = {extras = ["s3"], version = "3.1.18"}
61+
behave = "1.3.3"
62+
coverage = "7.11.0"
63+
moto = {extras = ["s3"], version = "4.0.13"}
64+
Werkzeug = "3.0.6" # Dependency of moto which needs 3.0.6 for security vuln mitigation
5365
mongomock = "4.1.2"
54-
pytest = "7.4.4"
55-
pytest-lazy-fixture = "0.6.3"
66+
pytest = "8.4.2"
67+
pytest-lazy-fixtures = "1.4.0" # switched from https://github.com/TvoroG/pytest-lazy-fixture as it's no longer supported
5668
xlsx2csv = "0.8.2"
5769

70+
[tool.poetry.group.lint]
71+
optional = true
72+
5873
[tool.poetry.group.lint.dependencies]
59-
black = "22.6.0"
60-
astroid = "2.11.7"
74+
black = "24.3.0"
75+
astroid = "2.14.2"
6176
isort = "5.11.5"
62-
pylint = "2.14.5"
63-
mypy = "0.982"
77+
pylint = "2.16.4"
78+
mypy = "0.991"
6479
boto3-stubs = {extras = ["essential"], version = "1.26.72"}
6580
botocore-stubs = "1.29.72"
6681
pandas-stubs = "1.2.0.62"
@@ -112,9 +127,8 @@ source_pkgs = [
112127
show_missing = true
113128

114129
[tool.pylint]
115-
# Can't add support for custom checker until running on Python 3.9+ again.
116-
# init-hook = "import sys; sys.path.append('./pylint_checkers')"
117-
# load-plugins = "check_typing_imports"
130+
init-hook = "import sys; sys.path.append('./pylint_checkers')"
131+
load-plugins = "check_typing_imports"
118132

119133
[tool.pylint.main]
120134
# Analyse import fallback blocks. This can be used to support both Python 2 and 3
@@ -189,7 +203,7 @@ persistent = true
189203

190204
# Minimum Python version to use for version dependent checks. Will default to the
191205
# version used to run pylint.
192-
py-version = "3.7"
206+
py-version = "3.11"
193207

194208
# Discover python modules and packages in the file system subtree.
195209
# recursive =

0 commit comments

Comments
 (0)