Skip to content

Commit 3baf205

Browse files
committed
feat: sorted linting. Added json schema docs
1 parent b75c0bc commit 3baf205

File tree

26 files changed

+204
-177
lines changed

26 files changed

+204
-177
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"$schema": "https://json-schema.org/draft-07/schema",
3+
"$id": "data-ingest:contract/components/contract_error_details.schema.json",
4+
"title": "base_entity",
5+
"description": "An optional specification of custom error codes and messages for fields for the data contract phase of validation",
6+
"type": "object",
7+
"properties": {
8+
"field_name": {
9+
"type": "object",
10+
"description": "A mapping of field names to the custom error code and message required if these fields were to fail validation during the data contract phase. For nested fields, these should be specified using struct '.' notation (eg. fieldA.fieldB.fieldC)",
11+
"additionalProperties": {
12+
"$ref": "field_error_type.schema.json"
13+
}
14+
}
15+
}
16+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"$schema": "https://json-schema.org/draft-07/schema",
3+
"$id": "data-ingest:contract/components/field_error_detail.schema.json",
4+
"title": "field_error_detail",
5+
"description": "The custom details to be used for a field when a validation error is raised during the data contract phase",
6+
"type": "object",
7+
"properties": {
8+
"error_code": {
9+
"description": "The code to be used for the field and error type specified",
10+
"type": "string"
11+
},
12+
"error_message": {
13+
"description": "The message to be used for the field and error type specified. This can include templating (specified using jinja2 conventions). During templating, the full record will be available with an additional __error_value to easily obtain nested offending values.",
14+
"type": "string",
15+
"enum": [
16+
"record_rejection",
17+
"file_rejection",
18+
"warning"
19+
]
20+
}
21+
},
22+
"required": [
23+
"error_code",
24+
"error_message"
25+
],
26+
"additionalProperties": false
27+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"$schema": "https://json-schema.org/draft-07/schema",
3+
"$id": "data-ingest:contract/components/field_error_type.schema.json",
4+
"title": "field_error_detail",
5+
"description": "The error type for a field when a validation error is raised during the data contract phase",
6+
"type": "object",
7+
"properties": {
8+
"error_type": {
9+
"description": "The type of error the details are for",
10+
"type": "string",
11+
"enum": [
12+
"Blank",
13+
"Bad value",
14+
"Wrong format"
15+
],
16+
"additionalProperties": {
17+
"$ref": "field_error_detail.schema.json"
18+
}
19+
}
20+
}
21+
}

src/dve/core_engine/backends/implementations/duckdb/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
"""Implementation of duckdb backend"""
12
from dve.core_engine.backends.implementations.duckdb.readers.json import DuckDBJSONReader
23
from dve.core_engine.backends.readers import register_reader
34

src/dve/core_engine/backends/implementations/duckdb/auditing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
PYTHON_TYPE_TO_DUCKDB_TYPE,
1616
table_exists,
1717
)
18+
from dve.core_engine.backends.utilities import PYTHON_TYPE_TO_POLARS_TYPE
1819
from dve.core_engine.models import (
1920
AuditRecord,
2021
ProcessingStatusRecord,
2122
SubmissionInfo,
2223
SubmissionStatisticsRecord,
2324
TransferRecord,
2425
)
25-
from dve.core_engine.backends.utilities import PYTHON_TYPE_TO_POLARS_TYPE
2626
from dve.core_engine.type_hints import URI, ExecutorType
2727

2828

src/dve/core_engine/backends/implementations/duckdb/contract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from dve.core_engine.backends.implementations.duckdb.types import DuckDBEntities
2626
from dve.core_engine.backends.metadata.contract import DataContractMetadata
2727
from dve.core_engine.backends.types import StageSuccessful
28-
from dve.core_engine.backends.utilities import stringify_model, get_polars_type_from_annotation
28+
from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model
2929
from dve.core_engine.message import FeedbackMessage
3030
from dve.core_engine.type_hints import URI, Messages
3131
from dve.core_engine.validation import RowValidator

src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@
1111

1212
import duckdb.typing as ddbtyp
1313
import numpy as np
14-
import polars as pl # type: ignore
1514
from duckdb import DuckDBPyConnection, DuckDBPyRelation
1615
from duckdb.typing import DuckDBPyType
1716
from pandas import DataFrame
18-
1917
from pydantic import BaseModel
2018
from typing_extensions import Annotated, get_args, get_origin, get_type_hints
2119

@@ -91,6 +89,7 @@ def __call__(self):
9189
}
9290
"""A mapping of Python types to the equivalent DuckDB types."""
9391

92+
9493
def table_exists(connection: DuckDBPyConnection, table_name: str) -> bool:
9594
"""check if a table exists in a given DuckDBPyConnection"""
9695
return table_name in map(lambda x: x[0], connection.sql("SHOW TABLES").fetchall())
@@ -190,6 +189,7 @@ def get_duckdb_type_from_annotation(type_annotation: Any) -> DuckDBPyType:
190189
return duck_type
191190
raise ValueError(f"No equivalent DuckDB type for {type_annotation!r}")
192191

192+
193193
def coerce_inferred_numpy_array_to_list(pandas_df: DataFrame) -> DataFrame:
194194
"""Function to modify numpy inferred array when cnverting from duckdb relation to
195195
pandas dataframe - these cause issues with pydantic models
@@ -224,23 +224,20 @@ def _ddb_read_parquet(
224224

225225

226226
def _ddb_write_parquet( # pylint: disable=unused-argument
227-
self,
228-
entity: Union[Iterator[Dict[str, Any]],
229-
DuckDBPyRelation],
230-
target_location: URI,
231-
**kwargs
227+
self, entity: Union[Iterator[Dict[str, Any]], DuckDBPyRelation], target_location: URI, **kwargs
232228
) -> URI:
233229
"""Method to write parquet files from type cast entities
234230
following data contract application
235231
"""
236232
if isinstance(_get_implementation(target_location), LocalFilesystemImplementation):
237233
Path(target_location).parent.mkdir(parents=True, exist_ok=True)
238-
234+
239235
if isinstance(entity, Generator):
240-
entity = self._connection.query("select dta.* from (select unnest($data) as dta)",
241-
params={"data": list(entity)})
236+
entity = self._connection.query(
237+
"select dta.* from (select unnest($data) as dta)", params={"data": list(entity)}
238+
)
242239

243-
entity.to_parquet(file_name=target_location, compression="snappy", **kwargs)
240+
entity.to_parquet(file_name=target_location, compression="snappy", **kwargs) # type: ignore
244241
return target_location
245242

246243

src/dve/core_engine/backends/implementations/duckdb/readers/csv.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# pylint: disable=arguments-differ
44
from typing import Any, Dict, Iterator, Type
55

6-
from duckdb import DuckDBPyConnection, DuckDBPyRelation, read_csv, default_connection
6+
from duckdb import DuckDBPyConnection, DuckDBPyRelation, default_connection, read_csv
77
from pydantic import BaseModel
88

99
from dve.core_engine.backends.base.reader import BaseFileReader, read_function
@@ -14,6 +14,7 @@
1414
from dve.core_engine.backends.implementations.duckdb.types import SQLType
1515
from dve.core_engine.type_hints import URI, EntityName
1616

17+
1718
@duckdb_write_parquet
1819
class DuckDBCSVReader(BaseFileReader):
1920
"""A reader for CSV files"""

src/dve/core_engine/backends/implementations/duckdb/readers/json.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from duckdb import DuckDBPyRelation, read_json
77
from pydantic import BaseModel
8-
from typing_extensions import Literal
98

109
from dve.core_engine.backends.base.reader import BaseFileReader, read_function
1110
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
@@ -19,12 +18,9 @@
1918
@duckdb_write_parquet
2019
class DuckDBJSONReader(BaseFileReader):
2120
"""A reader for JSON files"""
22-
23-
def __init__(
24-
self,
25-
format: Optional[str] = "array"
26-
):
27-
self._format = format
21+
22+
def __init__(self, json_format: Optional[str] = "array"):
23+
self._json_format = json_format
2824

2925
super().__init__()
3026

@@ -45,6 +41,4 @@ def read_to_relation( # pylint: disable=unused-argument
4541
for fld in schema.__fields__.values()
4642
}
4743

48-
return read_json(resource,
49-
columns=ddb_schema,
50-
format=self._format)
44+
return read_json(resource, columns=ddb_schema, format=self._json_format) # type: ignore

src/dve/core_engine/backends/implementations/duckdb/readers/xml.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,22 @@
33

44
from typing import Dict, Optional, Type
55

6-
import pandas as pd
76
import polars as pl
87
from duckdb import DuckDBPyConnection, DuckDBPyRelation, default_connection
98
from pydantic import BaseModel
109

1110
from dve.core_engine.backends.base.reader import read_function
12-
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
13-
duckdb_write_parquet,
14-
)
11+
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import duckdb_write_parquet
1512
from dve.core_engine.backends.readers.xml import XMLStreamReader
16-
from dve.core_engine.backends.utilities import stringify_model, get_polars_type_from_annotation
13+
from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model
1714
from dve.core_engine.type_hints import URI
1815

1916

2017
@duckdb_write_parquet
2118
class DuckDBXMLStreamReader(XMLStreamReader):
2219
"""A reader for XML files"""
2320

24-
def __init__(self,
25-
ddb_connection: Optional[DuckDBPyConnection] = None,
26-
**kwargs):
21+
def __init__(self, ddb_connection: Optional[DuckDBPyConnection] = None, **kwargs):
2722
self.ddb_connection = ddb_connection if ddb_connection else default_connection
2823
super().__init__(**kwargs)
2924

0 commit comments

Comments
 (0)