1+ # pylint: disable=W0223
12"""A duckdb pipeline for running on Foundry platform"""
23
34from typing import Optional
4- from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import duckdb_get_entity_count , duckdb_write_parquet
5+
6+ from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import (
7+ duckdb_get_entity_count ,
8+ duckdb_write_parquet ,
9+ )
510from dve .core_engine .exceptions import CriticalProcessingError
611from dve .core_engine .models import SubmissionInfo
7- from dve .core_engine .type_hints import URI , Failed
12+ from dve .core_engine .type_hints import URI
13+ from dve .parser import file_handling as fh
814from dve .parser .file_handling .implementations .file import LocalFilesystemImplementation
915from dve .parser .file_handling .service import _get_implementation
1016from dve .pipeline .duckdb_pipeline import DDBDVEPipeline
1117from dve .pipeline .utils import SubmissionStatus
12- from dve .parser import file_handling as fh
1318from dve .reporting .utils import dump_processing_errors
1419
20+
1521@duckdb_get_entity_count
1622@duckdb_write_parquet
1723class FoundryDDBPipeline (DDBDVEPipeline ):
@@ -24,12 +30,12 @@ def persist_audit_records(self, submission_info: SubmissionInfo) -> URI:
2430 write_to = fh .file_uri_to_local_path (write_to )
2531 write_to .parent .mkdir (parents = True , exist_ok = True )
2632 write_to = write_to .as_posix ()
27- self .write_parquet (
28- self ._audit_tables ._processing_status .get_relation (),
33+ self .write_parquet ( # type: ignore # pylint: disable=E1101
34+ self ._audit_tables ._processing_status .get_relation (), # pylint: disable=W0212
2935 fh .joinuri (write_to , "processing_status.parquet" ),
3036 )
31- self .write_parquet (
32- self ._audit_tables ._submission_statistics .get_relation (),
37+ self .write_parquet ( # type: ignore # pylint: disable=E1101
38+ self ._audit_tables ._submission_statistics .get_relation (), # pylint: disable=W0212
3339 fh .joinuri (write_to , "submission_statistics.parquet" ),
3440 )
3541 return write_to
@@ -39,62 +45,70 @@ def file_transformation(
3945 ) -> tuple [SubmissionInfo , SubmissionStatus ]:
4046 try :
4147 return super ().file_transformation (submission_info )
42- except Exception as exc : # pylint: disable=W0718
48+ except Exception as exc : # pylint: disable=W0718
4349 self ._logger .error (f"File transformation raised exception: { exc } " )
4450 self ._logger .exception (exc )
4551 dump_processing_errors (
46- fh .joinuri (self .processed_files_path , submission_info .submission_id ),
47- "file_transformation" ,
48- [CriticalProcessingError .from_exception (exc )]
49- )
52+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
53+ "file_transformation" ,
54+ [CriticalProcessingError .from_exception (exc )],
55+ )
5056 self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
5157 return submission_info , SubmissionStatus (processing_failed = True )
5258
53- def apply_data_contract (self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None ) -> tuple [SubmissionInfo | SubmissionStatus ]:
59+ def apply_data_contract (
60+ self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None
61+ ) -> tuple [SubmissionInfo , SubmissionStatus ]:
5462 try :
5563 return super ().apply_data_contract (submission_info , submission_status )
56- except Exception as exc : # pylint: disable=W0718
64+ except Exception as exc : # pylint: disable=W0718
5765 self ._logger .error (f"Apply data contract raised exception: { exc } " )
5866 self ._logger .exception (exc )
5967 dump_processing_errors (
60- fh .joinuri (self .processed_files_path , submission_info .submission_id ),
61- "contract" ,
62- [CriticalProcessingError .from_exception (exc )]
63- )
68+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
69+ "contract" ,
70+ [CriticalProcessingError .from_exception (exc )],
71+ )
6472 self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
6573 return submission_info , SubmissionStatus (processing_failed = True )
6674
67- def apply_business_rules (self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None ):
75+ def apply_business_rules (
76+ self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None
77+ ):
6878 try :
6979 return super ().apply_business_rules (submission_info , submission_status )
70- except Exception as exc : # pylint: disable=W0718
80+ except Exception as exc : # pylint: disable=W0718
7181 self ._logger .error (f"Apply business rules raised exception: { exc } " )
7282 self ._logger .exception (exc )
7383 dump_processing_errors (
74- fh .joinuri (self .processed_files_path , submission_info .submission_id ),
75- "business_rules" ,
76- [CriticalProcessingError .from_exception (exc )]
77- )
84+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
85+ "business_rules" ,
86+ [CriticalProcessingError .from_exception (exc )],
87+ )
7888 self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
7989 return submission_info , SubmissionStatus (processing_failed = True )
80-
81- def error_report (self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None ):
90+
91+ def error_report (
92+ self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None
93+ ):
8294 try :
8395 return super ().error_report (submission_info , submission_status )
84- except Exception as exc : # pylint: disable=W0718
96+ except Exception as exc : # pylint: disable=W0718
8597 self ._logger .error (f"Error reports raised exception: { exc } " )
8698 self ._logger .exception (exc )
8799 sub_stats = None
88100 report_uri = None
89101 dump_processing_errors (
90- fh .joinuri (self .processed_files_path , submission_info .submission_id ),
91- "error_report" ,
92- [CriticalProcessingError .from_exception (exc )]
93- )
102+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
103+ "error_report" ,
104+ [CriticalProcessingError .from_exception (exc )],
105+ )
94106 self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
95107 return submission_info , submission_status , sub_stats , report_uri
96108
97- def run_pipeline (self , submission_info : SubmissionInfo ) -> tuple [Optional [URI ], Optional [URI ], URI ]:
109+ def run_pipeline (
110+ self , submission_info : SubmissionInfo
111+ ) -> tuple [Optional [URI ], Optional [URI ], URI ]:
98112 """Sequential single submission pipeline runner"""
99113 try :
100114 sub_id : str = submission_info .submission_id
@@ -104,8 +118,12 @@ def run_pipeline(self, submission_info: SubmissionInfo) -> tuple[Optional[URI],
104118 sub_info , sub_status = self .file_transformation (submission_info = submission_info )
105119 if not (sub_status .validation_failed or sub_status .processing_failed ):
106120 self ._audit_tables .mark_data_contract (submission_ids = [sub_id ])
107- sub_info , sub_status = self .apply_data_contract (submission_info = sub_info , submission_status = sub_status )
108- self ._audit_tables .mark_business_rules (submissions = [(sub_id , sub_status .validation_failed )])
121+ sub_info , sub_status = self .apply_data_contract (
122+ submission_info = sub_info , submission_status = sub_status
123+ )
124+ self ._audit_tables .mark_business_rules (
125+ submissions = [(sub_id , sub_status .validation_failed )]
126+ )
109127 sub_info , sub_status = self .apply_business_rules (
110128 submission_info = submission_info , submission_status = sub_status
111129 )
@@ -118,15 +136,15 @@ def run_pipeline(self, submission_info: SubmissionInfo) -> tuple[Optional[URI],
118136 submission_info = submission_info , submission_status = sub_status
119137 )
120138 self ._audit_tables .add_submission_statistics_records (sub_stats = [sub_stats ])
121- except Exception as err : # pylint: disable=W0718
139+ except Exception as err : # pylint: disable=W0718
122140 self ._logger .error (
123- f"During processing of submission_id: { sub_id } , the following exception was raised: { err } "
141+ f"During processing of submission_id: { sub_id } , this exception was raised: { err } "
124142 )
125143 dump_processing_errors (
126- fh .joinuri (self .processed_files_path , submission_info .submission_id ),
127- "run_pipeline" ,
128- [CriticalProcessingError .from_exception (err )]
129- )
144+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
145+ "run_pipeline" ,
146+ [CriticalProcessingError .from_exception (err )],
147+ )
130148 self ._audit_tables .mark_failed (submissions = [sub_id ])
131149 finally :
132150 audit_files_uri = self .persist_audit_records (submission_info = submission_info )
0 commit comments