From 0483beec77d97bb5942a0e926020b5adcdf49e1f Mon Sep 17 00:00:00 2001 From: Ross Perry Date: Tue, 28 Oct 2025 10:42:27 -0600 Subject: [PATCH 1/2] check nulls after mapping, before dqc --- seed/models/columns.py | 1 + .../seed/js/controllers/mapping_controller.js | 15 +++- .../seed/js/services/data_quality_service.js | 4 + seed/static/seed/partials/mapping.html | 4 +- seed/utils/import_file.py | 83 ++++++++++++++++++- seed/views/v3/import_files.py | 20 +++++ 6 files changed, 123 insertions(+), 4 deletions(-) diff --git a/seed/models/columns.py b/seed/models/columns.py index 39e42d35b3..725e641c81 100644 --- a/seed/models/columns.py +++ b/seed/models/columns.py @@ -1126,6 +1126,7 @@ def create_mappings(mappings, organization, user, import_file_id=None): "from_units": mapping.get("from_units"), "to_field": mapping["to_field"], "to_table_name": mapping["to_table_name"], + "to_data_type": mapping.get("to_data_type"), } ) else: diff --git a/seed/static/seed/js/controllers/mapping_controller.js b/seed/static/seed/js/controllers/mapping_controller.js index 86e88c4b9e..6c970cd25f 100644 --- a/seed/static/seed/js/controllers/mapping_controller.js +++ b/seed/static/seed/js/controllers/mapping_controller.js @@ -250,6 +250,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ $scope.import_file = import_file_payload.import_file; $scope.import_file.matching_finished = false; $scope.suggested_mappings = suggested_mappings_payload.suggested_column_mappings; + $scope.mapping_error_messages = null; $scope.raw_columns = raw_columns_payload.raw_columns; $scope.mappable_property_columns = suggested_mappings_payload.property_columns; @@ -750,7 +751,10 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ progress_key, 0, 1, - $scope.get_cached_mapped_buildings, + (response) => { + $scope.check_mapping_for_nulls(); + $scope.get_cached_mapped_buildings(response); + }, () => {}, $scope.import_file ); @@ -760,6 +764,14 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ }); }; + $scope.check_mapping_for_nulls = () => { + data_quality_service.check_mapping_for_nulls($scope.organization.id, $scope.import_file.id) + .then((response) => { + $scope.mapping_error_messages = response.status === 'warning' ? response.message : null; + console.log(response); + }); + }; + $scope.get_cached_mapped_buildings = ({ unique_id }) => { cache_entry_service.get_cache_entry(unique_id) .then($scope.set_mapped_buildings) @@ -899,6 +911,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ col.suggestion_column_name = cached_col.to_field; col.suggestion_table_name = cached_col.to_table_name; col.from_units = cached_col.from_units; + col.data_type = cached_col.to_data_type; // If available, use display_name, else use raw field name. const mappable_column = _.find($scope.mappable_property_columns.concat($scope.mappable_taxlot_columns), { column_name: cached_col.to_field, table_name: cached_col.to_table_name }); diff --git a/seed/static/seed/js/services/data_quality_service.js b/seed/static/seed/js/services/data_quality_service.js index 1c5a9bb262..b017a3133b 100644 --- a/seed/static/seed/js/services/data_quality_service.js +++ b/seed/static/seed/js/services/data_quality_service.js @@ -95,6 +95,10 @@ angular.module('SEED.service.data_quality', []).factory('data_quality_service', return deferred.promise; }; + data_quality_factory.check_mapping_for_nulls = (org_id, import_file_id) => $http + .get(`/api/v3/import_files/${import_file_id}/verify_data_type_mapping?organization=${org_id}`) + .then((response) => response.data); + return data_quality_factory; } ]); diff --git a/seed/static/seed/partials/mapping.html b/seed/static/seed/partials/mapping.html index e64667afa0..64387809f1 100644 --- a/seed/static/seed/partials/mapping.html +++ b/seed/static/seed/partials/mapping.html @@ -227,8 +227,8 @@
>> TOOK {diff.seconds}s, {diff.microseconds}micros') + import_file.save() diff --git a/seed/views/v3/import_files.py b/seed/views/v3/import_files.py index 387af65680..21a7651f59 100644 --- a/seed/views/v3/import_files.py +++ b/seed/views/v3/import_files.py @@ -57,6 +57,7 @@ ) from seed.utils.api import OrgMixin, api_endpoint from seed.utils.api_schema import AutoSchemaHelper, swagger_auto_schema_org_query_param +from seed.utils.import_file import verify_data_types _log = logging.getLogger(__name__) @@ -1171,6 +1172,25 @@ def system_meter_upload(self, request, pk): status=status.HTTP_200_OK, ) + @swagger_auto_schema(manual_parameters=[AutoSchemaHelper.query_org_id_field()]) + @action(detail=True, methods=["GET"]) + def verify_data_type_mapping(self, request, pk): + org_id = self.get_organization(request) + + try: + import_file = ImportFile.objects.get(pk=pk, import_record__super_organization_id=org_id) + except ImportFile.DoesNotExist: + return JsonResponse( + {"status": "error", "message": "No such resource."}, status=status.HTTP_400_BAD_REQUEST + ) + + verify_data_types(org_id, import_file.id) + import_file.refresh_from_db() + + warnings = import_file.mapping_error_messages + status = "warning" if warnings else "success" + return JsonResponse({"status": status, "message": warnings}) + def get_conversion_factor(type_name, unit, _kbtu_thermal_conversion_factors, _kgal_water_conversion_factors): thermal_conversion_factor = _kbtu_thermal_conversion_factors.get(type_name, {}).get(unit, None) From dc3b1f4af6556053495feede11fee31a523b9f15 Mon Sep 17 00:00:00 2001 From: Ross Perry Date: Tue, 28 Oct 2025 14:01:36 -0600 Subject: [PATCH 2/2] add indicator, test, early exit --- .../seed/js/controllers/mapping_controller.js | 6 +- .../seed/js/services/data_quality_service.js | 2 +- seed/static/seed/partials/mapping.html | 18 ++++-- seed/tests/test_import_file_views.py | 11 ++++ seed/utils/import_file.py | 64 +++++++++---------- seed/views/v3/import_files.py | 28 ++++---- 6 files changed, 74 insertions(+), 55 deletions(-) diff --git a/seed/static/seed/js/controllers/mapping_controller.js b/seed/static/seed/js/controllers/mapping_controller.js index 6c970cd25f..5f293d9818 100644 --- a/seed/static/seed/js/controllers/mapping_controller.js +++ b/seed/static/seed/js/controllers/mapping_controller.js @@ -703,6 +703,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ * after saving column mappings, deletes unmatched buildings */ $scope.remap_buildings = () => { + $scope.mapping_error_messages = null; mapping_service.save_mappings($scope.import_file.id, $scope.get_mappings()).then((mapping_result) => { if (mapping_result.status === 'error' || mapping_result.status === 'warning') { return; @@ -765,10 +766,13 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ }; $scope.check_mapping_for_nulls = () => { + $scope.checking_for_nulls = true; data_quality_service.check_mapping_for_nulls($scope.organization.id, $scope.import_file.id) .then((response) => { $scope.mapping_error_messages = response.status === 'warning' ? response.message : null; - console.log(response); + }) + .finally(() => { + $scope.checking_for_nulls = false; }); }; diff --git a/seed/static/seed/js/services/data_quality_service.js b/seed/static/seed/js/services/data_quality_service.js index b017a3133b..27259c0993 100644 --- a/seed/static/seed/js/services/data_quality_service.js +++ b/seed/static/seed/js/services/data_quality_service.js @@ -96,7 +96,7 @@ angular.module('SEED.service.data_quality', []).factory('data_quality_service', }; data_quality_factory.check_mapping_for_nulls = (org_id, import_file_id) => $http - .get(`/api/v3/import_files/${import_file_id}/verify_data_type_mapping?organization=${org_id}`) + .post(`/api/v3/import_files/${import_file_id}/verify_data_type_mapping/?organization=${org_id}`) .then((response) => response.data); return data_quality_factory; diff --git a/seed/static/seed/partials/mapping.html b/seed/static/seed/partials/mapping.html index 64387809f1..dce56b56f7 100644 --- a/seed/static/seed/partials/mapping.html +++ b/seed/static/seed/partials/mapping.html @@ -226,11 +226,10 @@
-
+
+ + +
Mapped Fields class="pull-right btn btn-primary" ng-click="open_data_upload_modal(import_file.dataset)" ng-hide="import_file.matching_done" - translate > - Save Mappings + + Save Mappings
diff --git a/seed/tests/test_import_file_views.py b/seed/tests/test_import_file_views.py index 6e7fea3b93..436bd36c13 100644 --- a/seed/tests/test_import_file_views.py +++ b/seed/tests/test_import_file_views.py @@ -1060,6 +1060,17 @@ def test_unmatch(self): # # verify that the coparent id is now in the view # self.assertTrue(prop.exists()) + def test_verify_data_type_mapping(self): + self.assertEqual(self.import_file.mapping_error_messages, None) + url = reverse("api:v3:import_files-verify-data-type-mapping", args=[self.import_file.pk]) + url += f"?organization_id={self.org.pk}" + resp = self.client.post(url, content_type="application/json") + self.assertEqual(resp.status_code, 200) + # request modifies import_file + self.import_file.refresh_from_db() + exp_errs = "Blank values detected in columns: [ ENERGY STAR Score, Gross Floor Area, Recent Sale Date ]. Review import file for data type mismatches or click Save Mappings to import as displayed below." + self.assertEqual(self.import_file.mapping_error_messages, exp_errs) + class TestImportFileViewSetPermissions(AccessLevelBaseTestCase, DataMappingBaseTestCase): def setUp(self): diff --git a/seed/utils/import_file.py b/seed/utils/import_file.py index fbd96c5eca..aa511c43be 100644 --- a/seed/utils/import_file.py +++ b/seed/utils/import_file.py @@ -1,16 +1,17 @@ import json import logging + from django.db.models import Count, Q from seed.models import ( + DATA_STATE_DELETE, + DATA_STATE_IMPORT, + DATA_STATE_UNKNOWN, Column, ColumnMapping, - DATA_STATE_UNKNOWN, - DATA_STATE_IMPORT, - DATA_STATE_DELETE, ImportFile, PropertyState, - TaxLotState + TaxLotState, ) @@ -66,19 +67,30 @@ def get_import_file_table_mappings(import_file_id): return result + def verify_data_types(org_id, import_file_id): """ - To check for data type parsing errors, check non string fields for None values. - ex: If a column has a numeric data type, attempting to parse a string will result in None. - This gives the user a warning there may be a data type mapping issue + Verify that non-text columns don't contain null values, indicatative of data type mapping errors. + + Checks all mapped columns with numeric/date data types (excluding string and extra_data fields) + to identify null values that may result from failed type parsing. For example, if a column + is mapped to a numeric field but contains non-numeric text, the parsing will fail and store + null, indicating a potential mapping mistake. + + If blank values are detected, sets a warning message on the import file's + mapping_error_messages field with a list of affected columns. """ import_file = ImportFile.objects.filter(id=import_file_id, import_record__super_organization_id=org_id).first() if not import_file: return + + import_file.mapping_error_messages = None + import_file.save() + mapped_cols = import_file.get_cached_mapped_columns - if not import_file or not mapped_cols: + if not mapped_cols: return - + propertystate_ids = list( PropertyState.objects.filter(import_file=import_file) .exclude(data_state__in=[DATA_STATE_UNKNOWN, DATA_STATE_IMPORT, DATA_STATE_DELETE]) @@ -90,20 +102,15 @@ def verify_data_types(org_id, import_file_id): .values_list("id", flat=True) ) - if not len(propertystate_ids) and not len(taxlotstate_ids): + if not propertystate_ids and not taxlotstate_ids: return - - from datetime import datetime - start = datetime.now() - - import_file.mapping_error_messages = "" # {column_name: display_name, ...} for canonical cols with numeric (non-text) data types - column_map = dict(Column.objects - .filter(organization_id=org_id, is_extra_data=False, derived_column_id__isnull=True) - .exclude(data_type__in=['string', 'None']) - .exclude(table_name='') - .values_list('column_name', 'display_name') + column_map = dict( + Column.objects.filter(organization_id=org_id, is_extra_data=False, derived_column_id__isnull=True) + .exclude(data_type__in=["string", "None"]) + .exclude(table_name="") + .values_list("column_name", "display_name") ) # Check columns that are within import file's mapping AND column_map canonical_column_names = set(column_map.keys()) @@ -115,24 +122,17 @@ def verify_data_types(org_id, import_file_id): # create aggregations to check if null values exist for the selected column names # run query against import record inventory and count results if property_column_names: - property_null_checks = {f"{field}_null": Count('id', filter=Q(**{f"{field}__isnull": True})) for field in property_column_names} + property_null_checks = {f"{field}_null": Count("id", filter=Q(**{f"{field}__isnull": True})) for field in property_column_names} property_counts = PropertyState.objects.filter(id__in=propertystate_ids).aggregate(**property_null_checks) columns_with_blanks.update([column_map[field] for field in property_column_names if property_counts[f"{field}_null"]]) - + if taxlot_column_names: - taxlot_null_checks = {f"{field}_null": Count('id', filter=Q(**{f"{field}__isnull": True})) for field in taxlot_column_names} + taxlot_null_checks = {f"{field}_null": Count("id", filter=Q(**{f"{field}__isnull": True})) for field in taxlot_column_names} taxlot_counts = TaxLotState.objects.filter(id__in=taxlotstate_ids).aggregate(**taxlot_null_checks) columns_with_blanks.update([column_map[field] for field in taxlot_column_names if taxlot_counts[f"{field}_null"]]) if columns_with_blanks: col_string = ", ".join(sorted(columns_with_blanks)) - err_msg = ( - f"Blank values detected in columns: {col_string}. Review import file data or Save Mappings to ignore." - ) + err_msg = f"Blank values detected in columns: [ {col_string} ]. Review import file for data type mismatches or click Save Mappings to import as displayed below." import_file.mapping_error_messages = err_msg - - import logging - end = datetime.now() - diff = end - start - logging.error(f'>>> TOOK {diff.seconds}s, {diff.microseconds}micros') - import_file.save() + import_file.save() diff --git a/seed/views/v3/import_files.py b/seed/views/v3/import_files.py index 21a7651f59..0c727531f2 100644 --- a/seed/views/v3/import_files.py +++ b/seed/views/v3/import_files.py @@ -1173,23 +1173,23 @@ def system_meter_upload(self, request, pk): ) @swagger_auto_schema(manual_parameters=[AutoSchemaHelper.query_org_id_field()]) - @action(detail=True, methods=["GET"]) + @action(detail=True, methods=["POST"]) def verify_data_type_mapping(self, request, pk): - org_id = self.get_organization(request) - - try: - import_file = ImportFile.objects.get(pk=pk, import_record__super_organization_id=org_id) - except ImportFile.DoesNotExist: - return JsonResponse( - {"status": "error", "message": "No such resource."}, status=status.HTTP_400_BAD_REQUEST - ) + """ + Verify that non-text columns don't contain null values, indicatative of data type mapping errors. + """ + org_id = self.get_organization(request) - verify_data_types(org_id, import_file.id) - import_file.refresh_from_db() + try: + import_file = ImportFile.objects.get(pk=pk, import_record__super_organization_id=org_id) + except ImportFile.DoesNotExist: + return JsonResponse({"status": "error", "message": "No such resource."}, status=status.HTTP_400_BAD_REQUEST) + verify_data_types(org_id, import_file.id) + import_file.refresh_from_db() - warnings = import_file.mapping_error_messages - status = "warning" if warnings else "success" - return JsonResponse({"status": status, "message": warnings}) + warnings = import_file.mapping_error_messages + response_status = "warning" if warnings else "success" + return JsonResponse({"status": response_status, "message": warnings}) def get_conversion_factor(type_name, unit, _kbtu_thermal_conversion_factors, _kgal_water_conversion_factors):