From 27241972a3bc3b7dc5b1d3751bb97466888e198c Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Wed, 21 Jan 2026 09:49:48 +0100 Subject: [PATCH 01/17] MAROB mapping tables --- .../cdm_mapper/tables/marob/__init__.py | 1 + .../cdm_mapper/tables/marob/header.json | 114 ++++++++++++++++++ .../tables/marob/observations-at.json | 39 ++++++ .../tables/marob/observations-dpt.json | 39 ++++++ .../tables/marob/observations-slp.json | 39 ++++++ .../tables/marob/observations-sst.json | 39 ++++++ .../tables/marob/observations-wbt.json | 39 ++++++ .../tables/marob/observations-wd.json | 39 ++++++ .../tables/marob/observations-ws.json | 40 ++++++ .../cdm_mapper/tables/marob/observations.json | 86 +++++++++++++ 10 files changed, 475 insertions(+) create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/__init__.py create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/header.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json create mode 100755 cdm_reader_mapper/cdm_mapper/tables/marob/observations.json diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/__init__.py b/cdm_reader_mapper/cdm_mapper/tables/marob/__init__.py new file mode 100755 index 00000000..1cda077b --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/__init__.py @@ -0,0 +1 @@ +"""Common Data Model (CDM) MAROB mapping tables.""" diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json new file mode 100755 index 00000000..ba20195d --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -0,0 +1,114 @@ +{ + "report_id": { + "sections": "MAROB", + "elements": "ID", + "transform": "string_add", + "kwargs": { + "separator": "-", + "prepend": "MAROBSHIP" + } + }, + "application_area": { + "default": [ + 1, + 7, + 10, + 11 + ] + }, + "observing_programme": "UNKNOWN", + "report_type": { + "default": 0 + }, + "station_name": "UNKNOWN", + "station_type": { + "default": 2 + }, + "platform_type": "UNKNOWN", + "platform_sub_type": "UNKNOWN", + "primary_station_id": "UNKNOWN", + "station_record_number": { + "default": 1 + }, + "primary_station_id_scheme": "UNKNOWN", + "longitude": { + "sections": "MAROB", + "elements": "GEOGR_LAENGE" + }, + "latitude": { + "sections": "MAROB", + "elements": "GEOGR_BREITE" + }, + "location_accuracy": { + "sections": "MAROB", + "elements": [ + "GEOGR_LAENGE", + "GEOGR_BREITE" + ], + "transform": "location_accuracy", + "decimal_places": 0 + }, + "location_quality": { + "sections": "MAROB", + "elements": [ + "GEOGR_LAENGE_FLAG", + "GEOGR_BREITE_FLAG" + ], + "fill_value": "0", + "transform": "UNKNOWN" + }, + "crs": { + "default": 0 + }, + "station_speed": { + "sections": "NAVIGATION", + "elements": "FAHRTGESCHWINDIGKEIT" + }, + "station_course": { + "sections": "NAVIGATION", + "elements": "FAHRTRICHTUNG" + }, + "height_of_station_above_local_ground": { + "sections": "MAROB", + "elements": "STATIONSHOEHE_MSL" + }, + "height_of_station_above_sea_level": { + "sections": "MAROB", + "elements": "STATIONSHOEHE_MSL" + }, + "report_meaning_of_timestamp": { + "default": 2 + }, + "report_timestamp": { + "sections": "MAROB", + "elements": "MESSZEIT" + }, + "report_duration": { + "default": 11 + }, + "report_time_accuracy": "UNKNOWN", + "report_time_quality": { + "sections": "MAROB", + "elements": "MESSZEIT_FLAG" + }, + "report_quality": { + "default": 2 + }, + "duplicate_status": { + "default": 4 + }, + "record_timestamp": { + "transform": "datetime_utcnow" + }, + "history": { + "transform": "lineage" + }, + "source_id": { + "sections": "MAROB", + "elements": "KURZNAME" + }, + "source_record_id": { + "sections": "MAROB", + "elements": "UID" + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json new file mode 100755 index 00000000..f45ee123 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "AT" + } + }, + "z_coordinate": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observation_height_above_station_surface": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observed_variable": { + "default": 85 + }, + "observation_value": { + "sections": "LUFT", + "elements": "LUFTTEMPERATUR" + }, + "units": { + "default": 5 + }, + "quality_flag": { + "sections": "LUFT", + "elements": "LUFTTEMPERATUR_FLAG" + }, + "original_units": { + "default": 5 + }, + "original_value": { + "sections": "LUFT", + "elements": "LUFTTEMPERATUR" + }, + "conversion_method": { + "default": 1 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json new file mode 100755 index 00000000..3ceb53ee --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "DPT" + } + }, + "z_coordinate": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observation_height_above_station_surface": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observed_variable": { + "default": 36 + }, + "observation_value": { + "sections": "LUFT", + "elements": "TAUPUNKTTEMPERATUR" + }, + "units": { + "default": 5 + }, + "quality_flag": { + "sections": "LUFT", + "elements": "TAUPUNKTTEMPERATUR_FLAG" + }, + "original_units": { + "default": 5 + }, + "original_value": { + "sections": "LUFT", + "elements": "TAUPUNKTTEMPERATUR" + }, + "conversion_method": { + "default": 1 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json new file mode 100755 index 00000000..dcd6f8aa --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "SLP" + } + }, + "z_coordinate": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observation_height_above_station_surface": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observed_variable": { + "default": 58 + }, + "observation_value": { + "sections": "LUFT", + "elements": "LUFTDRUCK_STATIONSHOEHE" + }, + "units": { + "default": 32 + }, + "quality_flag": { + "sections": "LUFT", + "elements": "LUFTDRUCK_STATIONSHOEHE_FLAG" + }, + "original_units": { + "default": 32 + }, + "original_value": { + "sections": "LUFT", + "elements": "LUFTDRUCK_STATIONSHOEHE" + }, + "conversion_method": { + "default": 7 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json new file mode 100755 index 00000000..2f137831 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "SST" + } + }, + "z_coordinate": { + "sections": "WASSER", + "elements": "MESSTIEFE" + }, + "observation_height_above_station_surface": { + "sections": "WASSER", + "elements": "MESSTIEFE" + }, + "observed_variable": { + "default": 95 + }, + "observation_value": { + "sections": "WASSER", + "elements": "WASSERTEMPERATUR" + }, + "units": { + "default": 5 + }, + "quality_flag": { + "sections": "WASSER", + "elements": "WASSERTEMPERATUR_FLAG" + }, + "original_units": { + "default": 5 + }, + "original_value": { + "sections": "WASSER", + "elements": "WASSERTEMPERATUR" + }, + "conversion_method": { + "default": 1 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json new file mode 100755 index 00000000..da20279d --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "WBT" + } + }, + "z_coordinate": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observation_height_above_station_surface": { + "sections": "LUFT", + "elements": "SENSORHOEHE_WAS_TT" + }, + "observed_variable": { + "default": 41 + }, + "observation_value": { + "sections": "LUFT", + "elements": "FEUCHTTEMPERATUR" + }, + "units": { + "default": 5 + }, + "quality_flag": { + "sections": "LUFT", + "elements": "FEUCHTTEMPERATUR_FLAG" + }, + "original_units": { + "default": 5 + }, + "original_value": { + "sections": "LUFT", + "elements": "FEUCHTTEMPERATUR" + }, + "conversion_method": { + "default": 1 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json new file mode 100755 index 00000000..ba85b1d6 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json @@ -0,0 +1,39 @@ +{ + "observation_id": { + "kwargs": { + "append": "WD" + } + }, + "z_coordinate": { + "sections": "WIND", + "elements": "SENSORHOEHE_WAS_FF" + }, + "observation_height_above_station_surface": { + "sections": "WIND", + "elements": "SENSORHOEHE_WAS_FF" + }, + "observed_variable": { + "default": 106 + }, + "observation_value": { + "sections": "WIND", + "elements": "WINDRICHTUNG" + }, + "units": { + "default": 320 + }, + "quality_flag": { + "sections": "WIND", + "elements": "WINDRICHTUNG_FLAG" + }, + "original_units": { + "default": 320 + }, + "original_value": { + "sections": "WIND", + "elements": "WINDRICHTUNG" + }, + "conversion_flag": { + "default": 2 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json new file mode 100755 index 00000000..982432b4 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json @@ -0,0 +1,40 @@ +{ + "observation_id": { + "kwargs": { + "append": "WS" + } + }, + "z_coordinate": { + "sections": "WIND", + "elements": "SENSORHOEHE_WAS_FF" + }, + "observation_height_above_station_surface": { + "sections": "WIND", + "elements": "SENSORHOEHE_WAS_FF" + }, + "observed_variable": { + "default": 107 + }, + "observation_value": { + "sections": "WIND", + "elements": "WINDGESCHWINDIGKEIT" + }, + "units": { + "default": 731 + }, + "quality_flag": { + "sections": "WIND", + "elements": "WINDGESCHWINDIGKEIT_FLAG" + }, + "original_precision": {}, + "original_units": { + "default": 731 + }, + "original_value": { + "sections": "WIND", + "elements": "WINDGESCHWINDIGKEIT" + }, + "conversion_flag": { + "default": 2 + } +} diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json new file mode 100755 index 00000000..f7f81b07 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json @@ -0,0 +1,86 @@ +{ + "observation_id": { + "sections": "MAROB", + "elements": "ID", + "transform": "string_add", + "kwargs": { + "separator": "-" + } + }, + "report_id": { + "sections": "MAROB", + "elements": "ID", + "transform": "string_add", + "kwargs": { + "separator": "-" + } + }, + "data_policy_licence": "UNKNOWN", + "date_time": { + "sections": "MAROB", + "elements": "MESSZEIT" + }, + "date_time_meaning": { + "default": 2 + }, + "observation_duration": "UNKNOWN", + "longitude": { + "sections": "MAROB", + "elements": "GEOGR_LAENGE" + }, + "latitude": { + "sections": "MAROB", + "elements": "GEOGR_BREITE" + }, + "crs": { + "default": 0 + }, + "z_coordinate_type": { + "default": 0 + }, + "value_significance": { + "default": 2 + }, + "conversion_flag": { + "default": 0 + }, + "location_precision": { + "sections": "MAROB", + "elements": [ + "GEOGR_LAENGE", + "GEOGR_BREITE" + ], + "transform": "location_accuracy", + "decimal_places": 0 + }, + "spatial_representativeness": { + "default": 3 + }, + "numerical_precision": {}, + "sensor_automation_status": { + "default": 5 + }, + "exposure_of_sensor": { + "default": 3 + }, + "original_precision": {}, + "processing_level": { + "default": 3 + }, + "traceability": { + "default": 2 + }, + "advanced_qc": { + "default": 0 + }, + "advanced_uncertainty": { + "default": 0 + }, + "advanced_homogenisation": { + "default": 0 + }, + "source_id": { + "sections": "MAROB", + "elements": "KURZNAME" + } +} From 2d39319f355472c9c35023e38f983d1e1c8e5308 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Thu, 29 Jan 2026 14:03:11 +0100 Subject: [PATCH 02/17] fill in more information --- .../cdm_mapper/tables/marob/header.json | 19 +++++++++++++++---- .../cdm_mapper/tables/marob/observations.json | 13 +++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index ba20195d..881201ae 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -24,13 +24,20 @@ "station_type": { "default": 2 }, - "platform_type": "UNKNOWN", + "platform_type": { + "default": 2 + }, "platform_sub_type": "UNKNOWN", - "primary_station_id": "UNKNOWN", + "primary_station_id": { + "section": "MAROB", + "elements": "KENNUNG" + }, "station_record_number": { "default": 1 }, - "primary_station_id_scheme": "UNKNOWN", + "primary_station_id_scheme": { + "fill_value": 5 + }, "longitude": { "sections": "MAROB", "elements": "GEOGR_LAENGE" @@ -105,7 +112,11 @@ }, "source_id": { "sections": "MAROB", - "elements": "KURZNAME" + "elements": [ + "ORIGIN_CENTER", + "ORIGIN_SUBCENTER" + ], + "transform": "string_add" }, "source_record_id": { "sections": "MAROB", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json index f7f81b07..f362560b 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json @@ -4,7 +4,8 @@ "elements": "ID", "transform": "string_add", "kwargs": { - "separator": "-" + "separator": "-", + "prepend": "MAROBSHIP" } }, "report_id": { @@ -15,7 +16,9 @@ "separator": "-" } }, - "data_policy_licence": "UNKNOWN", + "data_policy_licence": { + "default": 0 + }, "date_time": { "sections": "MAROB", "elements": "MESSZEIT" @@ -23,7 +26,9 @@ "date_time_meaning": { "default": 2 }, - "observation_duration": "UNKNOWN", + "observation_duration": { + "default": 8 + }, "longitude": { "sections": "MAROB", "elements": "GEOGR_LAENGE" @@ -42,7 +47,7 @@ "default": 2 }, "conversion_flag": { - "default": 0 + "default": 2 }, "location_precision": { "sections": "MAROB", From 1f166d3edefefee56bd1efb2292b75e414ccbaca Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Thu, 29 Jan 2026 14:15:58 +0100 Subject: [PATCH 03/17] adjustments --- cdm_reader_mapper/cdm_mapper/tables/marob/header.json | 2 +- .../cdm_mapper/tables/marob/observations-slp.json | 8 ++++---- .../cdm_mapper/tables/marob/observations.json | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index 881201ae..c0e90613 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -120,6 +120,6 @@ }, "source_record_id": { "sections": "MAROB", - "elements": "UID" + "elements": "UNKNOWN" } } diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json index dcd6f8aa..e4ef497b 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json @@ -5,12 +5,12 @@ } }, "z_coordinate": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" + "sections": "MAROB", + "elements": "BAROMETERHOEHE_MSL" }, "observation_height_above_station_surface": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" + "sections": "MAROB", + "elements": "BAROMETERHOEHE_MSL" }, "observed_variable": { "default": 58 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json index f362560b..ce6c0c40 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json @@ -86,6 +86,6 @@ }, "source_id": { "sections": "MAROB", - "elements": "KURZNAME" + "elements": "UNKNOWN" } } From 891cfda24e7d856ad9030ed5aaa64958614af4c0 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Fri, 6 Feb 2026 14:56:48 +0100 Subject: [PATCH 04/17] update mapping tables --- .../cdm_mapper/tables/marob/header.json | 35 ++++++++----------- .../tables/marob/observations-at.json | 10 +++--- .../tables/marob/observations-dpt.json | 10 +++--- .../tables/marob/observations-slp.json | 13 +++---- .../tables/marob/observations-sst.json | 13 ++++--- .../tables/marob/observations-wbt.json | 10 +++--- .../tables/marob/observations-wd.json | 4 --- .../tables/marob/observations-ws.json | 12 +++---- .../cdm_mapper/tables/marob/observations.json | 13 ++++--- 9 files changed, 53 insertions(+), 67 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index c0e90613..d705df2d 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -5,7 +5,7 @@ "transform": "string_add", "kwargs": { "separator": "-", - "prepend": "MAROBSHIP" + "prepend": "DWD_MAROBSHIP" } }, "application_area": { @@ -16,18 +16,18 @@ 11 ] }, - "observing_programme": "UNKNOWN", + "observing_programme": { + "default": 56 + }, "report_type": { "default": 0 }, - "station_name": "UNKNOWN", "station_type": { "default": 2 }, "platform_type": { "default": 2 }, - "platform_sub_type": "UNKNOWN", "primary_station_id": { "section": "MAROB", "elements": "KENNUNG" @@ -56,13 +56,7 @@ "decimal_places": 0 }, "location_quality": { - "sections": "MAROB", - "elements": [ - "GEOGR_LAENGE_FLAG", - "GEOGR_BREITE_FLAG" - ], - "fill_value": "0", - "transform": "UNKNOWN" + "fill_value": 3 }, "crs": { "default": 0 @@ -76,8 +70,8 @@ "elements": "FAHRTRICHTUNG" }, "height_of_station_above_local_ground": { - "sections": "MAROB", - "elements": "STATIONSHOEHE_MSL" + "default": 0, + "decimal_places": 1 }, "height_of_station_above_sea_level": { "sections": "MAROB", @@ -91,9 +85,12 @@ "elements": "MESSZEIT" }, "report_duration": { - "default": 11 + "default": 15 + }, + "report_time_accuracy": { + "default": 1, + "decimal_places": 0 }, - "report_time_accuracy": "UNKNOWN", "report_time_quality": { "sections": "MAROB", "elements": "MESSZEIT_FLAG" @@ -112,14 +109,10 @@ }, "source_id": { "sections": "MAROB", - "elements": [ - "ORIGIN_CENTER", - "ORIGIN_SUBCENTER" - ], - "transform": "string_add" + "elements": "DATENQUELLE_ID" }, "source_record_id": { "sections": "MAROB", - "elements": "UNKNOWN" + "elements": "ID" } } diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json index f45ee123..57f66d70 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json @@ -4,10 +4,6 @@ "append": "AT" } }, - "z_coordinate": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" - }, "observation_height_above_station_surface": { "sections": "LUFT", "elements": "SENSORHOEHE_WAS_TT" @@ -17,7 +13,9 @@ }, "observation_value": { "sections": "LUFT", - "elements": "LUFTTEMPERATUR" + "elements": "LUFTTEMPERATUR", + "transform": "temperature_celsius_to_kelvin", + "decimal_places": 2 }, "units": { "default": 5 @@ -27,7 +25,7 @@ "elements": "LUFTTEMPERATUR_FLAG" }, "original_units": { - "default": 5 + "default": 60 }, "original_value": { "sections": "LUFT", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json index 3ceb53ee..1e92f1c1 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json @@ -4,10 +4,6 @@ "append": "DPT" } }, - "z_coordinate": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" - }, "observation_height_above_station_surface": { "sections": "LUFT", "elements": "SENSORHOEHE_WAS_TT" @@ -17,7 +13,9 @@ }, "observation_value": { "sections": "LUFT", - "elements": "TAUPUNKTTEMPERATUR" + "elements": "TAUPUNKTTEMPERATUR", + "transform": "temperature_celsius_to_kelvin", + "decimal_places": 2 }, "units": { "default": 5 @@ -27,7 +25,7 @@ "elements": "TAUPUNKTTEMPERATUR_FLAG" }, "original_units": { - "default": 5 + "default": 60 }, "original_value": { "sections": "LUFT", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json index e4ef497b..39fd0d17 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json @@ -4,10 +4,6 @@ "append": "SLP" } }, - "z_coordinate": { - "sections": "MAROB", - "elements": "BAROMETERHOEHE_MSL" - }, "observation_height_above_station_surface": { "sections": "MAROB", "elements": "BAROMETERHOEHE_MSL" @@ -17,7 +13,12 @@ }, "observation_value": { "sections": "LUFT", - "elements": "LUFTDRUCK_STATIONSHOEHE" + "elements": "LUFTDRUCK_STATIONSHOEHE", + "transform": "float_scale", + "kwargs": { + "factor": 100 + }, + "decimal_places": 0 }, "units": { "default": 32 @@ -27,7 +28,7 @@ "elements": "LUFTDRUCK_STATIONSHOEHE_FLAG" }, "original_units": { - "default": 32 + "default": 530 }, "original_value": { "sections": "LUFT", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json index 2f137831..85cbfb17 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json @@ -4,20 +4,19 @@ "append": "SST" } }, - "z_coordinate": { - "sections": "WASSER", - "elements": "MESSTIEFE" - }, "observation_height_above_station_surface": { "sections": "WASSER", - "elements": "MESSTIEFE" + "elements": "MESSTIEFE", + "transform": "float_opposite" }, "observed_variable": { "default": 95 }, "observation_value": { "sections": "WASSER", - "elements": "WASSERTEMPERATUR" + "elements": "WASSERTEMPERATUR", + "transform": "temperature_celsius_to_kelvin", + "decimal_places": 2 }, "units": { "default": 5 @@ -27,7 +26,7 @@ "elements": "WASSERTEMPERATUR_FLAG" }, "original_units": { - "default": 5 + "default": 60 }, "original_value": { "sections": "WASSER", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json index da20279d..97c5df76 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json @@ -4,10 +4,6 @@ "append": "WBT" } }, - "z_coordinate": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" - }, "observation_height_above_station_surface": { "sections": "LUFT", "elements": "SENSORHOEHE_WAS_TT" @@ -17,7 +13,9 @@ }, "observation_value": { "sections": "LUFT", - "elements": "FEUCHTTEMPERATUR" + "elements": "FEUCHTTEMPERATUR", + "transform": "temperature_celsius_to_kelvin", + "decimal_places": 2 }, "units": { "default": 5 @@ -27,7 +25,7 @@ "elements": "FEUCHTTEMPERATUR_FLAG" }, "original_units": { - "default": 5 + "default": 60 }, "original_value": { "sections": "LUFT", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json index ba85b1d6..28d3bc52 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json @@ -4,10 +4,6 @@ "append": "WD" } }, - "z_coordinate": { - "sections": "WIND", - "elements": "SENSORHOEHE_WAS_FF" - }, "observation_height_above_station_surface": { "sections": "WIND", "elements": "SENSORHOEHE_WAS_FF" diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json index 982432b4..a0d91a9f 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json @@ -4,10 +4,6 @@ "append": "WS" } }, - "z_coordinate": { - "sections": "WIND", - "elements": "SENSORHOEHE_WAS_FF" - }, "observation_height_above_station_surface": { "sections": "WIND", "elements": "SENSORHOEHE_WAS_FF" @@ -17,7 +13,11 @@ }, "observation_value": { "sections": "WIND", - "elements": "WINDGESCHWINDIGKEIT" + "elements": "WINDGESCHWINDIGKEIT", + "transform": "float_scale", + "kwargs": { + "factor": 0.277 + } }, "units": { "default": 731 @@ -28,7 +28,7 @@ }, "original_precision": {}, "original_units": { - "default": 731 + "default": 741 }, "original_value": { "sections": "WIND", diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json index ce6c0c40..d4200dcd 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json @@ -5,7 +5,7 @@ "transform": "string_add", "kwargs": { "separator": "-", - "prepend": "MAROBSHIP" + "prepend": "DWD_MAROBSHIP" } }, "report_id": { @@ -13,11 +13,12 @@ "elements": "ID", "transform": "string_add", "kwargs": { - "separator": "-" + "separator": "-", + "prepend": "DWD_MAROBSHIP" } }, "data_policy_licence": { - "default": 0 + "default": 1 }, "date_time": { "sections": "MAROB", @@ -63,7 +64,9 @@ }, "numerical_precision": {}, "sensor_automation_status": { - "default": 5 + "sections": "MAROB", + "elements": "DATENQUELLE_ID", + "transform": "UNKNOWN" }, "exposure_of_sensor": { "default": 3 @@ -86,6 +89,6 @@ }, "source_id": { "sections": "MAROB", - "elements": "UNKNOWN" + "elements": "DATENQUELLE_ID" } } From ff99beffd5a19f37f429369eac8eec25a7a07441 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Fri, 6 Feb 2026 14:57:03 +0100 Subject: [PATCH 05/17] add MAROB lineage --- cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py index 4c01fadc..40e2600e 100755 --- a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py +++ b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py @@ -40,6 +40,7 @@ "icoads_r300_d714": icoads_lineage + " with supplemental data recovery", "icoads_r302": ". Initial conversion from ICOADS R3.0.2T NRT", "craid": ". Initial conversion from C-RAID", + "marob": ". Initial conversion from DWD MAROB data base", } c2k_methods = { From 9d453d890a519eb73b2ac5591d33dfa85c562d6f Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:20:56 +0100 Subject: [PATCH 06/17] nre MAROB code tables --- cdm_reader_mapper/cdm_mapper/codes/marob/__init__.py | 1 + cdm_reader_mapper/cdm_mapper/codes/marob/quality_flag.json | 1 + .../cdm_mapper/codes/marob/report_time_quality.json | 1 + .../cdm_mapper/codes/marob/sensor_automation_status.json | 6 ++++++ 4 files changed, 9 insertions(+) create mode 100755 cdm_reader_mapper/cdm_mapper/codes/marob/__init__.py create mode 100755 cdm_reader_mapper/cdm_mapper/codes/marob/quality_flag.json create mode 100755 cdm_reader_mapper/cdm_mapper/codes/marob/report_time_quality.json create mode 100755 cdm_reader_mapper/cdm_mapper/codes/marob/sensor_automation_status.json diff --git a/cdm_reader_mapper/cdm_mapper/codes/marob/__init__.py b/cdm_reader_mapper/cdm_mapper/codes/marob/__init__.py new file mode 100755 index 00000000..d076a16a --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/codes/marob/__init__.py @@ -0,0 +1 @@ +"""Common Data Model (CDM) MAROB mapper code tables.""" diff --git a/cdm_reader_mapper/cdm_mapper/codes/marob/quality_flag.json b/cdm_reader_mapper/cdm_mapper/codes/marob/quality_flag.json new file mode 100755 index 00000000..69a88e3b --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/codes/marob/quality_flag.json @@ -0,0 +1 @@ +{} diff --git a/cdm_reader_mapper/cdm_mapper/codes/marob/report_time_quality.json b/cdm_reader_mapper/cdm_mapper/codes/marob/report_time_quality.json new file mode 100755 index 00000000..69a88e3b --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/codes/marob/report_time_quality.json @@ -0,0 +1 @@ +{} diff --git a/cdm_reader_mapper/cdm_mapper/codes/marob/sensor_automation_status.json b/cdm_reader_mapper/cdm_mapper/codes/marob/sensor_automation_status.json new file mode 100755 index 00000000..21be42a8 --- /dev/null +++ b/cdm_reader_mapper/cdm_mapper/codes/marob/sensor_automation_status.json @@ -0,0 +1,6 @@ +{ + "256": 4, + "10256": 4, + "384": 0, + "10384": 0 +} From b1d5aa0b5aeb36b3b39f0fa800f2e2eab1d7af9d Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:21:27 +0100 Subject: [PATCH 07/17] new mapping functions --- .../tables/craid/observations-slp.json | 5 +- .../tables/gdac/observations-slp.json | 5 +- .../tables/icoads/observations-slp.json | 5 +- .../icoads/r300/d714/observations-slp.json | 5 +- .../cdm_mapper/utils/mapping_functions.py | 110 +++++++++++++++++- 5 files changed, 110 insertions(+), 20 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/craid/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/craid/observations-slp.json index 3f092412..3c9963e6 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/craid/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/craid/observations-slp.json @@ -10,10 +10,7 @@ "observation_value": { "sections": "drifter_measurements", "elements": "ATMS_ADJUSTED", - "transform": "float_scale", - "kwargs": { - "factor": 100 - }, + "transform": "pressue_hpa_in_pa", "decimal_places": 0 }, "units": { diff --git a/cdm_reader_mapper/cdm_mapper/tables/gdac/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/gdac/observations-slp.json index 8cc5179a..884319ca 100644 --- a/cdm_reader_mapper/cdm_mapper/tables/gdac/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/gdac/observations-slp.json @@ -10,10 +10,7 @@ }, "observation_value": { "elements": "PPPP", - "transform": "float_scale", - "kwargs": { - "factor": 100 - }, + "transform": "pressue_hpa_in_pa", "decimal_places": 0 }, "units": { diff --git a/cdm_reader_mapper/cdm_mapper/tables/icoads/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/icoads/observations-slp.json index e541166c..2cb040c2 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/icoads/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/icoads/observations-slp.json @@ -10,10 +10,7 @@ "observation_value": { "sections": "core", "elements": "SLP", - "transform": "float_scale", - "kwargs": { - "factor": 100 - }, + "transform": "pressue_hpa_in_pa", "decimal_places": 0 }, "units": { diff --git a/cdm_reader_mapper/cdm_mapper/tables/icoads/r300/d714/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/icoads/r300/d714/observations-slp.json index d7416108..de9139f2 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/icoads/r300/d714/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/icoads/r300/d714/observations-slp.json @@ -2,10 +2,7 @@ "observation_value": { "sections": "c99", "elements": "Pressure", - "transform": "float_scale", - "kwargs": { - "factor": 100 - }, + "transform": "pressue_hpa_in_pa", "decimal_places": 0 }, "original_value": { diff --git a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py index 40e2600e..cd5e7334 100755 --- a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py +++ b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py @@ -285,7 +285,7 @@ def to_int(value: Any) -> int | pd.NA: class mapping_functions: - """Class for mapping Common Data Model (CDM) elements from IMMA1, GDAC, ICOADS, C-RAID, and IMMT datasets.""" + """Class for mapping Common Data Model (CDM) elements from IMMA1, GDAC, ICOADS, C-RAID, MAROB, Pub47, and IMMT datasets.""" def __init__(self, imodel): self.imodel = imodel @@ -503,6 +503,28 @@ def datetime_craid( data_1d = series.values.ravel() return pd.to_datetime(data_1d, format=format, errors="coerce") + def datetime_marob( + self, series: pd.Series, format: str = "%d.%m.%y %H:%M:%S,%f" + ) -> pd.DatetimeIndex: + """ + Convert C-RAID date strings to pandas datetime. + + Parameters + ---------- + series : pd.Series + Series of date strings. + format : str, optional + Datetime format string (default: "%Y-%m-%d %H:%M:%S.%f"). + + Returns + ------- + pd.DatetimeIndex + DatetimeIndex of converted dates. + """ + if series.empty: + return pd.DatetimeIndex([]) + return pd.to_datetime(series, format=format, errors="coerce") + def df_col_join(self, df: pd.DataFrame, sep: str) -> pd.Series: """ Join all columns of a pandas DataFrame into a single Series of strings. @@ -538,6 +560,7 @@ def float_opposite(self, series: pd.Series) -> pd.Series: pd.Series Series with negated values. """ + series = series.astype(float) return -series def select_column(self, df: pd.DataFrame) -> pd.Series: @@ -581,9 +604,9 @@ def float_scale(self, series: pd.Series, factor: float = 1) -> pd.Series: pd.Series Scaled Series, or empty float Series if input is non-numeric. """ - if pd.api.types.is_numeric_dtype(series): - return series * factor - return pd.Series(dtype=float, name=series.name) + scaled = pd.to_numeric(series, errors="coerce") * factor + scaled.name = series.name + return scaled def integer_to_float(self, s: pd.Series) -> pd.Series: """ @@ -844,6 +867,54 @@ def temperature_celsius_to_kelvin(self, df: pd.DataFrame) -> pd.Series: result = result.iloc[:, 0] return pd.Series(result, dtype=float) + def velocity_kmh_in_ms(self, series: pd.Series) -> pd.Series: + """ + Convert velocity from kilometers per hour in meters per second. + + Parameters + ---------- + series : pd.Series + Series of velocity in kilometers per hour. + + Returns + ------- + pd.Series + Series of velocity in meters per second. + """ + return self.float_scale(series, 1 / 3.6) + + def velocity_kn_in_ms(self, series: pd.Series) -> pd.Series: + """ + Convert velocity from knots in meters per second. + + Parameters + ---------- + series : pd.Series + Series of velocity in kilometers per hour. + + Returns + ------- + pd.Series + Series of velocity in meters per second. + """ + return self.float_scale(series, 1852.0 / 3600.0) + + def pressue_hpa_in_pa(self, series: pd.Series) -> pd.Series: + """ + Convert pressure from hPa in Pa. + + Parameters + ---------- + series : pd.Series + Series of presuure in hPa. + + Returns + ------- + pd.Series + Series of pressure in Pa. + """ + return self.float_scale(series, 100) + def time_accuracy(self, series: pd.Series) -> pd.Series: """ Map time accuracy codes to seconds. @@ -970,3 +1041,34 @@ def gdac_longitude(self, df: pd.DataFrame) -> pd.Series: lon = df["LoLoLoLo"].copy() lon[df["Qc"].isin([5, 7])] *= -1 return lon + + def marob_location_quality(self, df: pd.DataFrame) -> pd.Series: + """ + Get MAROB location quality. + + Parameters + ---------- + df : pd.DataFrame + Input DataFrame with columns 'GEOGR_BREITE_FLAG' and 'GEOGR_LAENGE_FLAG'. + + Returns + ------- + pd.Series + Series of location quality flags. + + Raises + ------ + KeyError + If required columns are missing. + """ + return np.nan + # if ( + # "GEOGR_BREITE_FLAG" not in df.columns + # or "GEOGR_LAENGE_FLAG" not in df.columns + # ): + # raise KeyError( + # "DataFrame must contain 'GEOGR_BREITE_FLAG' and 'GEOGR_LAENGE_FLAG' columns" + # ) + # lat_flag = df["GEOGR_BREITE_FLAG"] + # lon_flag = df["GEOGR_LAENGE_FLAG"] + # return pd.Series([None] * len(lat_flag), index=lat_flag.idx) From 98eac1044ec1061b0953bc173d44be37cf82934d Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:21:50 +0100 Subject: [PATCH 08/17] re-work MAROB mapping tables --- .../cdm_mapper/tables/marob/header.json | 46 +++++++------------ .../tables/marob/observations-at.json | 14 +++--- .../tables/marob/observations-dpt.json | 14 +++--- .../tables/marob/observations-slp.json | 19 ++++---- .../tables/marob/observations-sst.json | 14 +++--- .../tables/marob/observations-wbt.json | 14 +++--- .../tables/marob/observations-wd.json | 17 +++---- .../tables/marob/observations-ws.json | 20 ++++---- .../cdm_mapper/tables/marob/observations.json | 27 +++-------- 9 files changed, 77 insertions(+), 108 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index d705df2d..ed77ab9a 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -1,6 +1,5 @@ { "report_id": { - "sections": "MAROB", "elements": "ID", "transform": "string_add", "kwargs": { @@ -29,7 +28,6 @@ "default": 2 }, "primary_station_id": { - "section": "MAROB", "elements": "KENNUNG" }, "station_record_number": { @@ -39,50 +37,40 @@ "fill_value": 5 }, "longitude": { - "sections": "MAROB", - "elements": "GEOGR_LAENGE" + "elements": "GEOGR_LAENGE", + "decimal_places": 1 }, "latitude": { - "sections": "MAROB", - "elements": "GEOGR_BREITE" - }, - "location_accuracy": { - "sections": "MAROB", - "elements": [ - "GEOGR_LAENGE", - "GEOGR_BREITE" - ], - "transform": "location_accuracy", - "decimal_places": 0 - }, - "location_quality": { - "fill_value": 3 + "elements": "GEOGR_BREITE", + "decimal_places": 1 }, "crs": { "default": 0 }, "station_speed": { - "sections": "NAVIGATION", - "elements": "FAHRTGESCHWINDIGKEIT" + "elements": "FAHRTGESCHWINDIGKEIT", + "transform": "velocity_kn_in_ms", + "decimal_places": 2 }, "station_course": { - "sections": "NAVIGATION", - "elements": "FAHRTRICHTUNG" + "elements": "FAHRTRICHTUNG", + "decimal_places": 0 }, "height_of_station_above_local_ground": { "default": 0, "decimal_places": 1 }, "height_of_station_above_sea_level": { - "sections": "MAROB", - "elements": "STATIONSHOEHE_MSL" + "elements": "STATIONSHOEHE_MSL", + "fill_value": 0, + "decimal_places": 1 }, "report_meaning_of_timestamp": { "default": 2 }, "report_timestamp": { - "sections": "MAROB", - "elements": "MESSZEIT" + "elements": "MESSZEIT", + "transform": "datetime_marob" }, "report_duration": { "default": 15 @@ -92,8 +80,8 @@ "decimal_places": 0 }, "report_time_quality": { - "sections": "MAROB", - "elements": "MESSZEIT_FLAG" + "elements": "MESSZEIT_FLAG", + "code_table": "report_time_quality" }, "report_quality": { "default": 2 @@ -108,11 +96,9 @@ "transform": "lineage" }, "source_id": { - "sections": "MAROB", "elements": "DATENQUELLE_ID" }, "source_record_id": { - "sections": "MAROB", "elements": "ID" } } diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json index 57f66d70..5c1e7d2d 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json @@ -5,14 +5,13 @@ } }, "observation_height_above_station_surface": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" + "elements": "SENSORHOEHE_WAS_TT", + "decimal_places": 1 }, "observed_variable": { "default": 85 }, "observation_value": { - "sections": "LUFT", "elements": "LUFTTEMPERATUR", "transform": "temperature_celsius_to_kelvin", "decimal_places": 2 @@ -21,15 +20,16 @@ "default": 5 }, "quality_flag": { - "sections": "LUFT", - "elements": "LUFTTEMPERATUR_FLAG" + "elements": "LUFTTEMPERATUR_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 60 }, "original_value": { - "sections": "LUFT", - "elements": "LUFTTEMPERATUR" + "elements": "LUFTTEMPERATUR", + "decimal_places": 1 }, "conversion_method": { "default": 1 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json index 1e92f1c1..5c4cb28b 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json @@ -5,14 +5,13 @@ } }, "observation_height_above_station_surface": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" + "elements": "SENSORHOEHE_WAS_TT", + "decimal_places": 1 }, "observed_variable": { "default": 36 }, "observation_value": { - "sections": "LUFT", "elements": "TAUPUNKTTEMPERATUR", "transform": "temperature_celsius_to_kelvin", "decimal_places": 2 @@ -21,15 +20,16 @@ "default": 5 }, "quality_flag": { - "sections": "LUFT", - "elements": "TAUPUNKTTEMPERATUR_FLAG" + "elements": "TAUPUNKTTEMPERATUR_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 60 }, "original_value": { - "sections": "LUFT", - "elements": "TAUPUNKTTEMPERATUR" + "elements": "TAUPUNKTTEMPERATUR", + "decimal_places": 1 }, "conversion_method": { "default": 1 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json index 39fd0d17..84238110 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json @@ -5,34 +5,31 @@ } }, "observation_height_above_station_surface": { - "sections": "MAROB", - "elements": "BAROMETERHOEHE_MSL" + "elements": "BAROMETERHOEHE_MSL", + "decimal_places": 1 }, "observed_variable": { "default": 58 }, "observation_value": { - "sections": "LUFT", "elements": "LUFTDRUCK_STATIONSHOEHE", - "transform": "float_scale", - "kwargs": { - "factor": 100 - }, + "transform": "pressue_hpa_in_pa", "decimal_places": 0 }, "units": { "default": 32 }, "quality_flag": { - "sections": "LUFT", - "elements": "LUFTDRUCK_STATIONSHOEHE_FLAG" + "elements": "LUFTDRUCK_STATIONSHOEHE_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 530 }, "original_value": { - "sections": "LUFT", - "elements": "LUFTDRUCK_STATIONSHOEHE" + "elements": "LUFTDRUCK_STATIONSHOEHE", + "decimal_places": 1 }, "conversion_method": { "default": 7 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json index 85cbfb17..6cca397b 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json @@ -5,15 +5,14 @@ } }, "observation_height_above_station_surface": { - "sections": "WASSER", "elements": "MESSTIEFE", - "transform": "float_opposite" + "transform": "float_opposite", + "decimal_places": 1 }, "observed_variable": { "default": 95 }, "observation_value": { - "sections": "WASSER", "elements": "WASSERTEMPERATUR", "transform": "temperature_celsius_to_kelvin", "decimal_places": 2 @@ -22,15 +21,16 @@ "default": 5 }, "quality_flag": { - "sections": "WASSER", - "elements": "WASSERTEMPERATUR_FLAG" + "elements": "WASSERTEMPERATUR_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 60 }, "original_value": { - "sections": "WASSER", - "elements": "WASSERTEMPERATUR" + "elements": "WASSERTEMPERATUR", + "decimal_places": 1 }, "conversion_method": { "default": 1 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json index 97c5df76..7ffe458e 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json @@ -5,14 +5,13 @@ } }, "observation_height_above_station_surface": { - "sections": "LUFT", - "elements": "SENSORHOEHE_WAS_TT" + "elements": "SENSORHOEHE_WAS_TT", + "decimal_places": 1 }, "observed_variable": { "default": 41 }, "observation_value": { - "sections": "LUFT", "elements": "FEUCHTTEMPERATUR", "transform": "temperature_celsius_to_kelvin", "decimal_places": 2 @@ -21,15 +20,16 @@ "default": 5 }, "quality_flag": { - "sections": "LUFT", - "elements": "FEUCHTTEMPERATUR_FLAG" + "elements": "FEUCHTTEMPERATUR_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 60 }, "original_value": { - "sections": "LUFT", - "elements": "FEUCHTTEMPERATUR" + "elements": "FEUCHTTEMPERATUR", + "decimal_places": 1 }, "conversion_method": { "default": 1 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json index 28d3bc52..14dee488 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json @@ -5,29 +5,30 @@ } }, "observation_height_above_station_surface": { - "sections": "WIND", - "elements": "SENSORHOEHE_WAS_FF" + "elements": "SENSORHOEHE_WAS_FF", + "decimal_places": 1 }, "observed_variable": { "default": 106 }, "observation_value": { - "sections": "WIND", - "elements": "WINDRICHTUNG" + "elements": "WINDRICHTUNG", + "decimal_places": 0 }, "units": { "default": 320 }, "quality_flag": { - "sections": "WIND", - "elements": "WINDRICHTUNG_FLAG" + "elements": "WINDRICHTUNG_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_units": { "default": 320 }, "original_value": { - "sections": "WIND", - "elements": "WINDRICHTUNG" + "elements": "WINDRICHTUNG", + "decimal_places": 0 }, "conversion_flag": { "default": 2 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json index a0d91a9f..617d9c24 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json @@ -5,34 +5,32 @@ } }, "observation_height_above_station_surface": { - "sections": "WIND", - "elements": "SENSORHOEHE_WAS_FF" + "elements": "SENSORHOEHE_WAS_FF", + "decimal_places": 1 }, "observed_variable": { "default": 107 }, "observation_value": { - "sections": "WIND", "elements": "WINDGESCHWINDIGKEIT", - "transform": "float_scale", - "kwargs": { - "factor": 0.277 - } + "transform": "velocity_kmh_in_ms", + "decimal_places": 1 }, "units": { "default": 731 }, "quality_flag": { - "sections": "WIND", - "elements": "WINDGESCHWINDIGKEIT_FLAG" + "elements": "WINDGESCHWINDIGKEIT_FLAG", + "code_table": "quality_flag", + "fill_value": 2 }, "original_precision": {}, "original_units": { "default": 741 }, "original_value": { - "sections": "WIND", - "elements": "WINDGESCHWINDIGKEIT" + "elements": "WINDGESCHWINDIGKEIT", + "decimal_places": 1 }, "conversion_flag": { "default": 2 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json index d4200dcd..e5594732 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations.json @@ -1,6 +1,5 @@ { "observation_id": { - "sections": "MAROB", "elements": "ID", "transform": "string_add", "kwargs": { @@ -9,7 +8,6 @@ } }, "report_id": { - "sections": "MAROB", "elements": "ID", "transform": "string_add", "kwargs": { @@ -21,8 +19,8 @@ "default": 1 }, "date_time": { - "sections": "MAROB", - "elements": "MESSZEIT" + "elements": "MESSZEIT", + "transform": "datetime_marob" }, "date_time_meaning": { "default": 2 @@ -31,12 +29,12 @@ "default": 8 }, "longitude": { - "sections": "MAROB", - "elements": "GEOGR_LAENGE" + "elements": "GEOGR_LAENGE", + "decimal_places": 1 }, "latitude": { - "sections": "MAROB", - "elements": "GEOGR_BREITE" + "elements": "GEOGR_BREITE", + "decimal_places": 1 }, "crs": { "default": 0 @@ -50,23 +48,13 @@ "conversion_flag": { "default": 2 }, - "location_precision": { - "sections": "MAROB", - "elements": [ - "GEOGR_LAENGE", - "GEOGR_BREITE" - ], - "transform": "location_accuracy", - "decimal_places": 0 - }, "spatial_representativeness": { "default": 3 }, "numerical_precision": {}, "sensor_automation_status": { - "sections": "MAROB", "elements": "DATENQUELLE_ID", - "transform": "UNKNOWN" + "code_table": "sensor_automation_status" }, "exposure_of_sensor": { "default": 3 @@ -88,7 +76,6 @@ "default": 0 }, "source_id": { - "sections": "MAROB", "elements": "DATENQUELLE_ID" } } From 8cd4b74b7ed290f83d35217098b8ffeb5fe7d4f0 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:22:15 +0100 Subject: [PATCH 09/17] read_csv: add delimiter to parameter list --- cdm_reader_mapper/mdf_reader/utils/utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdm_reader_mapper/mdf_reader/utils/utilities.py b/cdm_reader_mapper/mdf_reader/utils/utilities.py index 5b47ef2c..a8576a10 100755 --- a/cdm_reader_mapper/mdf_reader/utils/utilities.py +++ b/cdm_reader_mapper/mdf_reader/utils/utilities.py @@ -180,7 +180,7 @@ def update_column_labels(columns: Iterable[str | tuple]) -> pd.Index | pd.MultiI return pd.Index(new_cols) -def read_csv(filepath, col_subset=None, **kwargs) -> pd.DataFrame: +def read_csv(filepath, delimiter=",", col_subset=None, **kwargs) -> pd.DataFrame: """ Safe CSV reader that handles missing files and column subsets. @@ -202,7 +202,7 @@ def read_csv(filepath, col_subset=None, **kwargs) -> pd.DataFrame: logging.warning(f"File not found: {filepath}") return pd.DataFrame() - df = pd.read_csv(filepath, delimiter=",", **kwargs) + df = pd.read_csv(filepath, delimiter=delimiter, **kwargs) df.columns = update_column_labels(df.columns) if col_subset is not None: df = df[col_subset] From a74b275687daf06cf0a83b2fb008f63ec0513fe0 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:22:34 +0100 Subject: [PATCH 10/17] add MAROB to supported data model list --- cdm_reader_mapper/properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdm_reader_mapper/properties.py b/cdm_reader_mapper/properties.py index b80513b7..4963fad5 100755 --- a/cdm_reader_mapper/properties.py +++ b/cdm_reader_mapper/properties.py @@ -6,4 +6,4 @@ object_types = ["str", "object", "key", "datetime"] -supported_data_models = ["craid", "gdac", "icoads", "pub47"] +supported_data_models = ["craid", "gdac", "icoads", "pub47", "marob"] From c7f6249a9a29a51a468bfd48b9ea5c6f6f805b8d Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 13:23:24 +0100 Subject: [PATCH 11/17] set parameters explicitly --- cdm_reader_mapper/cdm_mapper/mapper.py | 17 +++- cdm_reader_mapper/core/databundle.py | 104 +++++++++++++++++++++---- 2 files changed, 104 insertions(+), 17 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/mapper.py b/cdm_reader_mapper/cdm_mapper/mapper.py index 7013a725..15233285 100755 --- a/cdm_reader_mapper/cdm_mapper/mapper.py +++ b/cdm_reader_mapper/cdm_mapper/mapper.py @@ -155,6 +155,7 @@ def _transform( ) -> pd.Series: """Apply a transformation function from imodel_functions to a pandas Series.""" logger.debug(f"Applying transform: {transform}") + if kwargs: logger.debug(f"With kwargs: {', '.join(kwargs.keys())}") try: @@ -213,7 +214,12 @@ def _extract_input_data(idata, elements, cols, default, logger): """Extract the relevant input data based on `elements`.""" def _return_default(): - return pd.Series(_default(default, len(idata))), True + if default is None: + bool = False + else: + bool = True + + return pd.Series(_default(default, len(idata))), bool if not elements: return _return_default() @@ -516,6 +522,15 @@ def map_model( DataFrame with MultiIndex columns (cdm_table, column_name). """ logger = logging_hdlr.init_logger(__name__, level=log_level) + + if imodel is None: + logger.error("Input data model 'imodel' is not defined.") + return + + if not isinstance(imodel, str): + logger.error(f"Input data model type is not supported: {type(imodel)}") + return + imodel = imodel.split("_") if imodel[0] not in properties.supported_data_models: logger.error("Input data model " f"{imodel[0]}" " not supported") diff --git a/cdm_reader_mapper/core/databundle.py b/cdm_reader_mapper/core/databundle.py index e476397b..676a3df0 100755 --- a/cdm_reader_mapper/core/databundle.py +++ b/cdm_reader_mapper/core/databundle.py @@ -634,11 +634,15 @@ def replace_columns( db_._columns = db_._data.columns return self._return_db(db_, inplace) - def correct_datetime(self, inplace=False) -> DataBundle | None: + def correct_datetime( + self, imodel=None, inplace=False, **kwargs + ) -> DataBundle | None: """Correct datetime information in :py:attr:`data`. Parameters ---------- + imodel: str, optional + Name of the MFD/CDM data model. inplace: bool If ``True`` overwrite :py:attr:`data` in :py:class:`~DataBundle` else return a copy of :py:class:`~DataBundle` with datetime-corrected values in :py:attr:`data`. @@ -663,13 +667,19 @@ def correct_datetime(self, inplace=False) -> DataBundle | None: ---- For more information see :py:func:`correct_datetime` """ + imodel = imodel or self._imodel db_ = self._get_db(inplace) - db_._data = correct_datetime(db_._data, db_._imodel) + db_._data = correct_datetime(db_._data, imodel, **kwargs) return self._return_db(db_, inplace) - def validate_datetime(self) -> pd.DataFrame: + def validate_datetime(self, imodel=None, **kwargs) -> pd.DataFrame: """Validate datetime information in :py:attr:`data`. + Parameters + ---------- + imodel: str, optional + Name of the MFD/CDM data model. + Returns ------- pandas.DataFrame @@ -691,13 +701,16 @@ def validate_datetime(self) -> pd.DataFrame: ---- For more information see :py:func:`validate_datetime` """ - return validate_datetime(self._data, self._imodel) + imodel = imodel or self._imodel + return validate_datetime(self._data, imodel, **kwargs) - def correct_pt(self, inplace=False) -> DataBundle | None: + def correct_pt(self, imodel=None, inplace=False, **kwargs) -> DataBundle | None: """Correct platform type information in :py:attr:`data`. Parameters ---------- + imodel: str, optional + Name of the MFD/CDM data model. inplace: bool If ``True`` overwrite :py:attr:`data` in :py:class:`~DataBundle` else return a copy of :py:class:`~DataBundle` with platform-corrected values in :py:attr:`data`. @@ -722,13 +735,19 @@ def correct_pt(self, inplace=False) -> DataBundle | None: ---- For more information see :py:func:`correct_pt` """ + imodel = imodel or self._imodel db_ = self._get_db(inplace) - db_._data = correct_pt(db_._data, db_._imodel) + db_._data = correct_pt(db_._data, imodel, **kwargs) return self._return_db(db_, inplace) - def validate_id(self, **kwargs) -> pd.DataFrame: + def validate_id(self, imodel=None, **kwargs) -> pd.DataFrame: """Validate station id information in :py:attr:`data`. + Parameters + ---------- + imodel: str, optional + Name of the MFD/CDM data model. + Returns ------- pandas.DataFrame @@ -750,13 +769,16 @@ def validate_id(self, **kwargs) -> pd.DataFrame: ---- For more information see :py:func:`validate_id` """ - return validate_id(self._data, self._imodel, **kwargs) + imodel = imodel or self._imodel + return validate_id(self._data, imodel, **kwargs) - def map_model(self, inplace=False, **kwargs) -> DataBundle | None: + def map_model(self, imodel=None, inplace=False, **kwargs) -> DataBundle | None: """Map :py:attr:`data` to the Common Data Model. Parameters ---------- + imodel: str, optional + Name of the MFD/CDM data model. inplace: bool If ``True`` overwrite :py:attr:`data` in :py:class:`~DataBundle` else return a copy of :py:class:`~DataBundle` with :py:attr:`data` as CDM tables. @@ -775,16 +797,31 @@ def map_model(self, inplace=False, **kwargs) -> DataBundle | None: ---- For more information see :py:func:`map_model` """ + imodel = imodel or self._imodel db_ = self._get_db(inplace) - _tables = map_model(db_._data, db_._imodel, **kwargs) + _tables = map_model(db_._data, imodel, **kwargs) db_._mode = "tables" db_._columns = _tables.columns db_._data = _tables return self._return_db(db_, inplace) - def write(self, **kwargs) -> None: + def write( + self, dtypes=None, parse_dates=None, encoding=None, mode=None, **kwargs + ) -> None: """Write :py:attr:`data` on disk. + Parameters + ---------- + dtypes: dict, optional + Data types of ``data``. + parse_dates: list, optional + Information how to parse dates on ``data`` + encoding: str, optional + The encoding of the input file. Overrides the value in the imodel schema file. + mode: str, optional + Data mode ("data" or "tables") + Default: "data" + Examples -------- >>> db.write() @@ -803,13 +840,17 @@ def write(self, **kwargs) -> None: If :py:attr:`mode` is "data" write data using :py:func:`write_data`. If :py:attr:`mode` is "tables" write data using :py:func:`write_tables`. """ + dtypes = dtypes or self._dtypes + parse_dates = parse_dates or self._parse_dates + encoding = encoding or self._encoding + mode = mode or self._mode write( data=self._data, mask=self._mask, - dtypes=self._dtypes, - parse_dates=self._parse_dates, - encoding=self._encoding, - mode=self._mode, + dtypes=dtypes, + parse_dates=parse_dates, + encoding=encoding, + mode=mode, **kwargs, ) @@ -955,7 +996,7 @@ def remove_duplicates(self, inplace=False, **kwargs) -> DataBundle | None: Returns ------- :py:class:`~DataBundle` or None - DataBundle without duplictaed rows or None if ``inplace=True``. + DataBundle without duplicated rows or None if ``inplace=True``. Note ---- @@ -987,3 +1028,34 @@ def remove_duplicates(self, inplace=False, **kwargs) -> DataBundle | None: header_ = db_.DupDetect.result db_._data = db_._data[db_._data.index.isin(header_.index)] return self._return_db(db_, inplace) + + def convert_comma_as_decimal_float( + self, columns, inplace=False + ) -> DataBundle | None: + """Replace commas with dots and convert to floats. + + Parameters + ---------- + columns: list, pd.Index or pd.MultiIndex + List of commas to convert. + inplace: bool + If ``True`` overwrite :py:attr:`data` in :py:class:`~DataBundle` + else return a copy of :py:class:`~DataBundle` with :py:attr:`data` containing no duplicates. + Default: False + + Returns + ------- + :py:class:`~DataBundle` or None + DataBundle without converted ffloat entries or None if ``inplace=True``. + """ + if not isinstance(self._data, pd.DataFrame): + raise NotImplementedError( + f"This function is only implemented for pd.DataFrames, not {type(self._data)}." + ) + + db_ = self._get_db(inplace) + for column in columns: + db_._data[column] = ( + db_[column].astype(str).str.replace(",", ".", regex=False).astype(float) + ) + return self._return_db(db_, inplace) From 8f59236de8b611b43ad23759222b9e0fa334e0ce Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 14:13:48 +0100 Subject: [PATCH 12/17] fix boolean defaults --- cdm_reader_mapper/cdm_mapper/mapper.py | 15 +++++++-------- tests/test_cdm_mapper.py | 19 ++++++++++++++++++- tests/test_mapping_functions.py | 10 +++++----- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/mapper.py b/cdm_reader_mapper/cdm_mapper/mapper.py index be66ecc7..92d969b2 100755 --- a/cdm_reader_mapper/cdm_mapper/mapper.py +++ b/cdm_reader_mapper/cdm_mapper/mapper.py @@ -213,16 +213,15 @@ def _fill_value(series, fill_value) -> pd.Series: def _extract_input_data(idata, elements, default, logger): """Extract the relevant input data based on `elements`.""" - def _return_default(): + def _return_default(bool): + return pd.Series(_default(default, len(idata)), index=idata.index), bool + + if not elements: if default is None: bool = False else: bool = True - - return pd.Series(_default(default, len(idata)), index=idata.index), bool - - if not elements: - return _return_default() + return _return_default(bool) logger.debug(f"\telements: {' '.join(map(str, elements))}") @@ -231,12 +230,12 @@ def _return_default(): for e in elements: if e not in cols: logger.warning(f"Missing element from input data: {e}") - return _return_default() + return _return_default(True) data = idata[elements[0]] if len(elements) == 1 else idata[elements] if _is_empty(data): - return _return_default() + return _return_default(True) return data, False diff --git a/tests/test_cdm_mapper.py b/tests/test_cdm_mapper.py index 4969fff3..2504a1f2 100755 --- a/tests/test_cdm_mapper.py +++ b/tests/test_cdm_mapper.py @@ -345,10 +345,11 @@ def test_convert_dtype(value, atts, expected): ("report_id", [("c98", "UID")], None, False, "idata"), ("latitude", [("core", "LAT")], None, True, [None, None, None, None]), ("location_quality", [("c1", "LZ")], None, False, "idata"), + ], ) def test_extract_input_data( - imodel_maps, data_header, column, elements, default, use_default, exp + data_header, column, elements, default, use_default, exp ): logger = logging_hdlr.init_logger(__name__, level="INFO") result = _extract_input_data( @@ -360,6 +361,8 @@ def test_extract_input_data( assert isinstance(result, tuple) assert result[1] is use_default + + print(result) if exp == "idata": exp = data_header[elements[0]] @@ -397,6 +400,20 @@ def test_column_mapping(imodel_maps, imodel_functions, data_header, column, expe ) pd.testing.assert_series_equal(result, pd.Series(expected, name=column)) +def test_history_column_mapping(imodel_maps, imodel_functions, data_header): + logger = logging_hdlr.init_logger(__name__, level="INFO") + mapping_column = imodel_maps["header"]["history"] + column_atts = get_cdm_atts("header")["header"]["history"] + result = _column_mapping( + data_header, + mapping_column, + imodel_functions, + column_atts, + None, + "history", + logger, + ) + assert result.str.contains("Initial conversion from ICOADS R3.0.0T").all() def test_table_mapping( imodel_maps, imodel_functions, data_header, data_header_expected diff --git a/tests/test_mapping_functions.py b/tests/test_mapping_functions.py index 1603508c..93c9cfb1 100755 --- a/tests/test_mapping_functions.py +++ b/tests/test_mapping_functions.py @@ -487,10 +487,10 @@ def test_df_col_join_series(df, sep, expected): @pytest.mark.parametrize( "df, expected", [ - (5.0, -5.0), - (-3.2, 3.2), - (0.0, -0.0), - (123.456, -123.456), + (pd.Series([5.0]), pd.Series([-5.0])), + (pd.Series([-3.2]), pd.Series([3.2])), + (pd.Series([0.0]), pd.Series([-0.0])), + (pd.Series([123.456]), pd.Series([-123.456])), (pd.Series([1.0, -2.0, 3.5]), pd.Series([-1.0, 2.0, -3.5])), ], ) @@ -535,7 +535,7 @@ def test_select_column(df, expected): 10, pd.Series([], dtype=float, name="E"), ), - (pd.Series(["x", "y", "z"], name="F"), 3, pd.Series([], dtype=float, name="F")), + (pd.Series(["x", "y", "z"], name="F"), 3, pd.Series([np.nan, np.nan, np.nan], dtype=float, name="F")), ], ) def test_float_scale(input_s, factor, expected): From 0edd8a81184f7d43a58bc11ea8c869dca84e62cd Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 14:43:07 +0100 Subject: [PATCH 13/17] general function for converting srings to datetime --- .../cdm_mapper/utils/mapping_functions.py | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py index cd5e7334..68583c9b 100755 --- a/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py +++ b/cdm_reader_mapper/cdm_mapper/utils/mapping_functions.py @@ -284,6 +284,27 @@ def to_int(value: Any) -> int | pd.NA: return pd.NA +def series_strptime(series: pd.Series, format: str) -> pd.Series: + """ + Convert series with strings to series with datetime. + + Parameters + ---------- + series : pd.Series + Series with strings. + format : str + String time format. + + Returns + ------- + pd.Series + Series with datetime + """ + if series.empty: + return pd.Series([]) + return pd.to_datetime(series, format=format, errors="coerce") + + class mapping_functions: """Class for mapping Common Data Model (CDM) elements from IMMA1, GDAC, ICOADS, C-RAID, MAROB, Pub47, and IMMT datasets.""" @@ -498,32 +519,27 @@ def datetime_craid( pd.DatetimeIndex DatetimeIndex of converted dates. """ - if series.empty: - return pd.DatetimeIndex([]) - data_1d = series.values.ravel() - return pd.to_datetime(data_1d, format=format, errors="coerce") + return series_strptime(series, format) def datetime_marob( self, series: pd.Series, format: str = "%d.%m.%y %H:%M:%S,%f" - ) -> pd.DatetimeIndex: + ) -> pd.Series: """ - Convert C-RAID date strings to pandas datetime. + Convert MAROB date strings to pandas datetime. Parameters ---------- series : pd.Series Series of date strings. format : str, optional - Datetime format string (default: "%Y-%m-%d %H:%M:%S.%f"). + Datetime format string (default: "%d.%m.%y %H:%M:%S,%f"). Returns ------- - pd.DatetimeIndex - DatetimeIndex of converted dates. + pd.Series + Series of converted dates. """ - if series.empty: - return pd.DatetimeIndex([]) - return pd.to_datetime(series, format=format, errors="coerce") + return series_strptime(series, format) def df_col_join(self, df: pd.DataFrame, sep: str) -> pd.Series: """ From ed65445d8d4f788c36864e3d02f354cf1f898a76 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Mon, 9 Feb 2026 14:43:19 +0100 Subject: [PATCH 14/17] more tests --- tests/test_cdm_mapper.py | 11 ++-- tests/test_mapping_functions.py | 92 ++++++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/tests/test_cdm_mapper.py b/tests/test_cdm_mapper.py index 2504a1f2..e831ed7d 100755 --- a/tests/test_cdm_mapper.py +++ b/tests/test_cdm_mapper.py @@ -345,12 +345,9 @@ def test_convert_dtype(value, atts, expected): ("report_id", [("c98", "UID")], None, False, "idata"), ("latitude", [("core", "LAT")], None, True, [None, None, None, None]), ("location_quality", [("c1", "LZ")], None, False, "idata"), - ], ) -def test_extract_input_data( - data_header, column, elements, default, use_default, exp -): +def test_extract_input_data(data_header, column, elements, default, use_default, exp): logger = logging_hdlr.init_logger(__name__, level="INFO") result = _extract_input_data( data_header, @@ -361,7 +358,7 @@ def test_extract_input_data( assert isinstance(result, tuple) assert result[1] is use_default - + print(result) if exp == "idata": @@ -400,6 +397,7 @@ def test_column_mapping(imodel_maps, imodel_functions, data_header, column, expe ) pd.testing.assert_series_equal(result, pd.Series(expected, name=column)) + def test_history_column_mapping(imodel_maps, imodel_functions, data_header): logger = logging_hdlr.init_logger(__name__, level="INFO") mapping_column = imodel_maps["header"]["history"] @@ -413,7 +411,8 @@ def test_history_column_mapping(imodel_maps, imodel_functions, data_header): "history", logger, ) - assert result.str.contains("Initial conversion from ICOADS R3.0.0T").all() + assert result.str.contains("Initial conversion from ICOADS R3.0.0T").all() + def test_table_mapping( imodel_maps, imodel_functions, data_header, data_header_expected diff --git a/tests/test_mapping_functions.py b/tests/test_mapping_functions.py index 93c9cfb1..d1a88e20 100755 --- a/tests/test_mapping_functions.py +++ b/tests/test_mapping_functions.py @@ -443,26 +443,52 @@ def test_datetime_utcnow(): "df, expected", [ ( - pd.DataFrame([["2025-11-02 10:30:00.000"]]), - pd.DatetimeIndex([pd.Timestamp("2025-11-02 10:30:00")]), + pd.Series(["2025-11-02 10:30:00.000"]), + pd.Series(pd.Timestamp("2025-11-02 10:30:00")), ), ( - pd.DataFrame([["2025-11-02 10:30:00.000"], ["2025-12-03 15:45:00.123"]]), - pd.DatetimeIndex( + pd.Series(["2025-11-02 10:30:00.000", "2025-12-03 15:45:00.123"]), + pd.Series( [ pd.Timestamp("2025-11-02 10:30:00"), pd.Timestamp("2025-12-03 15:45:00.123"), ] ), ), - (pd.DataFrame([["invalid"]]), pd.DatetimeIndex([pd.NaT])), - (pd.DataFrame([]), pd.DatetimeIndex([])), + (pd.Series(["invalid"]), pd.Series([pd.NaT])), + (pd.Series([]), pd.Series([])), ], ) def test_datetime_craid(df, expected): obj = mapping_functions("dummy_model") result = obj.datetime_craid(df) - pd.testing.assert_index_equal(result, expected) + pd.testing.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "df, expected", + [ + ( + pd.Series(["02.11.25 10:30:00,000"]), + pd.Series([pd.Timestamp("2025-11-02 10:30:00")]), + ), + ( + pd.Series(["02.11.25 10:30:00,000", "03.12.25 15:45:00,123"]), + pd.Series( + [ + pd.Timestamp("2025-11-02 10:30:00"), + pd.Timestamp("2025-12-03 15:45:00.123"), + ] + ), + ), + (pd.Series(["invalid"]), pd.Series([pd.NaT])), + (pd.Series([]), pd.Series([])), + ], +) +def test_datetime_marob(df, expected): + obj = mapping_functions("dummy_model") + result = obj.datetime_marob(df) + pd.testing.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -535,7 +561,11 @@ def test_select_column(df, expected): 10, pd.Series([], dtype=float, name="E"), ), - (pd.Series(["x", "y", "z"], name="F"), 3, pd.Series([np.nan, np.nan, np.nan], dtype=float, name="F")), + ( + pd.Series(["x", "y", "z"], name="F"), + 3, + pd.Series([np.nan, np.nan, np.nan], dtype=float, name="F"), + ), ], ) def test_float_scale(input_s, factor, expected): @@ -823,6 +853,52 @@ def test_temperature_celsius_to_kelvin(input_df, imodel, expected): pd.testing.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "input_values, expected_values", + [ + ([0, 36, 72], [0.0, 10.0, 20.0]), + ([3.6, 7.2], [1.0, 2.0]), + ([0], [0.0]), + ], +) +def test_velocity_kmh_in_ms(input_values, expected_values): + obj = mapping_functions("dummy_model") + series = pd.Series(input_values) + expected = pd.Series(expected_values) + result = obj.velocity_kmh_in_ms(series) + pd.testing.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "input_values, expected_values", + [ + ([0, 10, 20], [0.0, 5.14444444444, 10.2888888889]), + ([1, 2], [0.51444444444, 1.0288888889]), + ], +) +def test_velocity_kn_in_ms(input_values, expected_values): + obj = mapping_functions("dummy_model") + series = pd.Series(input_values) + expected = pd.Series(expected_values) + result = obj.velocity_kn_in_ms(series) + pd.testing.assert_series_equal(result, expected, atol=1e-8) + + +@pytest.mark.parametrize( + "input_values, expected_values", + [ + ([1013, 1000, 950], [101300, 100000, 95000]), + ([0], [0]), + ], +) +def test_pressure_hpa_in_pa(input_values, expected_values): + obj = mapping_functions("dummy_model") + series = pd.Series(input_values) + expected = pd.Series(expected_values) + result = obj.pressue_hpa_in_pa(series) + pd.testing.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "input_series, expected", [ From b8d08b73ef2e4fef6b293f0e1ba2c3432cdbbc08 Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Fri, 13 Feb 2026 11:25:12 +0100 Subject: [PATCH 15/17] set all quality flags to unchecked --- cdm_reader_mapper/cdm_mapper/tables/marob/header.json | 6 ++++-- .../cdm_mapper/tables/marob/observations-at.json | 4 +--- .../cdm_mapper/tables/marob/observations-dpt.json | 4 +--- .../cdm_mapper/tables/marob/observations-slp.json | 4 +--- .../cdm_mapper/tables/marob/observations-sst.json | 4 +--- .../cdm_mapper/tables/marob/observations-wbt.json | 4 +--- .../cdm_mapper/tables/marob/observations-wd.json | 4 +--- .../cdm_mapper/tables/marob/observations-ws.json | 4 +--- 8 files changed, 11 insertions(+), 23 deletions(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index ed77ab9a..cd52ba50 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -44,6 +44,9 @@ "elements": "GEOGR_BREITE", "decimal_places": 1 }, + "location_quality": { + "default": 3 + }, "crs": { "default": 0 }, @@ -80,8 +83,7 @@ "decimal_places": 0 }, "report_time_quality": { - "elements": "MESSZEIT_FLAG", - "code_table": "report_time_quality" + "default": 2 }, "report_quality": { "default": 2 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json index 5c1e7d2d..aeeeddaf 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-at.json @@ -20,9 +20,7 @@ "default": 5 }, "quality_flag": { - "elements": "LUFTTEMPERATUR_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 60 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json index 5c4cb28b..98314ee0 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-dpt.json @@ -20,9 +20,7 @@ "default": 5 }, "quality_flag": { - "elements": "TAUPUNKTTEMPERATUR_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 60 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json index 84238110..954f9805 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-slp.json @@ -20,9 +20,7 @@ "default": 32 }, "quality_flag": { - "elements": "LUFTDRUCK_STATIONSHOEHE_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 530 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json index 6cca397b..1cc5fa46 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-sst.json @@ -21,9 +21,7 @@ "default": 5 }, "quality_flag": { - "elements": "WASSERTEMPERATUR_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 60 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json index 7ffe458e..f79eb5e5 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wbt.json @@ -20,9 +20,7 @@ "default": 5 }, "quality_flag": { - "elements": "FEUCHTTEMPERATUR_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 60 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json index 14dee488..b8e48a4e 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-wd.json @@ -19,9 +19,7 @@ "default": 320 }, "quality_flag": { - "elements": "WINDRICHTUNG_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_units": { "default": 320 diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json index 617d9c24..0b83ebb2 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/observations-ws.json @@ -20,9 +20,7 @@ "default": 731 }, "quality_flag": { - "elements": "WINDGESCHWINDIGKEIT_FLAG", - "code_table": "quality_flag", - "fill_value": 2 + "default": 2 }, "original_precision": {}, "original_units": { From 1b18846af46fa0722de26f9f837dca26d3ca9d76 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 10:27:48 +0000 Subject: [PATCH 16/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cdm_reader_mapper/cdm_mapper/tables/marob/header.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json index cd52ba50..0e5f5677 100755 --- a/cdm_reader_mapper/cdm_mapper/tables/marob/header.json +++ b/cdm_reader_mapper/cdm_mapper/tables/marob/header.json @@ -83,7 +83,7 @@ "decimal_places": 0 }, "report_time_quality": { - "default": 2 + "default": 2 }, "report_quality": { "default": 2 From a098bc1761658b8d57a25df1494bec5ef33a4b4d Mon Sep 17 00:00:00 2001 From: Ludwig Lierhammer Date: Fri, 13 Feb 2026 11:32:57 +0100 Subject: [PATCH 17/17] fixing parsing errro --- cdm_reader_mapper/mdf_reader/utils/utilities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdm_reader_mapper/mdf_reader/utils/utilities.py b/cdm_reader_mapper/mdf_reader/utils/utilities.py index 5c0a377f..51cd5105 100755 --- a/cdm_reader_mapper/mdf_reader/utils/utilities.py +++ b/cdm_reader_mapper/mdf_reader/utils/utilities.py @@ -281,10 +281,11 @@ def read_csv( reader=pd.read_csv, col_subset=col_subset, column_names=column_names, - reader_kwargs={delimiter=delimiter, **kwargs}, + reader_kwargs={"delimiter": delimiter, **kwargs}, iterator=True, ) + def read_parquet( filepath: Path, col_subset: str | list | None = None,