From 0fab90284704cc5ea744c6054439f3750d04e550 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 27 Feb 2024 13:47:54 +0100 Subject: [PATCH 01/15] first draft ESACCI-PERMAFROST downloader --- esmvaltool/cmorizers/data/datasets.yml | 11 ++++ .../downloaders/datasets/esacci_permafrost.py | 66 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 53bc40dc75..e9714d16d5 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -503,6 +503,17 @@ datasets: limb_profiles/l3/merged/merged_monthly_zonal_mean/v0002 Put all files under a single directory (no subdirectories with years). + ESACCI-PERMAFROST: + tier: 2 + source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost/data + last_access: 2024-02-27 + info: | + Download the data from: + active_layer_thickness/L4/area4/pp/v03.0 + ground_temperature/L4/area4/pp/v03.0 + permafrost_extent/L4/area4/pp/v03.0 + Put all files in a single directory. + ESACCI-SOILMOISTURE: tier: 2 source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py new file mode 100644 index 0000000000..435720d32d --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -0,0 +1,66 @@ +"""Script to download ESACCI-PERMAFROST.""" + +import logging +from datetime import datetime + +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1997, 1, 1) + else: + start_date = start_date + if end_date is None: + end_date = datetime(2019, 12, 31) + else: + end_date = end_date + + downloader = CCIDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + +# downloader.connect() + + version = 'v03.0' + + vars = ['active_layer_thickness', 'ground_temperature', + 'permafrost_extent'] + + # download active layer thickness + loop_date = start_date + while loop_date <= end_date: + for var in vars: + fn = (f'{var}/L4/area4/pp/{version}/' + f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc') + if downloader.exists(f'{fn}'): + downloader.download_file(f'{fn}') + else: + logger.info(f'{loop_date.year}:' + f' no data for {var} {version}') + loop_date += relativedelta.relativedelta(years=1) From 9e0be9d50ca807f4ff54374094a1ff7724b445df Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 28 Feb 2024 06:46:44 +0100 Subject: [PATCH 02/15] snapshot 2024-02-28 --- .../downloaders/datasets/esacci_permafrost.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index 435720d32d..b69466f2f3 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -31,12 +31,8 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, """ if start_date is None: start_date = datetime(1997, 1, 1) - else: - start_date = start_date if end_date is None: end_date = datetime(2019, 12, 31) - else: - end_date = end_date downloader = CCIDownloader( config=config, @@ -45,22 +41,22 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, overwrite=overwrite, ) -# downloader.connect() + downloader.connect() version = 'v03.0' - vars = ['active_layer_thickness', 'ground_temperature', - 'permafrost_extent'] + ccivars = ['active_layer_thickness', 'ground_temperature', + 'permafrost_extent'] # download active layer thickness loop_date = start_date while loop_date <= end_date: - for var in vars: - fn = (f'{var}/L4/area4/pp/{version}/' - f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc') - if downloader.exists(f'{fn}'): - downloader.download_file(f'{fn}') + for var in ccivars: + fname = (f'{var}/L4/area4/pp/{version}/' + f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc') + if downloader.exists(fname): + downloader.download_file(name) else: - logger.info(f'{loop_date.year}:' - f' no data for {var} {version}') + logger.info('%d: no data for %s %s', + loop_date.year, var, version) loop_date += relativedelta.relativedelta(years=1) From 680c617bfb33a01d457866a9efefa48faf1cb291 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 28 Feb 2024 07:49:41 +0100 Subject: [PATCH 03/15] working version of ESACCI-PERMAFROST downloader --- .../downloaders/datasets/esacci_permafrost.py | 8 ++-- esmvaltool/cmorizers/data/downloaders/ftp.py | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index b69466f2f3..b615ac58ff 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -52,10 +52,10 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, loop_date = start_date while loop_date <= end_date: for var in ccivars: - fname = (f'{var}/L4/area4/pp/{version}/' - f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc') - if downloader.exists(fname): - downloader.download_file(name) + pathname = f'{var}/L4/area4/pp/{version}/' + fname = f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc' + if downloader.file_exists(fname, pathname): + downloader.download_file(fname, pathname) else: logger.info('%d: no data for %s %s', loop_date.year, var, version) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index 9f0cd5e8f9..a60d52f467 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -1,5 +1,6 @@ """Downloader for FTP repositories.""" +import fnmatch import ftplib import logging import os @@ -206,6 +207,26 @@ def dataset_name(self): """ return self.dataset.lower().replace('-', '_') + def download_file(self, filename, path=None): + """Download file(s). + + Parameters: + ----------- + filename : str + Name of file (w/o path) to download (wildcards are allowed) + path : str + Path of file to check (optional) + """ + if path is not None: + self.set_cwd(path) + files = self._client.nlst() + matching_files = fnmatch.filter(files, filename) + if len(matching_files) != 0: + for file in matching_files: + super().download_file(file) + else: + logger.info('No files %s found.', filename) + def download_year(self, year): """Download a specific year. @@ -215,3 +236,22 @@ def download_year(self, year): Year to download """ self.download_folder(str(year)) + + def file_exists(self, filename, path=None): + """Check if a file exists. + + Parameters: + ----------- + filename : str + Name of file (w/o path) to check (wildcards are allowed) + path : str + Path of file to check (optional) + """ + if path is not None: + self.set_cwd(path) + files = self._client.nlst() + matching_files = fnmatch.filter(files, filename) + if len(matching_files) != 0: + return True + else: + return False From 127c924fc18d1c3e892f89262eec6106fadc7336 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 28 Feb 2024 08:00:44 +0100 Subject: [PATCH 04/15] some pylint fixes --- esmvaltool/cmorizers/data/downloaders/ftp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index a60d52f467..1cc14e29b4 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -207,7 +207,7 @@ def dataset_name(self): """ return self.dataset.lower().replace('-', '_') - def download_file(self, filename, path=None): + def download_files(self, filename, path=None): """Download file(s). Parameters: @@ -223,7 +223,7 @@ def download_file(self, filename, path=None): matching_files = fnmatch.filter(files, filename) if len(matching_files) != 0: for file in matching_files: - super().download_file(file) + super().download_file(file) else: logger.info('No files %s found.', filename) @@ -253,5 +253,4 @@ def file_exists(self, filename, path=None): matching_files = fnmatch.filter(files, filename) if len(matching_files) != 0: return True - else: - return False + return False From ada5ae302a72bf4280c8031311e78c43b12ba4ff Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 29 Feb 2024 15:50:06 +0100 Subject: [PATCH 05/15] snapshot 2024-02-29 --- doc/sphinx/source/input.rst | 2 + .../downloaders/datasets/esacci_permafrost.py | 2 +- esmvaltool/cmorizers/data/downloaders/ftp.py | 2 +- .../formatters/datasets/esacci_permafrost.py | 313 ++++++++++++++++++ .../references/esacci-permafrost.bibtex | 29 ++ 5 files changed, 346 insertions(+), 2 deletions(-) create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py create mode 100644 esmvaltool/references/esacci-permafrost.bibtex diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index a72bce73aa..41c89f7742 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -302,6 +302,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-OZONE | toz, tozStderr, tro3prof, tro3profStderr (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| ESACCI-PERMAFROST | alt, gtd, pfr (custom) | 3 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEA-SURFACE-SALINITY | sos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SOILMOISTURE | dos, dosStderr, sm, smStderr (Lmon) | 2 | NCL | diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index b615ac58ff..38ed010ffd 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -55,7 +55,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, pathname = f'{var}/L4/area4/pp/{version}/' fname = f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc' if downloader.file_exists(fname, pathname): - downloader.download_file(fname, pathname) + downloader.download_files(fname, pathname) else: logger.info('%d: no data for %s %s', loop_date.year, var, version) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index 1cc14e29b4..6181c1b1b3 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -215,7 +215,7 @@ def download_files(self, filename, path=None): filename : str Name of file (w/o path) to download (wildcards are allowed) path : str - Path of file to check (optional) + Path of file(s) to download (optional) """ if path is not None: self.set_cwd(path) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py new file mode 100644 index 0000000000..a9f53f7f8b --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -0,0 +1,313 @@ +"""ESMValTool CMORizer for ESACCI-PERMAFROST data. + +Tier + Tier 2: other freely-available dataset. + +Source + ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost/data + +Last access + 20240227 + +Download and processing instructions + Download the data from: + active_layer_thickness/L4/area4/pp/v03.0 + ground_temperature/L4/area4/pp/v03.0 + permafrost_extent/L4/area4/pp/v03.0 + Put all files in a single directory. +""" + +import glob +import logging +import os +from copy import deepcopy +from datetime import datetime +from dateutil import relativedelta + +from netCDF4 import Dataset +from cdo import * +import os.path + +import cf_units +import iris +import numpy as np +from dask import array as da +from esmvalcore.cmor.table import CMOR_TABLES +#from esmvalcore.preprocessor import regrid +#from esmvalcore.preprocessor.regrid_schemes import ( +# ESMPyAreaWeighted, ESMPyLinear, ESMPyNearest, UnstructuredNearest) +#from esmf_regrid.schemes import ESMFAreaWeighted, ESMFBilinear + +from esmvaltool.cmorizers.data.utilities import ( + save_variable, set_global_atts) + +#from iris import NameConstraint +from iris.cube import Cube + +logger = logging.getLogger(__name__) +#cdo = Cdo() + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} + axes = ['T', 'X', 'Y'] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == 'T': + coord.convert_units('days since 1850-1-1 00:00:00.0') + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype('float64') + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + + return cube + + +def _regrid_infile(infile, outfile): + """Regrid infile to 0.5 deg x 0.5 deg grid using cdo.""" + cdo = Cdo() + # ESACCI-PERMAFROST v3.0 dimensions of raw input data + xsize = 14762 + ysize = 10353 + totalsize = xsize * ysize + + # description of ESACCI-PERMAFROST v3.0 polar stereographic + # grid for cdo + esagrid = (f'gridtype = projection\n' + f'gridsize = {totalsize}\n' + f'xsize = {xsize}\n' + f'ysize = {ysize}\n' + f'xname = x\n' + f'xlongname = "x coordinate of projection"\n' + f'xunits = "m"\n' + f'yname = y\n' + f'ylongname = "y coordinate of projection"\n' + f'yunits = "m"\n' + f'xfirst = -6111475.22239475\n' + f'xinc = 926.625433138333\n' + f'yfirst = 4114895.09469662\n' + f'yinc = -926.625433138333\n' + f'grid_mapping = polar_stereographic\n' + f'grid_mapping_name = polar_stereographic\n' + f'straight_vertical_longitude_from_pole = 0.\n' + f'false_easting = 0.\n' + f'false_northing = 0.\n' + f'latitude_of_projection_origin = 90.\n' + f'standard_parallel = 71.\n' + f'longitude_of_prime_meridian = 0.\n' + f'semi_major_axis = 6378137.\n' + f'inverse_flattening = 298.257223563\n' + f'proj_params = "+proj=stere +lat_0=90 +lat_ts=71 +lon_0=0' + f' +k=1" +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"\n') + + esagrid_file = "./esacci_grid.txt" + + # write grid description to ASCII file + file = open(esagrid_file, "w") + file.write(esagrid) + file.close() + + # define dimensions of target grid (regular lat-lon grid) + target_dimx = 720 # delta_lon = 0.5 deg + target_dimy = 360 # delta_lat = 0.5 deg + target_grid = f'r{target_dimx}x{target_dimy}' + + # check if suitable weights file already exists + # (e.g. from previous call to _regrid_file) + +# weightsfile = "./esa_weights.nc" + weightsfile = "/work/bd0854/b380103/esmvaltool_output/esa_weights.nc" + weightsfile_ok = False + + if os.path.isfile(weightsfile): + weights = Dataset(weightsfile, "r") + # make sure dimensions of source and target grids match + # expected values + src = weights.variables['src_grid_dims'] + dst = weights.variables['dst_grid_dims'] + if (xsize == src[0] and ysize == src[1] and + target_dimx == dst[0] and target_dimy == dst[1]): + logger.info("Using matching weights file %s for regridding.", + weightsfile) + weightsfile_ok = True + weights.close() + + # if no suitable weights file, generate new weights for regridding + + if not weightsfile_ok: + logger.info("Generating regridding weights. This will take" + " about 5-10 minutes (or more)...") + cdo.genbil(f"{target_grid} -setgrid,{esagrid_file}", + input=infile, output=weightsfile, options="-f nc") + + # now regrid data to 0.5 deg x 0.5 deg + cdo.remap(f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", + input=infile, output=outfile, options='-f nc') + return + + +def _extract_variable(in_file, var, cfg, out_dir, year): + logger.info("CMORizing variable '%s' from input file '%s'", + var['short_name'], in_file) + attributes = deepcopy(cfg['attributes']) + attributes['mip'] = var['mip'] + attributes['raw'] = var['raw'] + cmor_table = CMOR_TABLES[attributes['project_id']] + definition = cmor_table.get_variable(var['mip'], var['short_name']) + + # regrid input file using cdo + # (using the preprocessor or ESMF regrid is too slow) + + regridded_file = f"./{year}_{var['short_name']}.nc" + print(regridded_file) + _regrid_infile(in_file, regridded_file) + exit() + + # load input file + cube = iris.load_cube(regridded_file) + + # --> drop attributes that differ among input files + # global attributes to remove + drop_attrs = [ + 'source', 'date_created', 'history', 'tracking_id', + 'id', 'time_coverage_start', 'time_coverage_end', 'platform', + 'sensor', 'keywords' + ] + # variable attributes to remove + drop_var_attrs = [ + 'flag_meanings', 'flag_values', 'grid_mapping', 'actual_range', + 'ancillary_variables' + ] + for attr in drop_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + for attr in drop_var_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + + set_global_atts(cube, attributes) + +# iris.util.unify_time_units(cube) + cube.coord('time').points = cube.coord('time').core_points().astype( + 'float64') + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + +# # Fix units +# # input variable for snc (sncf) reports 'percent' --> rename to '%' +# # input variable for snw (swe) reports 'mm' --> rename to 'kg m-2' +# cube.units = definition.units + + # Fix data type + cube.data = cube.core_data().astype('float32') + + # Fix coordinates +# cube = _fix_coordinates(cube, definition) +# cube.coord('latitude').attributes = None +# cube.coord('longitude').attributes = None + +# cube.coord('projection_y_coordinate').standard_name = "latitude" +# cube.coord('projection_x_coordinate').standard_name = "longitude" +# print(cube) + + dlon = 0.5 + dlat = 0.5 + mid_dlon, mid_dlat = dlon / 2, dlat / 2 + + latdata = np.linspace(-90.0, 90.0, int(180.0 / dlat) + 1) + londata = np.linspace(0.0, 360.0 - dlon, int(360.0 / dlon)) + + lats = iris.coords.DimCoord(latdata, + standard_name='latitude', + units='degrees_north', + var_name='lat', + circular=False) + + lons = iris.coords.DimCoord(londata, + standard_name='longitude', + units='degrees_east', + var_name='lon', + circular=False) + + lats.guess_bounds() + lons.guess_bounds() + + # Construct the resultant stock cube, with dummy data. + shape = (latdata.size, londata.size) + dummy = np.empty(shape, dtype=np.dtype('int8')) + coords_spec = [(lats, 0), (lons, 1)] + target_grid_cube = Cube(dummy, dim_coords_and_dims=coords_spec) + +# regridded_cube = cube.regrid(target_grid_cube, ESMPyLinear()) +# regridded_cube = cube.regrid(target_grid_cube, ESMPyAreaWeighted()) +# regridded_cube = cube.regrid(target_grid_cube, ESMPyNearest()) +# regridded_cube = cube.regrid(target_grid_cube, ESMFAreaWeighted()) +# regridded_cube = cube.regrid(target_grid_cube, ESMFBilinear()) +# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.PointInCell()) +# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.Nearest()) +# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.UnstructuredNearest()) +# regridded_cube = cube.regrid(target_grid_cube, UnstructuredNearest()) + +# # regridding from polar stereographic to 0.5x0.5 +# cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') +# cube = regrid(cube, target_grid='0.5x0.5', scheme='nearest') +# cube = regrid(cube, target_grid='0.5x0.5', scheme='linear') + +# cube.attributes.update({"geospatial_lon_resolution": "0.5", +# "geospatial_lat_resolution": "0.5", +# "spatial_resolution": "0.5"}) + + # Save results + logger.debug("Saving cube\n%s", regridded_cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + save_variable(regridded_cube, regridded_cube.var_name, + out_dir, attributes, + unlimited_dimensions=['time']) + logger.info("Finished CMORizing %s", in_file) + + exit() + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """CMORize ESACCI-PERMAFROST dataset.""" + glob_attrs = cfg['attributes'] + + logger.info("Starting CMORization for tier%s OBS files: %s", + glob_attrs['tier'], glob_attrs['dataset_id']) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info("CMORizing ESACCI-PERMAFROST version %s", + glob_attrs['version']) + + if start_date is None: + start_date = datetime(2003, 1, 1) + if end_date is None: + end_date = datetime(2019, 12, 31) + + loop_date = start_date + while loop_date <= end_date: + for short_name, var in cfg['variables'].items(): + if 'short_name' not in var: + var['short_name'] = short_name + filepattern = os.path.join( + in_dir, var['file'].format(year=loop_date.year) + ) + in_file = glob.glob(filepattern)[0] + if not in_file: + logger.info(f'{loop_date.year}: no data not found for ' + f'variable {short_name}') + else: + _extract_variable(in_file, var, cfg, out_dir, loop_date.year) + + loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/references/esacci-permafrost.bibtex b/esmvaltool/references/esacci-permafrost.bibtex new file mode 100644 index 0000000000..229df7ba84 --- /dev/null +++ b/esmvaltool/references/esacci-permafrost.bibtex @@ -0,0 +1,29 @@ +@article{esacci-permafrost-alt, + doi = {10.5285/67a3f8c8dc914ef99f7f08eb0d997e23}, + url = {https://dx.doi.org/10.5285/67a3f8c8dc914ef99f7f08eb0d997e23}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.} + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost active layer thickness for the Northern Hemisphere, v3.0}, +} + +@article{esacci-permafrost-gtd, + doi = {10.5285/b25d4a6174de4ac78000d034f500a268}, + url = {https://dx.doi.org/10.5285/b25d4a6174de4ac78000d034f500a268}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.}, + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost Ground Temperature for the Northern Hemisphere, v3.0}, +} + +@article{esacci-permafrost-pfr, + doi = {10.5285/6e2091cb0c8b4106921b63cd5357c97c}, + url = {https://dx.doi.org/10.5285/6e2091cb0c8b4106921b63cd5357c97c}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.}, + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost extent for the Northern Hemisphere, v3.0}, +} From 223073979ea546b52dfa98feedacfdd9b965e3bc Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Fri, 1 Mar 2024 06:43:47 +0100 Subject: [PATCH 06/15] added cmor_config/ESACCI-PERMAFROST.yml --- .../data/cmor_config/ESACCI-PERMAFROST.yml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml new file mode 100644 index 0000000000..fc4de28ff5 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml @@ -0,0 +1,26 @@ +--- +# Common global attributes for CMORizer output +attributes: + dataset_id: ESACCI-PERMAFROST + version: 'v3.0' + tier: 2 + modeling_realm: sat + project_id: OBS6 + source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost' + reference: "esacci-permafrost" + comment: "" + +# Variables to CMORize +variables: + alt: + mip: Lmon + raw: ALT + file: 'ESACCI-PERMAFROST-L4-ALT-*-AREA4_PP-{year}-fv03.0.nc' + gtd: + mip: Lmon + raw: [GST, T1m, T2m, T5m, T10m] + file: 'ESACCI-PERMAFROST-L4-GTD-*-AREA4_PP-{year}-fv03.0.nc' + pfr: + mip: Lmon + raw: PFR + file: 'ESACCI-PERMAFROST-L4-PFR-*-AREA4_PP-{year}-fv03.0.nc' From b7d412e382fe2ee1fa1b7bd9e1989750034863c0 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Fri, 1 Mar 2024 14:33:15 +0100 Subject: [PATCH 07/15] snapshot 2024-03-01 --- .../formatters/datasets/esacci_permafrost.py | 157 ++++++++++++------ 1 file changed, 108 insertions(+), 49 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index a9f53f7f8b..d96a798bb1 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -33,7 +33,7 @@ import numpy as np from dask import array as da from esmvalcore.cmor.table import CMOR_TABLES -#from esmvalcore.preprocessor import regrid +from esmvalcore.preprocessor import regrid #from esmvalcore.preprocessor.regrid_schemes import ( # ESMPyAreaWeighted, ESMPyLinear, ESMPyNearest, UnstructuredNearest) #from esmf_regrid.schemes import ESMFAreaWeighted, ESMFBilinear @@ -71,7 +71,7 @@ def _fix_coordinates(cube, definition): return cube -def _regrid_infile(infile, outfile): +def _regrid_infile(infile, outfile, weightsfile): """Regrid infile to 0.5 deg x 0.5 deg grid using cdo.""" cdo = Cdo() # ESACCI-PERMAFROST v3.0 dimensions of raw input data @@ -123,8 +123,6 @@ def _regrid_infile(infile, outfile): # check if suitable weights file already exists # (e.g. from previous call to _regrid_file) -# weightsfile = "./esa_weights.nc" - weightsfile = "/work/bd0854/b380103/esmvaltool_output/esa_weights.nc" weightsfile_ok = False if os.path.isfile(weightsfile): @@ -145,12 +143,21 @@ def _regrid_infile(infile, outfile): if not weightsfile_ok: logger.info("Generating regridding weights. This will take" " about 5-10 minutes (or more)...") + # check if path for weight files exists, if not create folder + path = os.path.split(weightsfile)[0] + if not os.path.exists(path): + os.makedirs(path) + # generate weights cdo.genbil(f"{target_grid} -setgrid,{esagrid_file}", input=infile, output=weightsfile, options="-f nc") # now regrid data to 0.5 deg x 0.5 deg cdo.remap(f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", input=infile, output=outfile, options='-f nc') + + # delete temporary file + os.remove(esagrid_file) + return @@ -163,18 +170,57 @@ def _extract_variable(in_file, var, cfg, out_dir, year): cmor_table = CMOR_TABLES[attributes['project_id']] definition = cmor_table.get_variable(var['mip'], var['short_name']) + if 'weights_dir' in var.keys(): + weights_dir = var['weights_dir'] + else: + weights_dir = '.' + # regrid input file using cdo - # (using the preprocessor or ESMF regrid is too slow) + # (using the preprocessor (ESMF) is too slow) regridded_file = f"./{year}_{var['short_name']}.nc" - print(regridded_file) - _regrid_infile(in_file, regridded_file) - exit() + weights_file = f"{weights_dir}/{year}_{var['short_name']}_weights.nc" + _regrid_infile(in_file, regridded_file, weights_file) # load input file - cube = iris.load_cube(regridded_file) - - # --> drop attributes that differ among input files + cubes = iris.load(regridded_file) + + if len(cubes) > 1: + # variable gtd contains the vertical levels as separate variables + # (depth level can only be recognized by the variable names) + # --> combine all depth levels into 1 cube + for cube in cubes: + if cube.var_name == 'GST': + sdepth = 0.0 + elif cube.var_name == 'T1m': + sdepth = 1.0 + elif cube.var_name == 'T2m': + sdepth = 2.0 + elif cube.var_name == 'T5m': + sdepth = 5.0 + elif cube.var_name == 'T10m': + sdepth = 10.0 + else: + sdepth = 999.0 + logger.info("Could not determin depth. Check results.") + cube.add_aux_coord(iris.coords.AuxCoord(sdepth, + long_name='sdepth', units="m")) + cube.var_name = "gst" + cube.standard_name = "soil_temperature" # "valid" standard name + cube.attributes.pop('actual_min') + cube.attributes.pop('actual_max') + tmp_cube = cubes.merge_cube() + # swap coordinates 'sdepth' and 'time' + # (sdepth, time, lat, lon) --> (time, sdepth, lat, lon) + flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) + coord_spec = [(tmp_cube.coord('time'), 0), (tmp_cube.coord('sdepth'), 1), + (tmp_cube.coord('latitude'), 2), (tmp_cube.coord('longitude'), 3)] + cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) + cube.metadata = tmp_cube.metadata + else: + cube = cubes[0] + + # --> drop attributes that differ among input files for different years # global attributes to remove drop_attrs = [ 'source', 'date_created', 'history', 'tracking_id', @@ -204,16 +250,26 @@ def _extract_variable(in_file, var, cfg, out_dir, year): cube.standard_name = definition.standard_name cube.long_name = definition.long_name -# # Fix units -# # input variable for snc (sncf) reports 'percent' --> rename to '%' -# # input variable for snw (swe) reports 'mm' --> rename to 'kg m-2' + # Fix units + # input variable for pfr reports 'percent' --> rename to '%' + # input variable for alt reports 'metres' --> rename to 'm' + # input variable for gtd reports 'degrees celsius' --> convert to 'K' + print(cube.units) + print(definition.units) +#### cube.convert_units(definition.units) # cube.units = definition.units # Fix data type cube.data = cube.core_data().astype('float32') # Fix coordinates -# cube = _fix_coordinates(cube, definition) + cube = _fix_coordinates(cube, definition) + + +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- + # cube.coord('latitude').attributes = None # cube.coord('longitude').attributes = None @@ -221,34 +277,34 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # cube.coord('projection_x_coordinate').standard_name = "longitude" # print(cube) - dlon = 0.5 - dlat = 0.5 - mid_dlon, mid_dlat = dlon / 2, dlat / 2 - - latdata = np.linspace(-90.0, 90.0, int(180.0 / dlat) + 1) - londata = np.linspace(0.0, 360.0 - dlon, int(360.0 / dlon)) - - lats = iris.coords.DimCoord(latdata, - standard_name='latitude', - units='degrees_north', - var_name='lat', - circular=False) - - lons = iris.coords.DimCoord(londata, - standard_name='longitude', - units='degrees_east', - var_name='lon', - circular=False) - - lats.guess_bounds() - lons.guess_bounds() - - # Construct the resultant stock cube, with dummy data. - shape = (latdata.size, londata.size) - dummy = np.empty(shape, dtype=np.dtype('int8')) - coords_spec = [(lats, 0), (lons, 1)] - target_grid_cube = Cube(dummy, dim_coords_and_dims=coords_spec) - +# dlon = 0.5 +# dlat = 0.5 +# mid_dlon, mid_dlat = dlon / 2, dlat / 2 +# +# latdata = np.linspace(-90.0, 90.0, int(180.0 / dlat) + 1) +# londata = np.linspace(0.0, 360.0 - dlon, int(360.0 / dlon)) +# +# lats = iris.coords.DimCoord(latdata, +# standard_name='latitude', +# units='degrees_north', +# var_name='lat', +# circular=False) +# +# lons = iris.coords.DimCoord(londata, +# standard_name='longitude', +# units='degrees_east', +# var_name='lon', +# circular=False) +# +# lats.guess_bounds() +# lons.guess_bounds() +# +# # Construct the resultant stock cube, with dummy data. +# shape = (latdata.size, londata.size) +# dummy = np.empty(shape, dtype=np.dtype('int8')) +# coords_spec = [(lats, 0), (lons, 1)] +# target_grid_cube = Cube(dummy, dim_coords_and_dims=coords_spec) +# # regridded_cube = cube.regrid(target_grid_cube, ESMPyLinear()) # regridded_cube = cube.regrid(target_grid_cube, ESMPyAreaWeighted()) # regridded_cube = cube.regrid(target_grid_cube, ESMPyNearest()) @@ -268,16 +324,19 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # "geospatial_lat_resolution": "0.5", # "spatial_resolution": "0.5"}) +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- + # Save results - logger.debug("Saving cube\n%s", regridded_cube) + logger.debug("Saving cube\n%s", cube) logger.debug("Setting time dimension to UNLIMITED while saving!") - save_variable(regridded_cube, regridded_cube.var_name, + save_variable(cube, cube.var_name, out_dir, attributes, unlimited_dimensions=['time']) + os.remove(regridded_file) # delete temporary file logger.info("Finished CMORizing %s", in_file) - exit() - def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """CMORize ESACCI-PERMAFROST dataset.""" @@ -291,9 +350,9 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): glob_attrs['version']) if start_date is None: - start_date = datetime(2003, 1, 1) + start_date = datetime(2003, 1, 1) # (1997, 1, 1) if end_date is None: - end_date = datetime(2019, 12, 31) + end_date = datetime(2003, 12, 31) # 2019, 12, 31) loop_date = start_date while loop_date <= end_date: From 81b99c6daa54e1aa2898434f201b3f9aff0e701a Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Mon, 4 Mar 2024 09:03:46 +0100 Subject: [PATCH 08/15] first working version --- .../data/cmor_config/ESACCI-PERMAFROST.yml | 9 ++++-- .../formatters/datasets/esacci_permafrost.py | 31 +++++++++++-------- .../recipes/examples/recipe_check_obs.yml | 11 ++++++- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml index fc4de28ff5..337dc93cce 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml @@ -13,14 +13,17 @@ attributes: # Variables to CMORize variables: alt: - mip: Lmon + mip: Lyr raw: ALT file: 'ESACCI-PERMAFROST-L4-ALT-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' gtd: - mip: Lmon + mip: Lyr raw: [GST, T1m, T2m, T5m, T10m] file: 'ESACCI-PERMAFROST-L4-GTD-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' pfr: - mip: Lmon + mip: Lyr raw: PFR file: 'ESACCI-PERMAFROST-L4-PFR-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index d96a798bb1..71cfc1b832 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -50,8 +50,8 @@ def _fix_coordinates(cube, definition): """Fix coordinates.""" - axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} - axes = ['T', 'X', 'Y'] + axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude', 'Z': 'sdepth'} + axes = ['T', 'X', 'Y', 'Z'] for axis in axes: coord_def = definition.coordinates.get(axis2def[axis]) @@ -204,19 +204,27 @@ def _extract_variable(in_file, var, cfg, out_dir, year): sdepth = 999.0 logger.info("Could not determin depth. Check results.") cube.add_aux_coord(iris.coords.AuxCoord(sdepth, - long_name='sdepth', units="m")) + standard_name='depth', + long_name='depth', units="m")) cube.var_name = "gst" cube.standard_name = "soil_temperature" # "valid" standard name cube.attributes.pop('actual_min') cube.attributes.pop('actual_max') tmp_cube = cubes.merge_cube() - # swap coordinates 'sdepth' and 'time' - # (sdepth, time, lat, lon) --> (time, sdepth, lat, lon) + # setting the attribute 'positive' is needed for Iris to recognize + # this coordinate as 'Z' axis + tmp_cube.coord('depth').attributes['positive'] = "down" + # swap coordinates 'depth' and 'time': + # (depth, time, lat, lon) --> (time, depth, lat, lon) flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) - coord_spec = [(tmp_cube.coord('time'), 0), (tmp_cube.coord('sdepth'), 1), + coord_spec = [(tmp_cube.coord('time'), 0), (tmp_cube.coord('depth'), 1), (tmp_cube.coord('latitude'), 2), (tmp_cube.coord('longitude'), 3)] cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) cube.metadata = tmp_cube.metadata + # change units string so unit conversion from deg C --> K will work + cube.units = 'celsius' + # convert units from degC to K + cube.convert_units('K') else: cube = cubes[0] @@ -253,11 +261,8 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # Fix units # input variable for pfr reports 'percent' --> rename to '%' # input variable for alt reports 'metres' --> rename to 'm' - # input variable for gtd reports 'degrees celsius' --> convert to 'K' - print(cube.units) - print(definition.units) -#### cube.convert_units(definition.units) -# cube.units = definition.units + # input variable for gtd has been converted to 'K' --> nothing to do + cube.units = definition.units # Fix data type cube.data = cube.core_data().astype('float32') @@ -350,9 +355,9 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): glob_attrs['version']) if start_date is None: - start_date = datetime(2003, 1, 1) # (1997, 1, 1) + start_date = datetime(1997, 1, 1) if end_date is None: - end_date = datetime(2003, 12, 31) # 2019, 12, 31) + end_date = datetime(2019, 12, 31) loop_date = start_date while loop_date <= end_date: diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 325d9685b8..fe161be390 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -229,6 +229,16 @@ diagnostics: start_year: 2003, end_year: 2018} scripts: null + ESACCI-PERMAFROST: + description: ESACCI-PERMAFROST check + variables: + alt: + gtd: + pfr: + additional_datasets: + - {dataset: ESACCI-PERMAFROST, project: OBS6, mip: Lyr, tier: 2, type: sat, version: v3.0, + start_year: 1997, end_year: 2019} + scripts: null ESACCI-OC: description: ESACCI-OC check @@ -239,7 +249,6 @@ diagnostics: type: sat, version: fv5.0, start_year: 1998, end_year: 2020} scripts: null - ESACCI-OZONE: description: ESACCI-OZONE check variables: From 5487cb4e0388e4c82a613b530f6fe5376f5f128f Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Mon, 4 Mar 2024 09:44:31 +0100 Subject: [PATCH 09/15] cleaned up code a bit --- .../formatters/datasets/esacci_permafrost.py | 108 +++--------------- 1 file changed, 17 insertions(+), 91 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index 71cfc1b832..ff5f693759 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -20,32 +20,22 @@ import glob import logging import os +import os.path + from copy import deepcopy from datetime import datetime from dateutil import relativedelta - from netCDF4 import Dataset -from cdo import * -import os.path +from cdo import Cdo -import cf_units import iris import numpy as np -from dask import array as da from esmvalcore.cmor.table import CMOR_TABLES -from esmvalcore.preprocessor import regrid -#from esmvalcore.preprocessor.regrid_schemes import ( -# ESMPyAreaWeighted, ESMPyLinear, ESMPyNearest, UnstructuredNearest) -#from esmf_regrid.schemes import ESMFAreaWeighted, ESMFBilinear - from esmvaltool.cmorizers.data.utilities import ( save_variable, set_global_atts) - -#from iris import NameConstraint -from iris.cube import Cube +#from iris.cube import Cube logger = logging.getLogger(__name__) -#cdo = Cdo() def _fix_coordinates(cube, definition): @@ -111,9 +101,9 @@ def _regrid_infile(infile, outfile, weightsfile): esagrid_file = "./esacci_grid.txt" # write grid description to ASCII file - file = open(esagrid_file, "w") - file.write(esagrid) - file.close() + with open(esagrid_file, "w", encoding="ascii") as file: + file.write(esagrid) + file.close() # define dimensions of target grid (regular lat-lon grid) target_dimx = 720 # delta_lon = 0.5 deg @@ -132,7 +122,7 @@ def _regrid_infile(infile, outfile, weightsfile): src = weights.variables['src_grid_dims'] dst = weights.variables['dst_grid_dims'] if (xsize == src[0] and ysize == src[1] and - target_dimx == dst[0] and target_dimy == dst[1]): + target_dimx == dst[0] and target_dimy == dst[1]): logger.info("Using matching weights file %s for regridding.", weightsfile) weightsfile_ok = True @@ -158,8 +148,6 @@ def _regrid_infile(infile, outfile, weightsfile): # delete temporary file os.remove(esagrid_file) - return - def _extract_variable(in_file, var, cfg, out_dir, year): logger.info("CMORizing variable '%s' from input file '%s'", @@ -177,7 +165,7 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # regrid input file using cdo # (using the preprocessor (ESMF) is too slow) - + regridded_file = f"./{year}_{var['short_name']}.nc" weights_file = f"{weights_dir}/{year}_{var['short_name']}_weights.nc" _regrid_infile(in_file, regridded_file, weights_file) @@ -212,13 +200,15 @@ def _extract_variable(in_file, var, cfg, out_dir, year): cube.attributes.pop('actual_max') tmp_cube = cubes.merge_cube() # setting the attribute 'positive' is needed for Iris to recognize - # this coordinate as 'Z' axis + # this coordinate as 'Z' axis tmp_cube.coord('depth').attributes['positive'] = "down" # swap coordinates 'depth' and 'time': # (depth, time, lat, lon) --> (time, depth, lat, lon) flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) - coord_spec = [(tmp_cube.coord('time'), 0), (tmp_cube.coord('depth'), 1), - (tmp_cube.coord('latitude'), 2), (tmp_cube.coord('longitude'), 3)] + coord_spec = [(tmp_cube.coord('time'), 0), + (tmp_cube.coord('depth'), 1), + (tmp_cube.coord('latitude'), 2), + (tmp_cube.coord('longitude'), 3)] cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) cube.metadata = tmp_cube.metadata # change units string so unit conversion from deg C --> K will work @@ -249,7 +239,6 @@ def _extract_variable(in_file, var, cfg, out_dir, year): set_global_atts(cube, attributes) -# iris.util.unify_time_units(cube) cube.coord('time').points = cube.coord('time').core_points().astype( 'float64') @@ -270,69 +259,6 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # Fix coordinates cube = _fix_coordinates(cube, definition) - -# ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- - -# cube.coord('latitude').attributes = None -# cube.coord('longitude').attributes = None - -# cube.coord('projection_y_coordinate').standard_name = "latitude" -# cube.coord('projection_x_coordinate').standard_name = "longitude" -# print(cube) - -# dlon = 0.5 -# dlat = 0.5 -# mid_dlon, mid_dlat = dlon / 2, dlat / 2 -# -# latdata = np.linspace(-90.0, 90.0, int(180.0 / dlat) + 1) -# londata = np.linspace(0.0, 360.0 - dlon, int(360.0 / dlon)) -# -# lats = iris.coords.DimCoord(latdata, -# standard_name='latitude', -# units='degrees_north', -# var_name='lat', -# circular=False) -# -# lons = iris.coords.DimCoord(londata, -# standard_name='longitude', -# units='degrees_east', -# var_name='lon', -# circular=False) -# -# lats.guess_bounds() -# lons.guess_bounds() -# -# # Construct the resultant stock cube, with dummy data. -# shape = (latdata.size, londata.size) -# dummy = np.empty(shape, dtype=np.dtype('int8')) -# coords_spec = [(lats, 0), (lons, 1)] -# target_grid_cube = Cube(dummy, dim_coords_and_dims=coords_spec) -# -# regridded_cube = cube.regrid(target_grid_cube, ESMPyLinear()) -# regridded_cube = cube.regrid(target_grid_cube, ESMPyAreaWeighted()) -# regridded_cube = cube.regrid(target_grid_cube, ESMPyNearest()) -# regridded_cube = cube.regrid(target_grid_cube, ESMFAreaWeighted()) -# regridded_cube = cube.regrid(target_grid_cube, ESMFBilinear()) -# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.PointInCell()) -# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.Nearest()) -# regridded_cube = cube.regrid(target_grid_cube, iris.analysis.UnstructuredNearest()) -# regridded_cube = cube.regrid(target_grid_cube, UnstructuredNearest()) - -# # regridding from polar stereographic to 0.5x0.5 -# cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') -# cube = regrid(cube, target_grid='0.5x0.5', scheme='nearest') -# cube = regrid(cube, target_grid='0.5x0.5', scheme='linear') - -# cube.attributes.update({"geospatial_lon_resolution": "0.5", -# "geospatial_lat_resolution": "0.5", -# "spatial_resolution": "0.5"}) - -# ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- - # Save results logger.debug("Saving cube\n%s", cube) logger.debug("Setting time dimension to UNLIMITED while saving!") @@ -341,7 +267,7 @@ def _extract_variable(in_file, var, cfg, out_dir, year): unlimited_dimensions=['time']) os.remove(regridded_file) # delete temporary file logger.info("Finished CMORizing %s", in_file) - + def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """CMORize ESACCI-PERMAFROST dataset.""" @@ -369,8 +295,8 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): ) in_file = glob.glob(filepattern)[0] if not in_file: - logger.info(f'{loop_date.year}: no data not found for ' - f'variable {short_name}') + logger.info("%d: no data not found for variable %s", + loop_date.year, short_name) else: _extract_variable(in_file, var, cfg, out_dir, loop_date.year) From 21708d6477756c067dc412c00e1bc976a444ae5e Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Fri, 21 Jun 2024 09:41:26 +0200 Subject: [PATCH 10/15] snapshot 2024-06-21 --- .../cmorizers/data/formatters/datasets/esacci_permafrost.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index ff5f693759..9fe43796c1 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -33,7 +33,6 @@ from esmvalcore.cmor.table import CMOR_TABLES from esmvaltool.cmorizers.data.utilities import ( save_variable, set_global_atts) -#from iris.cube import Cube logger = logging.getLogger(__name__) From b0ebee868d4b60bef075b94d9fc049fc3ba390b6 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 7 May 2025 08:40:12 +0200 Subject: [PATCH 11/15] snapshot 2025-05-07 --- .../downloaders/datasets/esacci_permafrost.py | 24 ++-- .../formatters/datasets/esacci_permafrost.py | 113 +++++++++--------- 2 files changed, 70 insertions(+), 67 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index 38ed010ffd..b687595666 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -10,8 +10,12 @@ logger = logging.getLogger(__name__) -def download_dataset(config, dataset, dataset_info, start_date, end_date, - overwrite): +def download_dataset(config: dict, + dataset: str, + dataset_info: dict, + start_date: datetime, + end_date: datetime, + overwrite: bool) -> None: """Download dataset. Parameters @@ -30,9 +34,9 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, Overwrite already downloaded files """ if start_date is None: - start_date = datetime(1997, 1, 1) + start_date = datetime(1997, 1, 1, tzinfo=datetime.timezone.utc) if end_date is None: - end_date = datetime(2019, 12, 31) + end_date = datetime(2019, 12, 31, tzinfo=datetime.timezone.utc) downloader = CCIDownloader( config=config, @@ -43,20 +47,20 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, downloader.connect() - version = 'v03.0' + version = "v03.0" - ccivars = ['active_layer_thickness', 'ground_temperature', - 'permafrost_extent'] + ccivars = ["active_layer_thickness", "ground_temperature", + "permafrost_extent"] # download active layer thickness loop_date = start_date while loop_date <= end_date: for var in ccivars: - pathname = f'{var}/L4/area4/pp/{version}/' - fname = f'ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc' + pathname = f"{var}/L4/area4/pp/{version}/" + fname = f"ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc" if downloader.file_exists(fname, pathname): downloader.download_files(fname, pathname) else: - logger.info('%d: no data for %s %s', + logger.info("%d: no data for %s %s", loop_date.year, var, version) loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index 9fe43796c1..e303b9e155 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -21,37 +21,36 @@ import logging import os import os.path - from copy import deepcopy from datetime import datetime -from dateutil import relativedelta -from netCDF4 import Dataset -from cdo import Cdo import iris import numpy as np +from cdo import Cdo +from dateutil import relativedelta from esmvalcore.cmor.table import CMOR_TABLES -from esmvaltool.cmorizers.data.utilities import ( - save_variable, set_global_atts) +from netCDF4 import Dataset + +from esmvaltool.cmorizers.data.utilities import save_variable, set_global_atts logger = logging.getLogger(__name__) def _fix_coordinates(cube, definition): """Fix coordinates.""" - axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude', 'Z': 'sdepth'} - axes = ['T', 'X', 'Y', 'Z'] + axis2def = {"T": "time", "X": "longitude", "Y": "latitude", "Z": "sdepth"} + axes = ["T", "X", "Y", "Z"] for axis in axes: coord_def = definition.coordinates.get(axis2def[axis]) if coord_def: coord = cube.coord(axis=axis) - if axis == 'T': - coord.convert_units('days since 1850-1-1 00:00:00.0') + if axis == "T": + coord.convert_units("days since 1850-1-1 00:00:00.0") coord.standard_name = coord_def.standard_name coord.var_name = coord_def.out_name coord.long_name = coord_def.long_name - coord.points = coord.core_points().astype('float64') + coord.points = coord.core_points().astype("float64") if len(coord.points) > 1: if coord.bounds is not None: coord.bounds = None @@ -107,7 +106,7 @@ def _regrid_infile(infile, outfile, weightsfile): # define dimensions of target grid (regular lat-lon grid) target_dimx = 720 # delta_lon = 0.5 deg target_dimy = 360 # delta_lat = 0.5 deg - target_grid = f'r{target_dimx}x{target_dimy}' + target_grid = f"r{target_dimx}x{target_dimy}" # check if suitable weights file already exists # (e.g. from previous call to _regrid_file) @@ -118,8 +117,8 @@ def _regrid_infile(infile, outfile, weightsfile): weights = Dataset(weightsfile, "r") # make sure dimensions of source and target grids match # expected values - src = weights.variables['src_grid_dims'] - dst = weights.variables['dst_grid_dims'] + src = weights.variables["src_grid_dims"] + dst = weights.variables["dst_grid_dims"] if (xsize == src[0] and ysize == src[1] and target_dimx == dst[0] and target_dimy == dst[1]): logger.info("Using matching weights file %s for regridding.", @@ -142,7 +141,7 @@ def _regrid_infile(infile, outfile, weightsfile): # now regrid data to 0.5 deg x 0.5 deg cdo.remap(f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", - input=infile, output=outfile, options='-f nc') + input=infile, output=outfile, options="-f nc") # delete temporary file os.remove(esagrid_file) @@ -150,17 +149,17 @@ def _regrid_infile(infile, outfile, weightsfile): def _extract_variable(in_file, var, cfg, out_dir, year): logger.info("CMORizing variable '%s' from input file '%s'", - var['short_name'], in_file) - attributes = deepcopy(cfg['attributes']) - attributes['mip'] = var['mip'] - attributes['raw'] = var['raw'] - cmor_table = CMOR_TABLES[attributes['project_id']] - definition = cmor_table.get_variable(var['mip'], var['short_name']) - - if 'weights_dir' in var.keys(): - weights_dir = var['weights_dir'] + var["short_name"], in_file) + attributes = deepcopy(cfg["attributes"]) + attributes["mip"] = var["mip"] + attributes["raw"] = var["raw"] + cmor_table = CMOR_TABLES[attributes["project_id"]] + definition = cmor_table.get_variable(var["mip"], var["short_name"]) + + if "weights_dir" in var.keys(): + weights_dir = var["weights_dir"] else: - weights_dir = '.' + weights_dir = "." # regrid input file using cdo # (using the preprocessor (ESMF) is too slow) @@ -177,57 +176,57 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # (depth level can only be recognized by the variable names) # --> combine all depth levels into 1 cube for cube in cubes: - if cube.var_name == 'GST': + if cube.var_name == "GST": sdepth = 0.0 - elif cube.var_name == 'T1m': + elif cube.var_name == "T1m": sdepth = 1.0 - elif cube.var_name == 'T2m': + elif cube.var_name == "T2m": sdepth = 2.0 - elif cube.var_name == 'T5m': + elif cube.var_name == "T5m": sdepth = 5.0 - elif cube.var_name == 'T10m': + elif cube.var_name == "T10m": sdepth = 10.0 else: sdepth = 999.0 logger.info("Could not determin depth. Check results.") cube.add_aux_coord(iris.coords.AuxCoord(sdepth, - standard_name='depth', - long_name='depth', units="m")) + standard_name="depth", + long_name="depth", units="m")) cube.var_name = "gst" cube.standard_name = "soil_temperature" # "valid" standard name - cube.attributes.pop('actual_min') - cube.attributes.pop('actual_max') + cube.attributes.pop("actual_min") + cube.attributes.pop("actual_max") tmp_cube = cubes.merge_cube() # setting the attribute 'positive' is needed for Iris to recognize # this coordinate as 'Z' axis - tmp_cube.coord('depth').attributes['positive'] = "down" + tmp_cube.coord("depth").attributes["positive"] = "down" # swap coordinates 'depth' and 'time': # (depth, time, lat, lon) --> (time, depth, lat, lon) flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) - coord_spec = [(tmp_cube.coord('time'), 0), - (tmp_cube.coord('depth'), 1), - (tmp_cube.coord('latitude'), 2), - (tmp_cube.coord('longitude'), 3)] + coord_spec = [(tmp_cube.coord("time"), 0), + (tmp_cube.coord("depth"), 1), + (tmp_cube.coord("latitude"), 2), + (tmp_cube.coord("longitude"), 3)] cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) cube.metadata = tmp_cube.metadata # change units string so unit conversion from deg C --> K will work - cube.units = 'celsius' + cube.units = "celsius" # convert units from degC to K - cube.convert_units('K') + cube.convert_units("K") else: cube = cubes[0] # --> drop attributes that differ among input files for different years # global attributes to remove drop_attrs = [ - 'source', 'date_created', 'history', 'tracking_id', - 'id', 'time_coverage_start', 'time_coverage_end', 'platform', - 'sensor', 'keywords' + "source", "date_created", "history", "tracking_id", + "id", "time_coverage_start", "time_coverage_end", "platform", + "sensor", "keywords", ] # variable attributes to remove drop_var_attrs = [ - 'flag_meanings', 'flag_values', 'grid_mapping', 'actual_range', - 'ancillary_variables' + "flag_meanings", "flag_values", "grid_mapping", "actual_range", + "ancillary_variables", ] for attr in drop_attrs: if attr in cube.attributes.keys(): @@ -238,8 +237,8 @@ def _extract_variable(in_file, var, cfg, out_dir, year): set_global_atts(cube, attributes) - cube.coord('time').points = cube.coord('time').core_points().astype( - 'float64') + cube.coord("time").points = cube.coord("time").core_points().astype( + "float64") # Set correct names cube.var_name = definition.short_name @@ -253,7 +252,7 @@ def _extract_variable(in_file, var, cfg, out_dir, year): cube.units = definition.units # Fix data type - cube.data = cube.core_data().astype('float32') + cube.data = cube.core_data().astype("float32") # Fix coordinates cube = _fix_coordinates(cube, definition) @@ -263,21 +262,21 @@ def _extract_variable(in_file, var, cfg, out_dir, year): logger.debug("Setting time dimension to UNLIMITED while saving!") save_variable(cube, cube.var_name, out_dir, attributes, - unlimited_dimensions=['time']) + unlimited_dimensions=["time"]) os.remove(regridded_file) # delete temporary file logger.info("Finished CMORizing %s", in_file) def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """CMORize ESACCI-PERMAFROST dataset.""" - glob_attrs = cfg['attributes'] + glob_attrs = cfg["attributes"] logger.info("Starting CMORization for tier%s OBS files: %s", - glob_attrs['tier'], glob_attrs['dataset_id']) + glob_attrs["tier"], glob_attrs["dataset_id"]) logger.info("Input data from: %s", in_dir) logger.info("Output will be written to: %s", out_dir) logger.info("CMORizing ESACCI-PERMAFROST version %s", - glob_attrs['version']) + glob_attrs["version"]) if start_date is None: start_date = datetime(1997, 1, 1) @@ -286,11 +285,11 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): loop_date = start_date while loop_date <= end_date: - for short_name, var in cfg['variables'].items(): - if 'short_name' not in var: - var['short_name'] = short_name + for short_name, var in cfg["variables"].items(): + if "short_name" not in var: + var["short_name"] = short_name filepattern = os.path.join( - in_dir, var['file'].format(year=loop_date.year) + in_dir, var["file"].format(year=loop_date.year), ) in_file = glob.glob(filepattern)[0] if not in_file: From ba1359e43e8c7df72a49097d6717d6da5be047e4 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 23 Jul 2025 11:38:15 +0200 Subject: [PATCH 12/15] updated recipe_check_obs.yml --- esmvaltool/recipes/examples/recipe_check_obs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 568c84e007..0f33ef0466 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -268,7 +268,7 @@ diagnostics: pfr: additional_datasets: - {dataset: ESACCI-PERMAFROST, project: OBS6, mip: Lyr, tier: 2, type: sat, version: v3.0, - start_year: 1997, end_year: 2019} + frequency: yr, start_year: 1997, end_year: 2019} scripts: null ESACCI-OC: From b174566425a820c8c35dc21c907ae74dedd74100 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 23 Jul 2025 11:40:50 +0200 Subject: [PATCH 13/15] updated input.rst --- doc/sphinx/source/input.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index dd384006d5..8dbdea7e32 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -336,7 +336,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEAICE | siconc (SIday, SImon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| ESACCI-PERMAFROST | alt, gtd, pfr (custom) | 3 | Python | +| ESACCI-PERMAFROST | alt, gtd, pfr (Lyr, frequency: yr) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEA-SURFACE-SALINITY | sos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ From 133cd2e6f47939a501269bf70b67840c95ecc8cc Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 23 Jul 2025 12:53:28 +0200 Subject: [PATCH 14/15] small formatting updates --- .../downloaders/datasets/esacci_permafrost.py | 26 +-- .../formatters/datasets/esacci_permafrost.py | 167 +++++++++++------- 2 files changed, 121 insertions(+), 72 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index b687595666..cfe2072f07 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -10,12 +10,14 @@ logger = logging.getLogger(__name__) -def download_dataset(config: dict, - dataset: str, - dataset_info: dict, - start_date: datetime, - end_date: datetime, - overwrite: bool) -> None: +def download_dataset( + config: dict, + dataset: str, + dataset_info: dict, + start_date: datetime, + end_date: datetime, + overwrite: bool, +) -> None: """Download dataset. Parameters @@ -49,8 +51,11 @@ def download_dataset(config: dict, version = "v03.0" - ccivars = ["active_layer_thickness", "ground_temperature", - "permafrost_extent"] + ccivars = [ + "active_layer_thickness", + "ground_temperature", + "permafrost_extent", + ] # download active layer thickness loop_date = start_date @@ -61,6 +66,7 @@ def download_dataset(config: dict, if downloader.file_exists(fname, pathname): downloader.download_files(fname, pathname) else: - logger.info("%d: no data for %s %s", - loop_date.year, var, version) + logger.info( + "%d: no data for %s %s", loop_date.year, var, version + ) loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index e303b9e155..90b91407f6 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -69,32 +69,34 @@ def _regrid_infile(infile, outfile, weightsfile): # description of ESACCI-PERMAFROST v3.0 polar stereographic # grid for cdo - esagrid = (f'gridtype = projection\n' - f'gridsize = {totalsize}\n' - f'xsize = {xsize}\n' - f'ysize = {ysize}\n' - f'xname = x\n' - f'xlongname = "x coordinate of projection"\n' - f'xunits = "m"\n' - f'yname = y\n' - f'ylongname = "y coordinate of projection"\n' - f'yunits = "m"\n' - f'xfirst = -6111475.22239475\n' - f'xinc = 926.625433138333\n' - f'yfirst = 4114895.09469662\n' - f'yinc = -926.625433138333\n' - f'grid_mapping = polar_stereographic\n' - f'grid_mapping_name = polar_stereographic\n' - f'straight_vertical_longitude_from_pole = 0.\n' - f'false_easting = 0.\n' - f'false_northing = 0.\n' - f'latitude_of_projection_origin = 90.\n' - f'standard_parallel = 71.\n' - f'longitude_of_prime_meridian = 0.\n' - f'semi_major_axis = 6378137.\n' - f'inverse_flattening = 298.257223563\n' - f'proj_params = "+proj=stere +lat_0=90 +lat_ts=71 +lon_0=0' - f' +k=1" +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"\n') + esagrid = ( + f"gridtype = projection\n" + f"gridsize = {totalsize}\n" + f"xsize = {xsize}\n" + f"ysize = {ysize}\n" + f"xname = x\n" + f'xlongname = "x coordinate of projection"\n' + f'xunits = "m"\n' + f"yname = y\n" + f'ylongname = "y coordinate of projection"\n' + f'yunits = "m"\n' + f"xfirst = -6111475.22239475\n" + f"xinc = 926.625433138333\n" + f"yfirst = 4114895.09469662\n" + f"yinc = -926.625433138333\n" + f"grid_mapping = polar_stereographic\n" + f"grid_mapping_name = polar_stereographic\n" + f"straight_vertical_longitude_from_pole = 0.\n" + f"false_easting = 0.\n" + f"false_northing = 0.\n" + f"latitude_of_projection_origin = 90.\n" + f"standard_parallel = 71.\n" + f"longitude_of_prime_meridian = 0.\n" + f"semi_major_axis = 6378137.\n" + f"inverse_flattening = 298.257223563\n" + f'proj_params = "+proj=stere +lat_0=90 +lat_ts=71 +lon_0=0' + f' +k=1" +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"\n' + ) esagrid_file = "./esacci_grid.txt" @@ -119,37 +121,55 @@ def _regrid_infile(infile, outfile, weightsfile): # expected values src = weights.variables["src_grid_dims"] dst = weights.variables["dst_grid_dims"] - if (xsize == src[0] and ysize == src[1] and - target_dimx == dst[0] and target_dimy == dst[1]): - logger.info("Using matching weights file %s for regridding.", - weightsfile) + if ( + xsize == src[0] + and ysize == src[1] + and target_dimx == dst[0] + and target_dimy == dst[1] + ): + logger.info( + "Using matching weights file %s for regridding.", weightsfile + ) weightsfile_ok = True weights.close() # if no suitable weights file, generate new weights for regridding if not weightsfile_ok: - logger.info("Generating regridding weights. This will take" - " about 5-10 minutes (or more)...") + logger.info( + "Generating regridding weights. This will take" + " about 5-10 minutes (or more)..." + ) # check if path for weight files exists, if not create folder path = os.path.split(weightsfile)[0] if not os.path.exists(path): os.makedirs(path) # generate weights - cdo.genbil(f"{target_grid} -setgrid,{esagrid_file}", - input=infile, output=weightsfile, options="-f nc") + cdo.genbil( + f"{target_grid} -setgrid,{esagrid_file}", + input=infile, + output=weightsfile, + options="-f nc", + ) # now regrid data to 0.5 deg x 0.5 deg - cdo.remap(f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", - input=infile, output=outfile, options="-f nc") + cdo.remap( + f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", + input=infile, + output=outfile, + options="-f nc", + ) # delete temporary file os.remove(esagrid_file) def _extract_variable(in_file, var, cfg, out_dir, year): - logger.info("CMORizing variable '%s' from input file '%s'", - var["short_name"], in_file) + logger.info( + "CMORizing variable '%s' from input file '%s'", + var["short_name"], + in_file, + ) attributes = deepcopy(cfg["attributes"]) attributes["mip"] = var["mip"] attributes["raw"] = var["raw"] @@ -189,9 +209,11 @@ def _extract_variable(in_file, var, cfg, out_dir, year): else: sdepth = 999.0 logger.info("Could not determin depth. Check results.") - cube.add_aux_coord(iris.coords.AuxCoord(sdepth, - standard_name="depth", - long_name="depth", units="m")) + cube.add_aux_coord( + iris.coords.AuxCoord( + sdepth, standard_name="depth", long_name="depth", units="m" + ) + ) cube.var_name = "gst" cube.standard_name = "soil_temperature" # "valid" standard name cube.attributes.pop("actual_min") @@ -203,10 +225,12 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # swap coordinates 'depth' and 'time': # (depth, time, lat, lon) --> (time, depth, lat, lon) flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) - coord_spec = [(tmp_cube.coord("time"), 0), - (tmp_cube.coord("depth"), 1), - (tmp_cube.coord("latitude"), 2), - (tmp_cube.coord("longitude"), 3)] + coord_spec = [ + (tmp_cube.coord("time"), 0), + (tmp_cube.coord("depth"), 1), + (tmp_cube.coord("latitude"), 2), + (tmp_cube.coord("longitude"), 3), + ] cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) cube.metadata = tmp_cube.metadata # change units string so unit conversion from deg C --> K will work @@ -219,13 +243,23 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # --> drop attributes that differ among input files for different years # global attributes to remove drop_attrs = [ - "source", "date_created", "history", "tracking_id", - "id", "time_coverage_start", "time_coverage_end", "platform", - "sensor", "keywords", + "source", + "date_created", + "history", + "tracking_id", + "id", + "time_coverage_start", + "time_coverage_end", + "platform", + "sensor", + "keywords", ] # variable attributes to remove drop_var_attrs = [ - "flag_meanings", "flag_values", "grid_mapping", "actual_range", + "flag_meanings", + "flag_values", + "grid_mapping", + "actual_range", "ancillary_variables", ] for attr in drop_attrs: @@ -237,8 +271,9 @@ def _extract_variable(in_file, var, cfg, out_dir, year): set_global_atts(cube, attributes) - cube.coord("time").points = cube.coord("time").core_points().astype( - "float64") + cube.coord("time").points = ( + cube.coord("time").core_points().astype("float64") + ) # Set correct names cube.var_name = definition.short_name @@ -260,9 +295,9 @@ def _extract_variable(in_file, var, cfg, out_dir, year): # Save results logger.debug("Saving cube\n%s", cube) logger.debug("Setting time dimension to UNLIMITED while saving!") - save_variable(cube, cube.var_name, - out_dir, attributes, - unlimited_dimensions=["time"]) + save_variable( + cube, cube.var_name, out_dir, attributes, unlimited_dimensions=["time"] + ) os.remove(regridded_file) # delete temporary file logger.info("Finished CMORizing %s", in_file) @@ -271,12 +306,16 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """CMORize ESACCI-PERMAFROST dataset.""" glob_attrs = cfg["attributes"] - logger.info("Starting CMORization for tier%s OBS files: %s", - glob_attrs["tier"], glob_attrs["dataset_id"]) + logger.info( + "Starting CMORization for tier%s OBS files: %s", + glob_attrs["tier"], + glob_attrs["dataset_id"], + ) logger.info("Input data from: %s", in_dir) logger.info("Output will be written to: %s", out_dir) - logger.info("CMORizing ESACCI-PERMAFROST version %s", - glob_attrs["version"]) + logger.info( + "CMORizing ESACCI-PERMAFROST version %s", glob_attrs["version"] + ) if start_date is None: start_date = datetime(1997, 1, 1) @@ -289,12 +328,16 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): if "short_name" not in var: var["short_name"] = short_name filepattern = os.path.join( - in_dir, var["file"].format(year=loop_date.year), - ) + in_dir, + var["file"].format(year=loop_date.year), + ) in_file = glob.glob(filepattern)[0] if not in_file: - logger.info("%d: no data not found for variable %s", - loop_date.year, short_name) + logger.info( + "%d: no data not found for variable %s", + loop_date.year, + short_name, + ) else: _extract_variable(in_file, var, cfg, out_dir, loop_date.year) From 52c7782984ddb9977f01490319564c8d1f967dcb Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 24 Jul 2025 13:53:53 +0200 Subject: [PATCH 15/15] format updates --- .../downloaders/datasets/esacci_permafrost.py | 5 +++- .../formatters/datasets/esacci_permafrost.py | 23 +++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py index cfe2072f07..02cc4f553f 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -67,6 +67,9 @@ def download_dataset( downloader.download_files(fname, pathname) else: logger.info( - "%d: no data for %s %s", loop_date.year, var, version + "%d: no data for %s %s", + loop_date.year, + var, + version, ) loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py index 90b91407f6..96d88f43cc 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -def _fix_coordinates(cube, definition): +def _fix_coordinates(cube: iris.cube.Cube, definition): """Fix coordinates.""" axis2def = {"T": "time", "X": "longitude", "Y": "latitude", "Z": "sdepth"} axes = ["T", "X", "Y", "Z"] @@ -128,7 +128,8 @@ def _regrid_infile(infile, outfile, weightsfile): and target_dimy == dst[1] ): logger.info( - "Using matching weights file %s for regridding.", weightsfile + "Using matching weights file %s for regridding.", + weightsfile, ) weightsfile_ok = True weights.close() @@ -138,7 +139,7 @@ def _regrid_infile(infile, outfile, weightsfile): if not weightsfile_ok: logger.info( "Generating regridding weights. This will take" - " about 5-10 minutes (or more)..." + " about 5-10 minutes (or more)...", ) # check if path for weight files exists, if not create folder path = os.path.split(weightsfile)[0] @@ -211,8 +212,11 @@ def _extract_variable(in_file, var, cfg, out_dir, year): logger.info("Could not determin depth. Check results.") cube.add_aux_coord( iris.coords.AuxCoord( - sdepth, standard_name="depth", long_name="depth", units="m" - ) + sdepth, + standard_name="depth", + long_name="depth", + units="m", + ), ) cube.var_name = "gst" cube.standard_name = "soil_temperature" # "valid" standard name @@ -296,7 +300,11 @@ def _extract_variable(in_file, var, cfg, out_dir, year): logger.debug("Saving cube\n%s", cube) logger.debug("Setting time dimension to UNLIMITED while saving!") save_variable( - cube, cube.var_name, out_dir, attributes, unlimited_dimensions=["time"] + cube, + cube.var_name, + out_dir, + attributes, + unlimited_dimensions=["time"], ) os.remove(regridded_file) # delete temporary file logger.info("Finished CMORizing %s", in_file) @@ -314,7 +322,8 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): logger.info("Input data from: %s", in_dir) logger.info("Output will be written to: %s", out_dir) logger.info( - "CMORizing ESACCI-PERMAFROST version %s", glob_attrs["version"] + "CMORizing ESACCI-PERMAFROST version %s", + glob_attrs["version"], ) if start_date is None: