From 8f9e9ea3930fe4d1b45edac9999967964ac35610 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Fri, 10 Mar 2023 18:00:53 +0100 Subject: [PATCH 01/17] Started revision of grid mapping identification --- test/core/gridmapping/test_cfconv.py | 325 +---------------------- xcube/core/gridmapping/cfconv.py | 376 ++++++++++++++------------- 2 files changed, 197 insertions(+), 504 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index e15c11781..742db7876 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -1,21 +1,6 @@ -import shutil import unittest -from typing import Set -import fsspec -import numpy as np import pyproj -import xarray as xr - -from test.s3test import MOTO_SERVER_ENDPOINT_URL -from test.s3test import S3Test -from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping.cfconv import GridCoords -from xcube.core.gridmapping.cfconv import GridMappingProxy -from xcube.core.gridmapping.cfconv import add_spatial_ref -from xcube.core.gridmapping.cfconv import get_dataset_grid_mapping_proxies -from xcube.core.new import new_cube -from xcube.core.store.fs.registry import new_fs_data_store CRS_WGS84 = pyproj.crs.CRS(4326) CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") @@ -27,310 +12,6 @@ grid_north_pole_longitude=170.)) -class GetDatasetGridMappingsTest(unittest.TestCase): - def test_no_crs_lon_lat_common_names(self): - dataset = xr.Dataset(coords=dict(lon=xr.DataArray(np.linspace(10, 12, 11), dims='lon'), - lat=xr.DataArray(np.linspace(50, 52, 11), dims='lat'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('lon', grid_mapping.coords.x.name) - self.assertEqual('lat', grid_mapping.coords.y.name) - - def test_no_crs_lon_lat_standard_names(self): - dataset = xr.Dataset(coords=dict(weird_x=xr.DataArray(np.linspace(10, 12, 11), dims='i', - attrs=dict(standard_name='longitude')), - weird_y=xr.DataArray(np.linspace(50, 52, 11), dims='j', - attrs=dict(standard_name='latitude')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('weird_x', grid_mapping.coords.x.name) - self.assertEqual('weird_y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_common_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(x=xr.DataArray(np.linspace(1000, 12000, 11), dims='x'), - y=xr.DataArray(np.linspace(5000, 52000, 11), dims='y'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_standard_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(myx=xr.DataArray(np.linspace(1000, 12000, 11), dims='x', - attrs=dict(standard_name='projection_x_coordinate')), - myy=xr.DataArray(np.linspace(5000, 52000, 11), dims='y', - attrs=dict(standard_name='projection_y_coordinate')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('myx', grid_mapping.coords.x.name) - self.assertEqual('myy', grid_mapping.coords.y.name) - - def test_latitude_longitude_with_x_y(self): - # This is what we get when opening a CRS-84 GeoTIFF using rioxarray - dataset = xr.Dataset( - dict( - band_1=xr.DataArray(np.zeros((11, 11)), dims=["y", "x"]), - spatial_ref=xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - ), - coords=dict( - x=xr.DataArray(np.linspace(10, 20, 11), dims='x'), - y=xr.DataArray(np.linspace(50, 40, 11), dims='y'), - ) - ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('spatial_ref', grid_mappings) - grid_mapping = grid_mappings.get('spatial_ref') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_CRS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - - def test_rotated_pole_with_common_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - rlon=xr.DataArray(np.linspace(-180, 180, 11), dims='rlon'), - rlat=xr.DataArray(np.linspace(0, 90, 11), dims='rlat') - ) - ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual('Derived Geographic 2D CRS', - grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('rlon', grid_mapping.coords.x.name) - self.assertEqual('rlat', grid_mapping.coords.y.name) - - def test_rotated_pole_with_standard_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, - attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - u=xr.DataArray(np.linspace(-180, 180, 11), dims='u', - attrs=dict(standard_name='grid_longitude')), - v=xr.DataArray(np.linspace(0, 90, 11), dims='v', - attrs=dict(standard_name='grid_latitude')) - ) - ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual('Derived Geographic 2D CRS', - grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('u', grid_mapping.coords.x.name) - self.assertEqual('v', grid_mapping.coords.y.name) - - -class XarrayDecodeCfTest(unittest.TestCase): - """Find out how xarray treats 1D and 2D coordinate variables when decode_cf=True or =False""" - - def test_cf_1d_coords(self): - self._write_coords(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_1d_data_vars(self): - self._write_data_vars(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_2d_coords(self): - self._write_coords(*self._gen_2d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def test_cf_2d_data_vars(self): - self._write_data_vars(*self._gen_2d()) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def assertVarNames(self, exp_data_vars: Set[str], exp_coords: Set[str], decode_cf: bool): - ds1 = xr.open_zarr('noise.zarr', decode_cf=decode_cf) - self.assertEqual(exp_coords, set(ds1.coords)) - self.assertEqual(exp_data_vars, set(ds1.data_vars)) - - @classmethod - def _write_coords(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs), coords=dict(lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - - @classmethod - def _write_data_vars(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs, lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - - @classmethod - def tearDownClass(cls) -> None: - shutil.rmtree('noise.zarr') - - def _gen_1d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('lat', 'lon')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='lon') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='lat') - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('lon',), lon.dims) - self.assertEqual(('lat',), lat.dims) - return noise, crs, lon, lat - - def _gen_2d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('y', 'x')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='x') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='y') - lat, lon = xr.broadcast(lat, lon) - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('y', 'x'), lon.dims) - self.assertEqual(('y', 'x'), lat.dims) - return noise, crs, lon, lat - - -class AddSpatialRefTest(S3Test, unittest.TestCase): - - def test_add_spatial_ref(self): - self.assert_add_spatial_ref_ok(None, None) - self.assert_add_spatial_ref_ok(None, ('cx', 'cy')) - self.assert_add_spatial_ref_ok('crs', None) - self.assert_add_spatial_ref_ok('crs', ('cx', 'cy')) - - def assert_add_spatial_ref_ok(self, crs_var_name, xy_dim_names): - - root = 'eurodatacube-test/xcube-eea' - data_id = 'test.zarr' - crs = pyproj.CRS.from_string("EPSG:3035") - - if xy_dim_names: - x_name, y_name = xy_dim_names - else: - x_name, y_name = 'x', 'y' - - cube = new_cube(x_name=x_name, - y_name=y_name, - x_start=0, - y_start=0, - x_res=10, - y_res=10, - x_units='metres', - y_units='metres', - drop_bounds=True, - width=100, - height=100, - variables=dict(A=1.3, B=8.3)) - - storage_options = dict( - anon=False, - client_kwargs=dict( - endpoint_url=MOTO_SERVER_ENDPOINT_URL, - ) - ) - - fs: fsspec.AbstractFileSystem = fsspec.filesystem('s3', - **storage_options) - if fs.isdir(root): - fs.rm(root, recursive=True) - fs.mkdirs(root, exist_ok=True) - - data_store = new_fs_data_store('s3', - root=root, - storage_options=storage_options) - - data_store.write_data(cube, data_id=data_id) - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', x_name, y_name}, - set(cube.variables)) - - with self.assertRaises(ValueError) as cm: - GridMapping.from_dataset(cube) - self.assertEqual( - ('cannot find any grid mapping in dataset',), - cm.exception.args - ) - - path = f"{root}/{data_id}" - group_store = fs.get_mapper(path, create=True) - - expected_crs_var_name = crs_var_name or 'spatial_ref' - - self.assertTrue(fs.exists(path)) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) - - kwargs = {} - if crs_var_name is not None: - kwargs.update(crs_var_name=crs_var_name) - if xy_dim_names is not None: - kwargs.update(xy_dim_names=xy_dim_names) - add_spatial_ref(group_store, crs, **kwargs) - - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) - - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', - x_name, y_name, expected_crs_var_name}, - set(cube.variables)) - self.assertEqual(expected_crs_var_name, - cube.A.attrs.get('grid_mapping')) - self.assertEqual(expected_crs_var_name, - cube.B.attrs.get('grid_mapping')) - - gm = GridMapping.from_dataset(cube) - self.assertIn("LAEA Europe", gm.crs.srs) +class GetDatasetGridMappingTest(unittest.TestCase): + def test_it(self): + pass diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 5f212a307..3bb991ea4 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -33,206 +33,218 @@ from xcube.util.assertions import assert_instance from .base import CRS_WGS84 -class GridCoords: - """ - Grid coordinates comprising x and y of - type xarray.DataArray. - """ - - def __init__(self): - self.x: Optional[xr.DataArray] = None - self.y: Optional[xr.DataArray] = None - - -class GridMappingProxy: - """ - Grid mapping comprising *crs* of type pyproj.crs.CRS, - grid coordinates, an optional name, coordinates, and a - tile size (= spatial chunk sizes). - """ +class GridMappingSchema: def __init__(self, - crs: Optional[pyproj.crs.CRS] = None, - name: Optional[str] = None, - coords: Optional[GridCoords] = None, - tile_size: Optional[Tuple[int, int]] = None): - self.crs = crs + name: str, + standard_names: Tuple[str, str], + common_names: Tuple[Tuple[str, str], ...], + default_crs: Optional[pyproj.CRS]): self.name = name - self.coords = coords - self.tile_size = tile_size - - -def get_dataset_grid_mapping_proxies( - dataset: xr.Dataset, - *, - missing_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_rotated_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_projected_crs: pyproj.crs.CRS = None, - emit_warnings: bool = False -) -> Dict[Union[Hashable, None], GridMappingProxy]: + self.standard_names = standard_names + self.common_var_names = common_names + self.default_crs = default_crs + + +GMS_LATITUDE_LONGITUDE = GridMappingSchema( + 'latitude_longitude', + ('longitude', 'latitude'), + (('lon', 'lat'), + ('longitude', 'latitude')), + CRS_WGS84 +) + +GMS_ROTATED_LATITUDE_LONGITUDE = GridMappingSchema( + 'rotated_latitude_longitude', + ('grid_longitude', 'grid_latitude'), + (('rlon', 'rlat'), + ('rlongitude', 'rlatitude')), + CRS_WGS84 +) + +GMS_PROJECTED = GridMappingSchema( + 'projected', + ('projection_x_coordinate', 'projection_y_coordinate'), + (('x', 'y'), + ('xc', 'yc'), + ('transformed_x', 'transformed_y')), + None +) + +GM_SCHEMAS = (GMS_LATITUDE_LONGITUDE, + GMS_ROTATED_LATITUDE_LONGITUDE, + GMS_PROJECTED) + + +class GridMappingTemplate: + def __init__(self, + var_name: str, + var: xr.DataArray, + crs: pyproj.CRS, + x_coords: Optional[xr.DataArray] = None, + y_coords: Optional[xr.DataArray] = None): + self.var_name = var_name + self.var = var + self.crs = crs + self.x_coords = x_coords + self.y_coords = y_coords + self.ref_vars: List[xr.DataArray] = [] + +def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ + -> List[GridMappingTemplate]: """ Find grid mappings encoded as described in the CF conventions [Horizontal Coordinate Reference Systems, Grid Mappings, and Projections] (http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#grid-mappings-and-projections). - :param dataset: - :param missing_latitude_longitude_crs: - :param missing_rotated_latitude_longitude_crs: - :param missing_projected_crs: - :param emit_warnings: + :param dataset: The dataset :return: """ - grid_mapping_proxies: Dict[Union[Hashable, None], - GridMappingProxy] = dict() - # Find any grid mapping variables by CF 'grid_mapping' attribute - # - for var_name, var in dataset.variables.items(): - grid_mapping_var_name = var.attrs.get('grid_mapping') - if grid_mapping_var_name \ - and grid_mapping_var_name not in grid_mapping_proxies \ - and grid_mapping_var_name in dataset: - grid_mapping_var = dataset[grid_mapping_var_name] - gmp = _parse_crs_from_attrs(grid_mapping_var.attrs) - grid_mapping_proxies[grid_mapping_var_name] = gmp - - # If no grid mapping variables found, - # try if CRS is encoded in some variable's attributes - # - if not grid_mapping_proxies: - for var_name, var in dataset.variables.items(): - gmp = _parse_crs_from_attrs(var.attrs) - if gmp is not None: - grid_mapping_proxies[var_name] = gmp - break - - # If no grid mapping variables found, - # try if CRS is encoded in dataset attributes - # - if not grid_mapping_proxies: - gmp = _parse_crs_from_attrs(dataset.attrs) - if gmp is not None: - grid_mapping_proxies[None] = gmp - - # Find coordinate variables. - # + grid_mappings = _get_raw_grid_mappings(dataset) + + if grid_mappings: + for gm in grid_mappings.values(): + if gm.ref_vars: + ref_var = gm.ref_vars[0] + y_dim, x_dim = ref_var[-2:] + ok = False + if x_dim in dataset.coords and y_dim in dataset.coords: + x_coords = dataset.coords[x_dim] + y_coords = dataset.coords[y_dim] + if x_coords.ndim == 1 and y_coords.ndim == 1: + gm.x_coords = x_coords + gm.y_coords = y_coords + ok = True + if not ok: + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name \ + and var.ndim == 1 \ + and var.dims[0] == x_dim: + x_coords = var + if standard_name == y_name \ + and var.ndim == 1 \ + and var.dims[0] == y_dim: + y_coords = var + if x_coords is not None and y_coords is not None: + gm.x_coords = x_coords + gm.y_coords = y_coords + ok = True + + if not ok: + + +def _get_xy_coords(dataset: xr.Dataset, + x_dim: str, + y_dim: str) \ + -> Optional[Tuple[xr.DataArray, + xr.DataArray, + Optional[GridMappingSchema]]]: + x_dims = (x_dim,) + y_dims = (y_dim,) + yx_dims = (y_dim, x_dim) + + # 1D-coordinate variables named after their dimensions + if x_dim in dataset.coords and y_dim in dataset.coords: + x_coords = dataset.coords[x_dim] + y_coords = dataset.coords[y_dim] + if x_coords.dims == x_dims and y_coords.dims == y_dims: + return x_coords, y_coords, None + + # 1D-coordinate variables identified by "standard_name" + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name and var.dims == x_dims: + x_coords = var + if standard_name == y_name and var.dims == y_dims: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords, gms + + # 2D-coordinate variables identified by "standard_name" + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name and var.dims == yx_dims: + x_coords = var + if standard_name == y_name and var.dims == yx_dims: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords, gms + + # 1D-coordinate variables identified by common variable names + for gms in GM_SCHEMAS: + for x_name, y_name in gms.common_var_names: + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == x_dims \ + and y_coords.dims == y_dims: + return x_coords, y_coords, gms + + # 2D-coordinate variables identified by common variable names + for gms in GM_SCHEMAS: + for x_name, y_name in gms.common_var_names: + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == yx_dims \ + and y_coords.dims == yx_dims: + return x_coords, y_coords, gms - latitude_longitude_coords = GridCoords() - rotated_latitude_longitude_coords = GridCoords() - projected_coords = GridCoords() - - potential_coord_vars = _find_potential_coord_vars(dataset) + return None - # Find coordinate variables that use a CF standard_name. +def _get_raw_grid_mappings(dataset): + grid_mappings = dict() + # Find any grid mappings by parsing variable attributes to CRS # - coords_standard_names = ( - (latitude_longitude_coords, - 'longitude', 'latitude'), - (rotated_latitude_longitude_coords, - 'grid_longitude', 'grid_latitude'), - (projected_coords, - 'projection_x_coordinate', 'projection_y_coordinate') - ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): + for var_name, var in dataset.data_vars.items(): + try: + crs = pyproj.crs.CRS.from_cf(var.attrs) + except pyproj.crs.CRSError: continue - standard_name = var.attrs.get('standard_name') - for coords, x_name, y_name in coords_standard_names: - if coords.x is None and standard_name == x_name: - coords.x = var - if coords.y is None and standard_name == y_name: - coords.y = var - - # Find coordinate variables by common naming convention. + grid_mappings[var_name] = GridMappingTemplate(var, crs) + # Add spatial variables to corresponding grid mapping + # by their CF 'grid_mapping' attribute # - coords_var_names = ( - (latitude_longitude_coords, - ('lon', 'longitude'), - ('lat', 'latitude')), - (rotated_latitude_longitude_coords, - ('rlon', 'rlongitude'), - ('rlat', 'rlatitude')), - (projected_coords, - ('x', 'xc', 'transformed_x'), - ('y', 'yc', 'transformed_y')) - ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): + for var_name, var in dataset.data_vars.items(): + if var_name in grid_mappings: continue - for coords, x_names, y_names in coords_var_names: - if coords.x is None and var_name in x_names: - coords.x = var - if coords.y is None and var_name in y_names: - coords.y = var - - # Assign found coordinates to grid mappings - # - for gmp in grid_mapping_proxies.values(): - if gmp.name == 'latitude_longitude': - gmp.coords = latitude_longitude_coords - elif gmp.name == 'rotated_latitude_longitude': - gmp.coords = rotated_latitude_longitude_coords + if var.ndim < 2: + continue + gm_var_name = var.attrs.get('grid_mapping') + if not gm_var_name or gm_var_name not in grid_mappings: + continue + gm = grid_mappings[gm_var_name] + gm.ref_vars.append(var) + + if len(grid_mappings) > 1: + # We have more than one grid mapping. + # Just keep the referenced grid mappings. + ref_grid_mappings = filter(lambda gm: bool(gm.ref_var_names), + grid_mappings.values()) + if ref_grid_mappings: + grid_mappings = {gm.var_name: gm for gm in ref_grid_mappings} else: - gmp.coords = projected_coords - - _complement_grid_mapping_coords(latitude_longitude_coords, - 'latitude_longitude', - missing_latitude_longitude_crs - or CRS_WGS84, - grid_mapping_proxies) - _complement_grid_mapping_coords(rotated_latitude_longitude_coords, - 'rotated_latitude_longitude', - missing_rotated_latitude_longitude_crs, - grid_mapping_proxies) - _complement_grid_mapping_coords(projected_coords, - None, - missing_projected_crs, - grid_mapping_proxies) - - # Collect complete grid mappings - complete_grid_mappings = dict() - for var_name, gmp in grid_mapping_proxies.items(): - if gmp.coords is not None \ - and gmp.coords.x is not None \ - and gmp.coords.y is not None \ - and gmp.coords.x.size >= 2 \ - and gmp.coords.y.size >= 2 \ - and gmp.coords.x.ndim == gmp.coords.y.ndim: - if gmp.coords.x.ndim == 1: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[0], - gmp.coords.y.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif gmp.coords.x.ndim == 2 \ - and gmp.coords.x.dims == gmp.coords.y.dims: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[1], - gmp.coords.x.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif emit_warnings: - warnings.warn(f'CRS "{gmp.name}": ' - f'missing x- and/or y-coordinates ' - f'(grid mapping variable "{var_name}": ' - f'grid_mapping_name="{gmp.name}")') - - return complete_grid_mappings - - -def _parse_crs_from_attrs(attrs: Dict[Hashable, Any]) \ - -> Optional[GridMappingProxy]: - # noinspection PyBroadException - try: - crs = pyproj.crs.CRS.from_cf(attrs) - except pyproj.crs.CRSError: - return None - return GridMappingProxy(crs=crs, - name=attrs.get('grid_mapping_name')) + # There are no referenced grid mappings. + # So we cannot unambiguously tell which variable uses which + # grid mappings. Therefore, we choose any: + gm = next(iter(grid_mappings.values())) + grid_mappings = {gm.var_name: gm} + + return grid_mappings def _complement_grid_mapping_coords( From 9e9ebc94017954fe0837e1b23c57e8478e203551 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Fri, 10 Mar 2023 18:37:13 +0100 Subject: [PATCH 02/17] Cont. impl. --- xcube/core/gridmapping/cfconv.py | 46 +++++++------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 3bb991ea4..b85ae7b55 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -103,45 +103,17 @@ def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ grid_mappings = _get_raw_grid_mappings(dataset) - if grid_mappings: - for gm in grid_mappings.values(): - if gm.ref_vars: - ref_var = gm.ref_vars[0] - y_dim, x_dim = ref_var[-2:] - ok = False - if x_dim in dataset.coords and y_dim in dataset.coords: - x_coords = dataset.coords[x_dim] - y_coords = dataset.coords[y_dim] - if x_coords.ndim == 1 and y_coords.ndim == 1: - gm.x_coords = x_coords - gm.y_coords = y_coords - ok = True - if not ok: - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.coords.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name \ - and var.ndim == 1 \ - and var.dims[0] == x_dim: - x_coords = var - if standard_name == y_name \ - and var.ndim == 1 \ - and var.dims[0] == y_dim: - y_coords = var - if x_coords is not None and y_coords is not None: - gm.x_coords = x_coords - gm.y_coords = y_coords - ok = True - - if not ok: + for gm in grid_mappings.values(): + if gm.ref_vars: + ref_var = gm.ref_vars[0] + x_dim, y_dim = ref_var.dims[-1], ref_var.dims[-2], + x_coors, y_coords, gms = _get_xy_coords(dataset, x_dim, y_dim) + gm.x_coords, gm.y_coords = x_coors, y_coords def _get_xy_coords(dataset: xr.Dataset, - x_dim: str, - y_dim: str) \ + x_dim: Hashable, + y_dim: Hashable) \ -> Optional[Tuple[xr.DataArray, xr.DataArray, Optional[GridMappingSchema]]]: @@ -175,7 +147,7 @@ def _get_xy_coords(dataset: xr.Dataset, x_name, y_name = gms.standard_names x_coords = None y_coords = None - for var_name, var in dataset.coords.items(): + for var_name, var in dataset.data_vars.items(): standard_name = var.attrs.get("standard_name") if standard_name == x_name and var.dims == yx_dims: x_coords = var From a0e7ae272e318f544c67622b2a986274e57bc812 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 10:27:48 +0100 Subject: [PATCH 03/17] cont. dev --- test/core/gridmapping/test_cfconv.py | 139 ++++++++- xcube/core/gridmapping/cfconv.py | 438 +++++++-------------------- 2 files changed, 251 insertions(+), 326 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 742db7876..8ddb1351d 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -1,9 +1,13 @@ import unittest +import numpy as np import pyproj +import xarray as xr + +from xcube.core.gridmapping import GridMapping +from xcube.core.gridmapping.cfconv import find_grid_mapping_for_var CRS_WGS84 = pyproj.crs.CRS(4326) -CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") CRS_UTM_33N = pyproj.crs.CRS(32633) CRS_ROTATED_POLE = pyproj.crs.CRS.from_cf( @@ -12,6 +16,133 @@ grid_north_pole_longitude=170.)) -class GetDatasetGridMappingTest(unittest.TestCase): - def test_it(self): - pass +class FindGridMappingForVarTest(unittest.TestCase): + w = 20 + h = 10 + + def new_data_var(self, + shape=None, + dims=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.zeros(shape or (1, self.h, self.w), dtype=np.float32), + dims=dims or ("time", "y", "x"), + attrs=attrs + ) + + def new_x_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.w, self.w), + dims=dim or "x", + attrs=attrs + ) + + def new_y_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.h, self.h), + dims=dim or "y", + attrs=attrs + ) + + def new_crs_var(self, crs) -> xr.DataArray: + return xr.DataArray(0, attrs=crs.to_cf()) + + def test_with_grid_mapping_and_named_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_ROTATED_POLE) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "grid_longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "grid_latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_ROTATED_POLE.to_cf(), gm.crs.to_cf()) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_projected(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "projection_x_coordinate"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "projection_y_coordinate"} + ), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index b85ae7b55..652c9c307 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,336 +19,130 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import warnings -from collections.abc import MutableMapping -from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple +import functools +from typing import Optional, Union, Sequence -import numpy as np import pyproj +import pyproj.exceptions import xarray as xr -import zarr -import zarr.convenience -from xcube.core.schema import get_dataset_chunks from xcube.util.assertions import assert_instance -from .base import CRS_WGS84 - - -class GridMappingSchema: - def __init__(self, - name: str, - standard_names: Tuple[str, str], - common_names: Tuple[Tuple[str, str], ...], - default_crs: Optional[pyproj.CRS]): - self.name = name - self.standard_names = standard_names - self.common_var_names = common_names - self.default_crs = default_crs - - -GMS_LATITUDE_LONGITUDE = GridMappingSchema( - 'latitude_longitude', - ('longitude', 'latitude'), - (('lon', 'lat'), - ('longitude', 'latitude')), - CRS_WGS84 -) - -GMS_ROTATED_LATITUDE_LONGITUDE = GridMappingSchema( - 'rotated_latitude_longitude', - ('grid_longitude', 'grid_latitude'), - (('rlon', 'rlat'), - ('rlongitude', 'rlatitude')), - CRS_WGS84 -) - -GMS_PROJECTED = GridMappingSchema( - 'projected', - ('projection_x_coordinate', 'projection_y_coordinate'), - (('x', 'y'), - ('xc', 'yc'), - ('transformed_x', 'transformed_y')), - None -) - -GM_SCHEMAS = (GMS_LATITUDE_LONGITUDE, - GMS_ROTATED_LATITUDE_LONGITUDE, - GMS_PROJECTED) - - -class GridMappingTemplate: - def __init__(self, - var_name: str, - var: xr.DataArray, - crs: pyproj.CRS, - x_coords: Optional[xr.DataArray] = None, - y_coords: Optional[xr.DataArray] = None): - self.var_name = var_name - self.var = var - self.crs = crs - self.x_coords = x_coords - self.y_coords = y_coords - self.ref_vars: List[xr.DataArray] = [] - -def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ - -> List[GridMappingTemplate]: - """ - Find grid mappings encoded as described in the CF conventions - [Horizontal Coordinate Reference Systems, Grid Mappings, and Projections] - (http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#grid-mappings-and-projections). - - :param dataset: The dataset - :return: - """ - - grid_mappings = _get_raw_grid_mappings(dataset) - - for gm in grid_mappings.values(): - if gm.ref_vars: - ref_var = gm.ref_vars[0] - x_dim, y_dim = ref_var.dims[-1], ref_var.dims[-2], - x_coors, y_coords, gms = _get_xy_coords(dataset, x_dim, y_dim) - gm.x_coords, gm.y_coords = x_coors, y_coords - - -def _get_xy_coords(dataset: xr.Dataset, - x_dim: Hashable, - y_dim: Hashable) \ - -> Optional[Tuple[xr.DataArray, - xr.DataArray, - Optional[GridMappingSchema]]]: - x_dims = (x_dim,) - y_dims = (y_dim,) - yx_dims = (y_dim, x_dim) - - # 1D-coordinate variables named after their dimensions - if x_dim in dataset.coords and y_dim in dataset.coords: - x_coords = dataset.coords[x_dim] - y_coords = dataset.coords[y_dim] - if x_coords.dims == x_dims and y_coords.dims == y_dims: - return x_coords, y_coords, None - - # 1D-coordinate variables identified by "standard_name" - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.coords.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name and var.dims == x_dims: - x_coords = var - if standard_name == y_name and var.dims == y_dims: - y_coords = var - if x_coords is not None and y_coords is not None: - return x_coords, y_coords, gms - - # 2D-coordinate variables identified by "standard_name" - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.data_vars.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name and var.dims == yx_dims: - x_coords = var - if standard_name == y_name and var.dims == yx_dims: - y_coords = var - if x_coords is not None and y_coords is not None: - return x_coords, y_coords, gms - - # 1D-coordinate variables identified by common variable names - for gms in GM_SCHEMAS: - for x_name, y_name in gms.common_var_names: - x_coords = dataset.get(x_name) - y_coords = dataset.get(y_name) - if x_coords is not None and y_coords is not None \ - and x_coords.dims == x_dims \ - and y_coords.dims == y_dims: - return x_coords, y_coords, gms - - # 2D-coordinate variables identified by common variable names - for gms in GM_SCHEMAS: - for x_name, y_name in gms.common_var_names: - x_coords = dataset.get(x_name) - y_coords = dataset.get(y_name) - if x_coords is not None and y_coords is not None \ - and x_coords.dims == yx_dims \ - and y_coords.dims == yx_dims: - return x_coords, y_coords, gms +from xcube.util.assertions import assert_true +from .base import GridMapping +from .coords import new_grid_mapping_from_coords + + +def find_grid_mapping_for_var(dataset: xr.Dataset, + var_name: str, + strict: bool = False) -> Optional[GridMapping]: + assert_instance(dataset, xr.Dataset, "dataset") + assert_instance(var_name, str, "var_name") + assert_true(var_name in dataset, + message=f"variable {var_name!r} not found in dataset") + + var = dataset[var_name] + + gm_var_name = var.attrs.get("grid_mapping") + if isinstance(gm_var_name, str) and gm_var_name: + return _find_grid_mapping_for_var_with_gm_attr( + dataset, + var, + gm_var_name, + strict=strict + ) return None -def _get_raw_grid_mappings(dataset): - grid_mappings = dict() - # Find any grid mappings by parsing variable attributes to CRS - # - for var_name, var in dataset.data_vars.items(): - try: - crs = pyproj.crs.CRS.from_cf(var.attrs) - except pyproj.crs.CRSError: - continue - grid_mappings[var_name] = GridMappingTemplate(var, crs) - # Add spatial variables to corresponding grid mapping - # by their CF 'grid_mapping' attribute - # - for var_name, var in dataset.data_vars.items(): - if var_name in grid_mappings: - continue - if var.ndim < 2: - continue - gm_var_name = var.attrs.get('grid_mapping') - if not gm_var_name or gm_var_name not in grid_mappings: - continue - gm = grid_mappings[gm_var_name] - gm.ref_vars.append(var) - if len(grid_mappings) > 1: - # We have more than one grid mapping. - # Just keep the referenced grid mappings. - ref_grid_mappings = filter(lambda gm: bool(gm.ref_var_names), - grid_mappings.values()) - if ref_grid_mappings: - grid_mappings = {gm.var_name: gm for gm in ref_grid_mappings} +def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, + var: xr.DataArray, + gm_var_name: str, + strict: bool = False) \ + -> Optional[GridMapping]: + maybe_fail = functools.partial(_maybe_fail, strict) + + if ":" in gm_var_name: + gm_var_name, coord_var_names = gm_var_name.split(":", maxsplit=1) + coord_var_names = coord_var_names.split() + else: + coord_var_names = dataset.coords.keys() + + if gm_var_name not in dataset: + return maybe_fail(f"grid mapping variable {gm_var_name!r}" + f" not found in dataset") + + gm_var = dataset[gm_var_name] + try: + crs = pyproj.CRS.from_cf(gm_var.attrs) + except pyproj.exceptions.CRSError as e: + return maybe_fail(e) + + gm_name = gm_var.attrs.get("grid_mapping_name") + + coord_vars = [dataset[coord_var_name] + for coord_var_name in coord_var_names + if coord_var_name in dataset] + + if len(coord_vars) < len(coord_var_names): + return maybe_fail(f"not all coordinate variables" + f" of {coord_var_names!r}" + f" found in dataset") + + valid_1d_coord_vars = [ + coord_var for coord_var in coord_vars + if (coord_var is not None + and coord_var.ndim == 1 + and coord_var.dims[0] in var.dims) + ] + + if gm_name == "latitude_longitude": + x_name, y_name = "longitude", "latitude" + elif gm_name == "rotated_latitude_longitude": + x_name, y_name = "grid_longitude", "grid_latitude" + else: + x_name, y_name = "projection_x_coordinate", "projection_y_coordinate" + + x_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, y_name) + if x_coords is not None and y_coords is not None: + return new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs) + + valid_2d_coord_vars = [ + coord_var for coord_var in coord_vars + if (coord_var is not None + and coord_var.ndim == 2 + and coord_var.dims[0] in var.dims + and coord_var.dims[1] in var.dims) + ] + x_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == y_coords.dims: + return new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs) + + return maybe_fail(f"cannot find coordinates in {coord_var_names!r}" + f" for grid mapping variable {gm_var_name!r}") + + +def _find_coord_var_by_standard_name( + coords: Sequence[xr.DataArray], + standard_name: str +) -> Optional[xr.DataArray]: + return next( + (var for var in coords + if var.attrs.get("standard_name") == standard_name), + None + ) + + +def _maybe_fail(strict: bool, error: Union[str, Exception]) -> None: + if strict: + if isinstance(error, str): + raise ValueError(error) else: - # There are no referenced grid mappings. - # So we cannot unambiguously tell which variable uses which - # grid mappings. Therefore, we choose any: - gm = next(iter(grid_mappings.values())) - grid_mappings = {gm.var_name: gm} - - return grid_mappings - - -def _complement_grid_mapping_coords( - coords: GridCoords, - grid_mapping_name: Optional[str], - missing_crs: Optional[pyproj.crs.CRS], - grid_mappings: Dict[Optional[str], GridMappingProxy] -): - if coords.x is not None or coords.y is not None: - grid_mapping = next((grid_mapping - for grid_mapping in grid_mappings.values() - if grid_mapping_name is None - or grid_mapping_name == grid_mapping.name), - None) - if grid_mapping is None and missing_crs is not None: - grid_mapping = GridMappingProxy(crs=missing_crs, - name=grid_mapping_name) - grid_mappings[None] = grid_mapping - - if grid_mapping is not None: - if grid_mapping.coords is None: - grid_mapping.coords = coords - # Edge case from GeoTIFF with CRS-84 with 1D - # coordinates named "x" and "y" as read by rioxarray - if grid_mapping.coords.x is None: - grid_mapping.coords.x = coords.x - if grid_mapping.coords.y is None: - grid_mapping.coords.y = coords.y - - -def _find_potential_coord_vars(dataset: xr.Dataset) -> List[Hashable]: - """ - Find potential coordinate variables. - - We need this function as we can not rely on xarray.Dataset.coords, - because 2D coordinate arrays are most likely not indicated as such - in many datasets. - """ - - # Collect bounds variables. We must exclude them. - bounds_vars = set() - for k in dataset.variables: - var = dataset[k] - - # Bounds variable as recommended through CF conventions - bounds_k = var.attrs.get('bounds') - if bounds_k is not None and bounds_k in dataset: - bounds_vars.add(bounds_k) - - # Bounds variable by naming convention, - # e.g. "lon_bnds" or "lat_bounds" - k_splits = str(k).rsplit("_", maxsplit=1) - if len(k_splits) == 2: - k_base, k_suffix = k_splits - if k_suffix in ('bnds', 'bounds') and k_base in dataset: - bounds_vars.add(k) - - potential_coord_vars = [] - - # First consider any CF global attribute "coordinates" - coordinates = dataset.attrs.get('coordinates') - if coordinates is not None: - for var_name in coordinates.split(): - if _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - # Then consider any other 1D/2D variables - for var_name in dataset.variables: - if var_name not in potential_coord_vars \ - and _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - return potential_coord_vars - - -def _is_potential_coord_var(dataset: xr.Dataset, - bounds_var_names: Set[str], - var_name: Hashable) -> bool: - if var_name in dataset: - var = dataset[var_name] - return var.ndim in (1, 2) and var_name not in bounds_var_names - return False - - -def _find_dataset_tile_size(dataset: xr.Dataset, - x_dim_name: Hashable, - y_dim_name: Hashable) \ - -> Optional[Tuple[int, int]]: - """Find the most likely tile size in *dataset*""" - dataset_chunks = get_dataset_chunks(dataset) - tile_width = dataset_chunks.get(x_dim_name) - tile_height = dataset_chunks.get(y_dim_name) - if tile_width is not None and tile_height is not None: - return tile_width, tile_height + raise error return None - - -def add_spatial_ref(dataset_store: zarr.convenience.StoreLike, - crs: pyproj.CRS, - crs_var_name: Optional[str] = 'spatial_ref', - xy_dim_names: Optional[Tuple[str, str]] = None): - """ - Helper function that allows adding a spatial reference to an - existing Zarr dataset. - - :param dataset_store: The dataset's existing Zarr store or path. - :param crs: The spatial coordinate reference system. - :param crs_var_name: The name of the variable that will hold the - spatial reference. Defaults to "spatial_ref". - :param xy_dim_names: The names of the x and y dimensions. - Defaults to ("x", "y"). - """ - assert_instance(dataset_store, (MutableMapping, str), name='group_store') - assert_instance(crs_var_name, str, name='crs_var_name') - x_dim_name, y_dim_name = xy_dim_names or ('x', 'y') - - spatial_attrs = crs.to_cf() - spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray - group = zarr.open(dataset_store, mode='r+') - spatial_ref = group.array(crs_var_name, - 0, - shape=(), - dtype=np.uint8, - fill_value=0) - spatial_ref.attrs.update(**spatial_attrs) - - for item_name, item in group.items(): - if item_name != crs_var_name: - dims = item.attrs.get('_ARRAY_DIMENSIONS') - if dims and len(dims) >= 2 \ - and dims[-2] == y_dim_name \ - and dims[-1] == x_dim_name: - item.attrs['grid_mapping'] = crs_var_name - - zarr.convenience.consolidate_metadata(dataset_store) From 2307d89e6fd531c9ba492f2410d61cf1eb7575dc Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 16:11:29 +0100 Subject: [PATCH 04/17] Tests work --- xcube/core/gridmapping/cfconv.py | 216 +++++++++++++++++++++---------- 1 file changed, 149 insertions(+), 67 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 652c9c307..0565cbb2f 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,8 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import functools -from typing import Optional, Union, Sequence +from typing import Optional, Union, Sequence, Tuple import pyproj import pyproj.exceptions @@ -28,13 +27,13 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true +from .base import CRS_WGS84 from .base import GridMapping from .coords import new_grid_mapping_from_coords def find_grid_mapping_for_var(dataset: xr.Dataset, - var_name: str, - strict: bool = False) -> Optional[GridMapping]: + var_name: str) -> Optional[GridMapping]: assert_instance(dataset, xr.Dataset, "dataset") assert_instance(var_name, str, "var_name") assert_true(var_name in dataset, @@ -42,57 +41,74 @@ def find_grid_mapping_for_var(dataset: xr.Dataset, var = dataset[var_name] - gm_var_name = var.attrs.get("grid_mapping") - if isinstance(gm_var_name, str) and gm_var_name: - return _find_grid_mapping_for_var_with_gm_attr( - dataset, - var, - gm_var_name, - strict=strict + gm_value = var.attrs.get("grid_mapping") + if isinstance(gm_value, str) and gm_value: + crs, gm_name, xy_coords = _find_grid_mapping_for_var_with_gm_value( + dataset, var, gm_value ) + else: + crs, gm_name, xy_coords = CRS_WGS84, "latitude_longitude", None - return None + if xy_coords is None: + xy_coords = _find_coordinates_for_crs_and_gm_name( + dataset, var, gm_name + ) + return new_grid_mapping_from_coords(x_coords=xy_coords[0], + y_coords=xy_coords[1], + crs=crs) -def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, - var: xr.DataArray, - gm_var_name: str, - strict: bool = False) \ - -> Optional[GridMapping]: - maybe_fail = functools.partial(_maybe_fail, strict) - if ":" in gm_var_name: - gm_var_name, coord_var_names = gm_var_name.split(":", maxsplit=1) +def _find_grid_mapping_for_var_with_gm_value( + dataset: xr.Dataset, + var: xr.DataArray, + gm_value: str +) -> Tuple[pyproj.CRS, + str, + Optional[Tuple[xr.DataArray, xr.DataArray]]]: + xy_coords = None + + if ":" in gm_value: + # gm_value has format ": " + gm_var_name, coord_var_names = gm_value.split(":", maxsplit=1) coord_var_names = coord_var_names.split() + if len(coord_var_names) == 2: + x_name, y_name = coord_var_names + # Check CF, whether the following is correct: + # if gm_name in ("latitude_longitude", + # "rotated_latitude_longitude"): + # x_name, y_name = y_name, x_name + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if ((_is_valid_1d_coord_var(var, x_coords) + and _is_valid_1d_coord_var(var, y_coords)) + or (_is_valid_2d_coord_var(var, x_coords) + and _is_valid_2d_coord_var(var, y_coords))): + xy_coords = x_coords, y_coords + if xy_coords is None: + raise ValueError(f"invalid coordinates in" + f" grid mapping value {gm_value!r}") else: - coord_var_names = dataset.coords.keys() - - if gm_var_name not in dataset: - return maybe_fail(f"grid mapping variable {gm_var_name!r}" - f" not found in dataset") + # gm_value has format "" + gm_var_name = gm_value - gm_var = dataset[gm_var_name] - try: - crs = pyproj.CRS.from_cf(gm_var.attrs) - except pyproj.exceptions.CRSError as e: - return maybe_fail(e) + crs, gm_name = _parse_crs(dataset, gm_var_name) - gm_name = gm_var.attrs.get("grid_mapping_name") + return crs, gm_name, xy_coords - coord_vars = [dataset[coord_var_name] - for coord_var_name in coord_var_names - if coord_var_name in dataset] - if len(coord_vars) < len(coord_var_names): - return maybe_fail(f"not all coordinate variables" - f" of {coord_var_names!r}" - f" found in dataset") +def _find_coordinates_for_crs_and_gm_name( + dataset: xr.Dataset, + var: xr.DataArray, + gm_name: str +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + other_vars = [dataset[var_name] + for var_name in dataset.variables.keys() + if var_name != var.name] valid_1d_coord_vars = [ - coord_var for coord_var in coord_vars - if (coord_var is not None - and coord_var.ndim == 1 - and coord_var.dims[0] in var.dims) + other_var for other_var in other_vars + if _is_valid_1d_coord_var(var, other_var) ] if gm_name == "latitude_longitude": @@ -105,44 +121,110 @@ def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, x_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, x_name) y_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, y_name) if x_coords is not None and y_coords is not None: - return new_grid_mapping_from_coords(x_coords=x_coords, - y_coords=y_coords, - crs=crs) + return x_coords, y_coords valid_2d_coord_vars = [ - coord_var for coord_var in coord_vars - if (coord_var is not None - and coord_var.ndim == 2 - and coord_var.dims[0] in var.dims - and coord_var.dims[1] in var.dims) + other_var for other_var in other_vars + if _is_valid_2d_coord_var(var, other_var) ] x_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, x_name) y_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, y_name) if x_coords is not None and y_coords is not None \ and x_coords.dims == y_coords.dims: - return new_grid_mapping_from_coords(x_coords=x_coords, - y_coords=y_coords, - crs=crs) + return x_coords, y_coords + + xy_coords = _find_1d_coord_var_by_common_names( + valid_1d_coord_vars, + (("lon", "lat"), + ("longitude", "latitude"), + ("x", "y"), + ("xc", "yc")), + ) + if xy_coords is not None: + return xy_coords + + # Check: also try _find_2d_coord_var_by_common_names()? - return maybe_fail(f"cannot find coordinates in {coord_var_names!r}" - f" for grid mapping variable {gm_var_name!r}") + raise ValueError(f"cannot determine grid mapping" + f" coordinates for variable {var.name!r}") + + +def _find_1d_coord_var_by_common_names( + coords: Sequence[xr.DataArray], + common_xy_names: Sequence[Tuple[str, str]], +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + x_coords = None + y_coords = None + for var in coords: + if var.attrs.get("axis") == "X": + x_coords = var + if var.attrs.get("axis") == "Y": + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name and var.name == x_name: + x_coords = var + if var.dims[0] == y_name and var.name == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name: + x_coords = var + if var.dims[0] == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + return None def _find_coord_var_by_standard_name( coords: Sequence[xr.DataArray], standard_name: str ) -> Optional[xr.DataArray]: - return next( - (var for var in coords - if var.attrs.get("standard_name") == standard_name), - None - ) + for var in coords: + if var.attrs.get("standard_name") == standard_name: + return var + return None + + +def _parse_crs(dataset: xr.Dataset, + gm_var_name: str) -> Tuple[pyproj.CRS, Optional[str]]: + if gm_var_name not in dataset: + raise ValueError(f"grid mapping variable {gm_var_name!r}" + f" not found in dataset") + gm_var = dataset[gm_var_name] + try: + return (pyproj.CRS.from_cf(gm_var.attrs), + gm_var.attrs.get("grid_mapping_name")) + except pyproj.exceptions.CRSError as e: + raise ValueError(f"illegal value for" + f" grid mapping variable {gm_var_name!r}") from e + + +def _is_valid_1d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 1 + and coord_var.dims[0] in data_var.dims) + + +def _is_valid_2d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 2 + and coord_var.dims[0] in data_var.dims + and coord_var.dims[1] in data_var.dims) + -def _maybe_fail(strict: bool, error: Union[str, Exception]) -> None: - if strict: - if isinstance(error, str): - raise ValueError(error) - else: - raise error - return None From ebc1c211204be91c332a667f681a387f331dc3b0 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 16:12:08 +0100 Subject: [PATCH 05/17] format --- xcube/core/gridmapping/cfconv.py | 24 ++++++++++++------------ xcube/core/gridmapping/regular.py | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 0565cbb2f..648f08764 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -1,23 +1,23 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. from typing import Optional, Union, Sequence, Tuple diff --git a/xcube/core/gridmapping/regular.py b/xcube/core/gridmapping/regular.py index 68869a10e..2fcfa5d36 100644 --- a/xcube/core/gridmapping/regular.py +++ b/xcube/core/gridmapping/regular.py @@ -1,23 +1,23 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. from typing import Tuple, Union From 1e48acbb82a55d6e1493d7a7faa896400122363d Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 15 Mar 2023 09:12:26 +0100 Subject: [PATCH 06/17] More tests --- test/core/gridmapping/test_cfconv.py | 199 ++++++++++++++++++++++++++- xcube/core/gridmapping/cfconv.py | 8 +- 2 files changed, 201 insertions(+), 6 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 8ddb1351d..493fd030b 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -2,6 +2,7 @@ import numpy as np import pyproj +import pytest import xarray as xr from xcube.core.gridmapping import GridMapping @@ -51,7 +52,7 @@ def new_y_coord_var(self, def new_crs_var(self, crs) -> xr.DataArray: return xr.DataArray(0, attrs=crs.to_cf()) - def test_with_grid_mapping_and_named_coords(self): + def test_with_gm_var_and_named_coords(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -72,7 +73,61 @@ def test_with_grid_mapping_and_named_coords(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_lat_lon(self): + def test_with_gm_var_fails_with_invalid_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="invalid coordinates in" + " grid mapping value 'crs: a b'"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_crs(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": xr.DataArray(0, attrs={"bibo": 12}) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="variable 'crs' is not" + " a valid grid mapping"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_gm_var(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "spatial_ref": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="grid mapping variable 'crs'" + " not found in dataset"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_and_standard_name_lat_lon(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -97,7 +152,7 @@ def test_with_grid_mapping_and_standard_name_lat_lon(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): + def test_with_gm_var_and_standard_name_rot_lat_lon(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -122,7 +177,7 @@ def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_projected(self): + def test_with_gm_var_and_standard_name_projected(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -146,3 +201,139 @@ def test_with_grid_mapping_and_standard_name_projected(self): self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_gm_var_and_axis(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(attrs={"axis": "X"}), + "b": self.new_y_coord_var(attrs={"axis": "Y"}), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("x", "y"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("lon", "lat"), gm.xy_var_names) + self.assertEqual(("lon", "lat"), gm.xy_dim_names) + + def test_with_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_without_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var(), + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("x", "y"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + ), + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("lon", "lat"), gm.xy_var_names) + self.assertEqual(("lon", "lat"), gm.xy_dim_names) + + def test_without_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var() + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 648f08764..c64d01bbe 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -153,6 +153,8 @@ def _find_1d_coord_var_by_common_names( coords: Sequence[xr.DataArray], common_xy_names: Sequence[Tuple[str, str]], ) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + + # Priority 1: find var by "axis" attribute x_coords = None y_coords = None for var in coords: @@ -163,6 +165,7 @@ def _find_1d_coord_var_by_common_names( if x_coords is not None and y_coords is not None: return x_coords, y_coords + # Priority 2: find var where dim name == common name == var name x_coords = None y_coords = None for var in coords: @@ -174,6 +177,7 @@ def _find_1d_coord_var_by_common_names( if x_coords is not None and y_coords is not None: return x_coords, y_coords + # Priority 3: find var where dim name == common name x_coords = None y_coords = None for var in coords: @@ -208,8 +212,8 @@ def _parse_crs(dataset: xr.Dataset, return (pyproj.CRS.from_cf(gm_var.attrs), gm_var.attrs.get("grid_mapping_name")) except pyproj.exceptions.CRSError as e: - raise ValueError(f"illegal value for" - f" grid mapping variable {gm_var_name!r}") from e + raise ValueError(f"variable {gm_var_name!r}" + f" is not a valid grid mapping") from e def _is_valid_1d_coord_var(data_var: xr.DataArray, From 7c2cb2958b5216f076dd300c08148e2b2bbf1381 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 15 Mar 2023 15:35:12 +0100 Subject: [PATCH 07/17] Saving ongoing work --- test/core/gridmapping/test_cfconv.py | 30 +++++----- xcube/core/gridmapping/cfconv.py | 87 +++++++++++++++++++++++----- xcube/core/gridmapping/dataset.py | 22 +++---- 3 files changed, 94 insertions(+), 45 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 493fd030b..ee376cf63 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -6,7 +6,7 @@ import xarray as xr from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping.cfconv import find_grid_mapping_for_var +from xcube.core.gridmapping.cfconv import find_grid_mapping_for_data_var CRS_WGS84 = pyproj.crs.CRS(4326) CRS_UTM_33N = pyproj.crs.CRS(32633) @@ -67,7 +67,7 @@ def test_with_gm_var_and_named_coords(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -89,7 +89,7 @@ def test_with_gm_var_fails_with_invalid_coords(self): with pytest.raises(ValueError, match="invalid coordinates in" " grid mapping value 'crs: a b'"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_fails_with_invalid_crs(self): ds = xr.Dataset( @@ -107,7 +107,7 @@ def test_with_gm_var_fails_with_invalid_crs(self): with pytest.raises(ValueError, match="variable 'crs' is not" " a valid grid mapping"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_fails_with_invalid_gm_var(self): ds = xr.Dataset( @@ -125,7 +125,7 @@ def test_with_gm_var_fails_with_invalid_gm_var(self): with pytest.raises(ValueError, match="grid mapping variable 'crs'" " not found in dataset"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_and_standard_name_lat_lon(self): ds = xr.Dataset( @@ -146,7 +146,7 @@ def test_with_gm_var_and_standard_name_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -171,7 +171,7 @@ def test_with_gm_var_and_standard_name_rot_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_ROTATED_POLE.to_cf(), gm.crs.to_cf()) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -196,7 +196,7 @@ def test_with_gm_var_and_standard_name_projected(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -217,7 +217,7 @@ def test_with_gm_var_and_axis(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -238,7 +238,7 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("x", "y"), gm.xy_var_names) @@ -259,7 +259,7 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("lon", "lat"), gm.xy_var_names) @@ -278,7 +278,7 @@ def test_with_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -296,7 +296,7 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("x", "y"), gm.xy_var_names) @@ -315,7 +315,7 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("lon", "lat"), gm.xy_var_names) @@ -331,7 +331,7 @@ def test_without_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index c64d01bbe..90020a859 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,7 +19,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -from typing import Optional, Union, Sequence, Tuple +from typing import Optional, Sequence, Tuple, Dict, Hashable, List import pyproj import pyproj.exceptions @@ -28,35 +28,89 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from .base import CRS_WGS84 -from .base import GridMapping -from .coords import new_grid_mapping_from_coords + +DimPair = Tuple[Hashable, Hashable] +CoordsPair = Tuple[xr.DataArray, xr.DataArray] +GridMappingTuple = Tuple[pyproj.CRS, str, CoordsPair] + + +def find_grid_mappings_for_data_vars(dataset: xr.Dataset) \ + -> Dict[Hashable, GridMappingTuple]: + grid_mappings = find_grid_mappings_for_dataset(dataset) + vars_to_grid_mappings: Dict[Hashable, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + if var.ndim < 2: + continue + for gmt in grid_mappings: + _, _, xy_coords = gmt + x_dim, y_dim = _get_xy_dims_from_xy_coords(xy_coords) + if x_dim in var.dims and y_dim in var.dims: + vars_to_grid_mappings[var_name] = gmt + break + return vars_to_grid_mappings + + +def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ + -> List[GridMappingTuple]: + dims_to_grid_mappings: Dict[DimPair, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + found = False + for x_dim, y_dim in dims_to_grid_mappings.keys(): + if x_dim in var.dims and y_dim in var.dims: + found = True + break + if not found: + gmt = find_grid_mapping_for_data_var(dataset, var_name) + _, _, xy_coords = gmt + xy_dims = _get_xy_dims_from_xy_coords(xy_coords) + dims_to_grid_mappings[xy_dims] = gmt + return list(dims_to_grid_mappings.values()) + + +def _get_xy_dims_from_xy_coords(xy_coords: CoordsPair) -> DimPair: + x_coords, y_coords = xy_coords + if x_coords.ndim == 1: + # 1-D coordinates + assert y_coords.ndim == 1 + return x_coords.dims[0], y_coords.dims[0] + else: + # 2-D coordinates + assert x_coords.ndim == 2 + assert x_coords.dims == y_coords.dims + return x_coords.dims[1], x_coords.dims[0] -def find_grid_mapping_for_var(dataset: xr.Dataset, - var_name: str) -> Optional[GridMapping]: +def find_grid_mapping_for_data_var( + dataset: xr.Dataset, + var_name: Hashable +) -> Optional[GridMappingTuple]: assert_instance(dataset, xr.Dataset, "dataset") - assert_instance(var_name, str, "var_name") + assert_instance(var_name, Hashable, "var_name") assert_true(var_name in dataset, message=f"variable {var_name!r} not found in dataset") var = dataset[var_name] + if var.ndim < 2: + return None gm_value = var.attrs.get("grid_mapping") if isinstance(gm_value, str) and gm_value: crs, gm_name, xy_coords = _find_grid_mapping_for_var_with_gm_value( dataset, var, gm_value ) + force_xy_coords = True else: crs, gm_name, xy_coords = CRS_WGS84, "latitude_longitude", None + force_xy_coords = False if xy_coords is None: xy_coords = _find_coordinates_for_crs_and_gm_name( - dataset, var, gm_name + dataset, var, gm_name, force_xy_coords ) + if xy_coords is None: + return None - return new_grid_mapping_from_coords(x_coords=xy_coords[0], - y_coords=xy_coords[1], - crs=crs) + return crs, gm_name, xy_coords def _find_grid_mapping_for_var_with_gm_value( @@ -65,7 +119,7 @@ def _find_grid_mapping_for_var_with_gm_value( gm_value: str ) -> Tuple[pyproj.CRS, str, - Optional[Tuple[xr.DataArray, xr.DataArray]]]: + Optional[CoordsPair]]: xy_coords = None if ":" in gm_value: @@ -100,7 +154,8 @@ def _find_grid_mapping_for_var_with_gm_value( def _find_coordinates_for_crs_and_gm_name( dataset: xr.Dataset, var: xr.DataArray, - gm_name: str + gm_name: str, + force: bool ) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: other_vars = [dataset[var_name] for var_name in dataset.variables.keys() @@ -145,9 +200,11 @@ def _find_coordinates_for_crs_and_gm_name( # Check: also try _find_2d_coord_var_by_common_names()? - raise ValueError(f"cannot determine grid mapping" - f" coordinates for variable {var.name!r}") - + if force: + raise ValueError(f"cannot determine grid mapping" + f" coordinates for variable {var.name!r}" + f" with dimensions {var.dims!r}") + return None def _find_1d_coord_var_by_common_names( coords: Sequence[xr.DataArray], diff --git a/xcube/core/gridmapping/dataset.py b/xcube/core/gridmapping/dataset.py index 2567b1bf7..c54be83bb 100644 --- a/xcube/core/gridmapping/dataset.py +++ b/xcube/core/gridmapping/dataset.py @@ -20,14 +20,13 @@ # SOFTWARE. from typing import Optional, Union, Tuple -import warnings import pyproj import xarray as xr from .base import DEFAULT_TOLERANCE from .base import GridMapping -from .cfconv import get_dataset_grid_mapping_proxies +from .cfconv import find_grid_mappings_for_dataset from .coords import new_grid_mapping_from_coords from .helpers import _normalize_crs @@ -54,21 +53,14 @@ def new_grid_mapping_from_dataset( else: prefer_crs = crs - grid_mapping_proxies = get_dataset_grid_mapping_proxies( - dataset, - emit_warnings=emit_warnings, - missing_projected_crs=crs, - missing_rotated_latitude_longitude_crs=crs, - missing_latitude_longitude_crs=crs, - ).values() - + grid_mapping_tuples = find_grid_mappings_for_dataset(dataset) grid_mappings = [ - new_grid_mapping_from_coords(x_coords=gmp.coords.x, - y_coords=gmp.coords.y, - crs=gmp.crs, - tile_size=tile_size or gmp.tile_size, + new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs, + tile_size=tile_size, tolerance=tolerance) - for gmp in grid_mapping_proxies + for crs, _, (x_coords, y_coords) in grid_mapping_tuples ] if len(grid_mappings) > 1: From c035ac4e1fb49d3c8a3e0ada52299c41fc2829fa Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 22 Mar 2023 11:11:37 +0100 Subject: [PATCH 08/17] Fix --- test/core/gridmapping/test_cfconv.py | 13 +++++++++++++ xcube/core/gridmapping/cfconv.py | 7 ++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index ee376cf63..c7127e6e2 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -284,6 +284,19 @@ def test_with_gm_var_and_common_dim_name(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) + def test_with_gm_var_but_without_spatial_var(self): + ds = xr.Dataset( + data_vars={ + "crs": self.new_crs_var(CRS_UTM_33N), + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_data_var(ds, "crs") + self.assertIsNone(gm) + def test_without_gm_var_and_common_dim_and_var_name(self): ds = xr.Dataset( data_vars={ diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 90020a859..4b9dcfd2e 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -61,9 +61,10 @@ def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ break if not found: gmt = find_grid_mapping_for_data_var(dataset, var_name) - _, _, xy_coords = gmt - xy_dims = _get_xy_dims_from_xy_coords(xy_coords) - dims_to_grid_mappings[xy_dims] = gmt + if gmt is not None: + _, _, xy_coords = gmt + xy_dims = _get_xy_dims_from_xy_coords(xy_coords) + dims_to_grid_mappings[xy_dims] = gmt return list(dims_to_grid_mappings.values()) From ddbf361246683c9fa651e7783d7c4cf4f4e41635 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Fri, 10 Mar 2023 18:00:53 +0100 Subject: [PATCH 09/17] Started revision of grid mapping identification --- xcube/core/gridmapping/cfconv.py | 376 ++++++++++++++++--------------- 1 file changed, 194 insertions(+), 182 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 5f212a307..3bb991ea4 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -33,206 +33,218 @@ from xcube.util.assertions import assert_instance from .base import CRS_WGS84 -class GridCoords: - """ - Grid coordinates comprising x and y of - type xarray.DataArray. - """ - - def __init__(self): - self.x: Optional[xr.DataArray] = None - self.y: Optional[xr.DataArray] = None - - -class GridMappingProxy: - """ - Grid mapping comprising *crs* of type pyproj.crs.CRS, - grid coordinates, an optional name, coordinates, and a - tile size (= spatial chunk sizes). - """ +class GridMappingSchema: def __init__(self, - crs: Optional[pyproj.crs.CRS] = None, - name: Optional[str] = None, - coords: Optional[GridCoords] = None, - tile_size: Optional[Tuple[int, int]] = None): - self.crs = crs + name: str, + standard_names: Tuple[str, str], + common_names: Tuple[Tuple[str, str], ...], + default_crs: Optional[pyproj.CRS]): self.name = name - self.coords = coords - self.tile_size = tile_size - - -def get_dataset_grid_mapping_proxies( - dataset: xr.Dataset, - *, - missing_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_rotated_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_projected_crs: pyproj.crs.CRS = None, - emit_warnings: bool = False -) -> Dict[Union[Hashable, None], GridMappingProxy]: + self.standard_names = standard_names + self.common_var_names = common_names + self.default_crs = default_crs + + +GMS_LATITUDE_LONGITUDE = GridMappingSchema( + 'latitude_longitude', + ('longitude', 'latitude'), + (('lon', 'lat'), + ('longitude', 'latitude')), + CRS_WGS84 +) + +GMS_ROTATED_LATITUDE_LONGITUDE = GridMappingSchema( + 'rotated_latitude_longitude', + ('grid_longitude', 'grid_latitude'), + (('rlon', 'rlat'), + ('rlongitude', 'rlatitude')), + CRS_WGS84 +) + +GMS_PROJECTED = GridMappingSchema( + 'projected', + ('projection_x_coordinate', 'projection_y_coordinate'), + (('x', 'y'), + ('xc', 'yc'), + ('transformed_x', 'transformed_y')), + None +) + +GM_SCHEMAS = (GMS_LATITUDE_LONGITUDE, + GMS_ROTATED_LATITUDE_LONGITUDE, + GMS_PROJECTED) + + +class GridMappingTemplate: + def __init__(self, + var_name: str, + var: xr.DataArray, + crs: pyproj.CRS, + x_coords: Optional[xr.DataArray] = None, + y_coords: Optional[xr.DataArray] = None): + self.var_name = var_name + self.var = var + self.crs = crs + self.x_coords = x_coords + self.y_coords = y_coords + self.ref_vars: List[xr.DataArray] = [] + +def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ + -> List[GridMappingTemplate]: """ Find grid mappings encoded as described in the CF conventions [Horizontal Coordinate Reference Systems, Grid Mappings, and Projections] (http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#grid-mappings-and-projections). - :param dataset: - :param missing_latitude_longitude_crs: - :param missing_rotated_latitude_longitude_crs: - :param missing_projected_crs: - :param emit_warnings: + :param dataset: The dataset :return: """ - grid_mapping_proxies: Dict[Union[Hashable, None], - GridMappingProxy] = dict() - # Find any grid mapping variables by CF 'grid_mapping' attribute - # - for var_name, var in dataset.variables.items(): - grid_mapping_var_name = var.attrs.get('grid_mapping') - if grid_mapping_var_name \ - and grid_mapping_var_name not in grid_mapping_proxies \ - and grid_mapping_var_name in dataset: - grid_mapping_var = dataset[grid_mapping_var_name] - gmp = _parse_crs_from_attrs(grid_mapping_var.attrs) - grid_mapping_proxies[grid_mapping_var_name] = gmp - - # If no grid mapping variables found, - # try if CRS is encoded in some variable's attributes - # - if not grid_mapping_proxies: - for var_name, var in dataset.variables.items(): - gmp = _parse_crs_from_attrs(var.attrs) - if gmp is not None: - grid_mapping_proxies[var_name] = gmp - break - - # If no grid mapping variables found, - # try if CRS is encoded in dataset attributes - # - if not grid_mapping_proxies: - gmp = _parse_crs_from_attrs(dataset.attrs) - if gmp is not None: - grid_mapping_proxies[None] = gmp - - # Find coordinate variables. - # + grid_mappings = _get_raw_grid_mappings(dataset) + + if grid_mappings: + for gm in grid_mappings.values(): + if gm.ref_vars: + ref_var = gm.ref_vars[0] + y_dim, x_dim = ref_var[-2:] + ok = False + if x_dim in dataset.coords and y_dim in dataset.coords: + x_coords = dataset.coords[x_dim] + y_coords = dataset.coords[y_dim] + if x_coords.ndim == 1 and y_coords.ndim == 1: + gm.x_coords = x_coords + gm.y_coords = y_coords + ok = True + if not ok: + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name \ + and var.ndim == 1 \ + and var.dims[0] == x_dim: + x_coords = var + if standard_name == y_name \ + and var.ndim == 1 \ + and var.dims[0] == y_dim: + y_coords = var + if x_coords is not None and y_coords is not None: + gm.x_coords = x_coords + gm.y_coords = y_coords + ok = True + + if not ok: + + +def _get_xy_coords(dataset: xr.Dataset, + x_dim: str, + y_dim: str) \ + -> Optional[Tuple[xr.DataArray, + xr.DataArray, + Optional[GridMappingSchema]]]: + x_dims = (x_dim,) + y_dims = (y_dim,) + yx_dims = (y_dim, x_dim) + + # 1D-coordinate variables named after their dimensions + if x_dim in dataset.coords and y_dim in dataset.coords: + x_coords = dataset.coords[x_dim] + y_coords = dataset.coords[y_dim] + if x_coords.dims == x_dims and y_coords.dims == y_dims: + return x_coords, y_coords, None + + # 1D-coordinate variables identified by "standard_name" + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name and var.dims == x_dims: + x_coords = var + if standard_name == y_name and var.dims == y_dims: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords, gms + + # 2D-coordinate variables identified by "standard_name" + for gms in GM_SCHEMAS: + x_name, y_name = gms.standard_names + x_coords = None + y_coords = None + for var_name, var in dataset.coords.items(): + standard_name = var.attrs.get("standard_name") + if standard_name == x_name and var.dims == yx_dims: + x_coords = var + if standard_name == y_name and var.dims == yx_dims: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords, gms + + # 1D-coordinate variables identified by common variable names + for gms in GM_SCHEMAS: + for x_name, y_name in gms.common_var_names: + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == x_dims \ + and y_coords.dims == y_dims: + return x_coords, y_coords, gms + + # 2D-coordinate variables identified by common variable names + for gms in GM_SCHEMAS: + for x_name, y_name in gms.common_var_names: + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == yx_dims \ + and y_coords.dims == yx_dims: + return x_coords, y_coords, gms - latitude_longitude_coords = GridCoords() - rotated_latitude_longitude_coords = GridCoords() - projected_coords = GridCoords() - - potential_coord_vars = _find_potential_coord_vars(dataset) + return None - # Find coordinate variables that use a CF standard_name. +def _get_raw_grid_mappings(dataset): + grid_mappings = dict() + # Find any grid mappings by parsing variable attributes to CRS # - coords_standard_names = ( - (latitude_longitude_coords, - 'longitude', 'latitude'), - (rotated_latitude_longitude_coords, - 'grid_longitude', 'grid_latitude'), - (projected_coords, - 'projection_x_coordinate', 'projection_y_coordinate') - ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): + for var_name, var in dataset.data_vars.items(): + try: + crs = pyproj.crs.CRS.from_cf(var.attrs) + except pyproj.crs.CRSError: continue - standard_name = var.attrs.get('standard_name') - for coords, x_name, y_name in coords_standard_names: - if coords.x is None and standard_name == x_name: - coords.x = var - if coords.y is None and standard_name == y_name: - coords.y = var - - # Find coordinate variables by common naming convention. + grid_mappings[var_name] = GridMappingTemplate(var, crs) + # Add spatial variables to corresponding grid mapping + # by their CF 'grid_mapping' attribute # - coords_var_names = ( - (latitude_longitude_coords, - ('lon', 'longitude'), - ('lat', 'latitude')), - (rotated_latitude_longitude_coords, - ('rlon', 'rlongitude'), - ('rlat', 'rlatitude')), - (projected_coords, - ('x', 'xc', 'transformed_x'), - ('y', 'yc', 'transformed_y')) - ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): + for var_name, var in dataset.data_vars.items(): + if var_name in grid_mappings: continue - for coords, x_names, y_names in coords_var_names: - if coords.x is None and var_name in x_names: - coords.x = var - if coords.y is None and var_name in y_names: - coords.y = var - - # Assign found coordinates to grid mappings - # - for gmp in grid_mapping_proxies.values(): - if gmp.name == 'latitude_longitude': - gmp.coords = latitude_longitude_coords - elif gmp.name == 'rotated_latitude_longitude': - gmp.coords = rotated_latitude_longitude_coords + if var.ndim < 2: + continue + gm_var_name = var.attrs.get('grid_mapping') + if not gm_var_name or gm_var_name not in grid_mappings: + continue + gm = grid_mappings[gm_var_name] + gm.ref_vars.append(var) + + if len(grid_mappings) > 1: + # We have more than one grid mapping. + # Just keep the referenced grid mappings. + ref_grid_mappings = filter(lambda gm: bool(gm.ref_var_names), + grid_mappings.values()) + if ref_grid_mappings: + grid_mappings = {gm.var_name: gm for gm in ref_grid_mappings} else: - gmp.coords = projected_coords - - _complement_grid_mapping_coords(latitude_longitude_coords, - 'latitude_longitude', - missing_latitude_longitude_crs - or CRS_WGS84, - grid_mapping_proxies) - _complement_grid_mapping_coords(rotated_latitude_longitude_coords, - 'rotated_latitude_longitude', - missing_rotated_latitude_longitude_crs, - grid_mapping_proxies) - _complement_grid_mapping_coords(projected_coords, - None, - missing_projected_crs, - grid_mapping_proxies) - - # Collect complete grid mappings - complete_grid_mappings = dict() - for var_name, gmp in grid_mapping_proxies.items(): - if gmp.coords is not None \ - and gmp.coords.x is not None \ - and gmp.coords.y is not None \ - and gmp.coords.x.size >= 2 \ - and gmp.coords.y.size >= 2 \ - and gmp.coords.x.ndim == gmp.coords.y.ndim: - if gmp.coords.x.ndim == 1: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[0], - gmp.coords.y.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif gmp.coords.x.ndim == 2 \ - and gmp.coords.x.dims == gmp.coords.y.dims: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[1], - gmp.coords.x.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif emit_warnings: - warnings.warn(f'CRS "{gmp.name}": ' - f'missing x- and/or y-coordinates ' - f'(grid mapping variable "{var_name}": ' - f'grid_mapping_name="{gmp.name}")') - - return complete_grid_mappings - - -def _parse_crs_from_attrs(attrs: Dict[Hashable, Any]) \ - -> Optional[GridMappingProxy]: - # noinspection PyBroadException - try: - crs = pyproj.crs.CRS.from_cf(attrs) - except pyproj.crs.CRSError: - return None - return GridMappingProxy(crs=crs, - name=attrs.get('grid_mapping_name')) + # There are no referenced grid mappings. + # So we cannot unambiguously tell which variable uses which + # grid mappings. Therefore, we choose any: + gm = next(iter(grid_mappings.values())) + grid_mappings = {gm.var_name: gm} + + return grid_mappings def _complement_grid_mapping_coords( From ff68b0db5b7427b676d1e0550151d7bbedf32948 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Fri, 10 Mar 2023 18:37:13 +0100 Subject: [PATCH 10/17] Cont. impl. --- xcube/core/gridmapping/cfconv.py | 46 +++++++------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 3bb991ea4..b85ae7b55 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -103,45 +103,17 @@ def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ grid_mappings = _get_raw_grid_mappings(dataset) - if grid_mappings: - for gm in grid_mappings.values(): - if gm.ref_vars: - ref_var = gm.ref_vars[0] - y_dim, x_dim = ref_var[-2:] - ok = False - if x_dim in dataset.coords and y_dim in dataset.coords: - x_coords = dataset.coords[x_dim] - y_coords = dataset.coords[y_dim] - if x_coords.ndim == 1 and y_coords.ndim == 1: - gm.x_coords = x_coords - gm.y_coords = y_coords - ok = True - if not ok: - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.coords.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name \ - and var.ndim == 1 \ - and var.dims[0] == x_dim: - x_coords = var - if standard_name == y_name \ - and var.ndim == 1 \ - and var.dims[0] == y_dim: - y_coords = var - if x_coords is not None and y_coords is not None: - gm.x_coords = x_coords - gm.y_coords = y_coords - ok = True - - if not ok: + for gm in grid_mappings.values(): + if gm.ref_vars: + ref_var = gm.ref_vars[0] + x_dim, y_dim = ref_var.dims[-1], ref_var.dims[-2], + x_coors, y_coords, gms = _get_xy_coords(dataset, x_dim, y_dim) + gm.x_coords, gm.y_coords = x_coors, y_coords def _get_xy_coords(dataset: xr.Dataset, - x_dim: str, - y_dim: str) \ + x_dim: Hashable, + y_dim: Hashable) \ -> Optional[Tuple[xr.DataArray, xr.DataArray, Optional[GridMappingSchema]]]: @@ -175,7 +147,7 @@ def _get_xy_coords(dataset: xr.Dataset, x_name, y_name = gms.standard_names x_coords = None y_coords = None - for var_name, var in dataset.coords.items(): + for var_name, var in dataset.data_vars.items(): standard_name = var.attrs.get("standard_name") if standard_name == x_name and var.dims == yx_dims: x_coords = var From a20d5c950aba2d8d917abe203db64c6c3dd0d563 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 10:27:48 +0100 Subject: [PATCH 11/17] cont. dev --- test/core/gridmapping/test_cfconv.py | 430 ++++++++------------------ xcube/core/gridmapping/cfconv.py | 438 +++++++-------------------- 2 files changed, 238 insertions(+), 630 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 703a60f8d..8ddb1351d 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -1,24 +1,13 @@ -import shutil import unittest -from typing import Set -import fsspec import numpy as np import pyproj import xarray as xr -from test.s3test import MOTO_SERVER_ENDPOINT_URL -from test.s3test import S3Test from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping.cfconv import GridCoords -from xcube.core.gridmapping.cfconv import GridMappingProxy -from xcube.core.gridmapping.cfconv import add_spatial_ref -from xcube.core.gridmapping.cfconv import get_dataset_grid_mapping_proxies -from xcube.core.new import new_cube -from xcube.core.store.fs.registry import new_fs_data_store +from xcube.core.gridmapping.cfconv import find_grid_mapping_for_var CRS_WGS84 = pyproj.crs.CRS(4326) -CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") CRS_UTM_33N = pyproj.crs.CRS(32633) CRS_ROTATED_POLE = pyproj.crs.CRS.from_cf( @@ -27,308 +16,133 @@ grid_north_pole_longitude=170.)) -class GetDatasetGridMappingsTest(unittest.TestCase): - def test_no_crs_lon_lat_common_names(self): - dataset = xr.Dataset(coords=dict(lon=xr.DataArray(np.linspace(10, 12, 11), dims='lon'), - lat=xr.DataArray(np.linspace(50, 52, 11), dims='lat'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('lon', grid_mapping.coords.x.name) - self.assertEqual('lat', grid_mapping.coords.y.name) +class FindGridMappingForVarTest(unittest.TestCase): + w = 20 + h = 10 - def test_no_crs_lon_lat_standard_names(self): - dataset = xr.Dataset(coords=dict(weird_x=xr.DataArray(np.linspace(10, 12, 11), dims='i', - attrs=dict(standard_name='longitude')), - weird_y=xr.DataArray(np.linspace(50, 52, 11), dims='j', - attrs=dict(standard_name='latitude')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('weird_x', grid_mapping.coords.x.name) - self.assertEqual('weird_y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_common_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(x=xr.DataArray(np.linspace(1000, 12000, 11), dims='x'), - y=xr.DataArray(np.linspace(5000, 52000, 11), dims='y'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_standard_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(myx=xr.DataArray(np.linspace(1000, 12000, 11), dims='x', - attrs=dict(standard_name='projection_x_coordinate')), - myy=xr.DataArray(np.linspace(5000, 52000, 11), dims='y', - attrs=dict(standard_name='projection_y_coordinate')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('myx', grid_mapping.coords.x.name) - self.assertEqual('myy', grid_mapping.coords.y.name) - - def test_latitude_longitude_with_x_y(self): - # This is what we get when opening a CRS-84 GeoTIFF using rioxarray - dataset = xr.Dataset( - dict( - band_1=xr.DataArray(np.zeros((11, 11)), dims=["y", "x"]), - spatial_ref=xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - ), - coords=dict( - x=xr.DataArray(np.linspace(10, 20, 11), dims='x'), - y=xr.DataArray(np.linspace(50, 40, 11), dims='y'), - ) + def new_data_var(self, + shape=None, + dims=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.zeros(shape or (1, self.h, self.w), dtype=np.float32), + dims=dims or ("time", "y", "x"), + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('spatial_ref', grid_mappings) - grid_mapping = grid_mappings.get('spatial_ref') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_CRS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - def test_rotated_pole_with_common_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - rlon=xr.DataArray(np.linspace(-180, 180, 11), dims='rlon'), - rlat=xr.DataArray(np.linspace(0, 90, 11), dims='rlat') - ) + def new_x_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.w, self.w), + dims=dim or "x", + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertIn('Geographic', grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('rlon', grid_mapping.coords.x.name) - self.assertEqual('rlat', grid_mapping.coords.y.name) - def test_rotated_pole_with_standard_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, - attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - u=xr.DataArray(np.linspace(-180, 180, 11), dims='u', - attrs=dict(standard_name='grid_longitude')), - v=xr.DataArray(np.linspace(0, 90, 11), dims='v', - attrs=dict(standard_name='grid_latitude')) - ) + def new_y_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.h, self.h), + dims=dim or "y", + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertIn('Geographic', grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('u', grid_mapping.coords.x.name) - self.assertEqual('v', grid_mapping.coords.y.name) - - -class XarrayDecodeCfTest(unittest.TestCase): - """Find out how xarray treats 1D and 2D coordinate variables when decode_cf=True or =False""" - - def test_cf_1d_coords(self): - self._write_coords(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_1d_data_vars(self): - self._write_data_vars(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_2d_coords(self): - self._write_coords(*self._gen_2d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def test_cf_2d_data_vars(self): - self._write_data_vars(*self._gen_2d()) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def assertVarNames(self, exp_data_vars: Set[str], exp_coords: Set[str], decode_cf: bool): - ds1 = xr.open_zarr('noise.zarr', decode_cf=decode_cf) - self.assertEqual(exp_coords, set(ds1.coords)) - self.assertEqual(exp_data_vars, set(ds1.data_vars)) - - @classmethod - def _write_coords(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs), coords=dict(lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - - @classmethod - def _write_data_vars(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs, lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - - @classmethod - def tearDownClass(cls) -> None: - shutil.rmtree('noise.zarr') - - def _gen_1d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('lat', 'lon')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='lon') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='lat') - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('lon',), lon.dims) - self.assertEqual(('lat',), lat.dims) - return noise, crs, lon, lat - - def _gen_2d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('y', 'x')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='x') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='y') - lat, lon = xr.broadcast(lat, lon) - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('y', 'x'), lon.dims) - self.assertEqual(('y', 'x'), lat.dims) - return noise, crs, lon, lat - - -class AddSpatialRefTest(S3Test, unittest.TestCase): - - def test_add_spatial_ref(self): - self.assert_add_spatial_ref_ok(None, None) - self.assert_add_spatial_ref_ok(None, ('cx', 'cy')) - self.assert_add_spatial_ref_ok('crs', None) - self.assert_add_spatial_ref_ok('crs', ('cx', 'cy')) - def assert_add_spatial_ref_ok(self, crs_var_name, xy_dim_names): - - root = 'eurodatacube-test/xcube-eea' - data_id = 'test.zarr' - crs = pyproj.CRS.from_string("EPSG:3035") - - if xy_dim_names: - x_name, y_name = xy_dim_names - else: - x_name, y_name = 'x', 'y' - - cube = new_cube(x_name=x_name, - y_name=y_name, - x_start=0, - y_start=0, - x_res=10, - y_res=10, - x_units='metres', - y_units='metres', - drop_bounds=True, - width=100, - height=100, - variables=dict(A=1.3, B=8.3)) - - storage_options = dict( - anon=False, - client_kwargs=dict( - endpoint_url=MOTO_SERVER_ENDPOINT_URL, - ) + def new_crs_var(self, crs) -> xr.DataArray: + return xr.DataArray(0, attrs=crs.to_cf()) + + def test_with_grid_mapping_and_named_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } ) - - fs: fsspec.AbstractFileSystem = fsspec.filesystem('s3', - **storage_options) - if fs.isdir(root): - fs.rm(root, recursive=True) - fs.mkdirs(root, exist_ok=True) - - data_store = new_fs_data_store('s3', - root=root, - storage_options=storage_options) - - data_store.write_data(cube, data_id=data_id) - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', x_name, y_name}, - set(cube.variables)) - - with self.assertRaises(ValueError) as cm: - GridMapping.from_dataset(cube) - self.assertEqual( - ('cannot find any grid mapping in dataset',), - cm.exception.args + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } ) - - path = f"{root}/{data_id}" - group_store = fs.get_mapper(path, create=True) - - expected_crs_var_name = crs_var_name or 'spatial_ref' - - self.assertTrue(fs.exists(path)) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) - - kwargs = {} - if crs_var_name is not None: - kwargs.update(crs_var_name=crs_var_name) - if xy_dim_names is not None: - kwargs.update(xy_dim_names=xy_dim_names) - add_spatial_ref(group_store, crs, **kwargs) - - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) - - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', - x_name, y_name, expected_crs_var_name}, - set(cube.variables)) - self.assertEqual(expected_crs_var_name, - cube.A.attrs.get('grid_mapping')) - self.assertEqual(expected_crs_var_name, - cube.B.attrs.get('grid_mapping')) - - gm = GridMapping.from_dataset(cube) - self.assertIn("LAEA Europe", gm.crs.srs) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_ROTATED_POLE) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "grid_longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "grid_latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_ROTATED_POLE.to_cf(), gm.crs.to_cf()) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_grid_mapping_and_standard_name_projected(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "projection_x_coordinate"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "projection_y_coordinate"} + ), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index b85ae7b55..652c9c307 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,336 +19,130 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import warnings -from collections.abc import MutableMapping -from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple +import functools +from typing import Optional, Union, Sequence -import numpy as np import pyproj +import pyproj.exceptions import xarray as xr -import zarr -import zarr.convenience -from xcube.core.schema import get_dataset_chunks from xcube.util.assertions import assert_instance -from .base import CRS_WGS84 - - -class GridMappingSchema: - def __init__(self, - name: str, - standard_names: Tuple[str, str], - common_names: Tuple[Tuple[str, str], ...], - default_crs: Optional[pyproj.CRS]): - self.name = name - self.standard_names = standard_names - self.common_var_names = common_names - self.default_crs = default_crs - - -GMS_LATITUDE_LONGITUDE = GridMappingSchema( - 'latitude_longitude', - ('longitude', 'latitude'), - (('lon', 'lat'), - ('longitude', 'latitude')), - CRS_WGS84 -) - -GMS_ROTATED_LATITUDE_LONGITUDE = GridMappingSchema( - 'rotated_latitude_longitude', - ('grid_longitude', 'grid_latitude'), - (('rlon', 'rlat'), - ('rlongitude', 'rlatitude')), - CRS_WGS84 -) - -GMS_PROJECTED = GridMappingSchema( - 'projected', - ('projection_x_coordinate', 'projection_y_coordinate'), - (('x', 'y'), - ('xc', 'yc'), - ('transformed_x', 'transformed_y')), - None -) - -GM_SCHEMAS = (GMS_LATITUDE_LONGITUDE, - GMS_ROTATED_LATITUDE_LONGITUDE, - GMS_PROJECTED) - - -class GridMappingTemplate: - def __init__(self, - var_name: str, - var: xr.DataArray, - crs: pyproj.CRS, - x_coords: Optional[xr.DataArray] = None, - y_coords: Optional[xr.DataArray] = None): - self.var_name = var_name - self.var = var - self.crs = crs - self.x_coords = x_coords - self.y_coords = y_coords - self.ref_vars: List[xr.DataArray] = [] - -def get_dataset_grid_mapping_templates(dataset: xr.Dataset) \ - -> List[GridMappingTemplate]: - """ - Find grid mappings encoded as described in the CF conventions - [Horizontal Coordinate Reference Systems, Grid Mappings, and Projections] - (http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#grid-mappings-and-projections). - - :param dataset: The dataset - :return: - """ - - grid_mappings = _get_raw_grid_mappings(dataset) - - for gm in grid_mappings.values(): - if gm.ref_vars: - ref_var = gm.ref_vars[0] - x_dim, y_dim = ref_var.dims[-1], ref_var.dims[-2], - x_coors, y_coords, gms = _get_xy_coords(dataset, x_dim, y_dim) - gm.x_coords, gm.y_coords = x_coors, y_coords - - -def _get_xy_coords(dataset: xr.Dataset, - x_dim: Hashable, - y_dim: Hashable) \ - -> Optional[Tuple[xr.DataArray, - xr.DataArray, - Optional[GridMappingSchema]]]: - x_dims = (x_dim,) - y_dims = (y_dim,) - yx_dims = (y_dim, x_dim) - - # 1D-coordinate variables named after their dimensions - if x_dim in dataset.coords and y_dim in dataset.coords: - x_coords = dataset.coords[x_dim] - y_coords = dataset.coords[y_dim] - if x_coords.dims == x_dims and y_coords.dims == y_dims: - return x_coords, y_coords, None - - # 1D-coordinate variables identified by "standard_name" - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.coords.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name and var.dims == x_dims: - x_coords = var - if standard_name == y_name and var.dims == y_dims: - y_coords = var - if x_coords is not None and y_coords is not None: - return x_coords, y_coords, gms - - # 2D-coordinate variables identified by "standard_name" - for gms in GM_SCHEMAS: - x_name, y_name = gms.standard_names - x_coords = None - y_coords = None - for var_name, var in dataset.data_vars.items(): - standard_name = var.attrs.get("standard_name") - if standard_name == x_name and var.dims == yx_dims: - x_coords = var - if standard_name == y_name and var.dims == yx_dims: - y_coords = var - if x_coords is not None and y_coords is not None: - return x_coords, y_coords, gms - - # 1D-coordinate variables identified by common variable names - for gms in GM_SCHEMAS: - for x_name, y_name in gms.common_var_names: - x_coords = dataset.get(x_name) - y_coords = dataset.get(y_name) - if x_coords is not None and y_coords is not None \ - and x_coords.dims == x_dims \ - and y_coords.dims == y_dims: - return x_coords, y_coords, gms - - # 2D-coordinate variables identified by common variable names - for gms in GM_SCHEMAS: - for x_name, y_name in gms.common_var_names: - x_coords = dataset.get(x_name) - y_coords = dataset.get(y_name) - if x_coords is not None and y_coords is not None \ - and x_coords.dims == yx_dims \ - and y_coords.dims == yx_dims: - return x_coords, y_coords, gms +from xcube.util.assertions import assert_true +from .base import GridMapping +from .coords import new_grid_mapping_from_coords + + +def find_grid_mapping_for_var(dataset: xr.Dataset, + var_name: str, + strict: bool = False) -> Optional[GridMapping]: + assert_instance(dataset, xr.Dataset, "dataset") + assert_instance(var_name, str, "var_name") + assert_true(var_name in dataset, + message=f"variable {var_name!r} not found in dataset") + + var = dataset[var_name] + + gm_var_name = var.attrs.get("grid_mapping") + if isinstance(gm_var_name, str) and gm_var_name: + return _find_grid_mapping_for_var_with_gm_attr( + dataset, + var, + gm_var_name, + strict=strict + ) return None -def _get_raw_grid_mappings(dataset): - grid_mappings = dict() - # Find any grid mappings by parsing variable attributes to CRS - # - for var_name, var in dataset.data_vars.items(): - try: - crs = pyproj.crs.CRS.from_cf(var.attrs) - except pyproj.crs.CRSError: - continue - grid_mappings[var_name] = GridMappingTemplate(var, crs) - # Add spatial variables to corresponding grid mapping - # by their CF 'grid_mapping' attribute - # - for var_name, var in dataset.data_vars.items(): - if var_name in grid_mappings: - continue - if var.ndim < 2: - continue - gm_var_name = var.attrs.get('grid_mapping') - if not gm_var_name or gm_var_name not in grid_mappings: - continue - gm = grid_mappings[gm_var_name] - gm.ref_vars.append(var) - if len(grid_mappings) > 1: - # We have more than one grid mapping. - # Just keep the referenced grid mappings. - ref_grid_mappings = filter(lambda gm: bool(gm.ref_var_names), - grid_mappings.values()) - if ref_grid_mappings: - grid_mappings = {gm.var_name: gm for gm in ref_grid_mappings} +def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, + var: xr.DataArray, + gm_var_name: str, + strict: bool = False) \ + -> Optional[GridMapping]: + maybe_fail = functools.partial(_maybe_fail, strict) + + if ":" in gm_var_name: + gm_var_name, coord_var_names = gm_var_name.split(":", maxsplit=1) + coord_var_names = coord_var_names.split() + else: + coord_var_names = dataset.coords.keys() + + if gm_var_name not in dataset: + return maybe_fail(f"grid mapping variable {gm_var_name!r}" + f" not found in dataset") + + gm_var = dataset[gm_var_name] + try: + crs = pyproj.CRS.from_cf(gm_var.attrs) + except pyproj.exceptions.CRSError as e: + return maybe_fail(e) + + gm_name = gm_var.attrs.get("grid_mapping_name") + + coord_vars = [dataset[coord_var_name] + for coord_var_name in coord_var_names + if coord_var_name in dataset] + + if len(coord_vars) < len(coord_var_names): + return maybe_fail(f"not all coordinate variables" + f" of {coord_var_names!r}" + f" found in dataset") + + valid_1d_coord_vars = [ + coord_var for coord_var in coord_vars + if (coord_var is not None + and coord_var.ndim == 1 + and coord_var.dims[0] in var.dims) + ] + + if gm_name == "latitude_longitude": + x_name, y_name = "longitude", "latitude" + elif gm_name == "rotated_latitude_longitude": + x_name, y_name = "grid_longitude", "grid_latitude" + else: + x_name, y_name = "projection_x_coordinate", "projection_y_coordinate" + + x_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, y_name) + if x_coords is not None and y_coords is not None: + return new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs) + + valid_2d_coord_vars = [ + coord_var for coord_var in coord_vars + if (coord_var is not None + and coord_var.ndim == 2 + and coord_var.dims[0] in var.dims + and coord_var.dims[1] in var.dims) + ] + x_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == y_coords.dims: + return new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs) + + return maybe_fail(f"cannot find coordinates in {coord_var_names!r}" + f" for grid mapping variable {gm_var_name!r}") + + +def _find_coord_var_by_standard_name( + coords: Sequence[xr.DataArray], + standard_name: str +) -> Optional[xr.DataArray]: + return next( + (var for var in coords + if var.attrs.get("standard_name") == standard_name), + None + ) + + +def _maybe_fail(strict: bool, error: Union[str, Exception]) -> None: + if strict: + if isinstance(error, str): + raise ValueError(error) else: - # There are no referenced grid mappings. - # So we cannot unambiguously tell which variable uses which - # grid mappings. Therefore, we choose any: - gm = next(iter(grid_mappings.values())) - grid_mappings = {gm.var_name: gm} - - return grid_mappings - - -def _complement_grid_mapping_coords( - coords: GridCoords, - grid_mapping_name: Optional[str], - missing_crs: Optional[pyproj.crs.CRS], - grid_mappings: Dict[Optional[str], GridMappingProxy] -): - if coords.x is not None or coords.y is not None: - grid_mapping = next((grid_mapping - for grid_mapping in grid_mappings.values() - if grid_mapping_name is None - or grid_mapping_name == grid_mapping.name), - None) - if grid_mapping is None and missing_crs is not None: - grid_mapping = GridMappingProxy(crs=missing_crs, - name=grid_mapping_name) - grid_mappings[None] = grid_mapping - - if grid_mapping is not None: - if grid_mapping.coords is None: - grid_mapping.coords = coords - # Edge case from GeoTIFF with CRS-84 with 1D - # coordinates named "x" and "y" as read by rioxarray - if grid_mapping.coords.x is None: - grid_mapping.coords.x = coords.x - if grid_mapping.coords.y is None: - grid_mapping.coords.y = coords.y - - -def _find_potential_coord_vars(dataset: xr.Dataset) -> List[Hashable]: - """ - Find potential coordinate variables. - - We need this function as we can not rely on xarray.Dataset.coords, - because 2D coordinate arrays are most likely not indicated as such - in many datasets. - """ - - # Collect bounds variables. We must exclude them. - bounds_vars = set() - for k in dataset.variables: - var = dataset[k] - - # Bounds variable as recommended through CF conventions - bounds_k = var.attrs.get('bounds') - if bounds_k is not None and bounds_k in dataset: - bounds_vars.add(bounds_k) - - # Bounds variable by naming convention, - # e.g. "lon_bnds" or "lat_bounds" - k_splits = str(k).rsplit("_", maxsplit=1) - if len(k_splits) == 2: - k_base, k_suffix = k_splits - if k_suffix in ('bnds', 'bounds') and k_base in dataset: - bounds_vars.add(k) - - potential_coord_vars = [] - - # First consider any CF global attribute "coordinates" - coordinates = dataset.attrs.get('coordinates') - if coordinates is not None: - for var_name in coordinates.split(): - if _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - # Then consider any other 1D/2D variables - for var_name in dataset.variables: - if var_name not in potential_coord_vars \ - and _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - return potential_coord_vars - - -def _is_potential_coord_var(dataset: xr.Dataset, - bounds_var_names: Set[str], - var_name: Hashable) -> bool: - if var_name in dataset: - var = dataset[var_name] - return var.ndim in (1, 2) and var_name not in bounds_var_names - return False - - -def _find_dataset_tile_size(dataset: xr.Dataset, - x_dim_name: Hashable, - y_dim_name: Hashable) \ - -> Optional[Tuple[int, int]]: - """Find the most likely tile size in *dataset*""" - dataset_chunks = get_dataset_chunks(dataset) - tile_width = dataset_chunks.get(x_dim_name) - tile_height = dataset_chunks.get(y_dim_name) - if tile_width is not None and tile_height is not None: - return tile_width, tile_height + raise error return None - - -def add_spatial_ref(dataset_store: zarr.convenience.StoreLike, - crs: pyproj.CRS, - crs_var_name: Optional[str] = 'spatial_ref', - xy_dim_names: Optional[Tuple[str, str]] = None): - """ - Helper function that allows adding a spatial reference to an - existing Zarr dataset. - - :param dataset_store: The dataset's existing Zarr store or path. - :param crs: The spatial coordinate reference system. - :param crs_var_name: The name of the variable that will hold the - spatial reference. Defaults to "spatial_ref". - :param xy_dim_names: The names of the x and y dimensions. - Defaults to ("x", "y"). - """ - assert_instance(dataset_store, (MutableMapping, str), name='group_store') - assert_instance(crs_var_name, str, name='crs_var_name') - x_dim_name, y_dim_name = xy_dim_names or ('x', 'y') - - spatial_attrs = crs.to_cf() - spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray - group = zarr.open(dataset_store, mode='r+') - spatial_ref = group.array(crs_var_name, - 0, - shape=(), - dtype=np.uint8, - fill_value=0) - spatial_ref.attrs.update(**spatial_attrs) - - for item_name, item in group.items(): - if item_name != crs_var_name: - dims = item.attrs.get('_ARRAY_DIMENSIONS') - if dims and len(dims) >= 2 \ - and dims[-2] == y_dim_name \ - and dims[-1] == x_dim_name: - item.attrs['grid_mapping'] = crs_var_name - - zarr.convenience.consolidate_metadata(dataset_store) From 174b77c00888d4007c5c4a832c3c5aac73054c35 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 16:11:29 +0100 Subject: [PATCH 12/17] Tests work --- xcube/core/gridmapping/cfconv.py | 216 +++++++++++++++++++++---------- 1 file changed, 149 insertions(+), 67 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 652c9c307..0565cbb2f 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,8 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import functools -from typing import Optional, Union, Sequence +from typing import Optional, Union, Sequence, Tuple import pyproj import pyproj.exceptions @@ -28,13 +27,13 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true +from .base import CRS_WGS84 from .base import GridMapping from .coords import new_grid_mapping_from_coords def find_grid_mapping_for_var(dataset: xr.Dataset, - var_name: str, - strict: bool = False) -> Optional[GridMapping]: + var_name: str) -> Optional[GridMapping]: assert_instance(dataset, xr.Dataset, "dataset") assert_instance(var_name, str, "var_name") assert_true(var_name in dataset, @@ -42,57 +41,74 @@ def find_grid_mapping_for_var(dataset: xr.Dataset, var = dataset[var_name] - gm_var_name = var.attrs.get("grid_mapping") - if isinstance(gm_var_name, str) and gm_var_name: - return _find_grid_mapping_for_var_with_gm_attr( - dataset, - var, - gm_var_name, - strict=strict + gm_value = var.attrs.get("grid_mapping") + if isinstance(gm_value, str) and gm_value: + crs, gm_name, xy_coords = _find_grid_mapping_for_var_with_gm_value( + dataset, var, gm_value ) + else: + crs, gm_name, xy_coords = CRS_WGS84, "latitude_longitude", None - return None + if xy_coords is None: + xy_coords = _find_coordinates_for_crs_and_gm_name( + dataset, var, gm_name + ) + return new_grid_mapping_from_coords(x_coords=xy_coords[0], + y_coords=xy_coords[1], + crs=crs) -def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, - var: xr.DataArray, - gm_var_name: str, - strict: bool = False) \ - -> Optional[GridMapping]: - maybe_fail = functools.partial(_maybe_fail, strict) - if ":" in gm_var_name: - gm_var_name, coord_var_names = gm_var_name.split(":", maxsplit=1) +def _find_grid_mapping_for_var_with_gm_value( + dataset: xr.Dataset, + var: xr.DataArray, + gm_value: str +) -> Tuple[pyproj.CRS, + str, + Optional[Tuple[xr.DataArray, xr.DataArray]]]: + xy_coords = None + + if ":" in gm_value: + # gm_value has format ": " + gm_var_name, coord_var_names = gm_value.split(":", maxsplit=1) coord_var_names = coord_var_names.split() + if len(coord_var_names) == 2: + x_name, y_name = coord_var_names + # Check CF, whether the following is correct: + # if gm_name in ("latitude_longitude", + # "rotated_latitude_longitude"): + # x_name, y_name = y_name, x_name + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if ((_is_valid_1d_coord_var(var, x_coords) + and _is_valid_1d_coord_var(var, y_coords)) + or (_is_valid_2d_coord_var(var, x_coords) + and _is_valid_2d_coord_var(var, y_coords))): + xy_coords = x_coords, y_coords + if xy_coords is None: + raise ValueError(f"invalid coordinates in" + f" grid mapping value {gm_value!r}") else: - coord_var_names = dataset.coords.keys() - - if gm_var_name not in dataset: - return maybe_fail(f"grid mapping variable {gm_var_name!r}" - f" not found in dataset") + # gm_value has format "" + gm_var_name = gm_value - gm_var = dataset[gm_var_name] - try: - crs = pyproj.CRS.from_cf(gm_var.attrs) - except pyproj.exceptions.CRSError as e: - return maybe_fail(e) + crs, gm_name = _parse_crs(dataset, gm_var_name) - gm_name = gm_var.attrs.get("grid_mapping_name") + return crs, gm_name, xy_coords - coord_vars = [dataset[coord_var_name] - for coord_var_name in coord_var_names - if coord_var_name in dataset] - if len(coord_vars) < len(coord_var_names): - return maybe_fail(f"not all coordinate variables" - f" of {coord_var_names!r}" - f" found in dataset") +def _find_coordinates_for_crs_and_gm_name( + dataset: xr.Dataset, + var: xr.DataArray, + gm_name: str +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + other_vars = [dataset[var_name] + for var_name in dataset.variables.keys() + if var_name != var.name] valid_1d_coord_vars = [ - coord_var for coord_var in coord_vars - if (coord_var is not None - and coord_var.ndim == 1 - and coord_var.dims[0] in var.dims) + other_var for other_var in other_vars + if _is_valid_1d_coord_var(var, other_var) ] if gm_name == "latitude_longitude": @@ -105,44 +121,110 @@ def _find_grid_mapping_for_var_with_gm_attr(dataset: xr.Dataset, x_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, x_name) y_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, y_name) if x_coords is not None and y_coords is not None: - return new_grid_mapping_from_coords(x_coords=x_coords, - y_coords=y_coords, - crs=crs) + return x_coords, y_coords valid_2d_coord_vars = [ - coord_var for coord_var in coord_vars - if (coord_var is not None - and coord_var.ndim == 2 - and coord_var.dims[0] in var.dims - and coord_var.dims[1] in var.dims) + other_var for other_var in other_vars + if _is_valid_2d_coord_var(var, other_var) ] x_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, x_name) y_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, y_name) if x_coords is not None and y_coords is not None \ and x_coords.dims == y_coords.dims: - return new_grid_mapping_from_coords(x_coords=x_coords, - y_coords=y_coords, - crs=crs) + return x_coords, y_coords + + xy_coords = _find_1d_coord_var_by_common_names( + valid_1d_coord_vars, + (("lon", "lat"), + ("longitude", "latitude"), + ("x", "y"), + ("xc", "yc")), + ) + if xy_coords is not None: + return xy_coords + + # Check: also try _find_2d_coord_var_by_common_names()? - return maybe_fail(f"cannot find coordinates in {coord_var_names!r}" - f" for grid mapping variable {gm_var_name!r}") + raise ValueError(f"cannot determine grid mapping" + f" coordinates for variable {var.name!r}") + + +def _find_1d_coord_var_by_common_names( + coords: Sequence[xr.DataArray], + common_xy_names: Sequence[Tuple[str, str]], +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + x_coords = None + y_coords = None + for var in coords: + if var.attrs.get("axis") == "X": + x_coords = var + if var.attrs.get("axis") == "Y": + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name and var.name == x_name: + x_coords = var + if var.dims[0] == y_name and var.name == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name: + x_coords = var + if var.dims[0] == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + return None def _find_coord_var_by_standard_name( coords: Sequence[xr.DataArray], standard_name: str ) -> Optional[xr.DataArray]: - return next( - (var for var in coords - if var.attrs.get("standard_name") == standard_name), - None - ) + for var in coords: + if var.attrs.get("standard_name") == standard_name: + return var + return None + + +def _parse_crs(dataset: xr.Dataset, + gm_var_name: str) -> Tuple[pyproj.CRS, Optional[str]]: + if gm_var_name not in dataset: + raise ValueError(f"grid mapping variable {gm_var_name!r}" + f" not found in dataset") + gm_var = dataset[gm_var_name] + try: + return (pyproj.CRS.from_cf(gm_var.attrs), + gm_var.attrs.get("grid_mapping_name")) + except pyproj.exceptions.CRSError as e: + raise ValueError(f"illegal value for" + f" grid mapping variable {gm_var_name!r}") from e + + +def _is_valid_1d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 1 + and coord_var.dims[0] in data_var.dims) + + +def _is_valid_2d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 2 + and coord_var.dims[0] in data_var.dims + and coord_var.dims[1] in data_var.dims) + -def _maybe_fail(strict: bool, error: Union[str, Exception]) -> None: - if strict: - if isinstance(error, str): - raise ValueError(error) - else: - raise error - return None From 7b4c785d605b893fdfd2ee70a82b5610c8dbf147 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 14 Mar 2023 16:12:08 +0100 Subject: [PATCH 13/17] format --- xcube/core/gridmapping/cfconv.py | 24 ++++++++++++------------ xcube/core/gridmapping/regular.py | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 0565cbb2f..648f08764 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -1,23 +1,23 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. from typing import Optional, Union, Sequence, Tuple diff --git a/xcube/core/gridmapping/regular.py b/xcube/core/gridmapping/regular.py index 68869a10e..2fcfa5d36 100644 --- a/xcube/core/gridmapping/regular.py +++ b/xcube/core/gridmapping/regular.py @@ -1,23 +1,23 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. from typing import Tuple, Union From 02db3aae11f8b4c50d47dcb61d9588603158a1ba Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 15 Mar 2023 09:12:26 +0100 Subject: [PATCH 14/17] More tests --- test/core/gridmapping/test_cfconv.py | 199 ++++++++++++++++++++++++++- xcube/core/gridmapping/cfconv.py | 8 +- 2 files changed, 201 insertions(+), 6 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 8ddb1351d..493fd030b 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -2,6 +2,7 @@ import numpy as np import pyproj +import pytest import xarray as xr from xcube.core.gridmapping import GridMapping @@ -51,7 +52,7 @@ def new_y_coord_var(self, def new_crs_var(self, crs) -> xr.DataArray: return xr.DataArray(0, attrs=crs.to_cf()) - def test_with_grid_mapping_and_named_coords(self): + def test_with_gm_var_and_named_coords(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -72,7 +73,61 @@ def test_with_grid_mapping_and_named_coords(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_lat_lon(self): + def test_with_gm_var_fails_with_invalid_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="invalid coordinates in" + " grid mapping value 'crs: a b'"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_crs(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": xr.DataArray(0, attrs={"bibo": 12}) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="variable 'crs' is not" + " a valid grid mapping"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_gm_var(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "spatial_ref": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="grid mapping variable 'crs'" + " not found in dataset"): + find_grid_mapping_for_var(ds, "sst") + + def test_with_gm_var_and_standard_name_lat_lon(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -97,7 +152,7 @@ def test_with_grid_mapping_and_standard_name_lat_lon(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): + def test_with_gm_var_and_standard_name_rot_lat_lon(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -122,7 +177,7 @@ def test_with_grid_mapping_and_standard_name_rot_lat_lon(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) - def test_with_grid_mapping_and_standard_name_projected(self): + def test_with_gm_var_and_standard_name_projected(self): ds = xr.Dataset( data_vars={ "sst": self.new_data_var( @@ -146,3 +201,139 @@ def test_with_grid_mapping_and_standard_name_projected(self): self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_gm_var_and_axis(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(attrs={"axis": "X"}), + "b": self.new_y_coord_var(attrs={"axis": "Y"}), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_with_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("x", "y"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("lon", "lat"), gm.xy_var_names) + self.assertEqual(("lon", "lat"), gm.xy_dim_names) + + def test_with_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_UTM_33N, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + def test_without_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var(), + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("x", "y"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + ), + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("lon", "lat"), gm.xy_var_names) + self.assertEqual(("lon", "lat"), gm.xy_dim_names) + + def test_without_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var() + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_var(ds, "sst") + self.assertIsInstance(gm, GridMapping) + self.assertEqual(CRS_WGS84, gm.crs) + self.assertEqual(("a", "b"), gm.xy_var_names) + self.assertEqual(("x", "y"), gm.xy_dim_names) + diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 648f08764..c64d01bbe 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -153,6 +153,8 @@ def _find_1d_coord_var_by_common_names( coords: Sequence[xr.DataArray], common_xy_names: Sequence[Tuple[str, str]], ) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + + # Priority 1: find var by "axis" attribute x_coords = None y_coords = None for var in coords: @@ -163,6 +165,7 @@ def _find_1d_coord_var_by_common_names( if x_coords is not None and y_coords is not None: return x_coords, y_coords + # Priority 2: find var where dim name == common name == var name x_coords = None y_coords = None for var in coords: @@ -174,6 +177,7 @@ def _find_1d_coord_var_by_common_names( if x_coords is not None and y_coords is not None: return x_coords, y_coords + # Priority 3: find var where dim name == common name x_coords = None y_coords = None for var in coords: @@ -208,8 +212,8 @@ def _parse_crs(dataset: xr.Dataset, return (pyproj.CRS.from_cf(gm_var.attrs), gm_var.attrs.get("grid_mapping_name")) except pyproj.exceptions.CRSError as e: - raise ValueError(f"illegal value for" - f" grid mapping variable {gm_var_name!r}") from e + raise ValueError(f"variable {gm_var_name!r}" + f" is not a valid grid mapping") from e def _is_valid_1d_coord_var(data_var: xr.DataArray, From 3f1dae0b6a7008003a22e9b545b232819f0414aa Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 15 Mar 2023 15:35:12 +0100 Subject: [PATCH 15/17] Saving ongoing work --- test/core/gridmapping/test_cfconv.py | 30 +++++----- xcube/core/gridmapping/cfconv.py | 87 +++++++++++++++++++++++----- xcube/core/gridmapping/dataset.py | 22 +++---- 3 files changed, 94 insertions(+), 45 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 493fd030b..ee376cf63 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -6,7 +6,7 @@ import xarray as xr from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping.cfconv import find_grid_mapping_for_var +from xcube.core.gridmapping.cfconv import find_grid_mapping_for_data_var CRS_WGS84 = pyproj.crs.CRS(4326) CRS_UTM_33N = pyproj.crs.CRS(32633) @@ -67,7 +67,7 @@ def test_with_gm_var_and_named_coords(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -89,7 +89,7 @@ def test_with_gm_var_fails_with_invalid_coords(self): with pytest.raises(ValueError, match="invalid coordinates in" " grid mapping value 'crs: a b'"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_fails_with_invalid_crs(self): ds = xr.Dataset( @@ -107,7 +107,7 @@ def test_with_gm_var_fails_with_invalid_crs(self): with pytest.raises(ValueError, match="variable 'crs' is not" " a valid grid mapping"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_fails_with_invalid_gm_var(self): ds = xr.Dataset( @@ -125,7 +125,7 @@ def test_with_gm_var_fails_with_invalid_gm_var(self): with pytest.raises(ValueError, match="grid mapping variable 'crs'" " not found in dataset"): - find_grid_mapping_for_var(ds, "sst") + find_grid_mapping_for_data_var(ds, "sst") def test_with_gm_var_and_standard_name_lat_lon(self): ds = xr.Dataset( @@ -146,7 +146,7 @@ def test_with_gm_var_and_standard_name_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -171,7 +171,7 @@ def test_with_gm_var_and_standard_name_rot_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_ROTATED_POLE.to_cf(), gm.crs.to_cf()) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -196,7 +196,7 @@ def test_with_gm_var_and_standard_name_projected(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -217,7 +217,7 @@ def test_with_gm_var_and_axis(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -238,7 +238,7 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("x", "y"), gm.xy_var_names) @@ -259,7 +259,7 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("lon", "lat"), gm.xy_var_names) @@ -278,7 +278,7 @@ def test_with_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_UTM_33N, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) @@ -296,7 +296,7 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("x", "y"), gm.xy_var_names) @@ -315,7 +315,7 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("lon", "lat"), gm.xy_var_names) @@ -331,7 +331,7 @@ def test_without_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_var(ds, "sst") + gm = find_grid_mapping_for_data_var(ds, "sst") self.assertIsInstance(gm, GridMapping) self.assertEqual(CRS_WGS84, gm.crs) self.assertEqual(("a", "b"), gm.xy_var_names) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index c64d01bbe..90020a859 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -19,7 +19,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -from typing import Optional, Union, Sequence, Tuple +from typing import Optional, Sequence, Tuple, Dict, Hashable, List import pyproj import pyproj.exceptions @@ -28,35 +28,89 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from .base import CRS_WGS84 -from .base import GridMapping -from .coords import new_grid_mapping_from_coords + +DimPair = Tuple[Hashable, Hashable] +CoordsPair = Tuple[xr.DataArray, xr.DataArray] +GridMappingTuple = Tuple[pyproj.CRS, str, CoordsPair] + + +def find_grid_mappings_for_data_vars(dataset: xr.Dataset) \ + -> Dict[Hashable, GridMappingTuple]: + grid_mappings = find_grid_mappings_for_dataset(dataset) + vars_to_grid_mappings: Dict[Hashable, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + if var.ndim < 2: + continue + for gmt in grid_mappings: + _, _, xy_coords = gmt + x_dim, y_dim = _get_xy_dims_from_xy_coords(xy_coords) + if x_dim in var.dims and y_dim in var.dims: + vars_to_grid_mappings[var_name] = gmt + break + return vars_to_grid_mappings + + +def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ + -> List[GridMappingTuple]: + dims_to_grid_mappings: Dict[DimPair, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + found = False + for x_dim, y_dim in dims_to_grid_mappings.keys(): + if x_dim in var.dims and y_dim in var.dims: + found = True + break + if not found: + gmt = find_grid_mapping_for_data_var(dataset, var_name) + _, _, xy_coords = gmt + xy_dims = _get_xy_dims_from_xy_coords(xy_coords) + dims_to_grid_mappings[xy_dims] = gmt + return list(dims_to_grid_mappings.values()) + + +def _get_xy_dims_from_xy_coords(xy_coords: CoordsPair) -> DimPair: + x_coords, y_coords = xy_coords + if x_coords.ndim == 1: + # 1-D coordinates + assert y_coords.ndim == 1 + return x_coords.dims[0], y_coords.dims[0] + else: + # 2-D coordinates + assert x_coords.ndim == 2 + assert x_coords.dims == y_coords.dims + return x_coords.dims[1], x_coords.dims[0] -def find_grid_mapping_for_var(dataset: xr.Dataset, - var_name: str) -> Optional[GridMapping]: +def find_grid_mapping_for_data_var( + dataset: xr.Dataset, + var_name: Hashable +) -> Optional[GridMappingTuple]: assert_instance(dataset, xr.Dataset, "dataset") - assert_instance(var_name, str, "var_name") + assert_instance(var_name, Hashable, "var_name") assert_true(var_name in dataset, message=f"variable {var_name!r} not found in dataset") var = dataset[var_name] + if var.ndim < 2: + return None gm_value = var.attrs.get("grid_mapping") if isinstance(gm_value, str) and gm_value: crs, gm_name, xy_coords = _find_grid_mapping_for_var_with_gm_value( dataset, var, gm_value ) + force_xy_coords = True else: crs, gm_name, xy_coords = CRS_WGS84, "latitude_longitude", None + force_xy_coords = False if xy_coords is None: xy_coords = _find_coordinates_for_crs_and_gm_name( - dataset, var, gm_name + dataset, var, gm_name, force_xy_coords ) + if xy_coords is None: + return None - return new_grid_mapping_from_coords(x_coords=xy_coords[0], - y_coords=xy_coords[1], - crs=crs) + return crs, gm_name, xy_coords def _find_grid_mapping_for_var_with_gm_value( @@ -65,7 +119,7 @@ def _find_grid_mapping_for_var_with_gm_value( gm_value: str ) -> Tuple[pyproj.CRS, str, - Optional[Tuple[xr.DataArray, xr.DataArray]]]: + Optional[CoordsPair]]: xy_coords = None if ":" in gm_value: @@ -100,7 +154,8 @@ def _find_grid_mapping_for_var_with_gm_value( def _find_coordinates_for_crs_and_gm_name( dataset: xr.Dataset, var: xr.DataArray, - gm_name: str + gm_name: str, + force: bool ) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: other_vars = [dataset[var_name] for var_name in dataset.variables.keys() @@ -145,9 +200,11 @@ def _find_coordinates_for_crs_and_gm_name( # Check: also try _find_2d_coord_var_by_common_names()? - raise ValueError(f"cannot determine grid mapping" - f" coordinates for variable {var.name!r}") - + if force: + raise ValueError(f"cannot determine grid mapping" + f" coordinates for variable {var.name!r}" + f" with dimensions {var.dims!r}") + return None def _find_1d_coord_var_by_common_names( coords: Sequence[xr.DataArray], diff --git a/xcube/core/gridmapping/dataset.py b/xcube/core/gridmapping/dataset.py index 2567b1bf7..c54be83bb 100644 --- a/xcube/core/gridmapping/dataset.py +++ b/xcube/core/gridmapping/dataset.py @@ -20,14 +20,13 @@ # SOFTWARE. from typing import Optional, Union, Tuple -import warnings import pyproj import xarray as xr from .base import DEFAULT_TOLERANCE from .base import GridMapping -from .cfconv import get_dataset_grid_mapping_proxies +from .cfconv import find_grid_mappings_for_dataset from .coords import new_grid_mapping_from_coords from .helpers import _normalize_crs @@ -54,21 +53,14 @@ def new_grid_mapping_from_dataset( else: prefer_crs = crs - grid_mapping_proxies = get_dataset_grid_mapping_proxies( - dataset, - emit_warnings=emit_warnings, - missing_projected_crs=crs, - missing_rotated_latitude_longitude_crs=crs, - missing_latitude_longitude_crs=crs, - ).values() - + grid_mapping_tuples = find_grid_mappings_for_dataset(dataset) grid_mappings = [ - new_grid_mapping_from_coords(x_coords=gmp.coords.x, - y_coords=gmp.coords.y, - crs=gmp.crs, - tile_size=tile_size or gmp.tile_size, + new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs, + tile_size=tile_size, tolerance=tolerance) - for gmp in grid_mapping_proxies + for crs, _, (x_coords, y_coords) in grid_mapping_tuples ] if len(grid_mappings) > 1: From 4f681129d5b25a6f8b9312cdd817c2c7ec49a6a4 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 22 Mar 2023 11:11:37 +0100 Subject: [PATCH 16/17] Fix --- test/core/gridmapping/test_cfconv.py | 13 +++++++++++++ xcube/core/gridmapping/cfconv.py | 7 ++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index ee376cf63..c7127e6e2 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -284,6 +284,19 @@ def test_with_gm_var_and_common_dim_name(self): self.assertEqual(("a", "b"), gm.xy_var_names) self.assertEqual(("x", "y"), gm.xy_dim_names) + def test_with_gm_var_but_without_spatial_var(self): + ds = xr.Dataset( + data_vars={ + "crs": self.new_crs_var(CRS_UTM_33N), + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gm = find_grid_mapping_for_data_var(ds, "crs") + self.assertIsNone(gm) + def test_without_gm_var_and_common_dim_and_var_name(self): ds = xr.Dataset( data_vars={ diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 90020a859..4b9dcfd2e 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -61,9 +61,10 @@ def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ break if not found: gmt = find_grid_mapping_for_data_var(dataset, var_name) - _, _, xy_coords = gmt - xy_dims = _get_xy_dims_from_xy_coords(xy_coords) - dims_to_grid_mappings[xy_dims] = gmt + if gmt is not None: + _, _, xy_coords = gmt + xy_dims = _get_xy_dims_from_xy_coords(xy_coords) + dims_to_grid_mappings[xy_dims] = gmt return list(dims_to_grid_mappings.values()) From bdbda0e6570150590ac94944b2f2b5d40087586f Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 4 May 2023 18:33:19 +0200 Subject: [PATCH 17/17] fix tests --- CHANGES.md | 3 + test/core/gridmapping/test_cfconv.py | 187 ++++++++++++++++++-------- test/core/gridmapping/test_dataset.py | 11 +- xcube/core/gridmapping/cfconv.py | 34 ++++- 4 files changed, 169 insertions(+), 66 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e1aa88486..34b979098 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,8 @@ ## Changes in 1.0.6 (in development) +* Reimplemented the logic to detect a dataset's grid mapping + in module `xcube.core.gridmapping.cfconv`. + * Updated [xcube Dataset Specification](docs/source/cubespec.md). * Bundled [xcube-viewer 1.1.0-dev.1](https://github.com/dcs4cop/xcube-viewer/releases/tag/v1.1.0-dev.1). * Fixed various issues with the auto-generated Python API documentation. diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index c7127e6e2..9a088f274 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -1,11 +1,12 @@ import unittest +from typing import Tuple import numpy as np import pyproj import pytest import xarray as xr -from xcube.core.gridmapping import GridMapping +from xcube.core.gridmapping import GridMapping, CRS_CRS84 from xcube.core.gridmapping.cfconv import find_grid_mapping_for_data_var CRS_WGS84 = pyproj.crs.CRS(4326) @@ -21,6 +22,31 @@ class FindGridMappingForVarTest(unittest.TestCase): w = 20 h = 10 + def assert_grid_mapping_tuple_tuple_ok( + self, + actual_gmt, + expected_crs: pyproj.CRS, + expected_gm_name: str, + expected_xy_names: Tuple[str, str], + expected_xy_dims: Tuple[str, str], + expected_xy_sizes: Tuple[int, int] + ): + self.assertIsInstance(actual_gmt, tuple) + self.assertEqual(3, len(actual_gmt)) + crs, gm_name, xy_coords = actual_gmt + self.assertEqual(expected_crs.to_cf(), crs.to_cf()) + self.assertEqual(expected_gm_name, gm_name) + self.assertIsInstance(xy_coords, tuple) + self.assertEqual(2, len(xy_coords)) + x, y = xy_coords + self.assertIsInstance(x, xr.DataArray) + self.assertIsInstance(y, xr.DataArray) + self.assertEqual(1, x.ndim) + self.assertEqual(1, y.ndim) + self.assertEqual(expected_xy_dims, (x.dims[0], y.dims[0])) + self.assertEqual(expected_xy_names, (x.name, y.name)) + self.assertEqual(expected_xy_sizes, (x.size, y.size)) + def new_data_var(self, shape=None, dims=None, @@ -67,11 +93,15 @@ def test_with_gm_var_and_named_coords(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_UTM_33N, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_fails_with_invalid_coords(self): ds = xr.Dataset( @@ -146,11 +176,15 @@ def test_with_gm_var_and_standard_name_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_WGS84, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_and_standard_name_rot_lat_lon(self): ds = xr.Dataset( @@ -171,11 +205,15 @@ def test_with_gm_var_and_standard_name_rot_lat_lon(self): "lat": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_ROTATED_POLE.to_cf(), gm.crs.to_cf()) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_ROTATED_POLE, + "rotated_latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_and_standard_name_projected(self): ds = xr.Dataset( @@ -196,11 +234,15 @@ def test_with_gm_var_and_standard_name_projected(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_UTM_33N, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_and_axis(self): ds = xr.Dataset( @@ -217,11 +259,15 @@ def test_with_gm_var_and_axis(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_UTM_33N, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_and_common_dim_and_var_name(self): ds = xr.Dataset( @@ -238,11 +284,15 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_UTM_33N, gm.crs) - self.assertEqual(("x", "y"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("x", "y"), + ("x", "y"), + (20, 10) + ) ds = xr.Dataset( data_vars={ @@ -259,11 +309,15 @@ def test_with_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_WGS84, gm.crs) - self.assertEqual(("lon", "lat"), gm.xy_var_names) - self.assertEqual(("lon", "lat"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("lon", "lat"), + ("lon", "lat"), + (20, 10) + ) def test_with_gm_var_and_common_dim_name(self): ds = xr.Dataset( @@ -278,11 +332,15 @@ def test_with_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_UTM_33N, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) def test_with_gm_var_but_without_spatial_var(self): ds = xr.Dataset( @@ -294,8 +352,8 @@ def test_with_gm_var_but_without_spatial_var(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "crs") - self.assertIsNone(gm) + gmt = find_grid_mapping_for_data_var(ds, "crs") + self.assertIsNone(gmt) def test_without_gm_var_and_common_dim_and_var_name(self): ds = xr.Dataset( @@ -309,11 +367,15 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "y": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_WGS84, gm.crs) - self.assertEqual(("x", "y"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("x", "y"), + ("x", "y"), + (20, 10) + ) ds = xr.Dataset( data_vars={ @@ -328,11 +390,15 @@ def test_without_gm_var_and_common_dim_and_var_name(self): "lat": self.new_y_coord_var(dim="lat"), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_WGS84, gm.crs) - self.assertEqual(("lon", "lat"), gm.xy_var_names) - self.assertEqual(("lon", "lat"), gm.xy_dim_names) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("lon", "lat"), + ("lon", "lat"), + (20, 10) + ) def test_without_gm_var_and_common_dim_name(self): ds = xr.Dataset( @@ -344,9 +410,12 @@ def test_without_gm_var_and_common_dim_name(self): "b": self.new_y_coord_var(), } ) - gm = find_grid_mapping_for_data_var(ds, "sst") - self.assertIsInstance(gm, GridMapping) - self.assertEqual(CRS_WGS84, gm.crs) - self.assertEqual(("a", "b"), gm.xy_var_names) - self.assertEqual(("x", "y"), gm.xy_dim_names) - + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) diff --git a/test/core/gridmapping/test_dataset.py b/test/core/gridmapping/test_dataset.py index 17ce3a7b0..40383c9a7 100644 --- a/test/core/gridmapping/test_dataset.py +++ b/test/core/gridmapping/test_dataset.py @@ -3,6 +3,7 @@ import numpy as np import pyproj +import pytest import xarray as xr import xcube.core.new @@ -40,12 +41,10 @@ def test_from_regular_cube_with_crs(self): gm1 = GridMapping.from_dataset(dataset) self.assertEqual(pyproj.CRS.from_string('epsg:25832'), gm1.crs) dataset = dataset.drop_vars('crs') - gm2 = GridMapping.from_dataset(dataset) - self.assertEqual(GEO_CRS, gm2.crs) - gm3 = GridMapping.from_dataset(dataset, crs=gm1.crs) - self.assertEqual(gm1.crs, gm3.crs) - self.assertEqual(('x', 'y'), gm3.xy_var_names) - self.assertEqual(('x', 'y'), gm3.xy_dim_names) + with pytest.raises(ValueError, + match="grid mapping variable 'crs'" + " not found in dataset"): + GridMapping.from_dataset(dataset) def test_from_regular_cube_no_chunks_and_chunks(self): dataset = xcube.core.new.new_cube(variables=dict(rad=0.5)) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 4b9dcfd2e..a470a1929 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -65,7 +65,39 @@ def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ _, _, xy_coords = gmt xy_dims = _get_xy_dims_from_xy_coords(xy_coords) dims_to_grid_mappings[xy_dims] = gmt - return list(dims_to_grid_mappings.values()) + + grid_mapping_tuples = list(dims_to_grid_mappings.values()) + + lat_lon_2d_gmt = _find_lat_lon_2d_gmt( + dataset, + tuple(dims_to_grid_mappings.keys()) or (("x", "y"),) + ) + if lat_lon_2d_gmt is not None: + grid_mapping_tuples.append(lat_lon_2d_gmt) + + return grid_mapping_tuples + + +def _find_lat_lon_2d_gmt(dataset: xr.Dataset, + xy_dims: Tuple[DimPair]) \ + -> Optional[GridMappingTuple]: + for x_dim, y_dim in xy_dims: + yx_dims = y_dim, x_dim + for lon_name, lat_name in (('lon', 'lat'), + ('longitude', 'latitude')): + lon_coords = dataset.coords.get(lon_name) + lat_coords = dataset.coords.get(lat_name) + if lon_coords is None or lat_coords is None: + lon_coords = dataset.data_vars.get(lon_name) + lat_coords = dataset.data_vars.get(lat_name) + if lon_coords is not None \ + and lat_coords is not None \ + and lon_coords.dims == yx_dims \ + and lat_coords.dims == yx_dims: + return (CRS_WGS84, + "latitude_longitude", + (lon_coords, lat_coords)) + return None def _get_xy_dims_from_xy_coords(xy_coords: CoordsPair) -> DimPair: