From ce449815e03f443931efc35278448d4101774fd8 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 17 Sep 2021 15:16:50 +0200 Subject: [PATCH 01/28] work on integrating temporal resampling --- xcube/core/gen2/config.py | 15 +++++++++++ xcube/core/gen2/local/generator.py | 2 +- xcube/core/gen2/local/resamplert.py | 41 +++++++++++++++++++++++++++-- xcube/core/resampling/temporal.py | 20 +++++++++----- 4 files changed, 69 insertions(+), 9 deletions(-) diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index 7a8489198..eff74342a 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -142,6 +142,7 @@ def __init__(self, tile_size: Union[int, Tuple[int, int]] = None, time_range: Tuple[str, Optional[str]] = None, time_period: str = None, + temporal_resampling: str = None, chunks: Mapping[str, Optional[int]] = None, metadata: Mapping[str, Any] = None, variable_metadata: Mapping[str, Mapping[str, Any]] = None,): @@ -196,6 +197,11 @@ def __init__(self, assert_instance(time_period, str, 'time_period') self.time_period = time_period + self.temporal_resampling = None + if temporal_resampling is not None: + assert_instance(temporal_resampling, str, 'temporal_resampling') + self.temporal_resampling = temporal_resampling + self.chunks = None if chunks is not None: assert_instance(chunks, collections.Mapping, 'chunks') @@ -271,6 +277,15 @@ def get_schema(cls): nullable=True, pattern=r'^([1-9][0-9]*)?[DWMY]$' ), + temporal_resampling=JsonStringSchema( + nullable=True, + enum=[ + 'first', 'last', 'max', 'min', 'mean', 'median', + 'percentile_

', 'linear', 'nearest', 'nearest-up', + 'zero', 'slinear', 'quadratic', 'cubic', 'previous', + 'next' + ] + ), chunks=JsonObjectSchema( nullable=True, additional_properties=JsonIntegerSchema(nullable=True, diff --git a/xcube/core/gen2/local/generator.py b/xcube/core/gen2/local/generator.py index 367f7b0c1..2ca921780 100644 --- a/xcube/core/gen2/local/generator.py +++ b/xcube/core/gen2/local/generator.py @@ -126,7 +126,7 @@ def __generate_cube(self, request: CubeGeneratorRequest) \ subsetter = CubeSubsetter() resampler_xy = CubeResamplerXY() - resampler_t = CubeResamplerT() + resampler_t = CubeResamplerT(cube_config) combiner = CubesCombiner(cube_config) rechunker = CubeRechunker() diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index 25b573641..d94f1bcfa 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -19,9 +19,12 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import pandas as pd import xarray as xr from xcube.core.gridmapping import GridMapping +from xcube.core.resampling import resample_in_time +from xcube.util.assertions import assert_instance from .transformer import CubeTransformer from .transformer import TransformedCube from ..config import CubeConfig @@ -29,9 +32,43 @@ class CubeResamplerT(CubeTransformer): + def __init__(self, + cube_config: CubeConfig): + assert_instance(cube_config, CubeConfig, 'cube_config') + self._time_range = cube_config.time_range \ + if cube_config.time_range else None + def transform_cube(self, cube: xr.Dataset, gm: GridMapping, cube_config: CubeConfig) -> TransformedCube: - # TODO (forman): implement me - return cube, gm, cube_config + + if cube_config.time_period is None: + resampled_cube = cube + else: + time_resample_params = dict() + time_resample_params['frequency'] = cube_config.time_period + time_resample_params['method'] = 'first' + if self._time_range: + start_time = pd.to_datetime(self._time_range[0]) + dataset_start_time = cube.time[0].values + time_delta = dataset_start_time - start_time + time_resample_params['offset'] = time_delta + if cube_config.temporal_resampling is not None: + if cube_config.temporal_resampling in \ + ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', + 'quadratic', 'cubic', 'previous', 'next']: + time_resample_params['method'] = 'interp' + time_resample_params['interp_kind'] = \ + cube_config.temporal_resampling + else: + time_resample_params['method'] = \ + cube_config.temporal_resampling + resampled_cube = resample_in_time( + cube, + rename_variables=False, + **time_resample_params + ) + cube_config = cube_config.drop_props(['time_period']) + + return resampled_cube, gm, cube_config diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 454d4caea..42c36d755 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -39,7 +39,8 @@ def resample_in_time(dataset: xr.Dataset, time_chunk_size=None, var_names: Sequence[str] = None, metadata: Dict[str, Any] = None, - cube_asserted: bool = False) -> xr.Dataset: + cube_asserted: bool = False, + rename_variables: bool = True) -> xr.Dataset: """ Resample a dataset in the time dimension. @@ -49,7 +50,9 @@ def resample_in_time(dataset: xr.Dataset, ``'first'``, ``'last'``, ``'max'``, ``'min'``, ``'mean'``, ``'median'``, ``'percentile_

'``, - ``'std'``, ``'sum'``, ``'var'``. + ``'std'``, ``'sum'``, ``'var'``, + ``'interpolate'`` + . In value ``'percentile_

'`` is a placeholder, where ``'

'`` must be replaced by an integer percentage @@ -82,6 +85,8 @@ def resample_in_time(dataset: xr.Dataset, :param metadata: Output metadata. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. + :param rename_variables: Whether the dataset's variables shall be renamed by + extending the resampling method to the original name. :return: A new xcube dataset resampled in time. """ if not cube_asserted: @@ -127,9 +132,10 @@ def resample_in_time(dataset: xr.Dataset, tolerance) resampled_cube = resampling_method(*method_args, **method_kwargs) - resampled_cube = resampled_cube.rename( - {var_name: f'{var_name}_{method_postfix}' - for var_name in resampled_cube.data_vars}) + if rename_variables: + resampled_cube = resampled_cube.rename( + {var_name: f'{var_name}_{method_postfix}' + for var_name in resampled_cube.data_vars}) resampled_cubes.append(resampled_cube) if len(resampled_cubes) == 1: @@ -160,10 +166,12 @@ def get_method_kwargs(method, frequency, interp_kind, tolerance): kwargs = {'kind': interp_kind or 'linear'} elif method in {'nearest', 'bfill', 'ffill', 'pad'}: kwargs = {'tolerance': tolerance or frequency} - elif method in {'first', 'last', 'sum', + elif method in {'last', 'sum', 'min', 'max', 'mean', 'median', 'std', 'var'}: kwargs = {'dim': 'time', 'keep_attrs': True, 'skipna': True} + elif method == 'first': + kwargs = {'keep_attrs': True, 'skipna': False} elif method == 'prod': kwargs = {'dim': 'time', 'skipna': True} elif method == 'count': From 666633fa79b37a239f7de28a4255f533e4411cac Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 20 Sep 2021 17:28:29 +0200 Subject: [PATCH 02/28] raise error when temporal resampling is requested --- xcube/core/gen2/local/resamplert.py | 52 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index d94f1bcfa..1c650982f 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -28,6 +28,7 @@ from .transformer import CubeTransformer from .transformer import TransformedCube from ..config import CubeConfig +from ..error import CubeGeneratorError class CubeResamplerT(CubeTransformer): @@ -46,29 +47,32 @@ def transform_cube(self, if cube_config.time_period is None: resampled_cube = cube else: - time_resample_params = dict() - time_resample_params['frequency'] = cube_config.time_period - time_resample_params['method'] = 'first' - if self._time_range: - start_time = pd.to_datetime(self._time_range[0]) - dataset_start_time = cube.time[0].values - time_delta = dataset_start_time - start_time - time_resample_params['offset'] = time_delta - if cube_config.temporal_resampling is not None: - if cube_config.temporal_resampling in \ - ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', - 'quadratic', 'cubic', 'previous', 'next']: - time_resample_params['method'] = 'interp' - time_resample_params['interp_kind'] = \ - cube_config.temporal_resampling - else: - time_resample_params['method'] = \ - cube_config.temporal_resampling - resampled_cube = resample_in_time( - cube, - rename_variables=False, - **time_resample_params - ) - cube_config = cube_config.drop_props(['time_period']) + raise CubeGeneratorError(f'Temporal resampling not yet provided. ' + f'Do not use "time_period" parameter.', + status_code=400) + # time_resample_params = dict() + # time_resample_params['frequency'] = cube_config.time_period + # time_resample_params['method'] = 'first' + # if self._time_range: + # start_time = pd.to_datetime(self._time_range[0]) + # dataset_start_time = cube.time[0].values + # time_delta = dataset_start_time - start_time + # time_resample_params['offset'] = time_delta + # if cube_config.temporal_resampling is not None: + # if cube_config.temporal_resampling in \ + # ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', + # 'quadratic', 'cubic', 'previous', 'next']: + # time_resample_params['method'] = 'interp' + # time_resample_params['interp_kind'] = \ + # cube_config.temporal_resampling + # else: + # time_resample_params['method'] = \ + # cube_config.temporal_resampling + # resampled_cube = resample_in_time( + # cube, + # rename_variables=False, + # **time_resample_params + # ) + # cube_config = cube_config.drop_props(['time_period']) return resampled_cube, gm, cube_config From 367f0c286c0cb158de5298cf9ed63902ddc62751 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 30 Sep 2021 10:44:02 +0200 Subject: [PATCH 03/28] fix --- xcube/cli/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcube/cli/resample.py b/xcube/cli/resample.py index fa9ba6ab7..e9cf69721 100644 --- a/xcube/cli/resample.py +++ b/xcube/cli/resample.py @@ -130,7 +130,7 @@ def resample(cube, config['frequency'] = frequency if offset: config['offset'] = offset - if offset: + if base: config['base'] = base if kind: config['interp_kind'] = kind From b118f9048b0d94bc87afc4f0e59bba28a98087ad Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 4 Oct 2021 13:58:45 +0200 Subject: [PATCH 04/28] integrated temporal resampling --- test/core/gen2/local/test_resamplert.py | 75 +++++++++++++++++++ test/core/gen2/test_config.py | 3 + test/core/gen2/test_request.py | 7 +- test/core/resampling/test_temporal.py | 66 ++++++++++++----- xcube/core/gen2/local/resamplert.py | 93 ++++++++++++++++-------- xcube/core/resampling/temporal.py | 95 ++++++++++++++++++++++++- 6 files changed, 288 insertions(+), 51 deletions(-) create mode 100644 test/core/gen2/local/test_resamplert.py diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py new file mode 100644 index 000000000..1e476fbda --- /dev/null +++ b/test/core/gen2/local/test_resamplert.py @@ -0,0 +1,75 @@ +from xcube.core.new import new_cube +from xcube.core.gen2 import CubeConfig +from xcube.core.gen2.local.resamplert import CubeResamplerT +from xcube.core.gridmapping import GridMapping + +import unittest + + +class CubeResamplerTTest(unittest.TestCase): + + @staticmethod + def _get_cube(time_freq: str, time_periods: int): + + def b3(index1, index2, index3): + return index1 + index2 * 0.1 + index3 * 0.01 + + return new_cube(variables=dict(B03=b3), + time_periods=time_periods, + time_freq=time_freq, + width=10, height=5, time_start='2010-08-04') + + def test_transform_cube_no_time_period(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2012-12-31')) + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='M', time_periods=12) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertEquals(cube, resampled_cube) + + def test_transform_cube_downsample_to_years(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2012-12-31'), + time_period='1Y', + temporal_resampling='min') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='M', time_periods=12) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertEquals(cube, resampled_cube) + + def test_transform_cube_downsample_to_months(self): + cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), + time_period='1M', + temporal_resampling='min') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='W', time_periods=12) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertEquals(cube, resampled_cube) + + def test_transform_cube_downsample_to_weeks(self): + cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), + time_period='2W', + temporal_resampling='min') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='D', time_periods=22) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertEquals(cube, resampled_cube) + diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index 35e015b8d..93d5aa2cc 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -56,6 +56,7 @@ def test_from_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', + temporal_resampling='slinear', metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), @@ -69,6 +70,7 @@ def test_from_dict(self): self.assertEqual(0.05, cube_config.spatial_res) self.assertEqual(('2018-01-01', None), cube_config.time_range) self.assertEqual('4D', cube_config.time_period) + self.assertEqual('slinear', cube_config.temporal_resampling) self.assertEqual(dict(title='S2L2A subset'), cube_config.metadata) self.assertEqual( @@ -86,6 +88,7 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', + temporal_resampling='slinear', metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), diff --git a/test/core/gen2/test_request.py b/test/core/gen2/test_request.py index c3ed5e9d8..b07c7882e 100644 --- a/test/core/gen2/test_request.py +++ b/test/core/gen2/test_request.py @@ -49,7 +49,8 @@ def test_from_dict(self): bbox=[12.2, 52.1, 13.9, 54.8], spatial_res=0.05, time_range=['2018-01-01', None], - time_period='4D'), + time_period='4D', + temporal_resampling='slinear'), output_config=dict(store_id='memory', data_id='CHL') ) @@ -69,6 +70,7 @@ def test_from_dict(self): self.assertEqual(0.05, gen_config.cube_config.spatial_res) self.assertEqual(('2018-01-01', None), gen_config.cube_config.time_range) self.assertEqual('4D', gen_config.cube_config.time_period) + self.assertEqual('slinear', gen_config.cube_config.temporal_resampling) def test_to_dict(self): expected_dict = dict( @@ -79,7 +81,8 @@ def test_to_dict(self): bbox=[12.2, 52.1, 13.9, 54.8], spatial_res=0.05, time_range=['2018-01-01', None], - time_period='4D'), + time_period='4D', + temporal_resampling='slinear'), output_config=dict(store_id='memory', replace=False, data_id='CHL') diff --git a/test/core/resampling/test_temporal.py b/test/core/resampling/test_temporal.py index 28d4ffb9f..e6c2ebdba 100644 --- a/test/core/resampling/test_temporal.py +++ b/test/core/resampling/test_temporal.py @@ -36,28 +36,46 @@ def test_resample_in_time_min_max(self): self.assertIn('precipitation_min', resampled_cube) self.assertIn('precipitation_max', resampled_cube) self.assertEqual(('time',), resampled_cube.time.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.temperature_min.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.temperature_max.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.precipitation_min.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.precipitation_max.dims) self.assertEqual((6,), resampled_cube.time.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_min.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_max.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_min.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_max.shape) - np.testing.assert_equal(resampled_cube.time.values, - np.array( - ['2017-06-25T00:00:00', '2017-07-09T00:00:00', - '2017-07-23T00:00:00', '2017-08-06T00:00:00', - '2017-08-20T00:00:00', '2017-09-03T00:00:00'], dtype=np.datetime64)) - np.testing.assert_allclose(resampled_cube.temperature_min.values[..., 0, 0], - np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9])) - np.testing.assert_allclose(resampled_cube.temperature_max.values[..., 0, 0], - np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9])) - np.testing.assert_allclose(resampled_cube.precipitation_min.values[..., 0, 0], - np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2])) - np.testing.assert_allclose(resampled_cube.precipitation_max.values[..., 0, 0], - np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2])) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2017-07-02T00:00:00', '2017-07-16T00:00:00', + '2017-07-30T00:00:00', '2017-08-13T00:00:00', + '2017-08-27T00:00:00', '2017-09-10T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2017-06-25T00:00:00', '2017-07-09T00:00:00'], + ['2017-07-09T00:00:00', '2017-07-23T00:00:00'], + ['2017-07-23T00:00:00', '2017-08-06T00:00:00'], + ['2017-08-06T00:00:00', '2017-08-20T00:00:00'], + ['2017-08-20T00:00:00', '2017-09-03T00:00:00'], + ['2017-09-03T00:00:00', '2017-09-17T00:00:00']], + dtype=np.datetime64)) + np.testing.assert_allclose( + resampled_cube.temperature_min.values[..., 0, 0], + np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9])) + np.testing.assert_allclose( + resampled_cube.temperature_max.values[..., 0, 0], + np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9])) + np.testing.assert_allclose( + resampled_cube.precipitation_min.values[..., 0, 0], + np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2])) + np.testing.assert_allclose( + resampled_cube.precipitation_max.values[..., 0, 0], + np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2])) schema = CubeSchema.new(resampled_cube) self.assertEqual(3, schema.ndim) @@ -144,3 +162,17 @@ def test_resample_f_all(self): self.assertEqual(3, schema.ndim) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual((1, 180, 360), schema.shape) + + def test_resample_in_time_resample_to_quarter(self): + resampled_cube = resample_in_time(self.input_cube, '1Q', ['min']) + self.assertIsNot(resampled_cube, self.input_cube) + self.assertIn('time', resampled_cube) + self.assertEqual(1, resampled_cube.time.size) + self.assertEqual(np.datetime64('2017-08-16'), + resampled_cube.time[0].values) + self.assertIn('time_bnds', resampled_cube) + self.assertEqual((1, 2), resampled_cube.time_bnds.shape) + self.assertEqual(np.datetime64('2017-07-01'), + resampled_cube.time_bnds[0, 0].values) + self.assertEqual(np.datetime64('2017-10-01'), + resampled_cube.time_bnds[0, 1].values) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index 1c650982f..d2c3963a4 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -1,4 +1,3 @@ -# The MIT License (MIT) # Copyright (c) 2021 by the xcube development team and contributors # # Permission is hereby granted, free of charge, to any person obtaining a copy of @@ -43,36 +42,72 @@ def transform_cube(self, cube: xr.Dataset, gm: GridMapping, cube_config: CubeConfig) -> TransformedCube: - + to_drop = [] if cube_config.time_period is None: resampled_cube = cube else: - raise CubeGeneratorError(f'Temporal resampling not yet provided. ' - f'Do not use "time_period" parameter.', - status_code=400) - # time_resample_params = dict() - # time_resample_params['frequency'] = cube_config.time_period - # time_resample_params['method'] = 'first' - # if self._time_range: - # start_time = pd.to_datetime(self._time_range[0]) - # dataset_start_time = cube.time[0].values - # time_delta = dataset_start_time - start_time - # time_resample_params['offset'] = time_delta - # if cube_config.temporal_resampling is not None: - # if cube_config.temporal_resampling in \ - # ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', - # 'quadratic', 'cubic', 'previous', 'next']: - # time_resample_params['method'] = 'interp' - # time_resample_params['interp_kind'] = \ - # cube_config.temporal_resampling - # else: - # time_resample_params['method'] = \ - # cube_config.temporal_resampling - # resampled_cube = resample_in_time( - # cube, - # rename_variables=False, - # **time_resample_params - # ) - # cube_config = cube_config.drop_props(['time_period']) + to_drop.append('time_period') + time_resample_params = dict() + time_resample_params['frequency'] = cube_config.time_period + time_resample_params['method'] = 'first' + if self._time_range: + import re + time_unit = re.findall('[A-Z]+', cube_config.time_period)[0] + if time_unit in ['H', 'D']: + start_time = pd.to_datetime(self._time_range[0]) + dataset_start_time = pd.Timestamp(cube.time[0].values) + time_delta = _normalize_time(dataset_start_time) \ + - start_time + period_delta = pd.Timedelta(cube_config.time_period) + if time_delta > period_delta: + if time_unit == 'H': + time_resample_params['base'] = \ + time_delta.hours / period_delta.hours + elif time_unit == 'D': + time_resample_params['base'] = \ + time_delta.days / period_delta.days + if cube_config.temporal_resampling is not None: + to_drop.append('temporal_resampling') + if cube_config.temporal_resampling in \ + ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', + 'quadratic', 'cubic', 'previous', 'next']: + time_resample_params['method'] = 'interp' + time_resample_params['interp_kind'] = \ + cube_config.temporal_resampling + else: + time_resample_params['method'] = \ + cube_config.temporal_resampling + resampled_cube = resample_in_time( + cube, + rename_variables=False, + **time_resample_params + ) + + cube_config = cube_config.drop_props(to_drop) return resampled_cube, gm, cube_config + + +def _normalize_time(time, normalize_hour=True): + if normalize_hour: + return time.replace(hour=0, minute=0, second=0, microsecond=0, + nanosecond=0) + return time.replace(minute=0, second=0, microsecond=0, nanosecond=0) + + +def _get_expected_start_time(dataset_start_time, time_unit): + if time_unit == 'H': + return _normalize_time(dataset_start_time, normalize_hour=False) + if time_unit == 'D': + return _normalize_time(dataset_start_time) + if time_unit == 'W': + delta = pd.Timedelta(-dataset_start_time.day_of_week) + return _normalize_time(dataset_start_time) - delta + if time_unit == 'M': + return _normalize_time(dataset_start_time).replace(day=1) + if time_unit == 'Q': + delta = pd.Timedelta(-(dataset_start_time.month - 1) % 3) + return _normalize_time(dataset_start_time).replace(day=1) - delta + if time_unit == 'Y': + return _normalize_time(dataset_start_time).replace(month=1, day=1) + raise CubeGeneratorError(f'Unsupported time unit "{time_unit}"') \ No newline at end of file diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 42c36d755..bb2e7b17c 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -22,6 +22,7 @@ from typing import Dict, Any, Sequence, Union import numpy as np +import re import xarray as xr from xcube.core.schema import CubeSchema @@ -98,6 +99,11 @@ def resample_in_time(dataset: xr.Dataset, / np.timedelta64(1, 'D')) + 1) frequency = f'{days}D' + # resample to start of period + if frequency.endswith('Y') or frequency.endswith('M') or \ + frequency.endswith('Q'): + frequency = f'{frequency}S' + if var_names: dataset = select_variables_subset(dataset, var_names) @@ -142,10 +148,16 @@ def resample_in_time(dataset: xr.Dataset, resampled_cube = resampled_cubes[0] else: resampled_cube = xr.merge(resampled_cubes) + adjusted_times, time_bounds = _adjust_times_and_bounds( + resampled_cube.time.values, frequency) + update_vars = dict( + time=adjusted_times, + time_bnds=xr.DataArray(time_bounds, dims=['time', 'bnds']) + ) + resampled_cube = resampled_cube.assign_coords(update_vars) - # TODO: add time_bnds to resampled_ds - time_coverage_start = '%s' % dataset.time[0] - time_coverage_end = '%s' % dataset.time[-1] + time_coverage_start = '%s' % time_bounds[0][0] + time_coverage_end = '%s' % time_bounds[-1][1] resampled_cube.attrs.update(metadata or {}) # TODO: add other time_coverage_ attributes @@ -153,6 +165,9 @@ def resample_in_time(dataset: xr.Dataset, time_coverage_end=time_coverage_end) schema = CubeSchema.new(dataset) + if schema.chunks is None: + return resampled_cube + chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)} if isinstance(time_chunk_size, int) and time_chunk_size >= 0: @@ -161,6 +176,80 @@ def resample_in_time(dataset: xr.Dataset, return resampled_cube.chunk(chunk_sizes) +def _adjust_times_and_bounds(time_values, frequency): + import pandas as pd + time_unit = re.findall('[A-Z]+', frequency)[0] + time_value = int(frequency.split(time_unit)[0]) + TIMEUNIT_INCREMENTORS = dict( + YS=(1, 0, 0), + QS=(0, 3, 0), + MS=(0, 1, 0), + W=(0, 0, 7) + ) + if time_unit not in TIMEUNIT_INCREMENTORS: + if time_unit == 'D': + half_time_delta = np.timedelta64(12*time_value, 'h') + elif time_unit == 'H': + half_time_delta = np.timedelta64(30 * time_value, 'm') + else: + raise ValueError(f'Unsupported time unit "{time_unit}"') + time_values += half_time_delta + time_bounds_values = np.array([time_values - half_time_delta, + time_values + half_time_delta]).\ + transpose() + return time_values, time_bounds_values + timestamps = [pd.Timestamp(tv) for tv in time_values] + last_ts = timestamps[-1] + replacement = dict( + year=last_ts.year + + (TIMEUNIT_INCREMENTORS[time_unit][0] * time_value), + month=last_ts.month + + (TIMEUNIT_INCREMENTORS[time_unit][1] * time_value), + day=last_ts.day + + (TIMEUNIT_INCREMENTORS[time_unit][2] * time_value) + ) + + def days_of_month(year: int, month: int): + if month in [1, 3, 5, 7, 8, 10, 12]: + return 31 + if month in [4, 6, 9, 11]: + return 30 + if year % 4 != 0: + return 28 + if year % 400 == 0: + return 29 + if year % 100 == 0: + return 28 + return 28 + + while replacement['day'] > days_of_month(replacement['year'], + replacement['month'] % 12): + replacement['day'] -= days_of_month(replacement['year'], + replacement['month'] % 12) + replacement['month'] += 1 + if replacement['month'] > 12: + replacement['month'] -= 12 + replacement['year'] += 1 + + while replacement['month'] > 12: + replacement['month'] -= 12 + replacement['year'] += 1 + + final_ts = pd.Timestamp(last_ts.replace(**replacement)) + + timestamps.append(final_ts) + + new_timestamps = [] + new_timestamp_bounds = [] + for i, ts in enumerate(timestamps[:-1]): + import pandas as pd + next_ts = timestamps[i + 1] + delta = pd.Timedelta((next_ts - ts).delta / 2) + new_timestamps.append(np.datetime64(ts + delta)) + new_timestamp_bounds.append([np.datetime64(ts), np.datetime64(next_ts)]) + return new_timestamps, new_timestamp_bounds + + def get_method_kwargs(method, frequency, interp_kind, tolerance): if method == 'interpolate': kwargs = {'kind': interp_kind or 'linear'} From ef6a2a36398a029469918a3bd0169476d8c5b1a7 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 7 Oct 2021 11:54:45 +0200 Subject: [PATCH 05/28] edited list of supported resampling methods --- xcube/core/gen2/config.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index eff74342a..98fc06f1a 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -279,12 +279,11 @@ def get_schema(cls): ), temporal_resampling=JsonStringSchema( nullable=True, - enum=[ - 'first', 'last', 'max', 'min', 'mean', 'median', - 'percentile_

', 'linear', 'nearest', 'nearest-up', - 'zero', 'slinear', 'quadratic', 'cubic', 'previous', - 'next' - ] + enum=['count', 'first', 'last', 'max', 'min', 'mean', 'sum', + 'prod', 'median', 'std', 'var', 'percentile_

', + 'asfreq', 'ffill', 'bfill', 'pad', 'linear', + 'nearest', 'nearest-up', 'zero', 'slinear', + 'quadratic', 'cubic', 'previous', 'next'] ), chunks=JsonObjectSchema( nullable=True, From 99f9551d08260920bbf54099a1c3502d7474d7dd Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 7 Oct 2021 11:57:19 +0200 Subject: [PATCH 06/28] introduced temporal resampling --- test/core/gen2/local/test_resamplert.py | 221 +++++++++++++++++++++++- xcube/core/gen2/local/resamplert.py | 56 +++++- xcube/core/resampling/temporal.py | 173 ++++++++++++++----- 3 files changed, 394 insertions(+), 56 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index 1e476fbda..68262fc29 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -3,13 +3,15 @@ from xcube.core.gen2.local.resamplert import CubeResamplerT from xcube.core.gridmapping import GridMapping +import cftime +import numpy as np import unittest class CubeResamplerTTest(unittest.TestCase): @staticmethod - def _get_cube(time_freq: str, time_periods: int): + def _get_cube(time_freq: str, time_periods: int, use_cftime: bool = False): def b3(index1, index2, index3): return index1 + index2 * 0.1 + index3 * 0.01 @@ -17,6 +19,8 @@ def b3(index1, index2, index3): return new_cube(variables=dict(B03=b3), time_periods=time_periods, time_freq=time_freq, + use_cftime=use_cftime, + time_dtype= 'datetime64[s]' if not use_cftime else None, width=10, height=5, time_start='2010-08-04') def test_transform_cube_no_time_period(self): @@ -32,22 +36,34 @@ def test_transform_cube_no_time_period(self): self.assertEquals(cube, resampled_cube) def test_transform_cube_downsample_to_years(self): - cube_config = CubeConfig(time_range=('2010-01-01', '2012-12-31'), - time_period='1Y', + cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), + time_period='2Y', temporal_resampling='min') temporal_resampler = CubeResamplerT(cube_config) - cube = self._get_cube(time_freq='M', time_periods=12) + cube = self._get_cube(time_freq='M', time_periods=24) resampled_cube, grid_mapping, cube_config = temporal_resampler.\ transform_cube(cube, GridMapping.from_dataset(cube), cube_config) - self.assertEquals(cube, resampled_cube) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2011-01-01T00:00:00', '2012-12-31T12:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-01-01T00:00:00', '2012-01-01T00:00:00'], + ['2012-01-01T00:00:00', '2014-01-01T00:00:00']], + dtype=np.datetime64)) + self.assertEquals((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(16.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_downsample_to_months(self): cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), - time_period='1M', + time_period='2M', temporal_resampling='min') temporal_resampler = CubeResamplerT(cube_config) @@ -57,19 +73,206 @@ def test_transform_cube_downsample_to_months(self): transform_cube(cube, GridMapping.from_dataset(cube), cube_config) - self.assertEquals(cube, resampled_cube) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-08-31T12:00:00', '2010-10-31T12:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-01T00:00:00', '2010-10-01T00:00:00'], + ['2010-10-01T00:00:00', '2010-12-01T00:00:00']], + dtype=np.datetime64)) + self.assertEquals((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(8.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_downsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), time_period='2W', + temporal_resampling='max') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='D', time_periods=32) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-08-08T00:00:00', '2010-08-22T00:00:00', + '2010-09-05T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-01T00:00:00', '2010-08-15T00:00:00'], + ['2010-08-15T00:00:00', '2010-08-29T00:00:00'], + ['2010-08-29T00:00:00', '2010-09-12T00:00:00']], + dtype=np.datetime64)) + self.assertEquals((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(10.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(24.0, resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEquals(31.0, resampled_cube.B03[2].values.min(), 8) + + def test_transform_cube_upsample_to_months(self): + cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), + time_period='2M', + temporal_resampling='linear') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='Y', time_periods=2) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2011-11-01T00:00:00', '2012-01-01T00:00:00', + '2012-03-01T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2011-10-01T12:00:00', '2011-12-01T12:00:00'], + ['2011-12-01T12:00:00', '2012-01-31T00:00:00'], + ['2012-01-31T00:00:00', '2012-03-31T12:00:00']], + dtype=np.datetime64)) + self.assertEquals((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEquals(0.66712329, + resampled_cube.B03[2].values.min(), 8) + + def test_transform_cube_upsample_to_weeks(self): + cube_config = CubeConfig(time_range=('2010-09-01', '2010-10-10'), + time_period='4W', + temporal_resampling='nearest') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='M', time_periods=4) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-09-12T00:00:00', '2010-10-10T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-29T00:00:00', '2010-09-26T00:00:00'], + ['2010-09-26T00:00:00', '2010-10-24T00:00:00']], + dtype=np.datetime64)) + self.assertEquals((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(1.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_upsample_to_days(self): + cube_config = CubeConfig(time_range=('2010-08-14', '2010-08-24'), + time_period='2D', + temporal_resampling='linear') + temporal_resampler = CubeResamplerT(cube_config) + + cube = self._get_cube(time_freq='W', time_periods=3) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-08-14T00:00:00', '2010-08-16T00:00:00', + '2010-08-18T00:00:00', '2010-08-20T00:00:00', + '2010-08-22T00:00:00', '2010-08-24T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-13T00:00:00', '2010-08-15T00:00:00'], + ['2010-08-15T00:00:00', '2010-08-17T00:00:00'], + ['2010-08-17T00:00:00', '2010-08-19T00:00:00'], + ['2010-08-19T00:00:00', '2010-08-21T00:00:00'], + ['2010-08-21T00:00:00', '2010-08-23T00:00:00'], + ['2010-08-23T00:00:00', '2010-08-25T00:00:00']], + dtype=np.datetime64)) + self.assertEquals((6, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.21428571, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(0.5, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEquals(0.78571429, + resampled_cube.B03[2].values.min(), 8) + self.assertAlmostEquals(1.07142857, + resampled_cube.B03[3].values.min(), 8) + self.assertAlmostEquals(1.35714286, + resampled_cube.B03[4].values.min(), 8) + self.assertAlmostEquals(1.64285714, + resampled_cube.B03[5].values.min(), 8) + + def test_transform_cube_downsample_to_years_cftimes(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), + time_period='2Y', temporal_resampling='min') temporal_resampler = CubeResamplerT(cube_config) - cube = self._get_cube(time_freq='D', time_periods=22) + cube = self._get_cube(time_freq='M', time_periods=24, use_cftime=True) resampled_cube, grid_mapping, cube_config = temporal_resampler.\ transform_cube(cube, GridMapping.from_dataset(cube), cube_config) - self.assertEquals(cube, resampled_cube) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal(resampled_cube.time.values, + [cftime.DatetimeProlepticGregorian(2011, 1, 1), + cftime.DatetimeProlepticGregorian(2012, 12, 31, + hour=12)]) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + [[cftime.DatetimeProlepticGregorian(2010, 1, 1), + cftime.DatetimeProlepticGregorian(2012, 1, 1)], + [cftime.DatetimeProlepticGregorian(2012, 1, 1), + cftime.DatetimeProlepticGregorian(2014, 1, 1)]]) + self.assertEquals((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(16.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_upsample_to_months_cftimes(self): + cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), + time_period='2M', + temporal_resampling='linear') + temporal_resampler = CubeResamplerT(cube_config) + cube = self._get_cube(time_freq='Y', time_periods=2, use_cftime=True) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + [cftime.DatetimeProlepticGregorian(2011, 11, 1), + cftime.DatetimeProlepticGregorian(2012, 1, 1), + cftime.DatetimeProlepticGregorian(2012, 3, 1)]) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + [[cftime.DatetimeProlepticGregorian(2011, 10, 1, hour=12), + cftime.DatetimeProlepticGregorian(2011, 12, 1, hour=12)], + [cftime.DatetimeProlepticGregorian(2011, 12, 1, hour=12), + cftime.DatetimeProlepticGregorian(2012, 1, 31)], + [cftime.DatetimeProlepticGregorian(2012, 1, 31), + cftime.DatetimeProlepticGregorian(2012, 3, 31, hour=12)]]) + self.assertEquals((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEquals(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEquals(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEquals(0.66712329, + resampled_cube.B03[2].values.min(), 8) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index d2c3963a4..dbea421d8 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -18,11 +18,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import cftime import pandas as pd import xarray as xr from xcube.core.gridmapping import GridMapping from xcube.core.resampling import resample_in_time +from xcube.core.resampling.temporal import adjust_metadata_and_chunking from xcube.util.assertions import assert_instance from .transformer import CubeTransformer from .transformer import TransformedCube @@ -69,25 +71,75 @@ def transform_cube(self, if cube_config.temporal_resampling is not None: to_drop.append('temporal_resampling') if cube_config.temporal_resampling in \ - ['linear', 'nearest', 'nearest-up', 'zero', 'slinear', + ['linear', 'nearest-up', 'zero', 'slinear', 'quadratic', 'cubic', 'previous', 'next']: - time_resample_params['method'] = 'interp' + time_resample_params['method'] = 'interpolate' time_resample_params['interp_kind'] = \ cube_config.temporal_resampling else: time_resample_params['method'] = \ cube_config.temporal_resampling + # we set cub_asserted to true so the resampling can deal with + # cftime data resampled_cube = resample_in_time( cube, rename_variables=False, + cube_asserted=True, **time_resample_params ) + if self._time_range: + # cut possible overlapping time steps + is_cf_time = isinstance(resampled_cube.time_bnds[0].values[0], + cftime.datetime) + if is_cf_time: + resampled_cube = _get_temporal_subset_cf(resampled_cube, + self._time_range) + else: + resampled_cube = _get_temporal_subset(resampled_cube, + self._time_range) + adjust_metadata_and_chunking(resampled_cube, time_chunk_size=1) cube_config = cube_config.drop_props(to_drop) return resampled_cube, gm, cube_config +def _get_temporal_subset_cf(resampled_cube, time_range): + try: + data_start_index = resampled_cube.time_bnds[:, 0].to_index().\ + get_loc(time_range[0], method='bfill') + if isinstance(data_start_index, slice): + data_start_index = data_start_index.start + except KeyError: + data_start_index = 0 + try: + data_end_index = resampled_cube.time_bnds[:, 1].to_index().\ + get_loc(time_range[1], method='ffill') + if isinstance(data_end_index, slice): + data_end_index = data_end_index.stop + except KeyError: + data_end_index = resampled_cube.time.size + return resampled_cube.isel(time=slice(data_start_index, data_end_index)) + + +def _get_temporal_subset(resampled_cube, time_range): + try: + data_start_time = resampled_cube.time_bnds[:, 0]. \ + sel(time=time_range[0], method='bfill') + if data_start_time.size < 1: + data_start_time = resampled_cube.time_bnds[0, 0] + except KeyError: + data_start_time = resampled_cube.time_bnds[0, 0] + try: + data_end_time = resampled_cube.time_bnds[:, 1]. \ + sel(time=time_range[1], method='ffill') + if data_end_time.size < 1: + data_end_time = resampled_cube.time_bnds[-1, 1] + except KeyError: + data_end_time = resampled_cube.time_bnds[-1, 1] + return resampled_cube.sel(time=slice(data_start_time, data_end_time)) + + def _normalize_time(time, normalize_hour=True): if normalize_hour: return time.replace(hour=0, minute=0, second=0, microsecond=0, diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index bb2e7b17c..d0803dedb 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -21,7 +21,9 @@ from typing import Dict, Any, Sequence, Union +import cftime import numpy as np +import pandas as pd import re import xarray as xr @@ -29,6 +31,17 @@ from xcube.core.select import select_variables_subset from xcube.core.verify import assert_cube +UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', + 'interpolate'] +DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', + 'mean', 'median', 'std', 'var'] +RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS +TIMEUNIT_INCREMENTORS = dict( + YS=(1, 0, 0), + QS=(0, 3, 0), + MS=(0, 1, 0), + W=(0, 0, 7) +) def resample_in_time(dataset: xr.Dataset, frequency: str, @@ -149,43 +162,55 @@ def resample_in_time(dataset: xr.Dataset, else: resampled_cube = xr.merge(resampled_cubes) adjusted_times, time_bounds = _adjust_times_and_bounds( - resampled_cube.time.values, frequency) + resampled_cube.time.values, frequency, method) update_vars = dict( time=adjusted_times, time_bnds=xr.DataArray(time_bounds, dims=['time', 'bnds']) ) resampled_cube = resampled_cube.assign_coords(update_vars) - time_coverage_start = '%s' % time_bounds[0][0] - time_coverage_end = '%s' % time_bounds[-1][1] + return adjust_metadata_and_chunking(resampled_cube, + metadata=metadata, + time_chunk_size=time_chunk_size) - resampled_cube.attrs.update(metadata or {}) - # TODO: add other time_coverage_ attributes - resampled_cube.attrs.update(time_coverage_start=time_coverage_start, - time_coverage_end=time_coverage_end) - schema = CubeSchema.new(dataset) +def adjust_metadata_and_chunking(dataset, metadata=None, time_chunk_size=None): + time_coverage_start = '%s' % dataset.time_bnds[0][0] + time_coverage_end = '%s' % dataset.time_bnds[-1][1] + + dataset.attrs.update(metadata or {}) + # TODO: add other time_coverage_ attributes + dataset.attrs.update(time_coverage_start=time_coverage_start, + time_coverage_end=time_coverage_end) + try: + schema = CubeSchema.new(dataset) + except ValueError: + return _adjust_chunk_sizes_without_schema(dataset, time_chunk_size) if schema.chunks is None: - return resampled_cube + return _adjust_chunk_sizes_without_schema(dataset, time_chunk_size) chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)} if isinstance(time_chunk_size, int) and time_chunk_size >= 0: chunk_sizes['time'] = time_chunk_size - return resampled_cube.chunk(chunk_sizes) + return dataset.chunk(chunk_sizes) + + +def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size = None): + chunk_sizes = dict(dataset.chunks) + if isinstance(time_chunk_size, int) and time_chunk_size >= 0: + chunk_sizes['time'] = time_chunk_size + else: + chunk_sizes['time'] = 1 + return dataset.chunk(chunk_sizes) -def _adjust_times_and_bounds(time_values, frequency): +def _adjust_times_and_bounds(time_values, frequency, method): import pandas as pd time_unit = re.findall('[A-Z]+', frequency)[0] time_value = int(frequency.split(time_unit)[0]) - TIMEUNIT_INCREMENTORS = dict( - YS=(1, 0, 0), - QS=(0, 3, 0), - MS=(0, 1, 0), - W=(0, 0, 7) - ) + if time_unit not in TIMEUNIT_INCREMENTORS: if time_unit == 'D': half_time_delta = np.timedelta64(12*time_value, 'h') @@ -198,7 +223,52 @@ def _adjust_times_and_bounds(time_values, frequency): time_values + half_time_delta]).\ transpose() return time_values, time_bounds_values - timestamps = [pd.Timestamp(tv) for tv in time_values] + is_cf_time = isinstance(time_values[0], cftime.datetime) + if is_cf_time: + timestamps = [pd.Timestamp(tv.isoformat()) for tv in time_values] + calendar = time_values[0].calendar + else: + timestamps = [pd.Timestamp(tv) for tv in time_values] + calendar = None + + iteration_offset = 0 + if method in UPSAMPLING_METHODS: + # we need a simulated preceding time stamp + timestamps.insert(0, _get_previous_timestamp(timestamps, + time_unit, + time_value)) + iteration_offset = 1 + + timestamps.append(_get_next_timestamp(timestamps, time_unit, time_value)) + + new_timestamps = [] + new_timestamp_bounds = [] + for i, ts in enumerate(timestamps[iteration_offset:-1]): + next_ts = timestamps[i + iteration_offset + 1] + delta_to_next = pd.Timedelta((next_ts - ts).delta / 2) + if method in DOWNSAMPLING_METHODS: + new_timestamps.append(_convert(ts + delta_to_next, calendar)) + new_timestamp_bounds.append([_convert(ts, calendar), + _convert(next_ts, calendar)]) + else: + previous_ts = timestamps[i + iteration_offset - 1] + delta_to_previous = pd.Timedelta((ts - previous_ts).delta / 2) + new_timestamps.append(_convert(ts, calendar)) + new_timestamp_bounds.append([_convert(ts - delta_to_previous, + calendar), + _convert(ts + delta_to_next, + calendar)]) + return new_timestamps, new_timestamp_bounds + + +def _convert(timestamp: pd.Timestamp, calendar: str): + if calendar is not None: + return cftime.DateFromJulianDay(timestamp.to_julian_date(), + calendar=calendar) + return np.datetime64(timestamp) + + +def _get_next_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: last_ts = timestamps[-1] replacement = dict( year=last_ts.year + @@ -208,23 +278,9 @@ def _adjust_times_and_bounds(time_values, frequency): day=last_ts.day + (TIMEUNIT_INCREMENTORS[time_unit][2] * time_value) ) - - def days_of_month(year: int, month: int): - if month in [1, 3, 5, 7, 8, 10, 12]: - return 31 - if month in [4, 6, 9, 11]: - return 30 - if year % 4 != 0: - return 28 - if year % 400 == 0: - return 29 - if year % 100 == 0: - return 28 - return 28 - - while replacement['day'] > days_of_month(replacement['year'], + while replacement['day'] > _days_of_month(replacement['year'], replacement['month'] % 12): - replacement['day'] -= days_of_month(replacement['year'], + replacement['day'] -= _days_of_month(replacement['year'], replacement['month'] % 12) replacement['month'] += 1 if replacement['month'] > 12: @@ -235,19 +291,46 @@ def days_of_month(year: int, month: int): replacement['month'] -= 12 replacement['year'] += 1 - final_ts = pd.Timestamp(last_ts.replace(**replacement)) + return pd.Timestamp(last_ts.replace(**replacement)) - timestamps.append(final_ts) - new_timestamps = [] - new_timestamp_bounds = [] - for i, ts in enumerate(timestamps[:-1]): - import pandas as pd - next_ts = timestamps[i + 1] - delta = pd.Timedelta((next_ts - ts).delta / 2) - new_timestamps.append(np.datetime64(ts + delta)) - new_timestamp_bounds.append([np.datetime64(ts), np.datetime64(next_ts)]) - return new_timestamps, new_timestamp_bounds +def _get_previous_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: + first_ts = timestamps[0] + replacement = dict( + year=first_ts.year - + (TIMEUNIT_INCREMENTORS[time_unit][0] * time_value), + month=first_ts.month - + (TIMEUNIT_INCREMENTORS[time_unit][1] * time_value), + day=first_ts.day - + (TIMEUNIT_INCREMENTORS[time_unit][2] * time_value) + ) + while replacement['day'] < 1: + replacement['month'] -= 1 + if replacement['month'] < 1: + replacement['month'] += 12 + replacement['year'] -= 1 + replacement['day'] += _days_of_month(replacement['year'], + replacement['month'] % 12) + + while replacement['month'] < 1: + replacement['month'] += 12 + replacement['year'] -= 1 + + return pd.Timestamp(first_ts.replace(**replacement)) + + +def _days_of_month(year: int, month: int): + if month in [1, 3, 5, 7, 8, 10, 12]: + return 31 + if month in [4, 6, 9, 11]: + return 30 + if year % 4 != 0: + return 28 + if year % 400 == 0: + return 29 + if year % 100 == 0: + return 28 + return 28 def get_method_kwargs(method, frequency, interp_kind, tolerance): From 6e9ca515be43c3da1ab42db3b82e2179237e21fa Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 7 Oct 2021 18:48:01 +0200 Subject: [PATCH 07/28] improved temporal subsetting --- xcube/core/gen2/local/resamplert.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index dbea421d8..716e39e5d 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. @@ -59,7 +59,7 @@ def transform_cube(self, start_time = pd.to_datetime(self._time_range[0]) dataset_start_time = pd.Timestamp(cube.time[0].values) time_delta = _normalize_time(dataset_start_time) \ - - start_time + - start_time period_delta = pd.Timedelta(cube_config.time_period) if time_delta > period_delta: if time_unit == 'H': @@ -116,7 +116,7 @@ def _get_temporal_subset_cf(resampled_cube, time_range): data_end_index = resampled_cube.time_bnds[:, 1].to_index().\ get_loc(time_range[1], method='ffill') if isinstance(data_end_index, slice): - data_end_index = data_end_index.stop + data_end_index = data_end_index.stop + 1 except KeyError: data_end_index = resampled_cube.time.size return resampled_cube.isel(time=slice(data_start_index, data_end_index)) @@ -162,4 +162,4 @@ def _get_expected_start_time(dataset_start_time, time_unit): return _normalize_time(dataset_start_time).replace(day=1) - delta if time_unit == 'Y': return _normalize_time(dataset_start_time).replace(month=1, day=1) - raise CubeGeneratorError(f'Unsupported time unit "{time_unit}"') \ No newline at end of file + raise CubeGeneratorError(f'Unsupported time unit "{time_unit}"') From 6943c2a629768e6137708a3ece2c9f0d5080e6ec Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 7 Oct 2021 18:48:53 +0200 Subject: [PATCH 08/28] improved setting of bounds --- test/core/gen2/local/test_resamplert.py | 25 +++-- xcube/core/resampling/temporal.py | 126 +++++++++++++++--------- 2 files changed, 93 insertions(+), 58 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index 68262fc29..6f5948935 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -50,7 +50,7 @@ def test_transform_cube_downsample_to_years(self): self.assertIsNotNone(resampled_cube) np.testing.assert_equal( resampled_cube.time.values, - np.array(['2011-01-01T00:00:00', '2012-12-31T12:00:00'], + np.array(['2011-01-01T00:00:00', '2013-01-01T00:00:00'], dtype=np.datetime64)) np.testing.assert_equal( resampled_cube.time_bnds.values, @@ -76,7 +76,7 @@ def test_transform_cube_downsample_to_months(self): self.assertIsNotNone(resampled_cube) np.testing.assert_equal( resampled_cube.time.values, - np.array(['2010-08-31T12:00:00', '2010-10-31T12:00:00'], + np.array(['2010-09-01T00:00:00', '2010-11-01T00:00:00'], dtype=np.datetime64)) np.testing.assert_equal( resampled_cube.time_bnds.values, @@ -136,9 +136,9 @@ def test_transform_cube_upsample_to_months(self): dtype=np.datetime64)) np.testing.assert_equal( resampled_cube.time_bnds.values, - np.array([['2011-10-01T12:00:00', '2011-12-01T12:00:00'], - ['2011-12-01T12:00:00', '2012-01-31T00:00:00'], - ['2012-01-31T00:00:00', '2012-03-31T12:00:00']], + np.array([['2011-10-01T00:00:00', '2011-12-01T00:00:00'], + ['2011-12-01T00:00:00', '2012-02-01T00:00:00'], + ['2012-02-01T00:00:00', '2012-04-01T00:00:00']], dtype=np.datetime64)) self.assertEquals((3, 5, 10), resampled_cube.B03.shape) self.assertAlmostEquals(0.33561644, @@ -231,8 +231,7 @@ def test_transform_cube_downsample_to_years_cftimes(self): self.assertIsNotNone(resampled_cube) np.testing.assert_equal(resampled_cube.time.values, [cftime.DatetimeProlepticGregorian(2011, 1, 1), - cftime.DatetimeProlepticGregorian(2012, 12, 31, - hour=12)]) + cftime.DatetimeProlepticGregorian(2013, 1, 1)]) np.testing.assert_equal( resampled_cube.time_bnds.values, [[cftime.DatetimeProlepticGregorian(2010, 1, 1), @@ -263,12 +262,12 @@ def test_transform_cube_upsample_to_months_cftimes(self): cftime.DatetimeProlepticGregorian(2012, 3, 1)]) np.testing.assert_equal( resampled_cube.time_bnds.values, - [[cftime.DatetimeProlepticGregorian(2011, 10, 1, hour=12), - cftime.DatetimeProlepticGregorian(2011, 12, 1, hour=12)], - [cftime.DatetimeProlepticGregorian(2011, 12, 1, hour=12), - cftime.DatetimeProlepticGregorian(2012, 1, 31)], - [cftime.DatetimeProlepticGregorian(2012, 1, 31), - cftime.DatetimeProlepticGregorian(2012, 3, 31, hour=12)]]) + [[cftime.DatetimeProlepticGregorian(2011, 10, 1), + cftime.DatetimeProlepticGregorian(2011, 12, 1)], + [cftime.DatetimeProlepticGregorian(2011, 12, 1), + cftime.DatetimeProlepticGregorian(2012, 2, 1)], + [cftime.DatetimeProlepticGregorian(2012, 2, 1), + cftime.DatetimeProlepticGregorian(2012, 4, 1)]]) self.assertEquals((3, 5, 10), resampled_cube.B03.shape) self.assertAlmostEquals(0.33561644, resampled_cube.B03[0].values.min(), 8) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index d0803dedb..b66814105 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -19,7 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, Any, Sequence, Union +from typing import Dict, Any, Sequence, Union, List import cftime import numpy as np @@ -37,10 +37,14 @@ 'mean', 'median', 'std', 'var'] RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS TIMEUNIT_INCREMENTORS = dict( - YS=(1, 0, 0), - QS=(0, 3, 0), - MS=(0, 1, 0), - W=(0, 0, 7) + YS=[1, 0, 0, 0], + QS=[0, 3, 0, 0], + MS=[0, 1, 0, 0], + W=[0, 0, 7, 0] +) +HALF_TIMEUNIT_INCREMENTORS = dict( + YS=[0, 6, 0, 0], + W=[0, 0, 3, 12] ) def resample_in_time(dataset: xr.Dataset, @@ -219,9 +223,9 @@ def _adjust_times_and_bounds(time_values, frequency, method): else: raise ValueError(f'Unsupported time unit "{time_unit}"') time_values += half_time_delta - time_bounds_values = np.array([time_values - half_time_delta, - time_values + half_time_delta]).\ - transpose() + time_bounds_values = \ + np.array([time_values - half_time_delta, + time_values + half_time_delta]).transpose() return time_values, time_bounds_values is_cf_time = isinstance(time_values[0], cftime.datetime) if is_cf_time: @@ -231,32 +235,28 @@ def _adjust_times_and_bounds(time_values, frequency, method): timestamps = [pd.Timestamp(tv) for tv in time_values] calendar = None - iteration_offset = 0 - if method in UPSAMPLING_METHODS: - # we need a simulated preceding time stamp - timestamps.insert(0, _get_previous_timestamp(timestamps, - time_unit, - time_value)) - iteration_offset = 1 - - timestamps.append(_get_next_timestamp(timestamps, time_unit, time_value)) + timestamps.append(_get_next_timestamp(timestamps[-1], + time_unit, + time_value, + False)) new_timestamps = [] new_timestamp_bounds = [] - for i, ts in enumerate(timestamps[iteration_offset:-1]): - next_ts = timestamps[i + iteration_offset + 1] - delta_to_next = pd.Timedelta((next_ts - ts).delta / 2) + for i, ts in enumerate(timestamps[:-1]): + next_ts = timestamps[i + 1] + half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) if method in DOWNSAMPLING_METHODS: - new_timestamps.append(_convert(ts + delta_to_next, calendar)) + half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) + new_timestamps.append(_convert(half_next_ts, calendar)) new_timestamp_bounds.append([_convert(ts, calendar), _convert(next_ts, calendar)]) else: - previous_ts = timestamps[i + iteration_offset - 1] - delta_to_previous = pd.Timedelta((ts - previous_ts).delta / 2) + half_previous_ts = \ + _get_previous_timestamp(ts, time_unit, time_value, True) new_timestamps.append(_convert(ts, calendar)) - new_timestamp_bounds.append([_convert(ts - delta_to_previous, + new_timestamp_bounds.append([_convert(half_previous_ts, calendar), - _convert(ts + delta_to_next, + _convert(half_next_ts, calendar)]) return new_timestamps, new_timestamp_bounds @@ -268,20 +268,22 @@ def _convert(timestamp: pd.Timestamp, calendar: str): return np.datetime64(timestamp) -def _get_next_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: - last_ts = timestamps[-1] +def _get_next_timestamp(timestamp, time_unit, time_value, half) \ + -> pd.Timestamp: + incrementors = _get_incrementors(timestamp, time_unit, time_value, half) replacement = dict( - year=last_ts.year + - (TIMEUNIT_INCREMENTORS[time_unit][0] * time_value), - month=last_ts.month + - (TIMEUNIT_INCREMENTORS[time_unit][1] * time_value), - day=last_ts.day + - (TIMEUNIT_INCREMENTORS[time_unit][2] * time_value) + year=timestamp.year + incrementors[0], + month=timestamp.month + incrementors[1], + day=timestamp.day + incrementors[2], + hour=timestamp.hour + incrementors[3] ) + while replacement['hour'] > 24: + replacement['hour'] -= 24 + replacement['day'] += 1 while replacement['day'] > _days_of_month(replacement['year'], - replacement['month'] % 12): + replacement['month']): replacement['day'] -= _days_of_month(replacement['year'], - replacement['month'] % 12) + replacement['month']) replacement['month'] += 1 if replacement['month'] > 12: replacement['month'] -= 12 @@ -291,19 +293,23 @@ def _get_next_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: replacement['month'] -= 12 replacement['year'] += 1 - return pd.Timestamp(last_ts.replace(**replacement)) + return pd.Timestamp(timestamp.replace(**replacement)) -def _get_previous_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: - first_ts = timestamps[0] +def _get_previous_timestamp(timestamp, time_unit, time_value, half) \ + -> pd.Timestamp: + incrementors = _get_incrementors(timestamp, time_unit, time_value, half) replacement = dict( - year=first_ts.year - - (TIMEUNIT_INCREMENTORS[time_unit][0] * time_value), - month=first_ts.month - - (TIMEUNIT_INCREMENTORS[time_unit][1] * time_value), - day=first_ts.day - - (TIMEUNIT_INCREMENTORS[time_unit][2] * time_value) + year=timestamp.year - incrementors[0], + month=timestamp.month - incrementors[1], + day=timestamp.day - incrementors[2], + hour=timestamp.hour - incrementors[3] ) + + while replacement['hour'] < 0: + replacement['hour'] += 24 + replacement['day'] -= 1 + while replacement['day'] < 1: replacement['month'] -= 1 if replacement['month'] < 1: @@ -316,7 +322,37 @@ def _get_previous_timestamp(timestamps, time_unit, time_value) -> pd.Timestamp: replacement['month'] += 12 replacement['year'] -= 1 - return pd.Timestamp(first_ts.replace(**replacement)) + return pd.Timestamp(timestamp.replace(**replacement)) + + +def _get_incrementors(timestamp, time_unit, time_value, half) -> List[int]: + if not half: + return _tune_incrementors(TIMEUNIT_INCREMENTORS[time_unit], time_value) + if time_value % 2 == 0: + time_value /= 2 + return _tune_incrementors(TIMEUNIT_INCREMENTORS[time_unit], + int(time_value)) + if time_unit in HALF_TIMEUNIT_INCREMENTORS: + return _tune_incrementors(HALF_TIMEUNIT_INCREMENTORS[time_unit], + time_value) + if time_unit == 'QS': + num_months = 3 + else: + num_months = 1 + import math + month = int(math.floor((num_months * time_value) / 2)) + days = _days_of_month(timestamp.year, month) + if days % 2 == 0: + hours = 0 + else: + hours = 12 + days = int(math.floor(days / 2)) + return [0, month, days, hours] + + +def _tune_incrementors(incrementors, time_value): + incrementors = [i * time_value for i in incrementors] + return incrementors def _days_of_month(year: int, month: int): From 3649eb344b0c6c8b7d343a2afefbd46bf03cb792 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 8 Oct 2021 10:50:11 +0200 Subject: [PATCH 09/28] adjust upsampled bounds with numpy timedelta correctly --- test/core/gen2/local/test_resamplert.py | 35 +++++------ xcube/core/resampling/temporal.py | 77 +++++++++++++++---------- 2 files changed, 62 insertions(+), 50 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index 6f5948935..c809a00e0 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -189,32 +189,29 @@ def test_transform_cube_upsample_to_days(self): self.assertIsNotNone(resampled_cube) np.testing.assert_equal( resampled_cube.time.values, - np.array(['2010-08-14T00:00:00', '2010-08-16T00:00:00', - '2010-08-18T00:00:00', '2010-08-20T00:00:00', - '2010-08-22T00:00:00', '2010-08-24T00:00:00'], + np.array(['2010-08-15T00:00:00', '2010-08-17T00:00:00', + '2010-08-19T00:00:00', '2010-08-21T00:00:00', + '2010-08-23T00:00:00'], dtype=np.datetime64)) np.testing.assert_equal( resampled_cube.time_bnds.values, - np.array([['2010-08-13T00:00:00', '2010-08-15T00:00:00'], - ['2010-08-15T00:00:00', '2010-08-17T00:00:00'], - ['2010-08-17T00:00:00', '2010-08-19T00:00:00'], - ['2010-08-19T00:00:00', '2010-08-21T00:00:00'], - ['2010-08-21T00:00:00', '2010-08-23T00:00:00'], - ['2010-08-23T00:00:00', '2010-08-25T00:00:00']], + np.array([['2010-08-14T00:00:00', '2010-08-16T00:00:00'], + ['2010-08-16T00:00:00', '2010-08-18T00:00:00'], + ['2010-08-18T00:00:00', '2010-08-20T00:00:00'], + ['2010-08-20T00:00:00', '2010-08-22T00:00:00'], + ['2010-08-22T00:00:00', '2010-08-24T00:00:00']], dtype=np.datetime64)) - self.assertEquals((6, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.21428571, - resampled_cube.B03[0].values.min(), 8) + self.assertEquals((5, 5, 10), resampled_cube.B03.shape) self.assertAlmostEquals(0.5, - resampled_cube.B03[1].values.min(), 8) + resampled_cube.B03[0].values.min(), 8) self.assertAlmostEquals(0.78571429, - resampled_cube.B03[2].values.min(), 8) + resampled_cube.B03[1].values.min(), 8) self.assertAlmostEquals(1.07142857, - resampled_cube.B03[3].values.min(), 8) + resampled_cube.B03[2].values.min(), 8) self.assertAlmostEquals(1.35714286, - resampled_cube.B03[4].values.min(), 8) + resampled_cube.B03[3].values.min(), 8) self.assertAlmostEquals(1.64285714, - resampled_cube.B03[5].values.min(), 8) + resampled_cube.B03[4].values.min(), 8) def test_transform_cube_downsample_to_years_cftimes(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), @@ -230,8 +227,8 @@ def test_transform_cube_downsample_to_years_cftimes(self): cube_config) self.assertIsNotNone(resampled_cube) np.testing.assert_equal(resampled_cube.time.values, - [cftime.DatetimeProlepticGregorian(2011, 1, 1), - cftime.DatetimeProlepticGregorian(2013, 1, 1)]) + [cftime.DatetimeProlepticGregorian(2011, 1, 1), + cftime.DatetimeProlepticGregorian(2013, 1, 1)]) np.testing.assert_equal( resampled_cube.time_bnds.values, [[cftime.DatetimeProlepticGregorian(2010, 1, 1), diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index b66814105..d2634f2d6 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -36,17 +36,16 @@ DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', 'var'] RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS -TIMEUNIT_INCREMENTORS = dict( +TIMEUNIT_INCREMENTS = dict( YS=[1, 0, 0, 0], QS=[0, 3, 0, 0], - MS=[0, 1, 0, 0], - W=[0, 0, 7, 0] + MS=[0, 1, 0, 0] ) -HALF_TIMEUNIT_INCREMENTORS = dict( - YS=[0, 6, 0, 0], - W=[0, 0, 3, 12] +HALF_TIMEUNIT_INCREMENTS = dict( + YS=[0, 6, 0, 0] ) + def resample_in_time(dataset: xr.Dataset, frequency: str, method: Union[str, Sequence[str]], @@ -201,7 +200,7 @@ def adjust_metadata_and_chunking(dataset, metadata=None, time_chunk_size=None): return dataset.chunk(chunk_sizes) -def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size = None): +def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size=None): chunk_sizes = dict(dataset.chunks) if isinstance(time_chunk_size, int) and time_chunk_size >= 0: chunk_sizes['time'] = time_chunk_size @@ -211,22 +210,26 @@ def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size = None): def _adjust_times_and_bounds(time_values, frequency, method): - import pandas as pd time_unit = re.findall('[A-Z]+', frequency)[0] time_value = int(frequency.split(time_unit)[0]) - - if time_unit not in TIMEUNIT_INCREMENTORS: + if time_unit not in TIMEUNIT_INCREMENTS: if time_unit == 'D': - half_time_delta = np.timedelta64(12*time_value, 'h') + half_time_delta = np.timedelta64(12 * time_value, 'h') elif time_unit == 'H': half_time_delta = np.timedelta64(30 * time_value, 'm') + elif time_unit == 'W': + half_time_delta = np.timedelta64(84 * time_value, 'h') else: raise ValueError(f'Unsupported time unit "{time_unit}"') - time_values += half_time_delta + if method in DOWNSAMPLING_METHODS: + time_values += half_time_delta time_bounds_values = \ np.array([time_values - half_time_delta, time_values + half_time_delta]).transpose() return time_values, time_bounds_values + # time units year, month and quarter cannot be converted to + # numpy timedelta objects, so we have to convert them to pandas timestamps + # and modify these is_cf_time = isinstance(time_values[0], cftime.datetime) if is_cf_time: timestamps = [pd.Timestamp(tv.isoformat()) for tv in time_values] @@ -245,8 +248,9 @@ def _adjust_times_and_bounds(time_values, frequency, method): for i, ts in enumerate(timestamps[:-1]): next_ts = timestamps[i + 1] half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) + # depending on whether the data was sampled down or up, + # times need to be adjusted differently if method in DOWNSAMPLING_METHODS: - half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) new_timestamps.append(_convert(half_next_ts, calendar)) new_timestamp_bounds.append([_convert(ts, calendar), _convert(next_ts, calendar)]) @@ -270,12 +274,16 @@ def _convert(timestamp: pd.Timestamp, calendar: str): def _get_next_timestamp(timestamp, time_unit, time_value, half) \ -> pd.Timestamp: - incrementors = _get_incrementors(timestamp, time_unit, time_value, half) + # Retrieves the timestamp following the passed timestamp according to the + # given time unit and time value. + # If half is True, the timestamp halfway between the timestamp and the next + # timestamp (which is not necessarily halfway between the two) is returned + increments = _get_increments(timestamp, time_unit, time_value, half) replacement = dict( - year=timestamp.year + incrementors[0], - month=timestamp.month + incrementors[1], - day=timestamp.day + incrementors[2], - hour=timestamp.hour + incrementors[3] + year=timestamp.year + increments[0], + month=timestamp.month + increments[1], + day=timestamp.day + increments[2], + hour=timestamp.hour + increments[3] ) while replacement['hour'] > 24: replacement['hour'] -= 24 @@ -298,12 +306,17 @@ def _get_next_timestamp(timestamp, time_unit, time_value, half) \ def _get_previous_timestamp(timestamp, time_unit, time_value, half) \ -> pd.Timestamp: - incrementors = _get_incrementors(timestamp, time_unit, time_value, half) + # Retrieves the timestamp preceding the passed timestamp according to the + # given time unit and time value. + # If half is True, the timestamp halfway between the timestamp and the + # previous timestamp (which is not necessarily halfway between the two) + # is returned + increments = _get_increments(timestamp, time_unit, time_value, half) replacement = dict( - year=timestamp.year - incrementors[0], - month=timestamp.month - incrementors[1], - day=timestamp.day - incrementors[2], - hour=timestamp.hour - incrementors[3] + year=timestamp.year - increments[0], + month=timestamp.month - increments[1], + day=timestamp.day - increments[2], + hour=timestamp.hour - increments[3] ) while replacement['hour'] < 0: @@ -325,16 +338,18 @@ def _get_previous_timestamp(timestamp, time_unit, time_value, half) \ return pd.Timestamp(timestamp.replace(**replacement)) -def _get_incrementors(timestamp, time_unit, time_value, half) -> List[int]: +def _get_increments(timestamp, time_unit, time_value, half) -> List[int]: + # Determines the increments for year, month, day, and hour to be applied + # to a timestamp if not half: - return _tune_incrementors(TIMEUNIT_INCREMENTORS[time_unit], time_value) + return _tune_increments(TIMEUNIT_INCREMENTS[time_unit], time_value) if time_value % 2 == 0: time_value /= 2 - return _tune_incrementors(TIMEUNIT_INCREMENTORS[time_unit], - int(time_value)) - if time_unit in HALF_TIMEUNIT_INCREMENTORS: - return _tune_incrementors(HALF_TIMEUNIT_INCREMENTORS[time_unit], - time_value) + return _tune_increments(TIMEUNIT_INCREMENTS[time_unit], + int(time_value)) + if time_unit in HALF_TIMEUNIT_INCREMENTS: + return _tune_increments(HALF_TIMEUNIT_INCREMENTS[time_unit], + time_value) if time_unit == 'QS': num_months = 3 else: @@ -350,7 +365,7 @@ def _get_incrementors(timestamp, time_unit, time_value, half) -> List[int]: return [0, month, days, hours] -def _tune_incrementors(incrementors, time_value): +def _tune_increments(incrementors, time_value): incrementors = [i * time_value for i in incrementors] return incrementors From b441a581710b53b996dbf777131ad981a046868d Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 8 Oct 2021 11:42:10 +0200 Subject: [PATCH 10/28] edited downsampling methods --- xcube/core/resampling/temporal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index d2634f2d6..69bdbe518 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -34,7 +34,7 @@ UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'interpolate'] DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', - 'mean', 'median', 'std', 'var'] + 'mean', 'median', 'std', 'var', 'quantile'] RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS TIMEUNIT_INCREMENTS = dict( YS=[1, 0, 0, 0], From 00eb10f684f7f999d8d984459750322ba7b848ba Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 8 Oct 2021 11:42:19 +0200 Subject: [PATCH 11/28] edited changelog --- CHANGES.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e5515c603..fd76337b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,15 @@ -## Changes in 0.9.0 (in development) +## Changes in 0.9.1 (in development) + +* Cube generator `xcube gen2` allows to use temporal resampling. To use it, + a user must set the parameter `time_period` (in a pandas-interpretable + pattern, e.g., '4D') and the newly introduced parameter `temporal_resampling`. + To sample down to a broader temporal resolution, use any of `['count', + 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', + 'var', 'percentile_

']`, to sample up to a finer resolution, use any of + `['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'nearest-up', 'zero', + 'slinear', 'quadratic', 'cubic', 'previous', 'next']`. (#523) + +## Changes in 0.9.0 ### New features From 6b76be54d74df7822b15f2692c66871382aedadc Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Wed, 13 Oct 2021 08:52:26 +0200 Subject: [PATCH 12/28] resample to center of month --- test/core/resampling/test_temporal.py | 2 +- xcube/core/resampling/temporal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/core/resampling/test_temporal.py b/test/core/resampling/test_temporal.py index e6c2ebdba..5d10b5eb7 100644 --- a/test/core/resampling/test_temporal.py +++ b/test/core/resampling/test_temporal.py @@ -168,7 +168,7 @@ def test_resample_in_time_resample_to_quarter(self): self.assertIsNot(resampled_cube, self.input_cube) self.assertIn('time', resampled_cube) self.assertEqual(1, resampled_cube.time.size) - self.assertEqual(np.datetime64('2017-08-16'), + self.assertEqual(np.datetime64('2017-08-15T12:00:00'), resampled_cube.time[0].values) self.assertIn('time_bnds', resampled_cube) self.assertEqual((1, 2), resampled_cube.time_bnds.shape) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 69bdbe518..4f9a432bc 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -361,7 +361,7 @@ def _get_increments(timestamp, time_unit, time_value, half) -> List[int]: hours = 0 else: hours = 12 - days = int(math.floor(days / 2)) + days = int(math.floor(days / 2)) - 1 return [0, month, days, hours] From 89e26bd8c67b2a00a0787af3acdc322bb628bc6e Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Wed, 13 Oct 2021 09:59:24 +0200 Subject: [PATCH 13/28] avoid use of outdated method --- xcube/core/resampling/temporal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 4f9a432bc..a21d1d107 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -267,8 +267,8 @@ def _adjust_times_and_bounds(time_values, frequency, method): def _convert(timestamp: pd.Timestamp, calendar: str): if calendar is not None: - return cftime.DateFromJulianDay(timestamp.to_julian_date(), - calendar=calendar) + return cftime.datetime.fromordinal(timestamp.to_julian_date(), + calendar=calendar) return np.datetime64(timestamp) From 3161ab238cfa0a028629f9bb50429a80fed72863 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 14 Oct 2021 19:25:00 +0200 Subject: [PATCH 14/28] pep 8 --- xcube/core/resampling/temporal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index a21d1d107..2dd5fe163 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -289,7 +289,7 @@ def _get_next_timestamp(timestamp, time_unit, time_value, half) \ replacement['hour'] -= 24 replacement['day'] += 1 while replacement['day'] > _days_of_month(replacement['year'], - replacement['month']): + replacement['month']): replacement['day'] -= _days_of_month(replacement['year'], replacement['month']) replacement['month'] += 1 @@ -329,7 +329,7 @@ def _get_previous_timestamp(timestamp, time_unit, time_value, half) \ replacement['month'] += 12 replacement['year'] -= 1 replacement['day'] += _days_of_month(replacement['year'], - replacement['month'] % 12) + replacement['month'] % 12) while replacement['month'] < 1: replacement['month'] += 12 From 4177a4dbfdb888b82b3e4dd2ce99d24c1dc2fcb9 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Thu, 14 Oct 2021 19:26:00 +0200 Subject: [PATCH 15/28] work on better considering up- and downsampling --- test/core/gen2/local/test_resamplert.py | 2 +- test/core/gen2/test_config.py | 11 ++- xcube/core/gen2/config.py | 69 ++++++++++++-- xcube/core/gen2/local/resamplert.py | 116 ++++++++++++++++++------ 4 files changed, 155 insertions(+), 43 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index c809a00e0..d5855c6d7 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -38,7 +38,7 @@ def test_transform_cube_no_time_period(self): def test_transform_cube_downsample_to_years(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), time_period='2Y', - temporal_resampling='min') + temporal_resampling=dict(downsampling='min')) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=24) diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index 93d5aa2cc..16c0654e2 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -56,7 +56,9 @@ def test_from_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling='slinear', + temporal_resampling=dict( + upsampling=('slinear', {}) + ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), @@ -70,7 +72,8 @@ def test_from_dict(self): self.assertEqual(0.05, cube_config.spatial_res) self.assertEqual(('2018-01-01', None), cube_config.time_range) self.assertEqual('4D', cube_config.time_period) - self.assertEqual('slinear', cube_config.temporal_resampling) + self.assertEqual(dict(upsampling=['slinear', {}]), + cube_config.temporal_resampling) self.assertEqual(dict(title='S2L2A subset'), cube_config.metadata) self.assertEqual( @@ -88,7 +91,9 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling='slinear', + temporal_resampling=dict( + upsampling=['slinear', {}] + ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index 98fc06f1a..73a198fd7 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -25,11 +25,14 @@ import pyproj +from xcube.core.resampling.temporal import UPSAMPLING_METHODS +from xcube.core.resampling.temporal import DOWNSAMPLING_METHODS from xcube.util.assertions import assert_given from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema +from xcube.util.jsonschema import JsonComplexSchema from xcube.util.jsonschema import JsonDateSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonNumberSchema @@ -142,7 +145,9 @@ def __init__(self, tile_size: Union[int, Tuple[int, int]] = None, time_range: Tuple[str, Optional[str]] = None, time_period: str = None, - temporal_resampling: str = None, + temporal_resampling: + Mapping[str, Union[str, Tuple[str, Mapping[str, Any]]]] + = None, chunks: Mapping[str, Optional[int]] = None, metadata: Mapping[str, Any] = None, variable_metadata: Mapping[str, Mapping[str, Any]] = None,): @@ -199,8 +204,19 @@ def __init__(self, self.temporal_resampling = None if temporal_resampling is not None: - assert_instance(temporal_resampling, str, 'temporal_resampling') - self.temporal_resampling = temporal_resampling + assert_instance(temporal_resampling, collections.Mapping, + 'temporal_resampling') + for resampling_direction, resampling_method \ + in temporal_resampling.items(): + assert_instance(resampling_direction, str, + 'resampling type name') + assert_instance(resampling_method, (str, Tuple), + 'resampling method') + if isinstance(resampling_method, Tuple): + assert_instance(resampling_method[0], str) + assert_instance(resampling_method[1], collections.Mapping, + 'resampling params') + self.temporal_resampling = dict(temporal_resampling) self.chunks = None if chunks is not None: @@ -277,13 +293,48 @@ def get_schema(cls): nullable=True, pattern=r'^([1-9][0-9]*)?[DWMY]$' ), - temporal_resampling=JsonStringSchema( + temporal_resampling=JsonObjectSchema( nullable=True, - enum=['count', 'first', 'last', 'max', 'min', 'mean', 'sum', - 'prod', 'median', 'std', 'var', 'percentile_

', - 'asfreq', 'ffill', 'bfill', 'pad', 'linear', - 'nearest', 'nearest-up', 'zero', 'slinear', - 'quadratic', 'cubic', 'previous', 'next'] + properties=dict( + upsampling=JsonComplexSchema( + one_of=[ + JsonStringSchema(enum=UPSAMPLING_METHODS), + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=UPSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties= + JsonObjectSchema( + additional_properties=True + ) + ) + ] + ) + ] + ), + downsampling=JsonComplexSchema( + one_of=[ + JsonStringSchema(enum=DOWNSAMPLING_METHODS), + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=DOWNSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties= + JsonObjectSchema( + additional_properties=True + ) + ) + ] + ) + ] + ) + ) ), chunks=JsonObjectSchema( nullable=True, diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index 716e39e5d..d9800f4de 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -1,11 +1,12 @@ +# The MIT License (MIT) # Copyright (c) 2021 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. @@ -19,6 +20,7 @@ # SOFTWARE. import cftime +import numpy as np import pandas as pd import xarray as xr @@ -31,6 +33,14 @@ from ..config import CubeConfig from ..error import CubeGeneratorError +MIN_MAX_DELTAS = dict( + H=(1, 1, 'H'), + D=(1, 1, 'D'), + W=(7, 7, 'D'), + M=(28, 31, 'D'), + Y=(365, 366, 'D') +) + class CubeResamplerT(CubeTransformer): @@ -52,34 +62,66 @@ def transform_cube(self, time_resample_params = dict() time_resample_params['frequency'] = cube_config.time_period time_resample_params['method'] = 'first' - if self._time_range: - import re - time_unit = re.findall('[A-Z]+', cube_config.time_period)[0] - if time_unit in ['H', 'D']: - start_time = pd.to_datetime(self._time_range[0]) - dataset_start_time = pd.Timestamp(cube.time[0].values) - time_delta = _normalize_time(dataset_start_time) \ - - start_time - period_delta = pd.Timedelta(cube_config.time_period) - if time_delta > period_delta: - if time_unit == 'H': - time_resample_params['base'] = \ - time_delta.hours / period_delta.hours - elif time_unit == 'D': - time_resample_params['base'] = \ - time_delta.days / period_delta.days + import re + time_unit = re.findall('[A-Z]+', cube_config.time_period)[0] + time_frequency = int(cube_config.time_period.split(time_unit)[0]) + if self._time_range and time_unit in ['H', 'D']: + start_time = pd.to_datetime(self._time_range[0]) + dataset_start_time = pd.Timestamp(cube.time[0].values) + time_delta = _normalize_time(dataset_start_time) \ + - start_time + period_delta = pd.Timedelta(cube_config.time_period) + if time_delta > period_delta: + if time_unit == 'H': + time_resample_params['base'] = \ + time_delta.hours / period_delta.hours + elif time_unit == 'D': + time_resample_params['base'] = \ + time_delta.days / period_delta.days if cube_config.temporal_resampling is not None: to_drop.append('temporal_resampling') - if cube_config.temporal_resampling in \ - ['linear', 'nearest-up', 'zero', 'slinear', - 'quadratic', 'cubic', 'previous', 'next']: + min_data_delta, max_data_delta = \ + get_min_max_timedeltas_from_data(cube) + min_period_delta, max_period_delta = \ + get_min_max_timedeltas_for_time_period(time_frequency, + time_unit) + if max_data_delta < min_period_delta: + if 'downsampling' not in cube_config.temporal_resampling: + raise ValueError('Data must be sampled down to a' + 'coarser temporal resolution, ' + 'but no temporal downsampling ' + 'method is set') + method = cube_config.temporal_resampling['downsampling'][0] + elif max_period_delta < min_data_delta: + if 'upsampling' not in cube_config.temporal_resampling: + raise ValueError('Data must be sampled up to a' + 'finer temporal resolution, ' + 'but no temporal upsampling ' + 'method is set') + method = cube_config.temporal_resampling['upsampling'][0] + else: + if 'downsampling' not in cube_config.temporal_resampling \ + and 'upsampling' not in \ + cube_config.temporal_resampling: + raise ValueError('Please specify a method for temporal ' + 'resampling.') + if 'downsampling' in cube_config.temporal_resampling and \ + 'upsampling' in cube_config.temporal_resampling: + raise ValueError('Cannot determine unambiguously ' + 'whether data needs to be sampled up ' + 'or down temporally. Please only ' + 'specify one method for temporal ' + 'resampling.') + method = cube_config.temporal_resampling.get( + 'downsampling', + cube_config.temporal_resampling.get('upsampling')) + if method in ['linear', 'nearest-up', 'zero', 'slinear', + 'quadratic', 'cubic', 'previous', 'next']: time_resample_params['method'] = 'interpolate' - time_resample_params['interp_kind'] = \ - cube_config.temporal_resampling + time_resample_params['interp_kind'] = method else: - time_resample_params['method'] = \ - cube_config.temporal_resampling - # we set cub_asserted to true so the resampling can deal with + time_resample_params['method'] = method + # we set cube_asserted to true so the resampling can deal with # cftime data resampled_cube = resample_in_time( cube, @@ -140,6 +182,20 @@ def _get_temporal_subset(resampled_cube, time_range): return resampled_cube.sel(time=slice(data_start_time, data_end_time)) +def get_min_max_timedeltas_from_data(data: xr.Dataset): + time_diff = data['time'].diff(dim=data['time'].dims[0])\ + .values.astype(np.float64) + return pd.Timedelta(min(time_diff)), pd.Timedelta(max(time_diff)) + + +def get_min_max_timedeltas_for_time_period(time_frequency: int, time_unit: str): + min_freq = MIN_MAX_DELTAS[time_unit][0] * time_frequency + max_freq = MIN_MAX_DELTAS[time_unit][1] * time_frequency + delta_unit = MIN_MAX_DELTAS[time_unit][2] + return pd.Timedelta(f'{min_freq}{delta_unit}'), \ + pd.Timedelta(f'{max_freq}{delta_unit}') + + def _normalize_time(time, normalize_hour=True): if normalize_hour: return time.replace(hour=0, minute=0, second=0, microsecond=0, From 19907c202da531aebb7be9b04620ff65f59ed235 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 15 Oct 2021 18:29:12 +0200 Subject: [PATCH 16/28] remodeled temporal resampling method parameter --- test/core/gen2/local/test_resamplert.py | 16 +++--- test/core/gen2/test_config.py | 8 +-- test/core/gen2/test_request.py | 7 ++- xcube/core/gen2/config.py | 75 ++++++++++--------------- xcube/core/resampling/temporal.py | 3 +- 5 files changed, 47 insertions(+), 62 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index d5855c6d7..d9d0ea617 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -38,7 +38,7 @@ def test_transform_cube_no_time_period(self): def test_transform_cube_downsample_to_years(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), time_period='2Y', - temporal_resampling=dict(downsampling='min')) + temporal_resampling=dict(downsampling=('min', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=24) @@ -64,7 +64,7 @@ def test_transform_cube_downsample_to_years(self): def test_transform_cube_downsample_to_months(self): cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), time_period='2M', - temporal_resampling='min') + temporal_resampling=dict(downsampling=('min', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='W', time_periods=12) @@ -90,7 +90,7 @@ def test_transform_cube_downsample_to_months(self): def test_transform_cube_downsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), time_period='2W', - temporal_resampling='max') + temporal_resampling=dict(downsampling=('max', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='D', time_periods=32) @@ -119,7 +119,7 @@ def test_transform_cube_downsample_to_weeks(self): def test_transform_cube_upsample_to_months(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), time_period='2M', - temporal_resampling='linear') + temporal_resampling=dict(upsampling=('linear', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='Y', time_periods=2) @@ -151,7 +151,7 @@ def test_transform_cube_upsample_to_months(self): def test_transform_cube_upsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-09-01', '2010-10-10'), time_period='4W', - temporal_resampling='nearest') + temporal_resampling=dict(upsampling=('nearest', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=4) @@ -177,7 +177,7 @@ def test_transform_cube_upsample_to_weeks(self): def test_transform_cube_upsample_to_days(self): cube_config = CubeConfig(time_range=('2010-08-14', '2010-08-24'), time_period='2D', - temporal_resampling='linear') + temporal_resampling=dict(upsampling=('linear', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='W', time_periods=3) @@ -216,7 +216,7 @@ def test_transform_cube_upsample_to_days(self): def test_transform_cube_downsample_to_years_cftimes(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), time_period='2Y', - temporal_resampling='min') + temporal_resampling=dict(downsampling=('min', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=24, use_cftime=True) @@ -242,7 +242,7 @@ def test_transform_cube_downsample_to_years_cftimes(self): def test_transform_cube_upsample_to_months_cftimes(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), time_period='2M', - temporal_resampling='linear') + temporal_resampling=dict(upsampling=('linear', {}))) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='Y', time_periods=2, use_cftime=True) diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index 16c0654e2..c1d026c04 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -57,7 +57,7 @@ def test_from_dict(self): time_range=['2018-01-01', None], time_period='4D', temporal_resampling=dict( - upsampling=('slinear', {}) + upsampling=('slinear', {'x': 1}) ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( @@ -72,7 +72,7 @@ def test_from_dict(self): self.assertEqual(0.05, cube_config.spatial_res) self.assertEqual(('2018-01-01', None), cube_config.time_range) self.assertEqual('4D', cube_config.time_period) - self.assertEqual(dict(upsampling=['slinear', {}]), + self.assertEqual(dict(upsampling=('slinear', {'x': 1})), cube_config.temporal_resampling) self.assertEqual(dict(title='S2L2A subset'), cube_config.metadata) @@ -91,9 +91,7 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling=dict( - upsampling=['slinear', {}] - ), + temporal_resampling=dict(upsampling=['slinear', {}]), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), diff --git a/test/core/gen2/test_request.py b/test/core/gen2/test_request.py index b07c7882e..0572875d2 100644 --- a/test/core/gen2/test_request.py +++ b/test/core/gen2/test_request.py @@ -50,7 +50,7 @@ def test_from_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling='slinear'), + temporal_resampling=dict(upsampling=('slinear', {}))), output_config=dict(store_id='memory', data_id='CHL') ) @@ -70,7 +70,8 @@ def test_from_dict(self): self.assertEqual(0.05, gen_config.cube_config.spatial_res) self.assertEqual(('2018-01-01', None), gen_config.cube_config.time_range) self.assertEqual('4D', gen_config.cube_config.time_period) - self.assertEqual('slinear', gen_config.cube_config.temporal_resampling) + self.assertEqual(dict(upsampling=('slinear', {})), + gen_config.cube_config.temporal_resampling) def test_to_dict(self): expected_dict = dict( @@ -82,7 +83,7 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling='slinear'), + temporal_resampling=dict(upsampling=['slinear', {}])), output_config=dict(store_id='memory', replace=False, data_id='CHL') diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index 73a198fd7..e228ae7ba 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -25,14 +25,11 @@ import pyproj -from xcube.core.resampling.temporal import UPSAMPLING_METHODS -from xcube.core.resampling.temporal import DOWNSAMPLING_METHODS from xcube.util.assertions import assert_given from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema -from xcube.util.jsonschema import JsonComplexSchema from xcube.util.jsonschema import JsonDateSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonNumberSchema @@ -41,6 +38,12 @@ from xcube.util.jsonschema import JsonStringSchema +UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'nearest-up', + 'linear', 'zero', 'slinear', 'quadratic', 'cubic', 'previous', + 'next'] +DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', + 'median', 'std', 'var', 'percentile_

'] + class InputConfig(JsonObject): def __init__(self, store_id: str = None, @@ -145,8 +148,7 @@ def __init__(self, tile_size: Union[int, Tuple[int, int]] = None, time_range: Tuple[str, Optional[str]] = None, time_period: str = None, - temporal_resampling: - Mapping[str, Union[str, Tuple[str, Mapping[str, Any]]]] + temporal_resampling: Mapping[str, Tuple[str, Mapping[str, Any]]] = None, chunks: Mapping[str, Optional[int]] = None, metadata: Mapping[str, Any] = None, @@ -210,13 +212,14 @@ def __init__(self, in temporal_resampling.items(): assert_instance(resampling_direction, str, 'resampling type name') - assert_instance(resampling_method, (str, Tuple), - 'resampling method') - if isinstance(resampling_method, Tuple): - assert_instance(resampling_method[0], str) - assert_instance(resampling_method[1], collections.Mapping, - 'resampling params') - self.temporal_resampling = dict(temporal_resampling) + assert_true(len(resampling_method) == 2, + 'Resampling method must consist of a method and a ' + 'dictionary with additional parameters') + assert_instance(resampling_method[0], str) + assert_instance(resampling_method[1], collections.Mapping, + 'resampling params') + temporal_resampling[resampling_direction] = tuple(resampling_method) + self.temporal_resampling = temporal_resampling self.chunks = None if chunks is not None: @@ -296,41 +299,25 @@ def get_schema(cls): temporal_resampling=JsonObjectSchema( nullable=True, properties=dict( - upsampling=JsonComplexSchema( - one_of=[ - JsonStringSchema(enum=UPSAMPLING_METHODS), - JsonArraySchema( - nullable=True, - items=[ - JsonStringSchema( - enum=UPSAMPLING_METHODS - ), - JsonObjectSchema( - additional_properties= - JsonObjectSchema( - additional_properties=True - ) - ) - ] + upsampling=JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=UPSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True ) ] ), - downsampling=JsonComplexSchema( - one_of=[ - JsonStringSchema(enum=DOWNSAMPLING_METHODS), - JsonArraySchema( - nullable=True, - items=[ - JsonStringSchema( - enum=DOWNSAMPLING_METHODS - ), - JsonObjectSchema( - additional_properties= - JsonObjectSchema( - additional_properties=True - ) - ) - ] + downsampling=JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=DOWNSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True ) ] ) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 2dd5fe163..3ed62546a 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -31,8 +31,7 @@ from xcube.core.select import select_variables_subset from xcube.core.verify import assert_cube -UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', - 'interpolate'] +UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'interpolate'] DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', 'var', 'quantile'] RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS From 9dca667885a48ab1f41545ac105dada6a22b545e Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Oct 2021 10:23:22 +0200 Subject: [PATCH 17/28] pass interpolation kind and percentile threshold as additional method arguments --- test/core/gen2/local/test_resamplert.py | 37 +++++++++++++++++++------ test/core/gen2/test_config.py | 8 ++++-- test/core/gen2/test_request.py | 11 ++++++-- xcube/core/gen2/config.py | 8 ++---- xcube/core/gen2/local/resamplert.py | 22 +++++++++------ xcube/core/resampling/temporal.py | 11 ++++---- 6 files changed, 62 insertions(+), 35 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index d9d0ea617..9b70a8851 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -20,7 +20,7 @@ def b3(index1, index2, index3): time_periods=time_periods, time_freq=time_freq, use_cftime=use_cftime, - time_dtype= 'datetime64[s]' if not use_cftime else None, + time_dtype='datetime64[s]' if not use_cftime else None, width=10, height=5, time_start='2010-08-04') def test_transform_cube_no_time_period(self): @@ -38,7 +38,9 @@ def test_transform_cube_no_time_period(self): def test_transform_cube_downsample_to_years(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), time_period='2Y', - temporal_resampling=dict(downsampling=('min', {}))) + temporal_resampling=dict( + downsampling=('min', {})) + ) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=24) @@ -64,7 +66,9 @@ def test_transform_cube_downsample_to_years(self): def test_transform_cube_downsample_to_months(self): cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), time_period='2M', - temporal_resampling=dict(downsampling=('min', {}))) + temporal_resampling=dict( + downsampling=('min', {})) + ) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='W', time_periods=12) @@ -90,7 +94,9 @@ def test_transform_cube_downsample_to_months(self): def test_transform_cube_downsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), time_period='2W', - temporal_resampling=dict(downsampling=('max', {}))) + temporal_resampling=dict( + downsampling=('max', {})) + ) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='D', time_periods=32) @@ -119,7 +125,10 @@ def test_transform_cube_downsample_to_weeks(self): def test_transform_cube_upsample_to_months(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), time_period='2M', - temporal_resampling=dict(upsampling=('linear', {}))) + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='Y', time_periods=2) @@ -151,7 +160,9 @@ def test_transform_cube_upsample_to_months(self): def test_transform_cube_upsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-09-01', '2010-10-10'), time_period='4W', - temporal_resampling=dict(upsampling=('nearest', {}))) + temporal_resampling=dict( + upsampling=('nearest', {})) + ) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=4) @@ -177,7 +188,10 @@ def test_transform_cube_upsample_to_weeks(self): def test_transform_cube_upsample_to_days(self): cube_config = CubeConfig(time_range=('2010-08-14', '2010-08-24'), time_period='2D', - temporal_resampling=dict(upsampling=('linear', {}))) + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='W', time_periods=3) @@ -216,7 +230,9 @@ def test_transform_cube_upsample_to_days(self): def test_transform_cube_downsample_to_years_cftimes(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), time_period='2Y', - temporal_resampling=dict(downsampling=('min', {}))) + temporal_resampling=dict( + downsampling=('min', {})) + ) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='M', time_periods=24, use_cftime=True) @@ -242,7 +258,10 @@ def test_transform_cube_downsample_to_years_cftimes(self): def test_transform_cube_upsample_to_months_cftimes(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), time_period='2M', - temporal_resampling=dict(upsampling=('linear', {}))) + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) temporal_resampler = CubeResamplerT(cube_config) cube = self._get_cube(time_freq='Y', time_periods=2, use_cftime=True) diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index c1d026c04..0835818bc 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -57,7 +57,7 @@ def test_from_dict(self): time_range=['2018-01-01', None], time_period='4D', temporal_resampling=dict( - upsampling=('slinear', {'x': 1}) + upsampling=('interpolate', {'kind': 'slinear'}) ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( @@ -72,7 +72,7 @@ def test_from_dict(self): self.assertEqual(0.05, cube_config.spatial_res) self.assertEqual(('2018-01-01', None), cube_config.time_range) self.assertEqual('4D', cube_config.time_period) - self.assertEqual(dict(upsampling=('slinear', {'x': 1})), + self.assertEqual(dict(upsampling=('interpolate', {'kind': 'slinear'})), cube_config.temporal_resampling) self.assertEqual(dict(title='S2L2A subset'), cube_config.metadata) @@ -91,7 +91,9 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling=dict(upsampling=['slinear', {}]), + temporal_resampling=dict( + downsampling=['percentile', {'threshold': 75}] + ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), diff --git a/test/core/gen2/test_request.py b/test/core/gen2/test_request.py index 0572875d2..f1484b7bc 100644 --- a/test/core/gen2/test_request.py +++ b/test/core/gen2/test_request.py @@ -50,7 +50,10 @@ def test_from_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling=dict(upsampling=('slinear', {}))), + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'slinear'})) + ), output_config=dict(store_id='memory', data_id='CHL') ) @@ -70,7 +73,7 @@ def test_from_dict(self): self.assertEqual(0.05, gen_config.cube_config.spatial_res) self.assertEqual(('2018-01-01', None), gen_config.cube_config.time_range) self.assertEqual('4D', gen_config.cube_config.time_period) - self.assertEqual(dict(upsampling=('slinear', {})), + self.assertEqual(dict(upsampling=('interpolate', {'kind': 'slinear'})), gen_config.cube_config.temporal_resampling) def test_to_dict(self): @@ -83,7 +86,9 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', - temporal_resampling=dict(upsampling=['slinear', {}])), + temporal_resampling=dict( + downsampling=['percentile', {'threshold': 70}]) + ), output_config=dict(store_id='memory', replace=False, data_id='CHL') diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index e228ae7ba..23f5b6bc0 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -25,6 +25,8 @@ import pyproj +from xcube.core.resampling.temporal import DOWNSAMPLING_METHODS +from xcube.core.resampling.temporal import UPSAMPLING_METHODS from xcube.util.assertions import assert_given from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true @@ -38,12 +40,6 @@ from xcube.util.jsonschema import JsonStringSchema -UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'nearest-up', - 'linear', 'zero', 'slinear', 'quadratic', 'cubic', 'previous', - 'next'] -DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', - 'median', 'std', 'var', 'percentile_

'] - class InputConfig(JsonObject): def __init__(self, store_id: str = None, diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index d9800f4de..870fd25a4 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -91,14 +91,16 @@ def transform_cube(self, 'coarser temporal resolution, ' 'but no temporal downsampling ' 'method is set') - method = cube_config.temporal_resampling['downsampling'][0] + method, method_args = \ + cube_config.temporal_resampling['downsampling'] elif max_period_delta < min_data_delta: if 'upsampling' not in cube_config.temporal_resampling: raise ValueError('Data must be sampled up to a' 'finer temporal resolution, ' 'but no temporal upsampling ' 'method is set') - method = cube_config.temporal_resampling['upsampling'][0] + method, method_args = \ + cube_config.temporal_resampling['upsampling'] else: if 'downsampling' not in cube_config.temporal_resampling \ and 'upsampling' not in \ @@ -112,13 +114,15 @@ def transform_cube(self, 'or down temporally. Please only ' 'specify one method for temporal ' 'resampling.') - method = cube_config.temporal_resampling.get( - 'downsampling', - cube_config.temporal_resampling.get('upsampling')) - if method in ['linear', 'nearest-up', 'zero', 'slinear', - 'quadratic', 'cubic', 'previous', 'next']: - time_resample_params['method'] = 'interpolate' - time_resample_params['interp_kind'] = method + method, method_args = cube_config.temporal_resampling.\ + get('downsampling', + cube_config.temporal_resampling.get('upsampling')) + if method == 'interpolate': + time_resample_params['method'] = method + time_resample_params['interp_kind'] = method_args['kind'] + elif method == 'percentile': + method = f'percentile_{method_args["threshold"]}' + time_resample_params['method'] = method else: time_resample_params['method'] = method # we set cube_asserted to true so the resampling can deal with diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 3ed62546a..7cb0058f8 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -31,10 +31,11 @@ from xcube.core.select import select_variables_subset from xcube.core.verify import assert_cube -UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'interpolate'] +UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', + 'interpolate'] DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', - 'mean', 'median', 'std', 'var', 'quantile'] -RESAMPLING_METHODS = UPSAMPLING_METHODS + DOWNSAMPLING_METHODS + 'mean', 'median', 'std', 'var', 'percentile'] + TIMEUNIT_INCREMENTS = dict( YS=[1, 0, 0, 0], QS=[0, 3, 0, 0], @@ -220,7 +221,7 @@ def _adjust_times_and_bounds(time_values, frequency, method): half_time_delta = np.timedelta64(84 * time_value, 'h') else: raise ValueError(f'Unsupported time unit "{time_unit}"') - if method in DOWNSAMPLING_METHODS: + if method not in UPSAMPLING_METHODS: time_values += half_time_delta time_bounds_values = \ np.array([time_values - half_time_delta, @@ -249,7 +250,7 @@ def _adjust_times_and_bounds(time_values, frequency, method): half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) # depending on whether the data was sampled down or up, # times need to be adjusted differently - if method in DOWNSAMPLING_METHODS: + if method not in UPSAMPLING_METHODS: new_timestamps.append(_convert(half_next_ts, calendar)) new_timestamp_bounds.append([_convert(ts, calendar), _convert(next_ts, calendar)]) From bffe1b3c62fde89f91c9597b4af886a6eaad541c Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Oct 2021 10:55:12 +0200 Subject: [PATCH 18/28] added error handling --- xcube/core/gen2/local/resamplert.py | 20 ++++++++++++++++++++ xcube/core/resampling/temporal.py | 3 +++ 2 files changed, 23 insertions(+) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index 870fd25a4..bbe9dbfc9 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -27,6 +27,7 @@ from xcube.core.gridmapping import GridMapping from xcube.core.resampling import resample_in_time from xcube.core.resampling.temporal import adjust_metadata_and_chunking +from xcube.core.resampling.temporal import INTERPOLATION_KINDS from xcube.util.assertions import assert_instance from .transformer import CubeTransformer from .transformer import TransformedCube @@ -119,8 +120,27 @@ def transform_cube(self, cube_config.temporal_resampling.get('upsampling')) if method == 'interpolate': time_resample_params['method'] = method + if 'kind' not in method_args: + interpolation_kinds = \ + ', '.join(map(repr, INTERPOLATION_KINDS)) + raise ValueError(f"To use 'interpolation' as " + f"upsampling method, the " + f"interpolation kind must be set. " + f"Use any of the following: " + f"{interpolation_kinds}.") + if method_args['kind'] not in INTERPOLATION_KINDS: + interpolation_kinds = \ + ', '.join(map(repr, INTERPOLATION_KINDS)) + raise ValueError(f'Interpolation kind must be one of ' + f'the following: ' + f'{interpolation_kinds}. Was: ' + f'"{method_args["kind"]}".') time_resample_params['interp_kind'] = method_args['kind'] elif method == 'percentile': + if 'threshold' not in method_args: + raise ValueError(f"To use 'percentile' as " + f"downsampling method, a " + f"threshold must be set.") method = f'percentile_{method_args["threshold"]}' time_resample_params['method'] = method else: diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 7cb0058f8..d3a678a7f 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -35,6 +35,9 @@ 'interpolate'] DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', 'var', 'percentile'] +SPLINE_INTERPOLATION_KINDS = ['zero', 'slinear', 'quadratic', 'cubic'] +OTHER_INTERPOLATION_KINDS = ['linear', 'nearest', 'previous', 'next'] +INTERPOLATION_KINDS = SPLINE_INTERPOLATION_KINDS + OTHER_INTERPOLATION_KINDS TIMEUNIT_INCREMENTS = dict( YS=[1, 0, 0, 0], From a2d629b65ae694649ab52856a27fdb889d03dd84 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Oct 2021 10:55:20 +0200 Subject: [PATCH 19/28] edited changelog --- CHANGES.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index dfc5f8879..dd9d37eab 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,9 +11,8 @@ pattern, e.g., '4D') and the newly introduced parameter `temporal_resampling`. To sample down to a broader temporal resolution, use any of `['count', 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', - 'var', 'percentile_

']`, to sample up to a finer resolution, use any of - `['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'nearest-up', 'zero', - 'slinear', 'quadratic', 'cubic', 'previous', 'next']`. (#523) + 'var', 'percentile']`, to sample up to a finer resolution, use any of + `['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'interpolate']`. (#523) ### Other From efbe0600fe267fde711d4e45674aa8cd91bad5a7 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Oct 2021 11:55:27 +0200 Subject: [PATCH 20/28] avoid use of deprectated methods --- test/core/gen2/local/test_resamplert.py | 84 ++++++++++++------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index 9b70a8851..4786efaf0 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -33,7 +33,7 @@ def test_transform_cube_no_time_period(self): transform_cube(cube, GridMapping.from_dataset(cube), cube_config) - self.assertEquals(cube, resampled_cube) + self.assertEqual(cube, resampled_cube) def test_transform_cube_downsample_to_years(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), @@ -59,9 +59,9 @@ def test_transform_cube_downsample_to_years(self): np.array([['2010-01-01T00:00:00', '2012-01-01T00:00:00'], ['2012-01-01T00:00:00', '2014-01-01T00:00:00']], dtype=np.datetime64)) - self.assertEquals((2, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(16.0, resampled_cube.B03[1].values.min(), 8) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(16.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_downsample_to_months(self): cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), @@ -87,9 +87,9 @@ def test_transform_cube_downsample_to_months(self): np.array([['2010-08-01T00:00:00', '2010-10-01T00:00:00'], ['2010-10-01T00:00:00', '2010-12-01T00:00:00']], dtype=np.datetime64)) - self.assertEquals((2, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(8.0, resampled_cube.B03[1].values.min(), 8) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(8.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_downsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), @@ -117,10 +117,10 @@ def test_transform_cube_downsample_to_weeks(self): ['2010-08-15T00:00:00', '2010-08-29T00:00:00'], ['2010-08-29T00:00:00', '2010-09-12T00:00:00']], dtype=np.datetime64)) - self.assertEquals((3, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(10.0, resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(24.0, resampled_cube.B03[1].values.min(), 8) - self.assertAlmostEquals(31.0, resampled_cube.B03[2].values.min(), 8) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(10.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(24.0, resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(31.0, resampled_cube.B03[2].values.min(), 8) def test_transform_cube_upsample_to_months(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), @@ -149,13 +149,13 @@ def test_transform_cube_upsample_to_months(self): ['2011-12-01T00:00:00', '2012-02-01T00:00:00'], ['2012-02-01T00:00:00', '2012-04-01T00:00:00']], dtype=np.datetime64)) - self.assertEquals((3, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.33561644, - resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(0.50273973, - resampled_cube.B03[1].values.min(), 8) - self.assertAlmostEquals(0.66712329, - resampled_cube.B03[2].values.min(), 8) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(0.66712329, + resampled_cube.B03[2].values.min(), 8) def test_transform_cube_upsample_to_weeks(self): cube_config = CubeConfig(time_range=('2010-09-01', '2010-10-10'), @@ -181,9 +181,9 @@ def test_transform_cube_upsample_to_weeks(self): np.array([['2010-08-29T00:00:00', '2010-09-26T00:00:00'], ['2010-09-26T00:00:00', '2010-10-24T00:00:00']], dtype=np.datetime64)) - self.assertEquals((2, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(1.0, resampled_cube.B03[1].values.min(), 8) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(1.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_upsample_to_days(self): cube_config = CubeConfig(time_range=('2010-08-14', '2010-08-24'), @@ -215,17 +215,17 @@ def test_transform_cube_upsample_to_days(self): ['2010-08-20T00:00:00', '2010-08-22T00:00:00'], ['2010-08-22T00:00:00', '2010-08-24T00:00:00']], dtype=np.datetime64)) - self.assertEquals((5, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.5, - resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(0.78571429, - resampled_cube.B03[1].values.min(), 8) - self.assertAlmostEquals(1.07142857, - resampled_cube.B03[2].values.min(), 8) - self.assertAlmostEquals(1.35714286, - resampled_cube.B03[3].values.min(), 8) - self.assertAlmostEquals(1.64285714, - resampled_cube.B03[4].values.min(), 8) + self.assertEqual((5, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.5, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.78571429, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(1.07142857, + resampled_cube.B03[2].values.min(), 8) + self.assertAlmostEqual(1.35714286, + resampled_cube.B03[3].values.min(), 8) + self.assertAlmostEqual(1.64285714, + resampled_cube.B03[4].values.min(), 8) def test_transform_cube_downsample_to_years_cftimes(self): cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), @@ -251,9 +251,9 @@ def test_transform_cube_downsample_to_years_cftimes(self): cftime.DatetimeProlepticGregorian(2012, 1, 1)], [cftime.DatetimeProlepticGregorian(2012, 1, 1), cftime.DatetimeProlepticGregorian(2014, 1, 1)]]) - self.assertEquals((2, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.0, resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(16.0, resampled_cube.B03[1].values.min(), 8) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(16.0, resampled_cube.B03[1].values.min(), 8) def test_transform_cube_upsample_to_months_cftimes(self): cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), @@ -284,10 +284,10 @@ def test_transform_cube_upsample_to_months_cftimes(self): cftime.DatetimeProlepticGregorian(2012, 2, 1)], [cftime.DatetimeProlepticGregorian(2012, 2, 1), cftime.DatetimeProlepticGregorian(2012, 4, 1)]]) - self.assertEquals((3, 5, 10), resampled_cube.B03.shape) - self.assertAlmostEquals(0.33561644, - resampled_cube.B03[0].values.min(), 8) - self.assertAlmostEquals(0.50273973, - resampled_cube.B03[1].values.min(), 8) - self.assertAlmostEquals(0.66712329, - resampled_cube.B03[2].values.min(), 8) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(0.66712329, + resampled_cube.B03[2].values.min(), 8) From 95a33946ff6b1fef48046cdcfbc0ab7a359b8661 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Tue, 11 Jan 2022 11:34:36 +0100 Subject: [PATCH 21/28] added tests --- test/util/test_jsonschema.py | 60 ++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/test/util/test_jsonschema.py b/test/util/test_jsonschema.py index a77587306..7dbe8c06f 100644 --- a/test/util/test_jsonschema.py +++ b/test/util/test_jsonschema.py @@ -1,5 +1,6 @@ import unittest from collections import namedtuple +from jsonschema import ValidationError from typing import Dict, Any from xcube.util.jsonschema import JsonArraySchema @@ -47,6 +48,65 @@ def test_to_dict(self): JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), JsonIntegerSchema(multiple_of=3)]).to_dict()) + def test_to_instance_one_of(self): + schema = JsonComplexSchema(one_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.to_instance(5)) + self.assertEqual(6, schema.to_instance(6)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(15) + + def test_to_instance_any_of(self): + schema = JsonComplexSchema(any_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.to_instance(5)) + self.assertEqual(6, schema.to_instance(6)) + self.assertEqual(15, schema.to_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + + def test_to_instance_all_of(self): + schema = JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(15, schema.to_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(5) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(6) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + + def test_from_instance_one_of(self): + schema = JsonComplexSchema(one_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.from_instance(5)) + self.assertEqual(6, schema.from_instance(6)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(15) + + def test_from_instance_any_of(self): + schema = JsonComplexSchema(any_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.from_instance(5)) + self.assertEqual(6, schema.from_instance(6)) + self.assertEqual(15, schema.from_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) + + def test_from_instance_all_of(self): + schema = JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(15, schema.from_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(5) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(6) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) class JsonSimpleSchemaTest(unittest.TestCase): From e12b348e569f92ccb41639c028fba10bb22abcf2 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Tue, 11 Jan 2022 11:35:41 +0100 Subject: [PATCH 22/28] allow passing in method name without parameters --- test/core/gen2/test_config.py | 3 +- test/core/gen2/test_request.py | 5 ++- xcube/core/gen2/config.py | 65 +++++++++++++++++------------ xcube/core/gen2/local/resamplert.py | 27 +++++++++--- 4 files changed, 65 insertions(+), 35 deletions(-) diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index 0835818bc..8600c7349 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -92,7 +92,8 @@ def test_to_dict(self): time_range=['2018-01-01', None], time_period='4D', temporal_resampling=dict( - downsampling=['percentile', {'threshold': 75}] + downsampling=('percentile', {'threshold': 75}), + upsampling='pad' ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( diff --git a/test/core/gen2/test_request.py b/test/core/gen2/test_request.py index f1484b7bc..520a14fe0 100644 --- a/test/core/gen2/test_request.py +++ b/test/core/gen2/test_request.py @@ -87,8 +87,9 @@ def test_to_dict(self): time_range=['2018-01-01', None], time_period='4D', temporal_resampling=dict( - downsampling=['percentile', {'threshold': 70}]) - ), + downsampling=('percentile', {'threshold': 70}), + upsampling='pad' + )), output_config=dict(store_id='memory', replace=False, data_id='CHL') diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index 23f5b6bc0..6bedc45db 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -32,6 +32,7 @@ from xcube.util.assertions import assert_true from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema +from xcube.util.jsonschema import JsonComplexSchema from xcube.util.jsonschema import JsonDateSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonNumberSchema @@ -144,8 +145,8 @@ def __init__(self, tile_size: Union[int, Tuple[int, int]] = None, time_range: Tuple[str, Optional[str]] = None, time_period: str = None, - temporal_resampling: Mapping[str, Tuple[str, Mapping[str, Any]]] - = None, + temporal_resampling: + Mapping[str, Union[str, Tuple[str, Mapping[str, Any]]]] = None, chunks: Mapping[str, Optional[int]] = None, metadata: Mapping[str, Any] = None, variable_metadata: Mapping[str, Mapping[str, Any]] = None,): @@ -205,16 +206,18 @@ def __init__(self, assert_instance(temporal_resampling, collections.Mapping, 'temporal_resampling') for resampling_direction, resampling_method \ - in temporal_resampling.items(): + in temporal_resampling.items(): assert_instance(resampling_direction, str, 'resampling type name') - assert_true(len(resampling_method) == 2, - 'Resampling method must consist of a method and a ' - 'dictionary with additional parameters') - assert_instance(resampling_method[0], str) - assert_instance(resampling_method[1], collections.Mapping, - 'resampling params') - temporal_resampling[resampling_direction] = tuple(resampling_method) + if not isinstance(resampling_method, str): + assert_true(len(resampling_method) == 2, + 'Resampling method must consist of a method ' + 'and a dictionary with additional parameters') + assert_instance(resampling_method[0], str) + assert_instance(resampling_method[1], collections.Mapping, + 'resampling params') + temporal_resampling[resampling_direction] = \ + tuple(resampling_method) self.temporal_resampling = temporal_resampling self.chunks = None @@ -295,26 +298,36 @@ def get_schema(cls): temporal_resampling=JsonObjectSchema( nullable=True, properties=dict( - upsampling=JsonArraySchema( - nullable=True, - items=[ - JsonStringSchema( - enum=UPSAMPLING_METHODS + upsampling=JsonComplexSchema( + one_of=[ + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=UPSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True + ) + ] ), - JsonObjectSchema( - additional_properties=True - ) + JsonStringSchema(enum=UPSAMPLING_METHODS) ] ), - downsampling=JsonArraySchema( - nullable=True, - items=[ - JsonStringSchema( - enum=DOWNSAMPLING_METHODS + downsampling=JsonComplexSchema( + one_of=[ + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=DOWNSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True + ) + ] ), - JsonObjectSchema( - additional_properties=True - ) + JsonStringSchema(enum=DOWNSAMPLING_METHODS) ] ) ) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index bbe9dbfc9..e62628648 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -92,16 +92,24 @@ def transform_cube(self, 'coarser temporal resolution, ' 'but no temporal downsampling ' 'method is set') - method, method_args = \ - cube_config.temporal_resampling['downsampling'] + try: + method, method_args = \ + cube_config.temporal_resampling['downsampling'] + except ValueError: + method = cube_config.temporal_resampling['downsampling'] + method_args = {} elif max_period_delta < min_data_delta: if 'upsampling' not in cube_config.temporal_resampling: raise ValueError('Data must be sampled up to a' 'finer temporal resolution, ' 'but no temporal upsampling ' 'method is set') - method, method_args = \ - cube_config.temporal_resampling['upsampling'] + try: + method, method_args = \ + cube_config.temporal_resampling['upsampling'] + except ValueError: + method = cube_config.temporal_resampling['upsampling'] + method_args = {} else: if 'downsampling' not in cube_config.temporal_resampling \ and 'upsampling' not in \ @@ -115,9 +123,16 @@ def transform_cube(self, 'or down temporally. Please only ' 'specify one method for temporal ' 'resampling.') - method, method_args = cube_config.temporal_resampling.\ - get('downsampling', + try: + method, method_args = cube_config.temporal_resampling.\ + get('downsampling', + cube_config.temporal_resampling. + get('upsampling')) + except ValueError: + method = cube_config.temporal_resampling.get( + 'downsampling', cube_config.temporal_resampling.get('upsampling')) + method_args = {} if method == 'interpolate': time_resample_params['method'] = method if 'kind' not in method_args: From e01c8930a5db8542afb3300a1d82ad678742b4c0 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Tue, 11 Jan 2022 11:37:57 +0100 Subject: [PATCH 23/28] added empty line --- test/util/test_jsonschema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/util/test_jsonschema.py b/test/util/test_jsonschema.py index 7dbe8c06f..c0c57c2cc 100644 --- a/test/util/test_jsonschema.py +++ b/test/util/test_jsonschema.py @@ -108,6 +108,7 @@ def test_from_instance_all_of(self): with self.assertRaises(ValidationError) as cm: schema.from_instance(7) + class JsonSimpleSchemaTest(unittest.TestCase): def test_base_props_validated(self): From c098f6f847a259d1e28b460f9101b66fa2b1497f Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Tue, 11 Jan 2022 14:42:18 +0100 Subject: [PATCH 24/28] do not pass cube config in constructor --- test/core/gen2/local/test_resamplert.py | 18 ++-- xcube/core/gen2/local/resamplert.py | 138 ++++++++++++++---------- 2 files changed, 93 insertions(+), 63 deletions(-) diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py index 4786efaf0..343a18582 100644 --- a/test/core/gen2/local/test_resamplert.py +++ b/test/core/gen2/local/test_resamplert.py @@ -25,7 +25,7 @@ def b3(index1, index2, index3): def test_transform_cube_no_time_period(self): cube_config = CubeConfig(time_range=('2010-01-01', '2012-12-31')) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='M', time_periods=12) @@ -41,7 +41,7 @@ def test_transform_cube_downsample_to_years(self): temporal_resampling=dict( downsampling=('min', {})) ) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='M', time_periods=24) @@ -69,7 +69,7 @@ def test_transform_cube_downsample_to_months(self): temporal_resampling=dict( downsampling=('min', {})) ) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='W', time_periods=12) @@ -97,7 +97,7 @@ def test_transform_cube_downsample_to_weeks(self): temporal_resampling=dict( downsampling=('max', {})) ) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='D', time_periods=32) @@ -129,7 +129,7 @@ def test_transform_cube_upsample_to_months(self): upsampling=('interpolate', {'kind': 'linear'}) )) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='Y', time_periods=2) @@ -163,7 +163,7 @@ def test_transform_cube_upsample_to_weeks(self): temporal_resampling=dict( upsampling=('nearest', {})) ) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='M', time_periods=4) @@ -192,7 +192,7 @@ def test_transform_cube_upsample_to_days(self): upsampling=('interpolate', {'kind': 'linear'}) )) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='W', time_periods=3) @@ -233,7 +233,7 @@ def test_transform_cube_downsample_to_years_cftimes(self): temporal_resampling=dict( downsampling=('min', {})) ) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='M', time_periods=24, use_cftime=True) @@ -262,7 +262,7 @@ def test_transform_cube_upsample_to_months_cftimes(self): upsampling=('interpolate', {'kind': 'linear'}) )) - temporal_resampler = CubeResamplerT(cube_config) + temporal_resampler = CubeResamplerT() cube = self._get_cube(time_freq='Y', time_periods=2, use_cftime=True) diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index e62628648..57da9105e 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -28,7 +28,7 @@ from xcube.core.resampling import resample_in_time from xcube.core.resampling.temporal import adjust_metadata_and_chunking from xcube.core.resampling.temporal import INTERPOLATION_KINDS -from xcube.util.assertions import assert_instance +from xcube.core.timecoord import get_time_range_from_data from .transformer import CubeTransformer from .transformer import TransformedCube from ..config import CubeConfig @@ -45,17 +45,17 @@ class CubeResamplerT(CubeTransformer): - def __init__(self, - cube_config: CubeConfig): - assert_instance(cube_config, CubeConfig, 'cube_config') - self._time_range = cube_config.time_range \ - if cube_config.time_range else None - def transform_cube(self, cube: xr.Dataset, gm: GridMapping, cube_config: CubeConfig) -> TransformedCube: to_drop = [] + if cube_config.time_range is not None: + start_time, end_time = cube_config.time_range + to_drop.append('time_range') + else: + start_time, end_time = \ + get_time_range_from_data(cube, maybe_consider_metadata=False) if cube_config.time_period is None: resampled_cube = cube else: @@ -66,19 +66,25 @@ def transform_cube(self, import re time_unit = re.findall('[A-Z]+', cube_config.time_period)[0] time_frequency = int(cube_config.time_period.split(time_unit)[0]) - if self._time_range and time_unit in ['H', 'D']: - start_time = pd.to_datetime(self._time_range[0]) - dataset_start_time = pd.Timestamp(cube.time[0].values) - time_delta = _normalize_time(dataset_start_time) \ - - start_time - period_delta = pd.Timedelta(cube_config.time_period) - if time_delta > period_delta: - if time_unit == 'H': - time_resample_params['base'] = \ - time_delta.hours / period_delta.hours - elif time_unit == 'D': - time_resample_params['base'] = \ - time_delta.days / period_delta.days + if time_unit in ['H', 'D']: + if start_time is not None: + start_time_as_datetime = pd.to_datetime(start_time) + dataset_start_time = pd.Timestamp(cube.time[0].values) + time_delta = _normalize_time(dataset_start_time) \ + - start_time_as_datetime + _adjust_time_resample_params(time_resample_params, + cube_config.time_period, + time_delta, + time_unit) + elif end_time is not None: + end_time_as_datetime = pd.to_datetime(end_time) + dataset_end_time = pd.Timestamp(cube.time[-1].values) + time_delta = end_time_as_datetime - \ + _normalize_time(dataset_end_time) + _adjust_time_resample_params(time_resample_params, + cube_config.time_period, + time_delta, + time_unit) if cube_config.temporal_resampling is not None: to_drop.append('temporal_resampling') min_data_delta, max_data_delta = \ @@ -168,16 +174,18 @@ def transform_cube(self, cube_asserted=True, **time_resample_params ) - if self._time_range: + if start_time is not None or end_time is not None: # cut possible overlapping time steps is_cf_time = isinstance(resampled_cube.time_bnds[0].values[0], cftime.datetime) if is_cf_time: resampled_cube = _get_temporal_subset_cf(resampled_cube, - self._time_range) + start_time, + end_time) else: resampled_cube = _get_temporal_subset(resampled_cube, - self._time_range) + start_time, + end_time) adjust_metadata_and_chunking(resampled_cube, time_chunk_size=1) cube_config = cube_config.drop_props(to_drop) @@ -185,39 +193,61 @@ def transform_cube(self, return resampled_cube, gm, cube_config -def _get_temporal_subset_cf(resampled_cube, time_range): - try: - data_start_index = resampled_cube.time_bnds[:, 0].to_index().\ - get_loc(time_range[0], method='bfill') - if isinstance(data_start_index, slice): - data_start_index = data_start_index.start - except KeyError: - data_start_index = 0 - try: - data_end_index = resampled_cube.time_bnds[:, 1].to_index().\ - get_loc(time_range[1], method='ffill') - if isinstance(data_end_index, slice): - data_end_index = data_end_index.stop + 1 - except KeyError: - data_end_index = resampled_cube.time.size +def _adjust_time_resample_params(time_resample_params, + time_period, + time_delta, + time_unit): + period_delta = pd.Timedelta(time_period) + if time_delta > period_delta: + if time_unit == 'H': + time_resample_params['base'] = \ + time_delta.hours / period_delta.hours + elif time_unit == 'D': + time_resample_params['base'] = \ + time_delta.days / period_delta.days + + +def _get_temporal_subset_cf(resampled_cube, start_time, end_time): + data_start_index = 0 + data_end_index = resampled_cube.time.size + if start_time: + try: + data_start_index = resampled_cube.time_bnds[:, 0].to_index().\ + get_loc(start_time, method='bfill') + if isinstance(data_start_index, slice): + data_start_index = data_start_index.start + except KeyError: + pass + if end_time: + try: + data_end_index = resampled_cube.time_bnds[:, 1].to_index().\ + get_loc(end_time, method='ffill') + if isinstance(data_end_index, slice): + data_end_index = data_end_index.stop + 1 + except KeyError: + pass return resampled_cube.isel(time=slice(data_start_index, data_end_index)) -def _get_temporal_subset(resampled_cube, time_range): - try: - data_start_time = resampled_cube.time_bnds[:, 0]. \ - sel(time=time_range[0], method='bfill') - if data_start_time.size < 1: - data_start_time = resampled_cube.time_bnds[0, 0] - except KeyError: - data_start_time = resampled_cube.time_bnds[0, 0] - try: - data_end_time = resampled_cube.time_bnds[:, 1]. \ - sel(time=time_range[1], method='ffill') - if data_end_time.size < 1: - data_end_time = resampled_cube.time_bnds[-1, 1] - except KeyError: - data_end_time = resampled_cube.time_bnds[-1, 1] +def _get_temporal_subset(resampled_cube, start_time, end_time): + data_start_time = resampled_cube.time_bnds[0, 0] + data_end_time = resampled_cube.time_bnds[-1, 1] + if start_time: + try: + data_start_time = resampled_cube.time_bnds[:, 0]. \ + sel(time=start_time, method='bfill') + if data_start_time.size < 1: + data_start_time = resampled_cube.time_bnds[0, 0] + except KeyError: + pass + if end_time: + try: + data_end_time = resampled_cube.time_bnds[:, 1]. \ + sel(time=end_time, method='ffill') + if data_end_time.size < 1: + data_end_time = resampled_cube.time_bnds[-1, 1] + except KeyError: + pass return resampled_cube.sel(time=slice(data_start_time, data_end_time)) @@ -232,7 +262,7 @@ def get_min_max_timedeltas_for_time_period(time_frequency: int, time_unit: str): max_freq = MIN_MAX_DELTAS[time_unit][1] * time_frequency delta_unit = MIN_MAX_DELTAS[time_unit][2] return pd.Timedelta(f'{min_freq}{delta_unit}'), \ - pd.Timedelta(f'{max_freq}{delta_unit}') + pd.Timedelta(f'{max_freq}{delta_unit}') def _normalize_time(time, normalize_hour=True): From 15e6cf0da28a5b05f478b8a0c2fa0cacb8e028d5 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 14 Jan 2022 16:35:38 +0100 Subject: [PATCH 25/28] temporal resampling is done purely based on datasetresample and dataarrayresample --- xcube/core/resampling/temporal.py | 371 ++++++++++++++---------------- 1 file changed, 175 insertions(+), 196 deletions(-) diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index d3a678a7f..9601c2037 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -19,13 +19,18 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, Any, Sequence, Union, List +from enum import Enum +from typing import Dict, Any, Sequence, Union import cftime +from datetime import timedelta import numpy as np -import pandas as pd -import re import xarray as xr +from xarray.coding.cftime_offsets import Day +from xarray.coding.cftime_offsets import Hour +from xarray.coding.cftime_offsets import MonthBegin +from xarray.coding.cftime_offsets import QuarterBegin +from xarray.coding.cftime_offsets import YearBegin from xcube.core.schema import CubeSchema from xcube.core.select import select_variables_subset @@ -39,14 +44,11 @@ OTHER_INTERPOLATION_KINDS = ['linear', 'nearest', 'previous', 'next'] INTERPOLATION_KINDS = SPLINE_INTERPOLATION_KINDS + OTHER_INTERPOLATION_KINDS -TIMEUNIT_INCREMENTS = dict( - YS=[1, 0, 0, 0], - QS=[0, 3, 0, 0], - MS=[0, 1, 0, 0] -) -HALF_TIMEUNIT_INCREMENTS = dict( - YS=[0, 6, 0, 0] -) + +class Offset(Enum): + PREVIOUS = 'previous' + NONE = 'none' + NEXT = 'next' def resample_in_time(dataset: xr.Dataset, @@ -118,9 +120,11 @@ def resample_in_time(dataset: xr.Dataset, / np.timedelta64(1, 'D')) + 1) frequency = f'{days}D' + frequency_is_irregular = frequency.endswith('Y') or \ + frequency.endswith('M') or \ + frequency.endswith('Q') # resample to start of period - if frequency.endswith('Y') or frequency.endswith('M') or \ - frequency.endswith('Q'): + if frequency_is_irregular: frequency = f'{frequency}S' if var_names: @@ -167,19 +171,169 @@ def resample_in_time(dataset: xr.Dataset, resampled_cube = resampled_cubes[0] else: resampled_cube = xr.merge(resampled_cubes) - adjusted_times, time_bounds = _adjust_times_and_bounds( - resampled_cube.time.values, frequency, method) - update_vars = dict( - time=adjusted_times, - time_bnds=xr.DataArray(time_bounds, dims=['time', 'bnds']) - ) - resampled_cube = resampled_cube.assign_coords(update_vars) - + if method in UPSAMPLING_METHODS: + resampled_cube = _adjust_upsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular) + else: + resampled_cube = _adjust_downsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular) return adjust_metadata_and_chunking(resampled_cube, metadata=metadata, time_chunk_size=time_chunk_size) +def _adjust_upsampled_cube(resampled_cube, frequency, base, frequency_is_irregular): + # Times of upsampled cube are correct, we need to determine time bounds + # Get times with negative offset + times = resampled_cube.time.values + previous_times = _get_resampled_times( + resampled_cube, frequency, 'time', Offset.PREVIOUS, base + ) + # Get centers between times and previous_times as start bounds + center_times = _get_centers_between_times( + previous_times, + times, + frequency_is_irregular, + resampled_cube + ) + # we need to add this as intermediate data array so we can retrieve + # resampled times from it + resampled_cube = resampled_cube.assign_coords( + intermediate_time=center_times + ) + stop_times = _get_resampled_times( + resampled_cube, frequency, 'intermediate_time', Offset.NEXT, base + ) + resampled_cube = resampled_cube.drop_vars('intermediate_time') + resampled_cube = _add_time_bounds_to_resampled_cube(center_times, + stop_times, + resampled_cube) + return resampled_cube + + +def _adjust_downsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular): + # times of resampled_cube are actually start bounding times. + # We need to determine times and end bounding times + start_times = resampled_cube.time.values + stop_times = _get_resampled_times( + resampled_cube, frequency, 'time', Offset.NEXT, base + ) + resampled_cube = _add_time_bounds_to_resampled_cube(start_times, + stop_times, + resampled_cube) + # Get centers between start and stop bounding times + center_times = _get_centers_between_times( + start_times, + stop_times, + frequency_is_irregular, + resampled_cube + ) + resampled_cube = resampled_cube.assign_coords(time=center_times) + return resampled_cube + + +def _get_resampled_times(cube: xr.Dataset, + frequency: str, + name_of_time_dim: str, + offset: Offset, + base=None): + if offset == Offset.PREVIOUS: + offset = _invert_frequency(frequency, + cube[name_of_time_dim].values[0]) + elif offset == Offset.NONE: + offset = None + elif offset == Offset.NEXT: + offset = frequency + args = dict(skipna=True, + closed='left', + label='left', + loffset=offset, + base=base) + args[name_of_time_dim] = frequency + return np.array(list(cube[name_of_time_dim].resample(**args).groups.keys())) + + +def _add_time_bounds_to_resampled_cube(start_times, stop_times, resampled_cube): + time_bounds = xr.DataArray( + np.array([start_times, stop_times]).transpose(), + dims=['time', 'bnds'] + ) + return resampled_cube.assign_coords( + time_bnds=time_bounds + ) + + +def _get_centers_between_times(earlier_times, + later_times, + frequency_is_irregular, + resampled_cube): + """ + Determines the center between two time arrays. + In case the frequency is irregular and the centers are close to the + beginning of a month, the centers are snapped to it + """ + time_deltas = later_times - earlier_times + center_times = later_times - time_deltas * 0.5 + if frequency_is_irregular: + # In case of 'M', 'Q' or 'Y' frequencies, add a small time delta + # so we move a little closer to the later time + time_delta = _get_time_delta(earlier_times[0]) + center_times_plus_delta = center_times + time_delta + resampled_cube = resampled_cube.assign_coords( + intermediate_time=center_times_plus_delta + ) + # snap center times to beginnings of months when they are close + starts_of_month = _get_resampled_times( + resampled_cube, '1MS', 'intermediate_time', Offset.NONE + ) + center_time_deltas = center_times_plus_delta - starts_of_month + snapped_times = np.where(center_time_deltas < time_delta * 2, + starts_of_month, + center_times) + resampled_cube.drop_vars('intermediate_time') + return snapped_times + return center_times + + +def _get_time_delta(time_value): + if _is_cf(time_value): + return timedelta(hours=42) + return np.timedelta64(42, 'h') + + +def _invert_frequency(frequency, time_value): + if not _is_cf(time_value): + return f'-{frequency}' + if frequency.endswith('H'): + frequency_value = frequency.split('H')[0] + return Hour(-int(frequency_value)) + if frequency.endswith('D'): + frequency_value = frequency.split('D')[0] + return Day(-int(frequency_value)) + if frequency.endswith('W'): + frequency_value = frequency.split('W')[0] + return Day(-int(frequency_value) * 7) + if frequency.endswith('MS'): + frequency_value = frequency.split('MS')[0] + return MonthBegin(-int(frequency_value)) + if frequency.endswith('QS'): + frequency_value = frequency.split('QS')[0] + return QuarterBegin(-int(frequency_value)) + frequency_value = frequency.split('YS')[0] + return YearBegin(-int(frequency_value)) + + +def _is_cf(time_value): + return isinstance(time_value, cftime.datetime) + + def adjust_metadata_and_chunking(dataset, metadata=None, time_chunk_size=None): time_coverage_start = '%s' % dataset.time_bnds[0][0] time_coverage_end = '%s' % dataset.time_bnds[-1][1] @@ -212,181 +366,6 @@ def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size=None): return dataset.chunk(chunk_sizes) -def _adjust_times_and_bounds(time_values, frequency, method): - time_unit = re.findall('[A-Z]+', frequency)[0] - time_value = int(frequency.split(time_unit)[0]) - if time_unit not in TIMEUNIT_INCREMENTS: - if time_unit == 'D': - half_time_delta = np.timedelta64(12 * time_value, 'h') - elif time_unit == 'H': - half_time_delta = np.timedelta64(30 * time_value, 'm') - elif time_unit == 'W': - half_time_delta = np.timedelta64(84 * time_value, 'h') - else: - raise ValueError(f'Unsupported time unit "{time_unit}"') - if method not in UPSAMPLING_METHODS: - time_values += half_time_delta - time_bounds_values = \ - np.array([time_values - half_time_delta, - time_values + half_time_delta]).transpose() - return time_values, time_bounds_values - # time units year, month and quarter cannot be converted to - # numpy timedelta objects, so we have to convert them to pandas timestamps - # and modify these - is_cf_time = isinstance(time_values[0], cftime.datetime) - if is_cf_time: - timestamps = [pd.Timestamp(tv.isoformat()) for tv in time_values] - calendar = time_values[0].calendar - else: - timestamps = [pd.Timestamp(tv) for tv in time_values] - calendar = None - - timestamps.append(_get_next_timestamp(timestamps[-1], - time_unit, - time_value, - False)) - - new_timestamps = [] - new_timestamp_bounds = [] - for i, ts in enumerate(timestamps[:-1]): - next_ts = timestamps[i + 1] - half_next_ts = _get_next_timestamp(ts, time_unit, time_value, True) - # depending on whether the data was sampled down or up, - # times need to be adjusted differently - if method not in UPSAMPLING_METHODS: - new_timestamps.append(_convert(half_next_ts, calendar)) - new_timestamp_bounds.append([_convert(ts, calendar), - _convert(next_ts, calendar)]) - else: - half_previous_ts = \ - _get_previous_timestamp(ts, time_unit, time_value, True) - new_timestamps.append(_convert(ts, calendar)) - new_timestamp_bounds.append([_convert(half_previous_ts, - calendar), - _convert(half_next_ts, - calendar)]) - return new_timestamps, new_timestamp_bounds - - -def _convert(timestamp: pd.Timestamp, calendar: str): - if calendar is not None: - return cftime.datetime.fromordinal(timestamp.to_julian_date(), - calendar=calendar) - return np.datetime64(timestamp) - - -def _get_next_timestamp(timestamp, time_unit, time_value, half) \ - -> pd.Timestamp: - # Retrieves the timestamp following the passed timestamp according to the - # given time unit and time value. - # If half is True, the timestamp halfway between the timestamp and the next - # timestamp (which is not necessarily halfway between the two) is returned - increments = _get_increments(timestamp, time_unit, time_value, half) - replacement = dict( - year=timestamp.year + increments[0], - month=timestamp.month + increments[1], - day=timestamp.day + increments[2], - hour=timestamp.hour + increments[3] - ) - while replacement['hour'] > 24: - replacement['hour'] -= 24 - replacement['day'] += 1 - while replacement['day'] > _days_of_month(replacement['year'], - replacement['month']): - replacement['day'] -= _days_of_month(replacement['year'], - replacement['month']) - replacement['month'] += 1 - if replacement['month'] > 12: - replacement['month'] -= 12 - replacement['year'] += 1 - - while replacement['month'] > 12: - replacement['month'] -= 12 - replacement['year'] += 1 - - return pd.Timestamp(timestamp.replace(**replacement)) - - -def _get_previous_timestamp(timestamp, time_unit, time_value, half) \ - -> pd.Timestamp: - # Retrieves the timestamp preceding the passed timestamp according to the - # given time unit and time value. - # If half is True, the timestamp halfway between the timestamp and the - # previous timestamp (which is not necessarily halfway between the two) - # is returned - increments = _get_increments(timestamp, time_unit, time_value, half) - replacement = dict( - year=timestamp.year - increments[0], - month=timestamp.month - increments[1], - day=timestamp.day - increments[2], - hour=timestamp.hour - increments[3] - ) - - while replacement['hour'] < 0: - replacement['hour'] += 24 - replacement['day'] -= 1 - - while replacement['day'] < 1: - replacement['month'] -= 1 - if replacement['month'] < 1: - replacement['month'] += 12 - replacement['year'] -= 1 - replacement['day'] += _days_of_month(replacement['year'], - replacement['month'] % 12) - - while replacement['month'] < 1: - replacement['month'] += 12 - replacement['year'] -= 1 - - return pd.Timestamp(timestamp.replace(**replacement)) - - -def _get_increments(timestamp, time_unit, time_value, half) -> List[int]: - # Determines the increments for year, month, day, and hour to be applied - # to a timestamp - if not half: - return _tune_increments(TIMEUNIT_INCREMENTS[time_unit], time_value) - if time_value % 2 == 0: - time_value /= 2 - return _tune_increments(TIMEUNIT_INCREMENTS[time_unit], - int(time_value)) - if time_unit in HALF_TIMEUNIT_INCREMENTS: - return _tune_increments(HALF_TIMEUNIT_INCREMENTS[time_unit], - time_value) - if time_unit == 'QS': - num_months = 3 - else: - num_months = 1 - import math - month = int(math.floor((num_months * time_value) / 2)) - days = _days_of_month(timestamp.year, month) - if days % 2 == 0: - hours = 0 - else: - hours = 12 - days = int(math.floor(days / 2)) - 1 - return [0, month, days, hours] - - -def _tune_increments(incrementors, time_value): - incrementors = [i * time_value for i in incrementors] - return incrementors - - -def _days_of_month(year: int, month: int): - if month in [1, 3, 5, 7, 8, 10, 12]: - return 31 - if month in [4, 6, 9, 11]: - return 30 - if year % 4 != 0: - return 28 - if year % 400 == 0: - return 29 - if year % 100 == 0: - return 28 - return 28 - - def get_method_kwargs(method, frequency, interp_kind, tolerance): if method == 'interpolate': kwargs = {'kind': interp_kind or 'linear'} From a551b3c8a1f14dd754cc7dc7634f5019b15fac01 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 14 Jan 2022 17:05:43 +0100 Subject: [PATCH 26/28] updated changelog --- CHANGES.md | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index fac32b8b7..3922075a6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,11 +4,24 @@ from other than NetCDF inputs, e.g. GeoTIFF. * Cube generator `xcube gen2` allows to use temporal resampling. To use it, a user must set the parameter `time_period` (in a pandas-interpretable - pattern, e.g., '4D') and the newly introduced parameter `temporal_resampling`. - To sample down to a broader temporal resolution, use any of `['count', - 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', - 'var', 'percentile']`, to sample up to a finer resolution, use any of - `['asfreq', 'ffill', 'bfill', 'pad', 'nearest', 'interpolate']`. (#523) + pattern, e.g., '4D') and the newly introduced parameter `temporal_resampling`, + to which a dictionary with entries for upsampling and/or downsampling can be + passed. Upsampling and downsampling can be used with or without parameters, + depending on the selected method. To sample down to a broader temporal + resolution, you need to specify a downsampling method (any of `['count', + 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', 'var', + 'percentile']`). If you also want to add parameters, you can pass a tuple + consisting of the method name and a dictionary with parameters. Analogously, + you can sample up to a finer temporal resolution using any of `['asfreq', + 'ffill', 'bfill', 'pad', 'nearest', 'interpolate']`. + Example: + ```python + temporal_resampling=dict( + downsampling=('percentile', {'threshold': 75}), + upsampling='pad' + ), + ``` +(#523) ## Changes in 0.9.2 From 2e559b006c67e131078f8f45a0c4e3d3d02fb558 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 14 Jan 2022 17:39:30 +0100 Subject: [PATCH 27/28] test fix --- xcube/core/gen2/local/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcube/core/gen2/local/generator.py b/xcube/core/gen2/local/generator.py index 2ca921780..367f7b0c1 100644 --- a/xcube/core/gen2/local/generator.py +++ b/xcube/core/gen2/local/generator.py @@ -126,7 +126,7 @@ def __generate_cube(self, request: CubeGeneratorRequest) \ subsetter = CubeSubsetter() resampler_xy = CubeResamplerXY() - resampler_t = CubeResamplerT(cube_config) + resampler_t = CubeResamplerT() combiner = CubesCombiner(cube_config) rechunker = CubeRechunker() From b45051ade1a47d74214d7c3f61d4b814cce7ce67 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 14 Jan 2022 17:39:37 +0100 Subject: [PATCH 28/28] test fix --- test/core/resampling/test_temporal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/core/resampling/test_temporal.py b/test/core/resampling/test_temporal.py index 5d10b5eb7..43b97df75 100644 --- a/test/core/resampling/test_temporal.py +++ b/test/core/resampling/test_temporal.py @@ -168,7 +168,7 @@ def test_resample_in_time_resample_to_quarter(self): self.assertIsNot(resampled_cube, self.input_cube) self.assertIn('time', resampled_cube) self.assertEqual(1, resampled_cube.time.size) - self.assertEqual(np.datetime64('2017-08-15T12:00:00'), + self.assertEqual(np.datetime64('2017-08-16T00:00:00'), resampled_cube.time[0].values) self.assertIn('time_bnds', resampled_cube) self.assertEqual((1, 2), resampled_cube.time_bnds.shape)