From 6b1b7cc431165e78c0a91a9c41ddf398ff7da75d Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 29 Jun 2023 13:51:38 -0700 Subject: [PATCH 01/91] Separated NTS backends --- data-access/nexustiles/AbstractTileService.py | 329 +++++ data-access/nexustiles/backends/__init__.py | 0 .../backends/nexusproto/__init__.py | 0 .../nexustiles/backends/nexusproto/backend.py | 566 ++++++++ .../backends/nexusproto/config/datastores.ini | 36 + .../nexusproto/config/datastores.ini.default | 39 + .../backends/nexusproto/dao/CassandraProxy.py | 317 +++++ .../backends/nexusproto/dao/DynamoProxy.py | 146 ++ .../nexusproto/dao/ElasticsearchProxy.py | 1235 +++++++++++++++++ .../backends/nexusproto/dao/S3Proxy.py | 141 ++ .../backends/nexusproto/dao/SolrProxy.py | 731 ++++++++++ .../backends/nexusproto/dao/__init__.py | 14 + .../nexustiles/backends/zarr/__init__.py | 0 data-access/nexustiles/nexustiles.py | 97 +- 14 files changed, 3564 insertions(+), 87 deletions(-) create mode 100644 data-access/nexustiles/AbstractTileService.py create mode 100644 data-access/nexustiles/backends/__init__.py create mode 100644 data-access/nexustiles/backends/nexusproto/__init__.py create mode 100644 data-access/nexustiles/backends/nexusproto/backend.py create mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini create mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini.default create mode 100644 data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py create mode 100644 data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py create mode 100644 data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py create mode 100644 data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py create mode 100644 data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py create mode 100644 data-access/nexustiles/backends/nexusproto/dao/__init__.py create mode 100644 data-access/nexustiles/backends/zarr/__init__.py diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py new file mode 100644 index 00000000..f4f4449c --- /dev/null +++ b/data-access/nexustiles/AbstractTileService.py @@ -0,0 +1,329 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import configparser +import logging +import sys +import json +from abc import ABC, abstractmethod +from datetime import datetime +from functools import reduce + +import numpy as np +import numpy.ma as ma +import pkg_resources +from pytz import timezone, UTC +from shapely.geometry import MultiPolygon, box + +from .dao import CassandraProxy +from .dao import DynamoProxy +from .dao import S3Proxy +from .dao import SolrProxy +from .dao import ElasticsearchProxy + +from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable +from nexustiles.nexustiles import NexusTileServiceException + +class AbstractTileService(ABC): + @abstractmethod + def get_dataseries_list(self, simple=False): + raise NotImplementedError() + + @abstractmethod + def find_tile_by_id(self, tile_id, **kwargs): + raise NotImplementedError() + + @abstractmethod + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + raise NotImplementedError() + + @abstractmethod + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + metrics_callback=None, **kwargs): + raise NotImplementedError() + + @abstractmethod + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + raise NotImplementedError() + + @abstractmethod + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall in the given box in the Solr index + raise NotImplementedError() + + @abstractmethod + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall within the polygon in the Solr index + raise NotImplementedError() + + @abstractmethod + def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles whose metadata matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + raise NotImplementedError() + + @abstractmethod + def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time + range properly masked out. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + raise NotImplementedError() + + @abstractmethod + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + raise NotImplementedError() + + @abstractmethod + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, + **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_min_max_time_by_granule(self, ds, granule_name): + raise NotImplementedError() + + @abstractmethod + def get_dataset_overall_stats(self, ds): + raise NotImplementedError() + + @abstractmethod + def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + @abstractmethod + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + raise NotImplementedError() + + @abstractmethod + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + @abstractmethod + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + @abstractmethod + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + raise NotImplementedError() + + def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): + for tile in tiles: + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles): + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def mask_tiles_to_polygon(self, bounding_polygon, tiles): + + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + + def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles): + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles) + + def mask_tiles_to_time_range(self, start_time, end_time, tiles): + """ + Masks data in tiles to specified time range. + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param tiles: List of tiles + :return: A list tiles with data masked to specified time range + """ + if 0 <= start_time <= end_time: + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + @abstractmethod + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + raise NotImplementedError() + + @abstractmethod + def fetch_data_for_tiles(self, *tiles): + raise NotImplementedError() + + @abstractmethod + def open_dataset(self, dataset): + raise NotImplementedError() + + @abstractmethod + def _metadata_store_docs_to_tiles(self, *store_docs): + raise NotImplementedError() + diff --git a/data-access/nexustiles/backends/__init__.py b/data-access/nexustiles/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data-access/nexustiles/backends/nexusproto/__init__.py b/data-access/nexustiles/backends/nexusproto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py new file mode 100644 index 00000000..86d5ca6a --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/backend.py @@ -0,0 +1,566 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import configparser +import logging +import sys +import json +from datetime import datetime +from functools import reduce + +import numpy as np +import numpy.ma as ma +import pkg_resources +from pytz import timezone, UTC +from shapely.geometry import MultiPolygon, box + +from .dao import CassandraProxy +from .dao import DynamoProxy +from .dao import S3Proxy +from .dao import SolrProxy +from .dao import ElasticsearchProxy + +from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable +from nexustiles.nexustiles import NexusTileServiceException + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) +logger = logging.getLogger("testing") + + +class NexusprotoTileService(object): + def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None): + self._datastore = None + self._metadatastore = None + + self._config = configparser.RawConfigParser() + self._config.read(NexusprotoTileService._get_config_files('config/datastores.ini')) + + if config: + self.override_config(config) + + if not skipDatastore: + datastore = self._config.get("datastore", "store") + if datastore == "cassandra": + self._datastore = CassandraProxy.CassandraProxy(self._config) + elif datastore == "s3": + self._datastore = S3Proxy.S3Proxy(self._config) + elif datastore == "dynamo": + self._datastore = DynamoProxy.DynamoProxy(self._config) + else: + raise ValueError("Error reading datastore from config file") + + if not skipMetadatastore: + metadatastore = self._config.get("metadatastore", "store", fallback='solr') + if metadatastore == "solr": + self._metadatastore = SolrProxy.SolrProxy(self._config) + elif metadatastore == "elasticsearch": + self._metadatastore = ElasticsearchProxy.ElasticsearchProxy(self._config) + + def override_config(self, config): + for section in config.sections(): + if self._config.has_section(section): # only override preexisting section, ignores the other + for option in config.options(section): + if config.get(section, option) is not None: + self._config.set(section, option, config.get(section, option)) + + def get_dataseries_list(self, simple=False): + if simple: + return self._metadatastore.get_data_series_list_simple() + else: + return self._metadatastore.get_data_series_list() + + def find_tile_by_id(self, tile_id, **kwargs): + return self._metadatastore.find_tile_by_id(tile_id) + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + return self._metadatastore.find_tiles_by_id(tile_ids, ds=ds, **kwargs) + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + metrics_callback=None, **kwargs): + start = datetime.now() + result = self._metadatastore.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, dataset, start_time, + end_time, + **kwargs) + duration = (datetime.now() - start).total_seconds() + if metrics_callback: + metrics_callback(solr=duration) + return result + + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + try: + tile = self._metadatastore.find_tile_by_polygon_and_most_recent_day_of_year(bounding_polygon, ds, + day_of_year) + except IndexError: + raise NexusTileServiceException("No tile found.").with_traceback(sys.exc_info()[2]) + + return tile + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self._metadatastore.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, + rows=5000, + **kwargs) + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + return self._metadatastore.find_all_tiles_in_polygon_at_time(bounding_polygon, dataset, time, rows=5000, + **kwargs) + + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall in the given box in the Solr index + if type(start_time) is datetime: + start_time = (start_time - EPOCH).total_seconds() + if type(end_time) is datetime: + end_time = (end_time - EPOCH).total_seconds() + return self._metadatastore.find_all_tiles_in_box_sorttimeasc(min_lat, max_lat, min_lon, max_lon, ds, start_time, + end_time, **kwargs) + + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall within the polygon in the Solr index + if 'sort' in list(kwargs.keys()): + tiles = self._metadatastore.find_all_tiles_in_polygon(bounding_polygon, ds, start_time, end_time, **kwargs) + else: + tiles = self._metadatastore.find_all_tiles_in_polygon_sorttimeasc(bounding_polygon, ds, start_time, + end_time, + **kwargs) + return tiles + + def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles whose metadata matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + tiles = self._metadatastore.find_all_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs) + + return tiles + + def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time + range properly masked out. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + tiles = self.find_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs) + tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles) + + return tiles + + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + tiles = self._metadatastore.find_tiles_by_exact_bounds(bounds[0], bounds[1], bounds[2], bounds[3], ds, + start_time, + end_time) + return tiles + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self._metadatastore.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, + rows=5000, + **kwargs) + + def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, + **kwargs): + tiles = self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) + tiles = self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + if 0 <= start_time <= end_time: + tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles) + + return tiles + + def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs): + tiles = self.find_tiles_in_polygon(polygon, ds, start_time, end_time, + **kwargs) + tiles = self.mask_tiles_to_polygon(polygon, tiles) + if 0 <= start_time <= end_time: + tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles) + + return tiles + + def get_min_max_time_by_granule(self, ds, granule_name): + start_time, end_time = self._metadatastore.find_min_max_date_from_granule(ds, granule_name) + + return start_time, end_time + + def get_dataset_overall_stats(self, ds): + return self._metadatastore.get_data_series_stats(ds) + + def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, time, time, tiles) + + return tiles + + def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs): + tiles = self.find_all_tiles_in_polygon_at_time(polygon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_polygon_and_time(polygon, time, time, tiles) + + return tiles + + def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) + tiles = self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, time, time, tiles) + + return tiles + + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + tiles = self._metadatastore.find_all_tiles_within_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, + **kwargs) + + return tiles + + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'], + fetch_data=False, rows=len(tile_ids)) + polys = [] + for tile in tiles: + polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat)) + return box(*MultiPolygon(polys).bounds) + + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + min_time = self._metadatastore.find_min_date_from_tiles(tile_ids, ds=ds) + return int((min_time - EPOCH).total_seconds()) + + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + max_time = self._metadatastore.find_max_date_from_tiles(tile_ids, ds=ds) + return int((max_time - EPOCH).total_seconds()) + + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + bounds = self._metadatastore.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time) + return [box(*b) for b in bounds] + + def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): + + for tile in tiles: + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles): + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def mask_tiles_to_polygon(self, bounding_polygon, tiles): + + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + + def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles): + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles) + + def mask_tiles_to_time_range(self, start_time, end_time, tiles): + """ + Masks data in tiles to specified time range. + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param tiles: List of tiles + :return: A list tiles with data masked to specified time range + """ + if 0 <= start_time <= end_time: + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + return self._metadatastore.get_tile_count(ds, bounding_polygon, start_time, end_time, metadata, **kwargs) + + def fetch_data_for_tiles(self, *tiles): + + nexus_tile_ids = set([tile.tile_id for tile in tiles]) + matched_tile_data = self._datastore.fetch_nexus_tiles(*nexus_tile_ids) + + tile_data_by_id = {str(a_tile_data.tile_id): a_tile_data for a_tile_data in matched_tile_data} + + missing_data = nexus_tile_ids.difference(list(tile_data_by_id.keys())) + if len(missing_data) > 0: + raise Exception("Missing data for tile_id(s) %s." % missing_data) + + for a_tile in tiles: + lats, lons, times, data, meta, is_multi_var = tile_data_by_id[a_tile.tile_id].get_lat_lon_time_data_meta() + + a_tile.latitudes = lats + a_tile.longitudes = lons + a_tile.times = times + a_tile.data = data + a_tile.meta_data = meta + a_tile.is_multi = is_multi_var + + del (tile_data_by_id[a_tile.tile_id]) + + return tiles + + def _metadata_store_docs_to_tiles(self, *store_docs): + + tiles = [] + for store_doc in store_docs: + tile = Tile() + try: + tile.tile_id = store_doc['id'] + except KeyError: + pass + + try: + min_lat = store_doc['tile_min_lat'] + min_lon = store_doc['tile_min_lon'] + max_lat = store_doc['tile_max_lat'] + max_lon = store_doc['tile_max_lon'] + + if isinstance(min_lat, list): + min_lat = min_lat[0] + if isinstance(min_lon, list): + min_lon = min_lon[0] + if isinstance(max_lat, list): + max_lat = max_lat[0] + if isinstance(max_lon, list): + max_lon = max_lon[0] + + tile.bbox = BBox(min_lat, max_lat, min_lon, max_lon) + except KeyError: + pass + + try: + tile.dataset = store_doc['dataset_s'] + except KeyError: + pass + + try: + tile.dataset_id = store_doc['dataset_id_s'] + except KeyError: + pass + + try: + tile.granule = store_doc['granule_s'] + except KeyError: + pass + + try: + tile.min_time = datetime.strptime(store_doc['tile_min_time_dt'], "%Y-%m-%dT%H:%M:%SZ").replace( + tzinfo=UTC) + except KeyError: + pass + + try: + tile.max_time = datetime.strptime(store_doc['tile_max_time_dt'], "%Y-%m-%dT%H:%M:%SZ").replace( + tzinfo=UTC) + except KeyError: + pass + + try: + tile.section_spec = store_doc['sectionSpec_s'] + except KeyError: + pass + + try: + tile.tile_stats = TileStats( + store_doc['tile_min_val_d'], store_doc['tile_max_val_d'], + store_doc['tile_avg_val_d'], store_doc['tile_count_i'] + ) + except KeyError: + pass + + try: + # Ensure backwards compatibility by working with old + # tile_var_name_s and tile_standard_name_s fields to + + # will be overwritten if tile_var_name_ss is present + # as well. + if '[' in store_doc['tile_var_name_s']: + var_names = json.loads(store_doc['tile_var_name_s']) + else: + var_names = [store_doc['tile_var_name_s']] + + standard_name = store_doc.get( + 'tile_standard_name_s', + json.dumps([None] * len(var_names)) + ) + if '[' in standard_name: + standard_names = json.loads(standard_name) + else: + standard_names = [standard_name] + + tile.variables = [] + for var_name, standard_name in zip(var_names, standard_names): + tile.variables.append(TileVariable( + variable_name=var_name, + standard_name=standard_name + )) + except KeyError: + pass + + if 'tile_var_name_ss' in store_doc: + tile.variables = [] + for var_name in store_doc['tile_var_name_ss']: + standard_name_key = f'{var_name}.tile_standard_name_s' + standard_name = store_doc.get(standard_name_key) + tile.variables.append(TileVariable( + variable_name=var_name, + standard_name=standard_name + )) + + tiles.append(tile) + + return tiles + + def pingSolr(self): + status = self._metadatastore.ping() + if status and status["status"] == "OK": + return True + else: + return False + + @staticmethod + def _get_config_files(filename): + log = logging.getLogger(__name__) + candidates = [] + extensions = ['.default', ''] + for extension in extensions: + try: + candidate = pkg_resources.resource_filename(__name__, filename + extension) + log.info('use config file {}'.format(filename + extension)) + candidates.append(candidate) + except KeyError as ke: + log.warning('configuration file {} not found'.format(filename + extension)) + + return candidates diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini b/data-access/nexustiles/backends/nexusproto/config/datastores.ini new file mode 100644 index 00000000..f3facb95 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/config/datastores.ini @@ -0,0 +1,36 @@ +[cassandra] +host=localhost +port=9042 +keyspace=nexustiles +local_datacenter=datacenter1 +protocol_version=3 +dc_policy=WhiteListRoundRobinPolicy +username=cassandra +password=cassandra + +[dynamo] +table=nexus-jpl-table +region=us-west-2 + +[solr] +host=http://localhost:8983 +core=nexustiles + +[s3] +bucket=cdms-dev-zarr +#key=MUR_aggregate/ +#key=MUR_1wk_7_100_100/ +#key=MUR_1wk_7_1500_2500/ +#key=MUR_2017_9dy_7_1500_2500/ +#key=MUR_2017_9dy_7_120_240/ +key=MUR_2017_2yr_30_120_240/ +#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_7_120_240.zarr/ +#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_1_240_240.zarr/ +#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_90_120_240.zarr/ +public=false +region=us-west-2 +profile=saml-pub + +[datastore] +store=cassandra +#store=zarrS3 diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default b/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default new file mode 100644 index 00000000..d8db1902 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[cassandra] +host=localhost +port=9042 +keyspace=nexustiles +local_datacenter=datacenter1 +protocol_version=3 +dc_policy=DCAwareRoundRobinPolicy +username= +password= + +[s3] +bucket=nexus-jpl +region=us-west-2 + +[dynamo] +table=nexus-jpl-table +region=us-west-2 + +[solr] +host=http://localhost:8983 +core=nexustiles + +[datastore] +store=cassandra diff --git a/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py b/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py new file mode 100644 index 00000000..96f7c4c6 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py @@ -0,0 +1,317 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import uuid +from configparser import NoOptionError + +import nexusproto.DataTile_pb2 as nexusproto +import numpy as np +from cassandra.auth import PlainTextAuthProvider +from cassandra.cqlengine import columns, connection, CQLEngineException +from cassandra.cluster import NoHostAvailable +from cassandra.cqlengine.models import Model +from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy +from multiprocessing.synchronize import Lock +from nexusproto.serialization import from_shaped_array + +INIT_LOCK = Lock(ctx=None) + +logger = logging.getLogger(__name__) + +class NexusTileData(Model): + __table_name__ = 'sea_surface_temp' + tile_id = columns.UUID(primary_key=True) + tile_blob = columns.Blob() + + __nexus_tile = None + + def _get_nexus_tile(self): + if self.__nexus_tile is None: + self.__nexus_tile = nexusproto.TileData.FromString(self.tile_blob) + + return self.__nexus_tile + + def get_raw_data_array(self): + + nexus_tile = self._get_nexus_tile() + the_tile_type = nexus_tile.tile.WhichOneof("tile_type") + + the_tile_data = getattr(nexus_tile.tile, the_tile_type) + + return from_shaped_array(the_tile_data.variable_data) + + def get_lat_lon_time_data_meta(self): + """ + Retrieve data from data store and metadata from metadata store + for this tile. For gridded tiles, the tile shape of the data + will match the input shape. For example, if the input was a + 30x30 tile, all variables will also be 30x30. However, if the + tile is a swath tile, the data will be transformed along the + diagonal of the data matrix. For example, a 30x30 tile would + become 900x900 where the 900 points are along the diagonal. + + Multi-variable tile will also include an extra dimension in the + data array. For example, a 30 x 30 x 30 array would be + transformed to N x 30 x 30 x 30 where N is the number of + variables in this tile. + + latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var + + :return: latitude data + :return: longitude data + :return: time data + :return: data + :return: meta data dictionary + :return: boolean flag, True if this tile has more than one variable + """ + is_multi_var = False + + if self._get_nexus_tile().HasField('grid_tile'): + grid_tile = self._get_nexus_tile().grid_tile + + grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) + latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) + longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) + + if len(grid_tile_data.shape) == 2: + grid_tile_data = grid_tile_data[np.newaxis, :] + + # Extract the meta data + meta_data = {} + for meta_data_obj in grid_tile.meta_data: + name = meta_data_obj.name + meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + if len(meta_array.shape) == 2: + meta_array = meta_array[np.newaxis, :] + meta_data[name] = meta_array + + return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var + elif self._get_nexus_tile().HasField('swath_tile'): + swath_tile = self._get_nexus_tile().swath_tile + + latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) + longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) + time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) + + # Simplify the tile if the time dimension is the same value repeated + if np.all(time_data == np.min(time_data)): + time_data = np.array([np.min(time_data)]) + + swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) + + tile_data = self._to_standard_index(swath_tile_data, + (len(time_data), len(latitude_data), len(longitude_data))) + + # Extract the meta data + meta_data = {} + for meta_data_obj in swath_tile.meta_data: + name = meta_data_obj.name + actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) + meta_data[name] = reshaped_meta_array + + return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var + elif self._get_nexus_tile().HasField('time_series_tile'): + time_series_tile = self._get_nexus_tile().time_series_tile + + time_series_tile_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.variable_data)) + time_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.time)).reshape(-1) + latitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.latitude)) + longitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.longitude)) + + reshaped_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data))) + idx = np.arange(len(latitude_data)) + reshaped_array[:, idx, idx] = time_series_tile_data + tile_data = reshaped_array + # Extract the meta data + meta_data = {} + for meta_data_obj in time_series_tile.meta_data: + name = meta_data_obj.name + meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + + reshaped_meta_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data))) + idx = np.arange(len(latitude_data)) + reshaped_meta_array[:, idx, idx] = meta_array + + meta_data[name] = reshaped_meta_array + + return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var + elif self._get_nexus_tile().HasField('swath_multi_variable_tile'): + swath_tile = self._get_nexus_tile().swath_multi_variable_tile + is_multi_var = True + + latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) + longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) + time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) + + # Simplify the tile if the time dimension is the same value repeated + if np.all(time_data == np.min(time_data)): + time_data = np.array([np.min(time_data)]) + + swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) + + desired_shape = ( + len(time_data), + len(latitude_data), + len(longitude_data), + ) + tile_data = self._to_standard_index(swath_tile_data, desired_shape, is_multi_var=True) + + # Extract the meta data + meta_data = {} + for meta_data_obj in swath_tile.meta_data: + name = meta_data_obj.name + actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) + meta_data[name] = reshaped_meta_array + + return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var + elif self._get_nexus_tile().HasField('grid_multi_variable_tile'): + grid_multi_variable_tile = self._get_nexus_tile().grid_multi_variable_tile + is_multi_var = True + + grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.variable_data)) + latitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.latitude)) + longitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.longitude)) + + # If there are 3 dimensions, that means the time dimension + # was squeezed. Add back in + if len(grid_tile_data.shape) == 3: + grid_tile_data = np.expand_dims(grid_tile_data, axis=1) + # If there are 4 dimensions, that means the time dimension + # is present. Move the multivar dimension. + if len(grid_tile_data.shape) == 4: + grid_tile_data = np.moveaxis(grid_tile_data, -1, 0) + + # Extract the meta data + meta_data = {} + for meta_data_obj in grid_multi_variable_tile.meta_data: + name = meta_data_obj.name + meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + if len(meta_array.shape) == 2: + meta_array = meta_array[np.newaxis, :] + meta_data[name] = meta_array + + return latitude_data, longitude_data, np.array([grid_multi_variable_tile.time]), grid_tile_data, meta_data, is_multi_var + else: + raise NotImplementedError("Only supports grid_tile, swath_tile, swath_multi_variable_tile, and time_series_tile") + + @staticmethod + def _to_standard_index(data_array, desired_shape, is_multi_var=False): + """ + Transform swath data to a standard format where data runs along + diagonal of ND matrix and the non-diagonal data points are + masked + + :param data_array: The data array to be transformed + :param desired_shape: The desired shape of the resulting array + :param is_multi_var: True if this is a multi-variable tile + :type data_array: np.array + :type desired_shape: tuple + :type is_multi_var: bool + :return: Reshaped array + :rtype: np.array + """ + + reshaped_array = [] + if is_multi_var: + reshaped_data_array = np.moveaxis(data_array, -1, 0) + else: + reshaped_data_array = [data_array] + + for variable_data_array in reshaped_data_array: + if desired_shape[0] == 1: + variable_reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) + else: + variable_reshaped_array = np.ma.masked_all(desired_shape) + + row, col = np.indices(variable_data_array.shape) + + variable_reshaped_array[ + np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \ + variable_data_array[ + row.flat, col.flat] + variable_reshaped_array.mask[ + np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \ + variable_data_array.mask[ + row.flat, col.flat] + + if desired_shape[0] == 1: + reshaped_array.append(variable_reshaped_array[np.newaxis, :]) + else: + reshaped_array.append(variable_reshaped_array) + + if not is_multi_var: + # If single var, squeeze extra dim out of array + reshaped_array = reshaped_array[0] + + return reshaped_array + + +class CassandraProxy(object): + def __init__(self, config): + self.config = config + self.__cass_url = config.get("cassandra", "host") + self.__cass_username = config.get("cassandra", "username") + self.__cass_password = config.get("cassandra", "password") + self.__cass_keyspace = config.get("cassandra", "keyspace") + self.__cass_local_DC = config.get("cassandra", "local_datacenter") + self.__cass_protocol_version = config.getint("cassandra", "protocol_version") + self.__cass_dc_policy = config.get("cassandra", "dc_policy") + + try: + self.__cass_port = config.getint("cassandra", "port") + except NoOptionError: + self.__cass_port = 9042 + + with INIT_LOCK: + try: + connection.get_cluster() + except CQLEngineException: + self.__open() + + def __open(self): + if self.__cass_dc_policy == 'DCAwareRoundRobinPolicy': + dc_policy = DCAwareRoundRobinPolicy(self.__cass_local_DC) + token_policy = TokenAwarePolicy(dc_policy) + elif self.__cass_dc_policy == 'WhiteListRoundRobinPolicy': + token_policy = WhiteListRoundRobinPolicy([self.__cass_url]) + + if self.__cass_username and self.__cass_password: + auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password) + else: + auth_provider = None + try: + connection.setup( + [host for host in self.__cass_url.split(',')], self.__cass_keyspace, + protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy, + port=self.__cass_port, + auth_provider=auth_provider + ) + except NoHostAvailable as e: + logger.error("Cassandra is not accessible, SDAP will not server local datasets", e) + + def fetch_nexus_tiles(self, *tile_ids): + tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if + (isinstance(tile_id, str) or isinstance(tile_id, str))] + + res = [] + for tile_id in tile_ids: + filterResults = NexusTileData.objects.filter(tile_id=tile_id) + if len(filterResults) > 0: + res.append(filterResults[0]) + + return res diff --git a/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py b/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py new file mode 100644 index 00000000..1ee70ac1 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py @@ -0,0 +1,146 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid +import nexusproto.DataTile_pb2 as nexusproto +from nexusproto.serialization import from_shaped_array +import numpy as np +import boto3 + +class NexusTileData(object): + __nexus_tile = None + __data = None + tile_id = None + + def __init__(self, data, _tile_id): + if self.__data is None: + self.__data = data + if self.tile_id is None: + self.tile_id = _tile_id + + def _get_nexus_tile(self): + if self.__nexus_tile is None: + self.__nexus_tile = nexusproto.TileData.FromString(self.__data) + + return self.__nexus_tile + + def get_raw_data_array(self): + + nexus_tile = self._get_nexus_tile() + the_tile_type = nexus_tile.tile.WhichOneof("tile_type") + + the_tile_data = getattr(nexus_tile.tile, the_tile_type) + + return from_shaped_array(the_tile_data.variable_data) + + def get_lat_lon_time_data_meta(self): + if self._get_nexus_tile().HasField('grid_tile'): + grid_tile = self._get_nexus_tile().grid_tile + + grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) + latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) + longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) + + if len(grid_tile_data.shape) == 2: + grid_tile_data = grid_tile_data[np.newaxis, :] + + # Extract the meta data + meta_data = {} + for meta_data_obj in grid_tile.meta_data: + name = meta_data_obj.name + meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + if len(meta_array.shape) == 2: + meta_array = meta_array[np.newaxis, :] + meta_data[name] = meta_array + + return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data + elif self._get_nexus_tile().HasField('swath_tile'): + swath_tile = self._get_nexus_tile().swath_tile + + latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) + longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) + time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) + + # Simplify the tile if the time dimension is the same value repeated + if np.all(time_data == np.min(time_data)): + time_data = np.array([np.min(time_data)]) + + swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) + + tile_data = self._to_standard_index(swath_tile_data, + (len(time_data), len(latitude_data), len(longitude_data))) + + # Extract the meta data + meta_data = {} + for meta_data_obj in swath_tile.meta_data: + name = meta_data_obj.name + actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) + meta_data[name] = reshaped_meta_array + + return latitude_data, longitude_data, time_data, tile_data, meta_data + else: + raise NotImplementedError("Only supports grid_tile and swath_tile") + + @staticmethod + def _to_standard_index(data_array, desired_shape): + + if desired_shape[0] == 1: + reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) + row, col = np.indices(data_array.shape) + + reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ + row.flat, col.flat] + reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ + row.flat, col.flat] + reshaped_array = reshaped_array[np.newaxis, :] + else: + reshaped_array = np.ma.masked_all(desired_shape) + row, col = np.indices(data_array.shape) + + reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ + row.flat, col.flat] + reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ + row.flat, col.flat] + + return reshaped_array + + +class DynamoProxy(object): + def __init__(self, config): + self.config = config + self.__dynamo_tablename = config.get("dynamo", "table") + self.__dynamo_region = config.get("dynamo", "region") + self.__dynamo = boto3.resource('dynamodb', region_name=self.__dynamo_region) + self.__dynamo_table = self.__dynamo.Table(self.__dynamo_tablename) + self.__nexus_tile = None + + def fetch_nexus_tiles(self, *tile_ids): + + tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if + (isinstance(tile_id, str) or isinstance(tile_id, str))] + res = [] + for tile_id in tile_ids: + response = self.__dynamo_table.get_item( + Key = { + 'tile_id': str(tile_id) + } + ) + item = response['Item'] + data = item['data'].__str__() + nexus_tile = NexusTileData(data, str(tile_id)) + res.append(nexus_tile) + + return res \ No newline at end of file diff --git a/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py b/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py new file mode 100644 index 00000000..157630f6 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py @@ -0,0 +1,1235 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import threading +import time +import re +from datetime import datetime +from pytz import timezone, UTC + +import requests +import pysolr +from shapely import wkt +from elasticsearch import Elasticsearch + +ELASTICSEARCH_CON_LOCK = threading.Lock() +thread_local = threading.local() + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +ELASTICSEARCH_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + + +class ElasticsearchProxy(object): + def __init__(self, config): + self.elasticsearchHosts = config.get("elasticsearch", "host").split(',') + self.elasticsearchIndex = config.get("elasticsearch", "index") + self.elasticsearchUsername = config.get("elasticsearch", "username") + self.elasticsearchPassword = config.get("elasticsearch", "password") + self.logger = logging.getLogger(__name__) + + with ELASTICSEARCH_CON_LOCK: + elasticsearchcon = getattr(thread_local, 'elasticsearchcon', None) + if elasticsearchcon is None: + elasticsearchcon = Elasticsearch(hosts=self.elasticsearchHosts, http_auth=(self.elasticsearchUsername, self.elasticsearchPassword)) + thread_local.elasticsearchcon = elasticsearchcon + + self.elasticsearchcon = elasticsearchcon + + def find_tile_by_id(self, tile_id): + + params = { + "size": 1, + "query": { + "term": { + "id": { + "value": tile_id + } + } + } + } + + results, _, hits = self.do_query(*(None, None, None, True, None), **params) + assert hits == 1, f"Found {hits} results, expected exactly 1" + return [results[0]["_source"]] + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + + params = { + "query": { + "bool": { + "filter": [], + "should": [], + "minimum_should_match": 1 + } + } + } + + for tile_id in tile_ids: + params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) + + if ds is not None: + params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) + + self._merge_kwargs(params, **kwargs) + + results = self.do_query_all(*(None, None, None, False, None), **params) + assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids)) + return results + + def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs): + params = { + "size": 0, + "query": { + "bool": { + "filter": [], + "should": [] + } + }, + "aggs": { + "min_date_agg": { + "min": { + "field": "tile_min_time_dt" + } + } + } + } + + for tile_id in tile_ids: + params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) + if ds is not None: + params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) + + aggregations = self.do_aggregation(*(None, None, None, True, None), **params) + return self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"]) + + def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs): + + params = { + "size": 0, + "query": { + "bool": { + "filter": [], + "should": [] + } + }, + "aggs": { + "max_date_agg": { + "max": { + "field": "tile_max_time_dt" + } + } + } + } + + for tile_id in tile_ids: + params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) + if ds is not None: + params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) + + aggregations = self.do_aggregation(*(None, None, None, True, None), **params) + return self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"]) + + + def find_min_max_date_from_granule(self, ds, granule_name, **kwargs): + + params = { + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "term": { + "granule_s": { + "value": granule_name + } + } + } + ] + } + }, + "aggs": { + "min_date_agg": { + "max": { + "field": "tile_min_time_dt" + } + }, + "max_date_agg": { + "max": { + "field": "tile_max_time_dt" + } + } + } + } + + self._merge_kwargs(params, **kwargs) + + aggregations = self.do_aggregation(*(None, None, None, False, None), **params) + start_time = self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"]) + end_time = self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"]) + + return start_time, end_time + + def get_data_series_list(self): + + datasets = self.get_data_series_list_simple() + + for dataset in datasets: + min_date = self.find_min_date_from_tiles([], ds=dataset['title']) + max_date = self.find_max_date_from_tiles([], ds=dataset['title']) + dataset['start'] = (min_date - EPOCH).total_seconds() + dataset['end'] = (max_date - EPOCH).total_seconds() + dataset['iso_start'] = min_date.strftime(ISO_8601) + dataset['iso_end'] = max_date.strftime(ISO_8601) + + return datasets + + def get_data_series_list_simple(self): + + params = { + 'size': 0, + "aggs": { + "dataset_list_agg": { + "composite": { + "size":100, + "sources": [ + { + "dataset_s": { + "terms": { + "field": "dataset_s" + } + } + } + ] + } + } + } + } + + aggregations = self.do_aggregation_all(params, 'dataset_list_agg') + l = [] + + for dataset in aggregations: + l.append({ + "shortName": dataset['key']['dataset_s'], + "title": dataset['key']['dataset_s'], + "tileCount": dataset["doc_count"] + }) + + l = sorted(l, key=lambda entry: entry["title"]) + return l + + def get_data_series_stats(self, ds): + + params = { + "size": 0, + "query": { + "term":{ + "dataset_s": { + "value": ds + } + } + }, + "aggs": { + "available_dates": { + "composite": { + "size": 100, + "sources": [ + {"terms_tile_max_time_dt": {"terms": {"field": "tile_max_time_dt"}}} + ] + } + } + } + } + + aggregations = self.do_aggregation_all(params, 'available_dates') + stats = {} + stats['available_dates'] = [] + + for dt in aggregations: + stats['available_dates'].append(dt['key']['terms_tile_max_time_dt'] / 1000) + + stats['available_dates'] = sorted(stats['available_dates']) + + params = { + "size": 0, + "query": { + "term":{ + "dataset_s": { + "value": ds + } + } + }, + "aggs": { + "min_tile_min_val_d": { + "min": { + "field": "tile_min_val_d" + } + }, + "min_tile_max_time_dt": { + "min": { + "field": "tile_max_time_dt" + } + }, + "max_tile_max_time_dt": { + "max": { + "field": "tile_max_time_dt" + } + }, + "max_tile_max_val_d": { + "max": { + "field": "tile_max_val_d" + } + } + } + } + + aggregations = self.do_aggregation(*(None, None, None, False, None), **params) + stats["start"] = int(aggregations["min_tile_max_time_dt"]["value"]) / 1000 + stats["end"] = int(aggregations["max_tile_max_time_dt"]["value"]) / 1000 + stats["minValue"] = aggregations["min_tile_min_val_d"]["value"] + stats["maxValue"] = aggregations["max_tile_max_val_d"]["value"] + + return stats + + # day_of_year_i added (SDAP-347) + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year): + + max_lat = bounding_polygon.bounds[3] + min_lon = bounding_polygon.bounds[0] + min_lat = bounding_polygon.bounds[1] + max_lon = bounding_polygon.bounds[2] + + params = { + "size": "1", + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] + }, + "relation": "intersects" + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + }, + { + "range": { + "day_of_year_i": { + "lte": day_of_year + } + } + } + ] + } + } + } + result, _, _ = self.do_query(*(None, None, None, True, 'day_of_year_i desc'), **params) + + return [result[0]] + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): + + search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT) + + params = { + "size": "0", + "_source": "tile_min_time_dt", + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "range": { + "tile_min_time_dt": { + "gte": search_start_s, + "lte": search_end_s + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] + }, + "relation": "intersects" + } + } + } + ] + } + }, + "aggs": { + "days_range_agg": { + "composite": { + "size":100, + "sources": [ + { + "tile_min_time_dt": { + "terms": { + "field": "tile_min_time_dt" + } + } + } + ] + } + } + } + } + + aggregations = self.do_aggregation_all(params, 'days_range_agg') + results = [res['key']['tile_min_time_dt'] for res in aggregations] + daysinrangeasc = sorted([(res / 1000) for res in results]) + return daysinrangeasc + + def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, + end_time=-1, **kwargs): + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] + }, + "relation": "intersects" + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + } + ] + } + } + } + + + if 0 < start_time <= end_time: + params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params) + + def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0]) + polygon_coordinates = list(zip(*[iter(nums)] * 2)) + + max_lat = bounding_polygon.bounds[3] + min_lon = bounding_polygon.bounds[0] + min_lat = bounding_polygon.bounds[1] + max_lon = bounding_polygon.bounds[2] + + params = { + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] + }, + "relation": "intersects" + } + } + } + ] + } + } + } + + try: + if 'fl' in list(kwargs.keys()): + params["_source"] = kwargs["fl"].split(',') + except KeyError: + pass + + if 0 < start_time <= end_time: + params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params) + + def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0]) + polygon_coordinates = list(zip(*[iter(nums)] * 2)) + + max_lat = bounding_polygon.bounds[3] + min_lon = bounding_polygon.bounds[0] + min_lat = bounding_polygon.bounds[1] + max_lon = bounding_polygon.bounds[2] + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] + }, + "relation": "intersects" + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + } + ] + } + } + } + + try: + if 'fl' in list(kwargs.keys()): + params["_source"] = kwargs["fl"].split(',') + except KeyError: + pass + + if 0 < start_time <= end_time: + params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, None), **params) + + def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + tile_max_lat = bounding_polygon.bounds[3] + tile_min_lon = bounding_polygon.bounds[0] + tile_min_lat = bounding_polygon.bounds[1] + tile_max_lon = bounding_polygon.bounds[2] + + params = { + "size": 0, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[tile_min_lon, tile_max_lat], [tile_max_lon, tile_min_lat]] + }, + "relation": "intersects" + } + } + } + ] + } + }, + "aggs": { + "distinct_bounding_boxes": { + "composite": { + "size": 100, + "sources": [ + { + "bounding_box": { + "terms": { + "script": { + "source": "String.valueOf(doc['tile_min_lon'].value) + ', ' + String.valueOf(doc['tile_max_lon'].value) + ', ' + String.valueOf(doc['tile_min_lat'].value) + ', ' + String.valueOf(doc['tile_max_lat'].value)", + "lang": "painless" + } + } + } + } + ] + } + } + } + } + + if 0 < start_time <= end_time: + params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + self._merge_kwargs(params, **kwargs) + aggregations = self.do_aggregation_all(params, 'distinct_bounding_boxes') + distinct_bounds = [] + for agg in aggregations: + coords = agg['key']['bounding_box'].split(',') + min_lon = round(float(coords[0]), 2) + max_lon = round(float(coords[1]), 2) + min_lat = round(float(coords[2]), 2) + max_lat = round(float(coords[3]), 2) + polygon = 'POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))' % (min_lon, max_lat, min_lon, min_lat, max_lon, min_lat, max_lon, max_lat, min_lon, max_lat) + distinct_bounds.append(wkt.loads(polygon).bounds) + + return distinct_bounds + + def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs): + + params = { + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "term": { + "tile_min_lon": { + "value": minx + } + } + }, + { + "term": { + "tile_min_lat": { + "value": miny + } + } + }, + { + "term": { + "tile_max_lon": { + "value": maxx + } + } + }, + { + "term": { + "tile_max_lat": { + "value": maxy + } + } + } + ] + } + }} + + if 0 < start_time <= end_time: + params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, None), **params) + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs): + + the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT) + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] + }, + "relation": "intersects" + } + } + }, + { + "range": { + "tile_min_time_dt": { + "lte": the_time + } + } + }, + { + "range": { + "tile_max_time_dt": { + "gte": the_time + } + } + } + ] + } + } + } + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, None), **params) + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs): + + the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT) + + max_lat = bounding_polygon.bounds[3] + min_lon = bounding_polygon.bounds[0] + min_lat = bounding_polygon.bounds[1] + max_lon = bounding_polygon.bounds[2] + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] + }, + "relation": "intersects" + } + } + }, + { "range": { + "tile_min_time_dt": { + "lte": the_time + } + } }, + { "range": { + "tile_max_time_dt": { + "gte": the_time + } + } } + ] + } + } + } + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, None), **params) + + + def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): + + the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT) + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] + }, + "relation": "within" + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + }, + { + "range": { + "tile_min_time_dt": { + "lte": the_time + } + } + }, + { + "range": { + "tile_max_time_dt": { + "gte": the_time + } + } + } + ] + } + } + } + + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, "product(tile_avg_val_d, tile_count_i),*", None, False, None), **params) + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): + + the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT) + + params = { + "size": 1000, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "geo_shape": { + "geo": { + "shape": { + "type": "multilinestring", + "coordinates": [[[min_lon, max_lat], [max_lon, max_lat], [min_lon, max_lat], [min_lon, min_lat], [max_lon, max_lat], [max_lon, min_lat], [min_lon, min_lat], [max_lon, min_lat]]] + }, + "relation": "intersects" + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + }, + { + "range": { + "tile_min_time_dt": { + "lte": the_time + } + } + }, + { + "range": { + "tile_max_time_dt": { + "gte": the_time + } + } + } + ], + "must_not" : { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] + }, + "relation": "within" + } + } + } + } + } + } + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all(*(None, None, None, False, None), **params) + + def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs): + """ + Get a list of tile metadata that matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tile metadata + """ + + params = { + "query": { + "bool": { + "must": [ + { + "term": { + "dataset_s": {"value": ds} + } + } + ] + } + } + } + + if len(metadata) > 0: + for key_value in metadata: + key = key_value.split(':')[0] + value = key_value.split(':')[1] + params['query']['bool']['must'].append({"match": {key: value}}) + + if 0 < start_time <= end_time: + params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + self._merge_kwargs(params, **kwargs) + return self.do_query_all(*(None, None, None, False, None), **params) + + def get_formatted_time_clause(self, start_time, end_time): + search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT) + + time_clause = [ + { + "range": { + "tile_min_time_dt": { + "lte": search_end_s, + "gte": search_start_s + } + } + }, + { + "range": { + "tile_max_time_dt": { + "lte": search_end_s, + "gte": search_start_s + } + } + }, + { + "bool": { + "must": [ + { + "range": { + "tile_min_time_dt": { + "gte": search_start_s + } + } + }, + { + "range": { + "tile_max_time_dt": { + "lte": search_end_s + } + } + } + ] + } + } + ] + + return time_clause + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + + params = { + "size": 0, + "query": { + "bool": { + "filter": [ + { + "term": { + "dataset_s": { + "value": ds + } + } + }, + { + "range": { + "tile_count_i": { + "gte": 1 + } + } + } + ] + } + } + } + + if bounding_polygon: + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + geo_clause = { + "geo_shape": { + "geo": { + "shape": { + "type": "envelope", + "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] + } + } + } + } + + params['query']['bool']['filter'].append(geo_clause) + + if 0 < start_time <= end_time: + params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time) + params["query"]["bool"]["minimum_should_match"] = 1 + + if len(metadata) > 0: + for key_value in metadata: + key = key_value.split(':')[0] + value = key_value.split(':')[1] + params['query']['bool']['filter'].append({"term": {key: {"value": value}}}) + + self._merge_kwargs(params, **kwargs) + _, _, found = self.do_query(*(None, None, None, True, None), **params) + + return found + + def do_aggregation(self, *args, **params): + # Gets raw aggregations + + response = self.do_query_raw(*args, **params) + aggregations = response.get('aggregations', None) + return aggregations + + def do_aggregation_all(self, params, agg_name): + # Used for pagination when results can exceed ES max size (use of after_key) + + with ELASTICSEARCH_CON_LOCK: + response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) + all_buckets = [] + + try: + aggregations = response.get('aggregations', None) + current_buckets = aggregations.get(agg_name, None) + buckets = current_buckets.get('buckets', None) + all_buckets += buckets + after_bucket = current_buckets.get('after_key', None) + + while after_bucket is not None: + for agg in params['aggs']: + params['aggs'][agg]['composite']['after'] = {} + for source in params['aggs'][agg]['composite']['sources']: + key_name = next(iter(source)) + params['aggs'][agg]['composite']['after'][key_name] = after_bucket[key_name] + with ELASTICSEARCH_CON_LOCK: + response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) + + aggregations = response.get('aggregations', None) + current_buckets = aggregations.get(agg_name, None) + buckets = current_buckets.get('buckets', None) + all_buckets += buckets + after_bucket = current_buckets.get('after_key', None) + + except AttributeError as e: + self.logger.error('Error when accessing aggregation buckets - ' + str(e)) + + return all_buckets + + def do_query(self, *args, **params): + response = self.do_query_raw(*args, **params) + return response['hits']['hits'], None, response['hits']['total']['value'] + + def do_query_raw(self, *args, **params): + + if args[4]: + + sort_fields = args[4].split(",") + + if 'sort' not in list(params.keys()): + params["sort"] = [] + + for field in sort_fields: + field_order = field.split(' ') + sort_instruction = {field_order[0]: field_order[1]} + if sort_instruction not in params['sort']: + params["sort"].append(sort_instruction) + with ELASTICSEARCH_CON_LOCK: + response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) + + return response + + def do_query_all(self, *args, **params): + # Used to paginate with search_after. + # The method calling this might already have a sort clause, + # so we merge both sort clauses inside do_query_raw + + results = [] + + search = None + + # Add track option to not be blocked at 10000 hits per worker + if 'track_total_hits' not in params.keys(): + params['track_total_hits'] = True + + # Add sort instruction order to paginate the results : + params["sort"] = [ + { "tile_min_time_dt": "asc"}, + { "_id": "asc" } + ] + + response = self.do_query_raw(*args, **params) + results.extend([r["_source"] for r in response["hits"]["hits"]]) + + total_hits = response["hits"]["total"]["value"] + + try: + search_after = [] + for sort_param in response["hits"]["hits"][-1]["sort"]: + search_after.append(str(sort_param)) + except (KeyError, IndexError): + search_after = [] + + try: + while len(results) < total_hits: + params["search_after"] = search_after + response = self.do_query_raw(*args, **params) + results.extend([r["_source"] for r in response["hits"]["hits"]]) + + search_after = [] + for sort_param in response["hits"]["hits"][-1]["sort"]: + search_after.append(str(sort_param)) + + except (KeyError, IndexError): + pass + + return results + + def convert_iso_to_datetime(self, date): + return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC) + + def convert_iso_to_timestamp(self, date): + return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds() + + @staticmethod + def _merge_kwargs(params, **kwargs): + # Only Solr-specific kwargs are parsed + # And the special 'limit' + try: + params['limit'] = kwargs['limit'] + except KeyError: + pass + + try: + params['_route_'] = kwargs['_route_'] + except KeyError: + pass + + try: + params['size'] = kwargs['size'] + except KeyError: + pass + + try: + params['start'] = kwargs['start'] + except KeyError: + pass + + try: + s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']] + except KeyError: + s = None + + try: + params['sort'].extend(s) + except KeyError: + if s is not None: + params['sort'] = s diff --git a/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py b/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py new file mode 100644 index 00000000..c8d3adfe --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py @@ -0,0 +1,141 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +import boto3 +import nexusproto.DataTile_pb2 as nexusproto +import numpy as np +from nexusproto.serialization import from_shaped_array + + +class NexusTileData(object): + __nexus_tile = None + __data = None + tile_id = None + + def __init__(self, data, _tile_id): + if self.__data is None: + self.__data = data + if self.tile_id is None: + self.tile_id = _tile_id + + def _get_nexus_tile(self): + if self.__nexus_tile is None: + self.__nexus_tile = nexusproto.TileData.FromString(self.__data) + + return self.__nexus_tile + + def get_raw_data_array(self): + + nexus_tile = self._get_nexus_tile() + the_tile_type = nexus_tile.tile.WhichOneof("tile_type") + + the_tile_data = getattr(nexus_tile.tile, the_tile_type) + + return from_shaped_array(the_tile_data.variable_data) + + def get_lat_lon_time_data_meta(self): + if self._get_nexus_tile().HasField('grid_tile'): + grid_tile = self._get_nexus_tile().grid_tile + + grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) + latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) + longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) + + if len(grid_tile_data.shape) == 2: + grid_tile_data = grid_tile_data[np.newaxis, :] + + # Extract the meta data + meta_data = {} + for meta_data_obj in grid_tile.meta_data: + name = meta_data_obj.name + meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + if len(meta_array.shape) == 2: + meta_array = meta_array[np.newaxis, :] + meta_data[name] = meta_array + + return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data + elif self._get_nexus_tile().HasField('swath_tile'): + swath_tile = self._get_nexus_tile().swath_tile + + latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) + longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) + time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) + + # Simplify the tile if the time dimension is the same value repeated + if np.all(time_data == np.min(time_data)): + time_data = np.array([np.min(time_data)]) + + swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) + + tile_data = self._to_standard_index(swath_tile_data, + (len(time_data), len(latitude_data), len(longitude_data))) + + # Extract the meta data + meta_data = {} + for meta_data_obj in swath_tile.meta_data: + name = meta_data_obj.name + actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) + reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) + meta_data[name] = reshaped_meta_array + + return latitude_data, longitude_data, time_data, tile_data, meta_data + else: + raise NotImplementedError("Only supports grid_tile and swath_tile") + + @staticmethod + def _to_standard_index(data_array, desired_shape): + + if desired_shape[0] == 1: + reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) + row, col = np.indices(data_array.shape) + + reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ + row.flat, col.flat] + reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ + row.flat, col.flat] + reshaped_array = reshaped_array[np.newaxis, :] + else: + reshaped_array = np.ma.masked_all(desired_shape) + row, col = np.indices(data_array.shape) + + reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ + row.flat, col.flat] + reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ + row.flat, col.flat] + + return reshaped_array + + +class S3Proxy(object): + def __init__(self, config): + self.config = config + self.__s3_bucketname = config.get("s3", "bucket") + self.__s3_region = config.get("s3", "region") + self.__s3 = boto3.resource('s3') + self.__nexus_tile = None + + def fetch_nexus_tiles(self, *tile_ids): + tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if + (isinstance(tile_id, str) or isinstance(tile_id, str))] + res = [] + for tile_id in tile_ids: + obj = self.__s3.Object(self.__s3_bucketname, str(tile_id)) + data = obj.get()['Body'].read() + nexus_tile = NexusTileData(data, str(tile_id)) + res.append(nexus_tile) + + return res diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py new file mode 100644 index 00000000..9b16533d --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py @@ -0,0 +1,731 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import threading +import time +from datetime import datetime +from pytz import timezone, UTC + +import requests +import pysolr +from shapely import wkt + +SOLR_CON_LOCK = threading.Lock() +thread_local = threading.local() + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + + +class SolrProxy(object): + def __init__(self, config): + self.solrUrl = config.get("solr", "host") + self.solrCore = config.get("solr", "core") + solr_kargs = {} + if config.has_option("solr", "time_out"): + solr_kargs["timeout"] = config.get("solr", "time_out") + self.logger = logging.getLogger('nexus') + + with SOLR_CON_LOCK: + solrcon = getattr(thread_local, 'solrcon', None) + if solrcon is None: + solr_url = '%s/solr/%s' % (self.solrUrl, self.solrCore) + self.logger.info("connect to solr, url {} with option(s) = {}".format(solr_url, solr_kargs)) + solrcon = pysolr.Solr(solr_url, **solr_kargs) + thread_local.solrcon = solrcon + + self.solrcon = solrcon + + def find_tile_by_id(self, tile_id): + + search = 'id:%s' % tile_id + + params = { + 'rows': 1 + } + + results, start, found = self.do_query(*(search, None, None, True, None), **params) + + assert len(results) == 1, "Found %s results, expected exactly 1" % len(results) + return [results[0]] + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + + if ds is not None: + search = 'dataset_s:%s' % ds + else: + search = '*:*' + + additionalparams = { + 'fq': [ + "{!terms f=id}%s" % ','.join(tile_ids) + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results = self.do_query_all(*(search, None, None, False, None), **additionalparams) + + assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids)) + return results + + def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs): + + if ds is not None: + search = 'dataset_s:%s' % ds + else: + search = '*:*' + + kwargs['rows'] = 1 + kwargs['fl'] = 'tile_min_time_dt' + kwargs['sort'] = ['tile_min_time_dt asc'] + additionalparams = { + 'fq': [ + "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['tile_min_time_dt']) + + def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs): + + if ds is not None: + search = 'dataset_s:%s' % ds + else: + search = '*:*' + + kwargs['rows'] = 1 + kwargs['fl'] = 'tile_max_time_dt' + kwargs['sort'] = ['tile_max_time_dt desc'] + additionalparams = { + 'fq': [ + "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['tile_max_time_dt']) + + def find_min_max_date_from_granule(self, ds, granule_name, **kwargs): + search = 'dataset_s:%s' % ds + + kwargs['rows'] = 1 + kwargs['fl'] = 'tile_min_time_dt' + kwargs['sort'] = ['tile_min_time_dt asc'] + additionalparams = { + 'fq': [ + "granule_s:%s" % granule_name + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams) + start_time = self.convert_iso_to_datetime(results[0]['tile_min_time_dt']) + + kwargs['fl'] = 'tile_max_time_dt' + kwargs['sort'] = ['tile_max_time_dt desc'] + additionalparams = { + 'fq': [ + "granule_s:%s" % granule_name + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams) + end_time = self.convert_iso_to_datetime(results[0]['tile_max_time_dt']) + + return start_time, end_time + + def get_data_series_list(self): + + datasets = self.get_data_series_list_simple() + + for dataset in datasets: + min_date = self.find_min_date_from_tiles([], ds=dataset['title']) + max_date = self.find_max_date_from_tiles([], ds=dataset['title']) + dataset['start'] = (min_date - EPOCH).total_seconds() + dataset['end'] = (max_date - EPOCH).total_seconds() + dataset['iso_start'] = min_date.strftime(ISO_8601) + dataset['iso_end'] = max_date.strftime(ISO_8601) + + return datasets + + def get_data_series_list_simple(self): + search = "*:*" + params = { + 'rows': 0, + "facet": "true", + "facet.field": "dataset_s", + "facet.mincount": "1", + "facet.limit": "-1" + } + + + response = self.do_query_raw(*(search, None, None, False, None), **params) + l = [] + for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2): + l.append({ + "shortName": g, + "title": g, + "tileCount": v + }) + l = sorted(l, key=lambda entry: entry["title"]) + return l + + def get_data_series_stats(self, ds): + search = "dataset_s:%s" % ds + params = { + "facet": "true", + "facet.field": ["dataset_s", "tile_max_time_dt"], + "facet.limit": "-1", + "facet.mincount": "1", + "facet.pivot": "{!stats=piv1}dataset_s", + "stats": "on", + "stats.field": ["{!tag=piv1 min=true max=true sum=false}tile_max_time_dt","{!tag=piv1 min=true max=false sum=false}tile_min_val_d","{!tag=piv1 min=false max=true sum=false}tile_max_val_d"] + } + + response = self.do_query_raw(*(search, None, None, False, None), **params) + + stats = {} + + for g in response.facets["facet_pivot"]["dataset_s"]: + if g["value"] == ds: + stats["start"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"]) + stats["end"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"]) + stats["minValue"] = g["stats"]["stats_fields"]["tile_min_val_d"]["min"] + stats["maxValue"] = g["stats"]["stats_fields"]["tile_max_val_d"]["max"] + + + stats["availableDates"] = [] + for dt in response.facets["facet_fields"]["tile_max_time_dt"][::2]: + stats["availableDates"].append(self.convert_iso_to_timestamp(dt)) + + stats["availableDates"] = sorted(stats["availableDates"]) + + return stats + + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year): + + search = 'dataset_s:%s' % ds + + params = { + 'fq': [ + "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, + "tile_count_i:[1 TO *]", + "day_of_year_i:[* TO %s]" % day_of_year + ], + 'rows': 1 + } + + results, start, found = self.do_query( + *(search, None, None, True, ('day_of_year_i desc',)), **params) + + return [results[0]] + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): + + search = 'dataset_s:%s' % ds + + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + additionalparams = { + 'fq': [ + "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + "{!frange l=0 u=0}ms(tile_min_time_dt,tile_max_time_dt)", + "tile_count_i:[1 TO *]", + "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s) + ], + 'rows': 0, + 'facet': 'true', + 'facet.field': 'tile_min_time_dt', + 'facet.mincount': '1', + 'facet.limit': '-1' + } + + self._merge_kwargs(additionalparams, **kwargs) + + response = self.do_query_raw(*(search, None, None, False, None), **additionalparams) + + daysinrangeasc = sorted( + [(datetime.strptime(a_date, SOLR_FORMAT) - datetime.utcfromtimestamp(0)).total_seconds() for a_date + in response.facets['facet_fields']['tile_min_time_dt'][::2]]) + + return daysinrangeasc + + def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, + end_time=-1, **kwargs): + + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + "tile_count_i:[1 TO *]" + ] + } + + if 0 <= start_time <= end_time: + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams['fq'].append(time_clause) + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all( + *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'), + **additionalparams) + + def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, + "tile_count_i:[1 TO *]" + ] + } + + if 0 <= start_time <= end_time: + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams['fq'].append(time_clause) + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all( + *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'), + **additionalparams) + + def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, + "tile_count_i:[1 TO *]" + ] + } + + if 0 <= start_time <= end_time: + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams['fq'].append(time_clause) + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all( + *(search, None, None, False, None), + **additionalparams) + + def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): + + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, + "tile_count_i:[1 TO *]" + ], + 'rows': 0, + 'facet': 'true', + 'facet.field': 'geo_s', + 'facet.limit': -1, + 'facet.mincount': 1 + } + + if 0 <= start_time <= end_time: + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams['fq'].append(time_clause) + + self._merge_kwargs(additionalparams, **kwargs) + + response = self.do_query_raw(*(search, None, None, False, None), **additionalparams) + + distinct_bounds = [wkt.loads(key).bounds for key in response.facets["facet_fields"]["geo_s"][::2]] + + return distinct_bounds + + def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs): + + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "tile_min_lon:\"%s\"" % minx, + "tile_min_lat:\"%s\"" % miny, + "tile_max_lon:\"%s\"" % maxx, + "tile_max_lat:\"%s\"" % maxy, + "tile_count_i:[1 TO *]" + ] + } + + if 0 <= start_time <= end_time: + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams['fq'].append(time_clause) + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all( + *(search, None, None, False, None), + **additionalparams) + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs): + search = 'dataset_s:%s' % ds + + the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT) + time_clause = "(" \ + "tile_min_time_dt:[* TO %s] " \ + "AND tile_max_time_dt:[%s TO *] " \ + ")" % ( + the_time, the_time + ) + + additionalparams = { + 'fq': [ + "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + "tile_count_i:[1 TO *]", + time_clause + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all(*(search, None, None, False, None), **additionalparams) + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs): + search = 'dataset_s:%s' % ds + + the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT) + time_clause = "(" \ + "tile_min_time_dt:[* TO %s] " \ + "AND tile_max_time_dt:[%s TO *] " \ + ")" % ( + the_time, the_time + ) + + additionalparams = { + 'fq': [ + "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, + "tile_count_i:[1 TO *]", + time_clause + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all(*(search, None, None, False, None), **additionalparams) + + def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): + search = 'dataset_s:%s' % ds + + the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT) + time_clause = "(" \ + "tile_min_time_dt:[* TO %s] " \ + "AND tile_max_time_dt:[%s TO *] " \ + ")" % ( + the_time, the_time + ) + + additionalparams = { + 'fq': [ + "geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat), + "tile_count_i:[1 TO *]", + time_clause + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all(*(search, "product(tile_avg_val_d, tile_count_i),*", None, False, None), + **additionalparams) + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): + search = 'dataset_s:%s' % ds + + the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT) + time_clause = "(" \ + "tile_min_time_dt:[* TO %s] " \ + "AND tile_max_time_dt:[%s TO *] " \ + ")" % ( + the_time, the_time + ) + + additionalparams = { + 'fq': [ + "geo:\"Intersects(MultiLineString((%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s)))\"" % ( + min_lon, max_lat, max_lon, max_lat, min_lon, max_lat, min_lon, min_lat, max_lon, max_lat, max_lon, + min_lat, min_lon, min_lat, max_lon, min_lat), + "-geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat), + "tile_count_i:[1 TO *]", + time_clause + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all(*(search, None, None, False, None), **additionalparams) + + def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs): + """ + Get a list of tile metadata that matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tile metadata + """ + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': metadata + } + + if 0 <= start_time <= end_time: + additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time)) + + self._merge_kwargs(additionalparams, **kwargs) + + return self.do_query_all( + *(search, None, None, False, None), + **additionalparams) + + def get_formatted_time_clause(self, start_time, end_time): + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + time_clause = "(" \ + "tile_min_time_dt:[%s TO %s] " \ + "OR tile_max_time_dt:[%s TO %s] " \ + "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + return time_clause + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + search = 'dataset_s:%s' % ds + + additionalparams = { + 'fq': [ + "tile_count_i:[1 TO *]" + ], + 'rows': 0 + } + + if bounding_polygon: + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + additionalparams['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) + + if 0 <= start_time <= end_time: + additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time)) + + if metadata: + additionalparams['fq'].extend(metadata) + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return found + + def do_query(self, *args, **params): + + response = self.do_query_raw(*args, **params) + + return response.docs, response.raw_response['response']['start'], response.hits + + def do_query_raw(self, *args, **params): + + if 'fl' not in list(params.keys()) and args[1]: + params['fl'] = args[1] + + if 'sort' not in list(params.keys()) and args[4]: + params['sort'] = args[4] + + # If dataset_s is specified as the search term, + # add the _route_ parameter to limit the search to the correct shard + if 'dataset_s:' in args[0]: + ds = args[0].split(':')[-1] + params['shard_keys'] = ds + '!' + + with SOLR_CON_LOCK: + response = self.solrcon.search(args[0], **params) + + return response + + + def do_query_all(self, *args, **params): + + results = [] + + response = self.do_query_raw(*args, **params) + results.extend(response.docs) + + limit = min(params.get('limit', float('inf')), response.hits) + + while len(results) < limit: + params['start'] = len(results) + response = self.do_query_raw(*args, **params) + results.extend(response.docs) + + assert len(results) == limit + + return results + + def convert_iso_to_datetime(self, date): + return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC) + + def convert_iso_to_timestamp(self, date): + return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds() + + def ping(self): + solrAdminPing = '%s/solr/%s/admin/ping' % (self.solrUrl, self.solrCore) + try: + r = requests.get(solrAdminPing, params={'wt': 'json'}) + results = json.loads(r.text) + return results + except: + return None + + @staticmethod + def _merge_kwargs(additionalparams, **kwargs): + # Only Solr-specific kwargs are parsed + # And the special 'limit' + try: + additionalparams['limit'] = kwargs['limit'] + except KeyError: + pass + + try: + additionalparams['_route_'] = kwargs['_route_'] + except KeyError: + pass + + try: + additionalparams['rows'] = kwargs['rows'] + except KeyError: + pass + + try: + additionalparams['start'] = kwargs['start'] + except KeyError: + pass + + try: + kwfq = kwargs['fq'] if isinstance(kwargs['fq'], list) else list(kwargs['fq']) + except KeyError: + kwfq = [] + + try: + additionalparams['fq'].extend(kwfq) + except KeyError: + additionalparams['fq'] = kwfq + + try: + kwfl = kwargs['fl'] if isinstance(kwargs['fl'], list) else [kwargs['fl']] + except KeyError: + kwfl = [] + + try: + additionalparams['fl'].extend(kwfl) + except KeyError: + additionalparams['fl'] = kwfl + + try: + s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']] + except KeyError: + s = None + + try: + additionalparams['sort'].extend(s) + except KeyError: + if s is not None: + additionalparams['sort'] = s diff --git a/data-access/nexustiles/backends/nexusproto/dao/__init__.py b/data-access/nexustiles/backends/nexusproto/dao/__init__.py new file mode 100644 index 00000000..6acb5d12 --- /dev/null +++ b/data-access/nexustiles/backends/nexusproto/dao/__init__.py @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/data-access/nexustiles/backends/zarr/__init__.py b/data-access/nexustiles/backends/zarr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index a3aa61e9..333b0c55 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -32,6 +32,13 @@ from .dao import SolrProxy from .dao import ElasticsearchProxy +from .backends.nexusproto.backend import NexusprotoTileService + + +from abc import ABC, abstractmethod + +from .AbstractTileService import AbstractTileService + from .model.nexusmodel import Tile, BBox, TileStats, TileVariable EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -78,7 +85,9 @@ class NexusTileServiceException(Exception): pass -class NexusTileService(object): +class NexusTileService(AbstractTileService): + backends = {} + def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None): self._datastore = None self._metadatastore = None @@ -352,92 +361,6 @@ def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_tim bounds = self._metadatastore.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time) return [box(*b) for b in bounds] - def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): - - for tile in tiles: - tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) - tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - # If this is multi-var, need to mask each variable separately. - if tile.is_multi: - # Combine space/time mask with existing mask on data - data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) - - num_vars = len(tile.data) - multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) - tile.data = ma.masked_where(multi_data_mask, tile.data) - else: - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - - def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles): - for tile in tiles: - tile.times = ma.masked_outside(tile.times, start_time, end_time) - tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) - tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - - def mask_tiles_to_polygon(self, bounding_polygon, tiles): - - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - - return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) - - def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles): - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - - return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles) - - def mask_tiles_to_time_range(self, start_time, end_time, tiles): - """ - Masks data in tiles to specified time range. - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :param tiles: List of tiles - :return: A list tiles with data masked to specified time range - """ - if 0 <= start_time <= end_time: - for tile in tiles: - tile.times = ma.masked_outside(tile.times, start_time, end_time) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - # If this is multi-var, need to mask each variable separately. - if tile.is_multi: - # Combine space/time mask with existing mask on data - data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) - - num_vars = len(tile.data) - multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) - tile.data = ma.masked_where(multi_data_mask, tile.data) - else: - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): """ Return number of tiles that match search criteria. From 4f3f6112f0156f5d928f8549ed0a58d6d8f64e9e Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 5 Jul 2023 13:09:11 -0700 Subject: [PATCH 02/91] n/a --- data-access/nexustiles/AbstractTileService.py | 5 ++ data-access/nexustiles/config/datasets.ini | 18 +++++ .../nexustiles/config/datasets.ini.default | 18 +++++ data-access/nexustiles/nexustiles.py | 78 +++++++++++++------ 4 files changed, 96 insertions(+), 23 deletions(-) create mode 100644 data-access/nexustiles/config/datasets.ini create mode 100644 data-access/nexustiles/config/datasets.ini.default diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py index f4f4449c..307a2c15 100644 --- a/data-access/nexustiles/AbstractTileService.py +++ b/data-access/nexustiles/AbstractTileService.py @@ -37,6 +37,11 @@ from nexustiles.nexustiles import NexusTileServiceException class AbstractTileService(ABC): + @staticmethod + @abstractmethod + def open_dataset(dataset_s, **kwargs): + pass + @abstractmethod def get_dataseries_list(self, simple=False): raise NotImplementedError() diff --git a/data-access/nexustiles/config/datasets.ini b/data-access/nexustiles/config/datasets.ini new file mode 100644 index 00000000..9f586cf2 --- /dev/null +++ b/data-access/nexustiles/config/datasets.ini @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[solr] +host=http://localhost:8983 +core=nexusdatasets diff --git a/data-access/nexustiles/config/datasets.ini.default b/data-access/nexustiles/config/datasets.ini.default new file mode 100644 index 00000000..9f586cf2 --- /dev/null +++ b/data-access/nexustiles/config/datasets.ini.default @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[solr] +host=http://localhost:8983 +core=nexusdatasets diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 333b0c55..622792eb 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -18,14 +18,16 @@ import sys import json from datetime import datetime -from functools import wraps, reduce +from functools import wraps, reduce, partial import numpy as np import numpy.ma as ma import pkg_resources from pytz import timezone, UTC from shapely.geometry import MultiPolygon, box +import pysolr +import threading from .dao import CassandraProxy from .dao import DynamoProxy from .dao import S3Proxy @@ -41,6 +43,8 @@ from .model.nexusmodel import Tile, BBox, TileStats, TileVariable +from webservice.webmodel import DatasetNotFoundException + EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) logging.basicConfig( @@ -85,36 +89,64 @@ class NexusTileServiceException(Exception): pass -class NexusTileService(AbstractTileService): - backends = {} +SOLR_LOCK = threading.Lock() +thread_local = threading.local() + - def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None): - self._datastore = None - self._metadatastore = None +class NexusTileService(AbstractTileService): + backends = {} # relate ds names to factory func objects + + def __init__(self, config=None): self._config = configparser.RawConfigParser() - self._config.read(NexusTileService._get_config_files('config/datastores.ini')) + self._config.read(NexusTileService._get_config_files('config/datasets.ini')) + + self._alg_config = config if config: self.override_config(config) - if not skipDatastore: - datastore = self._config.get("datastore", "store") - if datastore == "cassandra": - self._datastore = CassandraProxy.CassandraProxy(self._config) - elif datastore == "s3": - self._datastore = S3Proxy.S3Proxy(self._config) - elif datastore == "dynamo": - self._datastore = DynamoProxy.DynamoProxy(self._config) + NexusTileService.backends[None] = NexusprotoTileService(False, False, config) + NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None] + + + + def _get_ingested_datasets(self): + solr_url = self._config.get("solr", "host") + solr_core = self._config.get("solr", "core") + solr_kwargs = {} + + if self._config.has_option("solr", "time_out"): + solr_kwargs["timeout"] = self._config.get("solr", "time_out") + + with SOLR_LOCK: + solrcon = getattr(thread_local, 'solrcon', None) + if solrcon is None: + solr_url = '%s/solr/%s' % (solr_url, solr_core) + solrcon = pysolr.Solr(solr_url, **solr_kwargs) + thread_local.solrcon = solrcon + + solrcon = solrcon + + response = solrcon.search('*:*') + + for dataset in response.docs: + d_id = dataset['dataset_s'] + store_type = dataset.get('store_type_s', 'nexusproto') + + if store_type == 'nexus_proto': + NexusTileService.backends[d_id] = NexusTileService.backends[None] else: - raise ValueError("Error reading datastore from config file") - - if not skipMetadatastore: - metadatastore = self._config.get("metadatastore", "store", fallback='solr') - if metadatastore == "solr": - self._metadatastore = SolrProxy.SolrProxy(self._config) - elif metadatastore == "elasticsearch": - self._metadatastore = ElasticsearchProxy.ElasticsearchProxy(self._config) + ds_config = dataset['config'] + # NexusTileService.backends[d_id] = + + + + + def get_tileservice_factory(self, dataset=None): + pass + + def override_config(self, config): for section in config.sections(): From e32d5addd4488ced41bc895a744c7c3de70f4301 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 6 Jul 2023 14:30:58 -0700 Subject: [PATCH 03/91] More nts backend stuff --- data-access/nexustiles/AbstractTileService.py | 45 +---- .../nexustiles/backends/nexusproto/backend.py | 4 +- .../nexustiles/backends/zarr/backend.py | 45 +++++ data-access/nexustiles/nexustiles.py | 181 +++++++++--------- 4 files changed, 143 insertions(+), 132 deletions(-) create mode 100644 data-access/nexustiles/backends/zarr/backend.py diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py index 307a2c15..6426295b 100644 --- a/data-access/nexustiles/AbstractTileService.py +++ b/data-access/nexustiles/AbstractTileService.py @@ -36,11 +36,16 @@ from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable from nexustiles.nexustiles import NexusTileServiceException + class AbstractTileService(ABC): - @staticmethod + # @staticmethod + # @abstractmethod + # def open_dataset(dataset_s, **kwargs): + # pass + @abstractmethod - def open_dataset(dataset_s, **kwargs): - pass + def try_connect(self) -> bool: + raise NotImplementedError() @abstractmethod def get_dataseries_list(self, simple=False): @@ -115,19 +120,6 @@ def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, * """ raise NotImplementedError() - @abstractmethod - def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): - """ - Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time - range properly masked out. - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :param ds: The dataset name to search - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :return: A list of tiles - """ - raise NotImplementedError() - @abstractmethod def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): """ @@ -148,15 +140,6 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): raise NotImplementedError() - @abstractmethod - def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, - **kwargs): - raise NotImplementedError() - - @abstractmethod - def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs): - raise NotImplementedError() - @abstractmethod def get_min_max_time_by_granule(self, ds, granule_name): raise NotImplementedError() @@ -165,18 +148,6 @@ def get_min_max_time_by_granule(self, ds, granule_name): def get_dataset_overall_stats(self, ds): raise NotImplementedError() - @abstractmethod - def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - raise NotImplementedError() - - @abstractmethod - def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs): - raise NotImplementedError() - - @abstractmethod - def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - raise NotImplementedError() - @abstractmethod def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): raise NotImplementedError() diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py index 86d5ca6a..aa0ab290 100644 --- a/data-access/nexustiles/backends/nexusproto/backend.py +++ b/data-access/nexustiles/backends/nexusproto/backend.py @@ -34,6 +34,7 @@ from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable from nexustiles.nexustiles import NexusTileServiceException +from nexustiles.AbstractTileService import AbstractTileService EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -44,8 +45,9 @@ logger = logging.getLogger("testing") -class NexusprotoTileService(object): +class NexusprotoTileService(AbstractTileService): def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None): + AbstractTileService.__init__(self) self._datastore = None self._metadatastore = None diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py new file mode 100644 index 00000000..019cd753 --- /dev/null +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import configparser +import logging +import sys +import json +from datetime import datetime +from functools import reduce + +import numpy as np +import numpy.ma as ma +import pkg_resources +from pytz import timezone, UTC +from shapely.geometry import MultiPolygon, box + +from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable +from nexustiles.nexustiles import NexusTileServiceException +from nexustiles.AbstractTileService import AbstractTileService + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) +logger = logging.getLogger("testing") + + +class ZarrBackend(AbstractTileService): + def __init__(self, config): + AbstractTileService.__init__(self) + self.__config = config diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 622792eb..fde0a5f3 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -28,13 +28,10 @@ import pysolr import threading -from .dao import CassandraProxy -from .dao import DynamoProxy -from .dao import S3Proxy -from .dao import SolrProxy -from .dao import ElasticsearchProxy +from time import sleep from .backends.nexusproto.backend import NexusprotoTileService +from .backends.zarr.backend import ZarrBackend from abc import ABC, abstractmethod @@ -42,8 +39,9 @@ from .AbstractTileService import AbstractTileService from .model.nexusmodel import Tile, BBox, TileStats, TileVariable +from typing import Dict, Union -from webservice.webmodel import DatasetNotFoundException +from webservice.webmodel import DatasetNotFoundException, NexusProcessingException EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -90,12 +88,13 @@ class NexusTileServiceException(Exception): SOLR_LOCK = threading.Lock() +DS_LOCK = threading.Lock() thread_local = threading.local() class NexusTileService(AbstractTileService): - backends = {} # relate ds names to factory func objects + backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {} def __init__(self, config=None): self._config = configparser.RawConfigParser() @@ -106,12 +105,37 @@ def __init__(self, config=None): if config: self.override_config(config) - NexusTileService.backends[None] = NexusprotoTileService(False, False, config) + NexusTileService.backends[None] = {"backend": NexusprotoTileService(False, False, config), 'up': True} NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None] + def __update_datasets(): + while True: + with DS_LOCK: + self._update_datasets() + sleep(3600) + threading.Thread(target=__update_datasets, name='dataset_update', daemon=False).start() - def _get_ingested_datasets(self): + + + @staticmethod + def __get_backend(dataset_s) -> AbstractTileService: + if dataset_s not in NexusTileService.backends: + raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested') + + b = NexusTileService.backends[dataset_s] + + if not b['up']: + success = b['backend'].try_connect() + + if not success: + raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable') + else: + NexusTileService.backends[dataset_s]['up'] = True + + return b['backend'] + + def _update_datasets(self): solr_url = self._config.get("solr", "host") solr_core = self._config.get("solr", "core") solr_kwargs = {} @@ -130,23 +154,34 @@ def _get_ingested_datasets(self): response = solrcon.search('*:*') + present_datasets = set() + for dataset in response.docs: d_id = dataset['dataset_s'] store_type = dataset.get('store_type_s', 'nexusproto') - if store_type == 'nexus_proto': - NexusTileService.backends[d_id] = NexusTileService.backends[None] - else: - ds_config = dataset['config'] - # NexusTileService.backends[d_id] = - + present_datasets.add(d_id) + if d_id in NexusTileService.backends: + continue + # is_up = NexusTileService.backends[d_id]['backend'].try_connect() + if store_type == 'nexus_proto' or store_type == 'nexusproto': + NexusTileService.backends[d_id] = NexusTileService.backends[None] + elif store_type == 'zarr': + ds_config = json.loads(dataset['config'][0]) + NexusTileService.backends[d_id] = { + 'backend': ZarrBackend(ds_config), + 'up': True + } + else: + logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') - def get_tileservice_factory(self, dataset=None): - pass - + removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets) + for dataset in removed_datasets: + logger.info(f"Removing dataset {dataset}") + del NexusTileService.backends[dataset] def override_config(self, config): for section in config.sections(): @@ -163,65 +198,35 @@ def get_dataseries_list(self, simple=False): @tile_data() def find_tile_by_id(self, tile_id, **kwargs): - return self._metadatastore.find_tile_by_id(tile_id) + return NexusTileService.__get_backend('__nexusproto__').find_tile_by_id(tile_id) @tile_data() def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - return self._metadatastore.find_tiles_by_id(tile_ids, ds=ds, **kwargs) + return NexusTileService.__get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs) def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): - start = datetime.now() - result = self._metadatastore.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, dataset, start_time, - end_time, - **kwargs) - duration = (datetime.now() - start).total_seconds() - if metrics_callback: - metrics_callback(solr=duration) - return result + return NexusTileService.__get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, + dataset, start_time, end_time, + metrics_callback, **kwargs) @tile_data() def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): - """ - Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding - polygon and the closest day of year. - - For example: - given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 - search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) - - Valid matches: - minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 - minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 - - Invalid matches: - minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 - minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 - minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists - - :param bounding_polygon: The exact bounding polygon of tiles to search for - :param ds: The dataset name being searched - :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned - :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found - """ - try: - tile = self._metadatastore.find_tile_by_polygon_and_most_recent_day_of_year(bounding_polygon, ds, - day_of_year) - except IndexError: - raise NexusTileServiceException("No tile found.").with_traceback(sys.exc_info()[2]) - - return tile + return NexusTileService.__get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year( + bounding_polygon, ds, day_of_year, **kwargs + ) @tile_data() def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - return self._metadatastore.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, - rows=5000, - **kwargs) + return NexusTileService.__get_backend(dataset).find_all_tiles_in_box_at_time( + min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs + ) @tile_data() def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): - return self._metadatastore.find_all_tiles_in_polygon_at_time(bounding_polygon, dataset, time, rows=5000, - **kwargs) + return NexusTileService.__get_backend(dataset).find_all_tiles_in_polygon_at_time( + bounding_polygon, dataset, time, **kwargs + ) @tile_data() def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): @@ -230,33 +235,22 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t start_time = (start_time - EPOCH).total_seconds() if type(end_time) is datetime: end_time = (end_time - EPOCH).total_seconds() - return self._metadatastore.find_all_tiles_in_box_sorttimeasc(min_lat, max_lat, min_lon, max_lon, ds, start_time, - end_time, **kwargs) + + return NexusTileService.__get_backend(ds).find_tiles_in_box( + min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs + ) @tile_data() def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): - # Find tiles that fall within the polygon in the Solr index - if 'sort' in list(kwargs.keys()): - tiles = self._metadatastore.find_all_tiles_in_polygon(bounding_polygon, ds, start_time, end_time, **kwargs) - else: - tiles = self._metadatastore.find_all_tiles_in_polygon_sorttimeasc(bounding_polygon, ds, start_time, - end_time, - **kwargs) - return tiles + return NexusTileService.__get_backend(ds).find_tiles_in_polygon( + bounding_polygon, ds, start_time, end_time, **kwargs + ) @tile_data() def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): - """ - Return list of tiles whose metadata matches the specified metadata, start_time, end_time. - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :param ds: The dataset name to search - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :return: A list of tiles - """ - tiles = self._metadatastore.find_all_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs) - - return tiles + return NexusTileService.__get_backend(ds).find_tiles_by_metadata( + metadata, ds, start_time, end_time, **kwargs + ) def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): """ @@ -287,16 +281,15 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) :param kwargs: fetch_data: True/False = whether or not to retrieve tile data :return: """ - tiles = self._metadatastore.find_tiles_by_exact_bounds(bounds[0], bounds[1], bounds[2], bounds[3], ds, - start_time, - end_time) - return tiles + return NexusTileService.__get_backend(ds).find_tiles_by_exact_bounds( + bounds, ds, start_time, end_time, **kwargs + ) @tile_data() def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - return self._metadatastore.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, - rows=5000, - **kwargs) + return NexusTileService.__get_backend(dataset).find_all_boundary_tiles_at_time( + min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs + ) def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): @@ -317,12 +310,12 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time= return tiles def get_min_max_time_by_granule(self, ds, granule_name): - start_time, end_time = self._metadatastore.find_min_max_date_from_granule(ds, granule_name) - - return start_time, end_time + return NexusTileService.__get_backend(ds).get_min_max_time_by_granule( + ds, granule_name + ) def get_dataset_overall_stats(self, ds): - return self._metadatastore.get_data_series_stats(ds) + return NexusTileService.__get_backend(ds).get_dataset_overall_stats(ds) def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) From ccc0de4e56122e570a1acc8dbbf6f9443dfebc23 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jul 2023 15:26:22 -0700 Subject: [PATCH 04/91] Working(?) np backend --- .../algorithms/DailyDifferenceAverage.py | 3 +- .../algorithms/StandardDeviationSearch.py | 2 +- .../app_builders/NexusAppBuilder.py | 2 +- data-access/nexustiles/AbstractTileService.py | 112 +- .../nexustiles/backends/nexusproto/backend.py | 2 +- .../nexustiles/backends/zarr/backend.py | 2 +- data-access/nexustiles/dao/CassandraProxy.py | 317 ----- data-access/nexustiles/dao/DynamoProxy.py | 146 -- .../nexustiles/dao/ElasticsearchProxy.py | 1235 ----------------- data-access/nexustiles/dao/S3Proxy.py | 141 -- data-access/nexustiles/dao/SolrProxy.py | 731 ---------- data-access/nexustiles/dao/__init__.py | 14 - data-access/nexustiles/exception.py | 2 + data-access/nexustiles/nexustiles.py | 286 ++-- data-access/setup.py | 18 +- 15 files changed, 205 insertions(+), 2808 deletions(-) delete mode 100644 data-access/nexustiles/dao/CassandraProxy.py delete mode 100644 data-access/nexustiles/dao/DynamoProxy.py delete mode 100644 data-access/nexustiles/dao/ElasticsearchProxy.py delete mode 100644 data-access/nexustiles/dao/S3Proxy.py delete mode 100644 data-access/nexustiles/dao/SolrProxy.py delete mode 100644 data-access/nexustiles/dao/__init__.py create mode 100644 data-access/nexustiles/exception.py diff --git a/analysis/webservice/algorithms/DailyDifferenceAverage.py b/analysis/webservice/algorithms/DailyDifferenceAverage.py index 05274fc2..c6c84951 100644 --- a/analysis/webservice/algorithms/DailyDifferenceAverage.py +++ b/analysis/webservice/algorithms/DailyDifferenceAverage.py @@ -21,7 +21,8 @@ import numpy as np import pytz -from nexustiles.nexustiles import NexusTileService, NexusTileServiceException +from nexustiles.nexustiles import NexusTileService +from nexustiles.exception import NexusTileServiceException from shapely.geometry import box from webservice.NexusHandler import nexus_handler diff --git a/analysis/webservice/algorithms/StandardDeviationSearch.py b/analysis/webservice/algorithms/StandardDeviationSearch.py index ae0566f1..26451cb1 100644 --- a/analysis/webservice/algorithms/StandardDeviationSearch.py +++ b/analysis/webservice/algorithms/StandardDeviationSearch.py @@ -19,7 +19,7 @@ from datetime import datetime from functools import partial -from nexustiles.nexustiles import NexusTileServiceException +from nexustiles.exception import NexusTileServiceException from pytz import timezone from webservice.NexusHandler import nexus_handler diff --git a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py index afe7d690..01798583 100644 --- a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py +++ b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py @@ -53,7 +53,7 @@ def set_modules(self, module_dir, algorithm_config, remote_collections=None, max NexusHandler.executeInitializers(algorithm_config) self.log.info("Initializing request ThreadPool to %s" % max_request_threads) - tile_service_factory = partial(NexusTileService, False, False, algorithm_config) + tile_service_factory = partial(NexusTileService, algorithm_config) handler_args_builder = HandlerArgsBuilder( max_request_threads, tile_service_factory, diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py index 6426295b..6e5b4640 100644 --- a/data-access/nexustiles/AbstractTileService.py +++ b/data-access/nexustiles/AbstractTileService.py @@ -13,28 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import configparser -import logging -import sys -import json from abc import ABC, abstractmethod -from datetime import datetime from functools import reduce import numpy as np import numpy.ma as ma -import pkg_resources -from pytz import timezone, UTC -from shapely.geometry import MultiPolygon, box - -from .dao import CassandraProxy -from .dao import DynamoProxy -from .dao import S3Proxy -from .dao import SolrProxy -from .dao import ElasticsearchProxy - -from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable -from nexustiles.nexustiles import NexusTileServiceException class AbstractTileService(ABC): @@ -43,9 +26,9 @@ class AbstractTileService(ABC): # def open_dataset(dataset_s, **kwargs): # pass - @abstractmethod - def try_connect(self) -> bool: - raise NotImplementedError() + # @abstractmethod + # def try_connect(self) -> bool: + # raise NotImplementedError() @abstractmethod def get_dataseries_list(self, simple=False): @@ -193,91 +176,6 @@ def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_tim """ raise NotImplementedError() - def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): - for tile in tiles: - tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) - tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - # If this is multi-var, need to mask each variable separately. - if tile.is_multi: - # Combine space/time mask with existing mask on data - data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) - - num_vars = len(tile.data) - multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) - tile.data = ma.masked_where(multi_data_mask, tile.data) - else: - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - - def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles): - for tile in tiles: - tile.times = ma.masked_outside(tile.times, start_time, end_time) - tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) - tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - - def mask_tiles_to_polygon(self, bounding_polygon, tiles): - - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - - return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) - - def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles): - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - - return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles) - - def mask_tiles_to_time_range(self, start_time, end_time, tiles): - """ - Masks data in tiles to specified time range. - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :param tiles: List of tiles - :return: A list tiles with data masked to specified time range - """ - if 0 <= start_time <= end_time: - for tile in tiles: - tile.times = ma.masked_outside(tile.times, start_time, end_time) - - # Or together the masks of the individual arrays to create the new mask - data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ - | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ - | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - - # If this is multi-var, need to mask each variable separately. - if tile.is_multi: - # Combine space/time mask with existing mask on data - data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) - - num_vars = len(tile.data) - multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) - tile.data = ma.masked_where(multi_data_mask, tile.data) - else: - tile.data = ma.masked_where(data_mask, tile.data) - - tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - - return tiles - @abstractmethod def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): """ @@ -295,10 +193,6 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m def fetch_data_for_tiles(self, *tiles): raise NotImplementedError() - @abstractmethod - def open_dataset(self, dataset): - raise NotImplementedError() - @abstractmethod def _metadata_store_docs_to_tiles(self, *store_docs): raise NotImplementedError() diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py index aa0ab290..6aa63644 100644 --- a/data-access/nexustiles/backends/nexusproto/backend.py +++ b/data-access/nexustiles/backends/nexusproto/backend.py @@ -33,7 +33,7 @@ from .dao import ElasticsearchProxy from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable -from nexustiles.nexustiles import NexusTileServiceException +from nexustiles.exception import NexusTileServiceException from nexustiles.AbstractTileService import AbstractTileService EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 019cd753..93963166 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -27,7 +27,7 @@ from shapely.geometry import MultiPolygon, box from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable -from nexustiles.nexustiles import NexusTileServiceException +from nexustiles.exception import NexusTileServiceException from nexustiles.AbstractTileService import AbstractTileService EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py deleted file mode 100644 index 96f7c4c6..00000000 --- a/data-access/nexustiles/dao/CassandraProxy.py +++ /dev/null @@ -1,317 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import uuid -from configparser import NoOptionError - -import nexusproto.DataTile_pb2 as nexusproto -import numpy as np -from cassandra.auth import PlainTextAuthProvider -from cassandra.cqlengine import columns, connection, CQLEngineException -from cassandra.cluster import NoHostAvailable -from cassandra.cqlengine.models import Model -from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy -from multiprocessing.synchronize import Lock -from nexusproto.serialization import from_shaped_array - -INIT_LOCK = Lock(ctx=None) - -logger = logging.getLogger(__name__) - -class NexusTileData(Model): - __table_name__ = 'sea_surface_temp' - tile_id = columns.UUID(primary_key=True) - tile_blob = columns.Blob() - - __nexus_tile = None - - def _get_nexus_tile(self): - if self.__nexus_tile is None: - self.__nexus_tile = nexusproto.TileData.FromString(self.tile_blob) - - return self.__nexus_tile - - def get_raw_data_array(self): - - nexus_tile = self._get_nexus_tile() - the_tile_type = nexus_tile.tile.WhichOneof("tile_type") - - the_tile_data = getattr(nexus_tile.tile, the_tile_type) - - return from_shaped_array(the_tile_data.variable_data) - - def get_lat_lon_time_data_meta(self): - """ - Retrieve data from data store and metadata from metadata store - for this tile. For gridded tiles, the tile shape of the data - will match the input shape. For example, if the input was a - 30x30 tile, all variables will also be 30x30. However, if the - tile is a swath tile, the data will be transformed along the - diagonal of the data matrix. For example, a 30x30 tile would - become 900x900 where the 900 points are along the diagonal. - - Multi-variable tile will also include an extra dimension in the - data array. For example, a 30 x 30 x 30 array would be - transformed to N x 30 x 30 x 30 where N is the number of - variables in this tile. - - latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var - - :return: latitude data - :return: longitude data - :return: time data - :return: data - :return: meta data dictionary - :return: boolean flag, True if this tile has more than one variable - """ - is_multi_var = False - - if self._get_nexus_tile().HasField('grid_tile'): - grid_tile = self._get_nexus_tile().grid_tile - - grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) - latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) - longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) - - if len(grid_tile_data.shape) == 2: - grid_tile_data = grid_tile_data[np.newaxis, :] - - # Extract the meta data - meta_data = {} - for meta_data_obj in grid_tile.meta_data: - name = meta_data_obj.name - meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - if len(meta_array.shape) == 2: - meta_array = meta_array[np.newaxis, :] - meta_data[name] = meta_array - - return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var - elif self._get_nexus_tile().HasField('swath_tile'): - swath_tile = self._get_nexus_tile().swath_tile - - latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) - longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) - time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) - - # Simplify the tile if the time dimension is the same value repeated - if np.all(time_data == np.min(time_data)): - time_data = np.array([np.min(time_data)]) - - swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) - - tile_data = self._to_standard_index(swath_tile_data, - (len(time_data), len(latitude_data), len(longitude_data))) - - # Extract the meta data - meta_data = {} - for meta_data_obj in swath_tile.meta_data: - name = meta_data_obj.name - actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) - meta_data[name] = reshaped_meta_array - - return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var - elif self._get_nexus_tile().HasField('time_series_tile'): - time_series_tile = self._get_nexus_tile().time_series_tile - - time_series_tile_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.variable_data)) - time_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.time)).reshape(-1) - latitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.latitude)) - longitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.longitude)) - - reshaped_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data))) - idx = np.arange(len(latitude_data)) - reshaped_array[:, idx, idx] = time_series_tile_data - tile_data = reshaped_array - # Extract the meta data - meta_data = {} - for meta_data_obj in time_series_tile.meta_data: - name = meta_data_obj.name - meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - - reshaped_meta_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data))) - idx = np.arange(len(latitude_data)) - reshaped_meta_array[:, idx, idx] = meta_array - - meta_data[name] = reshaped_meta_array - - return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var - elif self._get_nexus_tile().HasField('swath_multi_variable_tile'): - swath_tile = self._get_nexus_tile().swath_multi_variable_tile - is_multi_var = True - - latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) - longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) - time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) - - # Simplify the tile if the time dimension is the same value repeated - if np.all(time_data == np.min(time_data)): - time_data = np.array([np.min(time_data)]) - - swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) - - desired_shape = ( - len(time_data), - len(latitude_data), - len(longitude_data), - ) - tile_data = self._to_standard_index(swath_tile_data, desired_shape, is_multi_var=True) - - # Extract the meta data - meta_data = {} - for meta_data_obj in swath_tile.meta_data: - name = meta_data_obj.name - actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) - meta_data[name] = reshaped_meta_array - - return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var - elif self._get_nexus_tile().HasField('grid_multi_variable_tile'): - grid_multi_variable_tile = self._get_nexus_tile().grid_multi_variable_tile - is_multi_var = True - - grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.variable_data)) - latitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.latitude)) - longitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.longitude)) - - # If there are 3 dimensions, that means the time dimension - # was squeezed. Add back in - if len(grid_tile_data.shape) == 3: - grid_tile_data = np.expand_dims(grid_tile_data, axis=1) - # If there are 4 dimensions, that means the time dimension - # is present. Move the multivar dimension. - if len(grid_tile_data.shape) == 4: - grid_tile_data = np.moveaxis(grid_tile_data, -1, 0) - - # Extract the meta data - meta_data = {} - for meta_data_obj in grid_multi_variable_tile.meta_data: - name = meta_data_obj.name - meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - if len(meta_array.shape) == 2: - meta_array = meta_array[np.newaxis, :] - meta_data[name] = meta_array - - return latitude_data, longitude_data, np.array([grid_multi_variable_tile.time]), grid_tile_data, meta_data, is_multi_var - else: - raise NotImplementedError("Only supports grid_tile, swath_tile, swath_multi_variable_tile, and time_series_tile") - - @staticmethod - def _to_standard_index(data_array, desired_shape, is_multi_var=False): - """ - Transform swath data to a standard format where data runs along - diagonal of ND matrix and the non-diagonal data points are - masked - - :param data_array: The data array to be transformed - :param desired_shape: The desired shape of the resulting array - :param is_multi_var: True if this is a multi-variable tile - :type data_array: np.array - :type desired_shape: tuple - :type is_multi_var: bool - :return: Reshaped array - :rtype: np.array - """ - - reshaped_array = [] - if is_multi_var: - reshaped_data_array = np.moveaxis(data_array, -1, 0) - else: - reshaped_data_array = [data_array] - - for variable_data_array in reshaped_data_array: - if desired_shape[0] == 1: - variable_reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) - else: - variable_reshaped_array = np.ma.masked_all(desired_shape) - - row, col = np.indices(variable_data_array.shape) - - variable_reshaped_array[ - np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \ - variable_data_array[ - row.flat, col.flat] - variable_reshaped_array.mask[ - np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \ - variable_data_array.mask[ - row.flat, col.flat] - - if desired_shape[0] == 1: - reshaped_array.append(variable_reshaped_array[np.newaxis, :]) - else: - reshaped_array.append(variable_reshaped_array) - - if not is_multi_var: - # If single var, squeeze extra dim out of array - reshaped_array = reshaped_array[0] - - return reshaped_array - - -class CassandraProxy(object): - def __init__(self, config): - self.config = config - self.__cass_url = config.get("cassandra", "host") - self.__cass_username = config.get("cassandra", "username") - self.__cass_password = config.get("cassandra", "password") - self.__cass_keyspace = config.get("cassandra", "keyspace") - self.__cass_local_DC = config.get("cassandra", "local_datacenter") - self.__cass_protocol_version = config.getint("cassandra", "protocol_version") - self.__cass_dc_policy = config.get("cassandra", "dc_policy") - - try: - self.__cass_port = config.getint("cassandra", "port") - except NoOptionError: - self.__cass_port = 9042 - - with INIT_LOCK: - try: - connection.get_cluster() - except CQLEngineException: - self.__open() - - def __open(self): - if self.__cass_dc_policy == 'DCAwareRoundRobinPolicy': - dc_policy = DCAwareRoundRobinPolicy(self.__cass_local_DC) - token_policy = TokenAwarePolicy(dc_policy) - elif self.__cass_dc_policy == 'WhiteListRoundRobinPolicy': - token_policy = WhiteListRoundRobinPolicy([self.__cass_url]) - - if self.__cass_username and self.__cass_password: - auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password) - else: - auth_provider = None - try: - connection.setup( - [host for host in self.__cass_url.split(',')], self.__cass_keyspace, - protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy, - port=self.__cass_port, - auth_provider=auth_provider - ) - except NoHostAvailable as e: - logger.error("Cassandra is not accessible, SDAP will not server local datasets", e) - - def fetch_nexus_tiles(self, *tile_ids): - tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if - (isinstance(tile_id, str) or isinstance(tile_id, str))] - - res = [] - for tile_id in tile_ids: - filterResults = NexusTileData.objects.filter(tile_id=tile_id) - if len(filterResults) > 0: - res.append(filterResults[0]) - - return res diff --git a/data-access/nexustiles/dao/DynamoProxy.py b/data-access/nexustiles/dao/DynamoProxy.py deleted file mode 100644 index 1ee70ac1..00000000 --- a/data-access/nexustiles/dao/DynamoProxy.py +++ /dev/null @@ -1,146 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import uuid -import nexusproto.DataTile_pb2 as nexusproto -from nexusproto.serialization import from_shaped_array -import numpy as np -import boto3 - -class NexusTileData(object): - __nexus_tile = None - __data = None - tile_id = None - - def __init__(self, data, _tile_id): - if self.__data is None: - self.__data = data - if self.tile_id is None: - self.tile_id = _tile_id - - def _get_nexus_tile(self): - if self.__nexus_tile is None: - self.__nexus_tile = nexusproto.TileData.FromString(self.__data) - - return self.__nexus_tile - - def get_raw_data_array(self): - - nexus_tile = self._get_nexus_tile() - the_tile_type = nexus_tile.tile.WhichOneof("tile_type") - - the_tile_data = getattr(nexus_tile.tile, the_tile_type) - - return from_shaped_array(the_tile_data.variable_data) - - def get_lat_lon_time_data_meta(self): - if self._get_nexus_tile().HasField('grid_tile'): - grid_tile = self._get_nexus_tile().grid_tile - - grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) - latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) - longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) - - if len(grid_tile_data.shape) == 2: - grid_tile_data = grid_tile_data[np.newaxis, :] - - # Extract the meta data - meta_data = {} - for meta_data_obj in grid_tile.meta_data: - name = meta_data_obj.name - meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - if len(meta_array.shape) == 2: - meta_array = meta_array[np.newaxis, :] - meta_data[name] = meta_array - - return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data - elif self._get_nexus_tile().HasField('swath_tile'): - swath_tile = self._get_nexus_tile().swath_tile - - latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) - longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) - time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) - - # Simplify the tile if the time dimension is the same value repeated - if np.all(time_data == np.min(time_data)): - time_data = np.array([np.min(time_data)]) - - swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) - - tile_data = self._to_standard_index(swath_tile_data, - (len(time_data), len(latitude_data), len(longitude_data))) - - # Extract the meta data - meta_data = {} - for meta_data_obj in swath_tile.meta_data: - name = meta_data_obj.name - actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) - meta_data[name] = reshaped_meta_array - - return latitude_data, longitude_data, time_data, tile_data, meta_data - else: - raise NotImplementedError("Only supports grid_tile and swath_tile") - - @staticmethod - def _to_standard_index(data_array, desired_shape): - - if desired_shape[0] == 1: - reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) - row, col = np.indices(data_array.shape) - - reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ - row.flat, col.flat] - reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ - row.flat, col.flat] - reshaped_array = reshaped_array[np.newaxis, :] - else: - reshaped_array = np.ma.masked_all(desired_shape) - row, col = np.indices(data_array.shape) - - reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ - row.flat, col.flat] - reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ - row.flat, col.flat] - - return reshaped_array - - -class DynamoProxy(object): - def __init__(self, config): - self.config = config - self.__dynamo_tablename = config.get("dynamo", "table") - self.__dynamo_region = config.get("dynamo", "region") - self.__dynamo = boto3.resource('dynamodb', region_name=self.__dynamo_region) - self.__dynamo_table = self.__dynamo.Table(self.__dynamo_tablename) - self.__nexus_tile = None - - def fetch_nexus_tiles(self, *tile_ids): - - tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if - (isinstance(tile_id, str) or isinstance(tile_id, str))] - res = [] - for tile_id in tile_ids: - response = self.__dynamo_table.get_item( - Key = { - 'tile_id': str(tile_id) - } - ) - item = response['Item'] - data = item['data'].__str__() - nexus_tile = NexusTileData(data, str(tile_id)) - res.append(nexus_tile) - - return res \ No newline at end of file diff --git a/data-access/nexustiles/dao/ElasticsearchProxy.py b/data-access/nexustiles/dao/ElasticsearchProxy.py deleted file mode 100644 index 157630f6..00000000 --- a/data-access/nexustiles/dao/ElasticsearchProxy.py +++ /dev/null @@ -1,1235 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import logging -import threading -import time -import re -from datetime import datetime -from pytz import timezone, UTC - -import requests -import pysolr -from shapely import wkt -from elasticsearch import Elasticsearch - -ELASTICSEARCH_CON_LOCK = threading.Lock() -thread_local = threading.local() - -EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) -ELASTICSEARCH_FORMAT = '%Y-%m-%dT%H:%M:%SZ' -ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' - - -class ElasticsearchProxy(object): - def __init__(self, config): - self.elasticsearchHosts = config.get("elasticsearch", "host").split(',') - self.elasticsearchIndex = config.get("elasticsearch", "index") - self.elasticsearchUsername = config.get("elasticsearch", "username") - self.elasticsearchPassword = config.get("elasticsearch", "password") - self.logger = logging.getLogger(__name__) - - with ELASTICSEARCH_CON_LOCK: - elasticsearchcon = getattr(thread_local, 'elasticsearchcon', None) - if elasticsearchcon is None: - elasticsearchcon = Elasticsearch(hosts=self.elasticsearchHosts, http_auth=(self.elasticsearchUsername, self.elasticsearchPassword)) - thread_local.elasticsearchcon = elasticsearchcon - - self.elasticsearchcon = elasticsearchcon - - def find_tile_by_id(self, tile_id): - - params = { - "size": 1, - "query": { - "term": { - "id": { - "value": tile_id - } - } - } - } - - results, _, hits = self.do_query(*(None, None, None, True, None), **params) - assert hits == 1, f"Found {hits} results, expected exactly 1" - return [results[0]["_source"]] - - def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - - params = { - "query": { - "bool": { - "filter": [], - "should": [], - "minimum_should_match": 1 - } - } - } - - for tile_id in tile_ids: - params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) - - if ds is not None: - params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) - - self._merge_kwargs(params, **kwargs) - - results = self.do_query_all(*(None, None, None, False, None), **params) - assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids)) - return results - - def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs): - params = { - "size": 0, - "query": { - "bool": { - "filter": [], - "should": [] - } - }, - "aggs": { - "min_date_agg": { - "min": { - "field": "tile_min_time_dt" - } - } - } - } - - for tile_id in tile_ids: - params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) - if ds is not None: - params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) - - aggregations = self.do_aggregation(*(None, None, None, True, None), **params) - return self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"]) - - def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs): - - params = { - "size": 0, - "query": { - "bool": { - "filter": [], - "should": [] - } - }, - "aggs": { - "max_date_agg": { - "max": { - "field": "tile_max_time_dt" - } - } - } - } - - for tile_id in tile_ids: - params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}}) - if ds is not None: - params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}}) - - aggregations = self.do_aggregation(*(None, None, None, True, None), **params) - return self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"]) - - - def find_min_max_date_from_granule(self, ds, granule_name, **kwargs): - - params = { - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "term": { - "granule_s": { - "value": granule_name - } - } - } - ] - } - }, - "aggs": { - "min_date_agg": { - "max": { - "field": "tile_min_time_dt" - } - }, - "max_date_agg": { - "max": { - "field": "tile_max_time_dt" - } - } - } - } - - self._merge_kwargs(params, **kwargs) - - aggregations = self.do_aggregation(*(None, None, None, False, None), **params) - start_time = self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"]) - end_time = self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"]) - - return start_time, end_time - - def get_data_series_list(self): - - datasets = self.get_data_series_list_simple() - - for dataset in datasets: - min_date = self.find_min_date_from_tiles([], ds=dataset['title']) - max_date = self.find_max_date_from_tiles([], ds=dataset['title']) - dataset['start'] = (min_date - EPOCH).total_seconds() - dataset['end'] = (max_date - EPOCH).total_seconds() - dataset['iso_start'] = min_date.strftime(ISO_8601) - dataset['iso_end'] = max_date.strftime(ISO_8601) - - return datasets - - def get_data_series_list_simple(self): - - params = { - 'size': 0, - "aggs": { - "dataset_list_agg": { - "composite": { - "size":100, - "sources": [ - { - "dataset_s": { - "terms": { - "field": "dataset_s" - } - } - } - ] - } - } - } - } - - aggregations = self.do_aggregation_all(params, 'dataset_list_agg') - l = [] - - for dataset in aggregations: - l.append({ - "shortName": dataset['key']['dataset_s'], - "title": dataset['key']['dataset_s'], - "tileCount": dataset["doc_count"] - }) - - l = sorted(l, key=lambda entry: entry["title"]) - return l - - def get_data_series_stats(self, ds): - - params = { - "size": 0, - "query": { - "term":{ - "dataset_s": { - "value": ds - } - } - }, - "aggs": { - "available_dates": { - "composite": { - "size": 100, - "sources": [ - {"terms_tile_max_time_dt": {"terms": {"field": "tile_max_time_dt"}}} - ] - } - } - } - } - - aggregations = self.do_aggregation_all(params, 'available_dates') - stats = {} - stats['available_dates'] = [] - - for dt in aggregations: - stats['available_dates'].append(dt['key']['terms_tile_max_time_dt'] / 1000) - - stats['available_dates'] = sorted(stats['available_dates']) - - params = { - "size": 0, - "query": { - "term":{ - "dataset_s": { - "value": ds - } - } - }, - "aggs": { - "min_tile_min_val_d": { - "min": { - "field": "tile_min_val_d" - } - }, - "min_tile_max_time_dt": { - "min": { - "field": "tile_max_time_dt" - } - }, - "max_tile_max_time_dt": { - "max": { - "field": "tile_max_time_dt" - } - }, - "max_tile_max_val_d": { - "max": { - "field": "tile_max_val_d" - } - } - } - } - - aggregations = self.do_aggregation(*(None, None, None, False, None), **params) - stats["start"] = int(aggregations["min_tile_max_time_dt"]["value"]) / 1000 - stats["end"] = int(aggregations["max_tile_max_time_dt"]["value"]) / 1000 - stats["minValue"] = aggregations["min_tile_min_val_d"]["value"] - stats["maxValue"] = aggregations["max_tile_max_val_d"]["value"] - - return stats - - # day_of_year_i added (SDAP-347) - def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year): - - max_lat = bounding_polygon.bounds[3] - min_lon = bounding_polygon.bounds[0] - min_lat = bounding_polygon.bounds[1] - max_lon = bounding_polygon.bounds[2] - - params = { - "size": "1", - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] - }, - "relation": "intersects" - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - }, - { - "range": { - "day_of_year_i": { - "lte": day_of_year - } - } - } - ] - } - } - } - result, _, _ = self.do_query(*(None, None, None, True, 'day_of_year_i desc'), **params) - - return [result[0]] - - def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): - - search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT) - - params = { - "size": "0", - "_source": "tile_min_time_dt", - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "range": { - "tile_min_time_dt": { - "gte": search_start_s, - "lte": search_end_s - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] - }, - "relation": "intersects" - } - } - } - ] - } - }, - "aggs": { - "days_range_agg": { - "composite": { - "size":100, - "sources": [ - { - "tile_min_time_dt": { - "terms": { - "field": "tile_min_time_dt" - } - } - } - ] - } - } - } - } - - aggregations = self.do_aggregation_all(params, 'days_range_agg') - results = [res['key']['tile_min_time_dt'] for res in aggregations] - daysinrangeasc = sorted([(res / 1000) for res in results]) - return daysinrangeasc - - def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, - end_time=-1, **kwargs): - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] - }, - "relation": "intersects" - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - } - ] - } - } - } - - - if 0 < start_time <= end_time: - params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params) - - def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0]) - polygon_coordinates = list(zip(*[iter(nums)] * 2)) - - max_lat = bounding_polygon.bounds[3] - min_lon = bounding_polygon.bounds[0] - min_lat = bounding_polygon.bounds[1] - max_lon = bounding_polygon.bounds[2] - - params = { - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] - }, - "relation": "intersects" - } - } - } - ] - } - } - } - - try: - if 'fl' in list(kwargs.keys()): - params["_source"] = kwargs["fl"].split(',') - except KeyError: - pass - - if 0 < start_time <= end_time: - params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params) - - def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0]) - polygon_coordinates = list(zip(*[iter(nums)] * 2)) - - max_lat = bounding_polygon.bounds[3] - min_lon = bounding_polygon.bounds[0] - min_lat = bounding_polygon.bounds[1] - max_lon = bounding_polygon.bounds[2] - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] - }, - "relation": "intersects" - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - } - ] - } - } - } - - try: - if 'fl' in list(kwargs.keys()): - params["_source"] = kwargs["fl"].split(',') - except KeyError: - pass - - if 0 < start_time <= end_time: - params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, None), **params) - - def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - tile_max_lat = bounding_polygon.bounds[3] - tile_min_lon = bounding_polygon.bounds[0] - tile_min_lat = bounding_polygon.bounds[1] - tile_max_lon = bounding_polygon.bounds[2] - - params = { - "size": 0, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[tile_min_lon, tile_max_lat], [tile_max_lon, tile_min_lat]] - }, - "relation": "intersects" - } - } - } - ] - } - }, - "aggs": { - "distinct_bounding_boxes": { - "composite": { - "size": 100, - "sources": [ - { - "bounding_box": { - "terms": { - "script": { - "source": "String.valueOf(doc['tile_min_lon'].value) + ', ' + String.valueOf(doc['tile_max_lon'].value) + ', ' + String.valueOf(doc['tile_min_lat'].value) + ', ' + String.valueOf(doc['tile_max_lat'].value)", - "lang": "painless" - } - } - } - } - ] - } - } - } - } - - if 0 < start_time <= end_time: - params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - self._merge_kwargs(params, **kwargs) - aggregations = self.do_aggregation_all(params, 'distinct_bounding_boxes') - distinct_bounds = [] - for agg in aggregations: - coords = agg['key']['bounding_box'].split(',') - min_lon = round(float(coords[0]), 2) - max_lon = round(float(coords[1]), 2) - min_lat = round(float(coords[2]), 2) - max_lat = round(float(coords[3]), 2) - polygon = 'POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))' % (min_lon, max_lat, min_lon, min_lat, max_lon, min_lat, max_lon, max_lat, min_lon, max_lat) - distinct_bounds.append(wkt.loads(polygon).bounds) - - return distinct_bounds - - def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs): - - params = { - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "term": { - "tile_min_lon": { - "value": minx - } - } - }, - { - "term": { - "tile_min_lat": { - "value": miny - } - } - }, - { - "term": { - "tile_max_lon": { - "value": maxx - } - } - }, - { - "term": { - "tile_max_lat": { - "value": maxy - } - } - } - ] - } - }} - - if 0 < start_time <= end_time: - params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, None), **params) - - def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs): - - the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT) - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] - }, - "relation": "intersects" - } - } - }, - { - "range": { - "tile_min_time_dt": { - "lte": the_time - } - } - }, - { - "range": { - "tile_max_time_dt": { - "gte": the_time - } - } - } - ] - } - } - } - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, None), **params) - - def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs): - - the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT) - - max_lat = bounding_polygon.bounds[3] - min_lon = bounding_polygon.bounds[0] - min_lat = bounding_polygon.bounds[1] - max_lon = bounding_polygon.bounds[2] - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] - }, - "relation": "intersects" - } - } - }, - { "range": { - "tile_min_time_dt": { - "lte": the_time - } - } }, - { "range": { - "tile_max_time_dt": { - "gte": the_time - } - } } - ] - } - } - } - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, None), **params) - - - def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): - - the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT) - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat],[max_lon, min_lat]] - }, - "relation": "within" - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - }, - { - "range": { - "tile_min_time_dt": { - "lte": the_time - } - } - }, - { - "range": { - "tile_max_time_dt": { - "gte": the_time - } - } - } - ] - } - } - } - - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, "product(tile_avg_val_d, tile_count_i),*", None, False, None), **params) - - def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): - - the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT) - - params = { - "size": 1000, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "geo_shape": { - "geo": { - "shape": { - "type": "multilinestring", - "coordinates": [[[min_lon, max_lat], [max_lon, max_lat], [min_lon, max_lat], [min_lon, min_lat], [max_lon, max_lat], [max_lon, min_lat], [min_lon, min_lat], [max_lon, min_lat]]] - }, - "relation": "intersects" - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - }, - { - "range": { - "tile_min_time_dt": { - "lte": the_time - } - } - }, - { - "range": { - "tile_max_time_dt": { - "gte": the_time - } - } - } - ], - "must_not" : { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] - }, - "relation": "within" - } - } - } - } - } - } - - self._merge_kwargs(params, **kwargs) - - return self.do_query_all(*(None, None, None, False, None), **params) - - def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs): - """ - Get a list of tile metadata that matches the specified metadata, start_time, end_time. - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :param ds: The dataset name to search - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :return: A list of tile metadata - """ - - params = { - "query": { - "bool": { - "must": [ - { - "term": { - "dataset_s": {"value": ds} - } - } - ] - } - } - } - - if len(metadata) > 0: - for key_value in metadata: - key = key_value.split(':')[0] - value = key_value.split(':')[1] - params['query']['bool']['must'].append({"match": {key: value}}) - - if 0 < start_time <= end_time: - params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - self._merge_kwargs(params, **kwargs) - return self.do_query_all(*(None, None, None, False, None), **params) - - def get_formatted_time_clause(self, start_time, end_time): - search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT) - - time_clause = [ - { - "range": { - "tile_min_time_dt": { - "lte": search_end_s, - "gte": search_start_s - } - } - }, - { - "range": { - "tile_max_time_dt": { - "lte": search_end_s, - "gte": search_start_s - } - } - }, - { - "bool": { - "must": [ - { - "range": { - "tile_min_time_dt": { - "gte": search_start_s - } - } - }, - { - "range": { - "tile_max_time_dt": { - "lte": search_end_s - } - } - } - ] - } - } - ] - - return time_clause - - def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): - """ - Return number of tiles that match search criteria. - :param ds: The dataset name to search - :param bounding_polygon: The polygon to search for tiles - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :return: number of tiles that match search criteria - """ - - params = { - "size": 0, - "query": { - "bool": { - "filter": [ - { - "term": { - "dataset_s": { - "value": ds - } - } - }, - { - "range": { - "tile_count_i": { - "gte": 1 - } - } - } - ] - } - } - } - - if bounding_polygon: - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - geo_clause = { - "geo_shape": { - "geo": { - "shape": { - "type": "envelope", - "coordinates": [[min_lon, max_lat], [max_lon, min_lat]] - } - } - } - } - - params['query']['bool']['filter'].append(geo_clause) - - if 0 < start_time <= end_time: - params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time) - params["query"]["bool"]["minimum_should_match"] = 1 - - if len(metadata) > 0: - for key_value in metadata: - key = key_value.split(':')[0] - value = key_value.split(':')[1] - params['query']['bool']['filter'].append({"term": {key: {"value": value}}}) - - self._merge_kwargs(params, **kwargs) - _, _, found = self.do_query(*(None, None, None, True, None), **params) - - return found - - def do_aggregation(self, *args, **params): - # Gets raw aggregations - - response = self.do_query_raw(*args, **params) - aggregations = response.get('aggregations', None) - return aggregations - - def do_aggregation_all(self, params, agg_name): - # Used for pagination when results can exceed ES max size (use of after_key) - - with ELASTICSEARCH_CON_LOCK: - response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) - all_buckets = [] - - try: - aggregations = response.get('aggregations', None) - current_buckets = aggregations.get(agg_name, None) - buckets = current_buckets.get('buckets', None) - all_buckets += buckets - after_bucket = current_buckets.get('after_key', None) - - while after_bucket is not None: - for agg in params['aggs']: - params['aggs'][agg]['composite']['after'] = {} - for source in params['aggs'][agg]['composite']['sources']: - key_name = next(iter(source)) - params['aggs'][agg]['composite']['after'][key_name] = after_bucket[key_name] - with ELASTICSEARCH_CON_LOCK: - response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) - - aggregations = response.get('aggregations', None) - current_buckets = aggregations.get(agg_name, None) - buckets = current_buckets.get('buckets', None) - all_buckets += buckets - after_bucket = current_buckets.get('after_key', None) - - except AttributeError as e: - self.logger.error('Error when accessing aggregation buckets - ' + str(e)) - - return all_buckets - - def do_query(self, *args, **params): - response = self.do_query_raw(*args, **params) - return response['hits']['hits'], None, response['hits']['total']['value'] - - def do_query_raw(self, *args, **params): - - if args[4]: - - sort_fields = args[4].split(",") - - if 'sort' not in list(params.keys()): - params["sort"] = [] - - for field in sort_fields: - field_order = field.split(' ') - sort_instruction = {field_order[0]: field_order[1]} - if sort_instruction not in params['sort']: - params["sort"].append(sort_instruction) - with ELASTICSEARCH_CON_LOCK: - response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params) - - return response - - def do_query_all(self, *args, **params): - # Used to paginate with search_after. - # The method calling this might already have a sort clause, - # so we merge both sort clauses inside do_query_raw - - results = [] - - search = None - - # Add track option to not be blocked at 10000 hits per worker - if 'track_total_hits' not in params.keys(): - params['track_total_hits'] = True - - # Add sort instruction order to paginate the results : - params["sort"] = [ - { "tile_min_time_dt": "asc"}, - { "_id": "asc" } - ] - - response = self.do_query_raw(*args, **params) - results.extend([r["_source"] for r in response["hits"]["hits"]]) - - total_hits = response["hits"]["total"]["value"] - - try: - search_after = [] - for sort_param in response["hits"]["hits"][-1]["sort"]: - search_after.append(str(sort_param)) - except (KeyError, IndexError): - search_after = [] - - try: - while len(results) < total_hits: - params["search_after"] = search_after - response = self.do_query_raw(*args, **params) - results.extend([r["_source"] for r in response["hits"]["hits"]]) - - search_after = [] - for sort_param in response["hits"]["hits"][-1]["sort"]: - search_after.append(str(sort_param)) - - except (KeyError, IndexError): - pass - - return results - - def convert_iso_to_datetime(self, date): - return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC) - - def convert_iso_to_timestamp(self, date): - return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds() - - @staticmethod - def _merge_kwargs(params, **kwargs): - # Only Solr-specific kwargs are parsed - # And the special 'limit' - try: - params['limit'] = kwargs['limit'] - except KeyError: - pass - - try: - params['_route_'] = kwargs['_route_'] - except KeyError: - pass - - try: - params['size'] = kwargs['size'] - except KeyError: - pass - - try: - params['start'] = kwargs['start'] - except KeyError: - pass - - try: - s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']] - except KeyError: - s = None - - try: - params['sort'].extend(s) - except KeyError: - if s is not None: - params['sort'] = s diff --git a/data-access/nexustiles/dao/S3Proxy.py b/data-access/nexustiles/dao/S3Proxy.py deleted file mode 100644 index c8d3adfe..00000000 --- a/data-access/nexustiles/dao/S3Proxy.py +++ /dev/null @@ -1,141 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import uuid - -import boto3 -import nexusproto.DataTile_pb2 as nexusproto -import numpy as np -from nexusproto.serialization import from_shaped_array - - -class NexusTileData(object): - __nexus_tile = None - __data = None - tile_id = None - - def __init__(self, data, _tile_id): - if self.__data is None: - self.__data = data - if self.tile_id is None: - self.tile_id = _tile_id - - def _get_nexus_tile(self): - if self.__nexus_tile is None: - self.__nexus_tile = nexusproto.TileData.FromString(self.__data) - - return self.__nexus_tile - - def get_raw_data_array(self): - - nexus_tile = self._get_nexus_tile() - the_tile_type = nexus_tile.tile.WhichOneof("tile_type") - - the_tile_data = getattr(nexus_tile.tile, the_tile_type) - - return from_shaped_array(the_tile_data.variable_data) - - def get_lat_lon_time_data_meta(self): - if self._get_nexus_tile().HasField('grid_tile'): - grid_tile = self._get_nexus_tile().grid_tile - - grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data)) - latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude)) - longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude)) - - if len(grid_tile_data.shape) == 2: - grid_tile_data = grid_tile_data[np.newaxis, :] - - # Extract the meta data - meta_data = {} - for meta_data_obj in grid_tile.meta_data: - name = meta_data_obj.name - meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - if len(meta_array.shape) == 2: - meta_array = meta_array[np.newaxis, :] - meta_data[name] = meta_array - - return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data - elif self._get_nexus_tile().HasField('swath_tile'): - swath_tile = self._get_nexus_tile().swath_tile - - latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1) - longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1) - time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1) - - # Simplify the tile if the time dimension is the same value repeated - if np.all(time_data == np.min(time_data)): - time_data = np.array([np.min(time_data)]) - - swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data)) - - tile_data = self._to_standard_index(swath_tile_data, - (len(time_data), len(latitude_data), len(longitude_data))) - - # Extract the meta data - meta_data = {} - for meta_data_obj in swath_tile.meta_data: - name = meta_data_obj.name - actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data)) - reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape) - meta_data[name] = reshaped_meta_array - - return latitude_data, longitude_data, time_data, tile_data, meta_data - else: - raise NotImplementedError("Only supports grid_tile and swath_tile") - - @staticmethod - def _to_standard_index(data_array, desired_shape): - - if desired_shape[0] == 1: - reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2])) - row, col = np.indices(data_array.shape) - - reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ - row.flat, col.flat] - reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ - row.flat, col.flat] - reshaped_array = reshaped_array[np.newaxis, :] - else: - reshaped_array = np.ma.masked_all(desired_shape) - row, col = np.indices(data_array.shape) - - reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[ - row.flat, col.flat] - reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[ - row.flat, col.flat] - - return reshaped_array - - -class S3Proxy(object): - def __init__(self, config): - self.config = config - self.__s3_bucketname = config.get("s3", "bucket") - self.__s3_region = config.get("s3", "region") - self.__s3 = boto3.resource('s3') - self.__nexus_tile = None - - def fetch_nexus_tiles(self, *tile_ids): - tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if - (isinstance(tile_id, str) or isinstance(tile_id, str))] - res = [] - for tile_id in tile_ids: - obj = self.__s3.Object(self.__s3_bucketname, str(tile_id)) - data = obj.get()['Body'].read() - nexus_tile = NexusTileData(data, str(tile_id)) - res.append(nexus_tile) - - return res diff --git a/data-access/nexustiles/dao/SolrProxy.py b/data-access/nexustiles/dao/SolrProxy.py deleted file mode 100644 index 9b16533d..00000000 --- a/data-access/nexustiles/dao/SolrProxy.py +++ /dev/null @@ -1,731 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import logging -import threading -import time -from datetime import datetime -from pytz import timezone, UTC - -import requests -import pysolr -from shapely import wkt - -SOLR_CON_LOCK = threading.Lock() -thread_local = threading.local() - -EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) -SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ' -ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' - - -class SolrProxy(object): - def __init__(self, config): - self.solrUrl = config.get("solr", "host") - self.solrCore = config.get("solr", "core") - solr_kargs = {} - if config.has_option("solr", "time_out"): - solr_kargs["timeout"] = config.get("solr", "time_out") - self.logger = logging.getLogger('nexus') - - with SOLR_CON_LOCK: - solrcon = getattr(thread_local, 'solrcon', None) - if solrcon is None: - solr_url = '%s/solr/%s' % (self.solrUrl, self.solrCore) - self.logger.info("connect to solr, url {} with option(s) = {}".format(solr_url, solr_kargs)) - solrcon = pysolr.Solr(solr_url, **solr_kargs) - thread_local.solrcon = solrcon - - self.solrcon = solrcon - - def find_tile_by_id(self, tile_id): - - search = 'id:%s' % tile_id - - params = { - 'rows': 1 - } - - results, start, found = self.do_query(*(search, None, None, True, None), **params) - - assert len(results) == 1, "Found %s results, expected exactly 1" % len(results) - return [results[0]] - - def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - - if ds is not None: - search = 'dataset_s:%s' % ds - else: - search = '*:*' - - additionalparams = { - 'fq': [ - "{!terms f=id}%s" % ','.join(tile_ids) - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - results = self.do_query_all(*(search, None, None, False, None), **additionalparams) - - assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids)) - return results - - def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs): - - if ds is not None: - search = 'dataset_s:%s' % ds - else: - search = '*:*' - - kwargs['rows'] = 1 - kwargs['fl'] = 'tile_min_time_dt' - kwargs['sort'] = ['tile_min_time_dt asc'] - additionalparams = { - 'fq': [ - "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else '' - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) - - return self.convert_iso_to_datetime(results[0]['tile_min_time_dt']) - - def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs): - - if ds is not None: - search = 'dataset_s:%s' % ds - else: - search = '*:*' - - kwargs['rows'] = 1 - kwargs['fl'] = 'tile_max_time_dt' - kwargs['sort'] = ['tile_max_time_dt desc'] - additionalparams = { - 'fq': [ - "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else '' - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) - - return self.convert_iso_to_datetime(results[0]['tile_max_time_dt']) - - def find_min_max_date_from_granule(self, ds, granule_name, **kwargs): - search = 'dataset_s:%s' % ds - - kwargs['rows'] = 1 - kwargs['fl'] = 'tile_min_time_dt' - kwargs['sort'] = ['tile_min_time_dt asc'] - additionalparams = { - 'fq': [ - "granule_s:%s" % granule_name - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams) - start_time = self.convert_iso_to_datetime(results[0]['tile_min_time_dt']) - - kwargs['fl'] = 'tile_max_time_dt' - kwargs['sort'] = ['tile_max_time_dt desc'] - additionalparams = { - 'fq': [ - "granule_s:%s" % granule_name - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams) - end_time = self.convert_iso_to_datetime(results[0]['tile_max_time_dt']) - - return start_time, end_time - - def get_data_series_list(self): - - datasets = self.get_data_series_list_simple() - - for dataset in datasets: - min_date = self.find_min_date_from_tiles([], ds=dataset['title']) - max_date = self.find_max_date_from_tiles([], ds=dataset['title']) - dataset['start'] = (min_date - EPOCH).total_seconds() - dataset['end'] = (max_date - EPOCH).total_seconds() - dataset['iso_start'] = min_date.strftime(ISO_8601) - dataset['iso_end'] = max_date.strftime(ISO_8601) - - return datasets - - def get_data_series_list_simple(self): - search = "*:*" - params = { - 'rows': 0, - "facet": "true", - "facet.field": "dataset_s", - "facet.mincount": "1", - "facet.limit": "-1" - } - - - response = self.do_query_raw(*(search, None, None, False, None), **params) - l = [] - for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2): - l.append({ - "shortName": g, - "title": g, - "tileCount": v - }) - l = sorted(l, key=lambda entry: entry["title"]) - return l - - def get_data_series_stats(self, ds): - search = "dataset_s:%s" % ds - params = { - "facet": "true", - "facet.field": ["dataset_s", "tile_max_time_dt"], - "facet.limit": "-1", - "facet.mincount": "1", - "facet.pivot": "{!stats=piv1}dataset_s", - "stats": "on", - "stats.field": ["{!tag=piv1 min=true max=true sum=false}tile_max_time_dt","{!tag=piv1 min=true max=false sum=false}tile_min_val_d","{!tag=piv1 min=false max=true sum=false}tile_max_val_d"] - } - - response = self.do_query_raw(*(search, None, None, False, None), **params) - - stats = {} - - for g in response.facets["facet_pivot"]["dataset_s"]: - if g["value"] == ds: - stats["start"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"]) - stats["end"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"]) - stats["minValue"] = g["stats"]["stats_fields"]["tile_min_val_d"]["min"] - stats["maxValue"] = g["stats"]["stats_fields"]["tile_max_val_d"]["max"] - - - stats["availableDates"] = [] - for dt in response.facets["facet_fields"]["tile_max_time_dt"][::2]: - stats["availableDates"].append(self.convert_iso_to_timestamp(dt)) - - stats["availableDates"] = sorted(stats["availableDates"]) - - return stats - - def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year): - - search = 'dataset_s:%s' % ds - - params = { - 'fq': [ - "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, - "tile_count_i:[1 TO *]", - "day_of_year_i:[* TO %s]" % day_of_year - ], - 'rows': 1 - } - - results, start, found = self.do_query( - *(search, None, None, True, ('day_of_year_i desc',)), **params) - - return [results[0]] - - def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): - - search = 'dataset_s:%s' % ds - - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - additionalparams = { - 'fq': [ - "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), - "{!frange l=0 u=0}ms(tile_min_time_dt,tile_max_time_dt)", - "tile_count_i:[1 TO *]", - "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s) - ], - 'rows': 0, - 'facet': 'true', - 'facet.field': 'tile_min_time_dt', - 'facet.mincount': '1', - 'facet.limit': '-1' - } - - self._merge_kwargs(additionalparams, **kwargs) - - response = self.do_query_raw(*(search, None, None, False, None), **additionalparams) - - daysinrangeasc = sorted( - [(datetime.strptime(a_date, SOLR_FORMAT) - datetime.utcfromtimestamp(0)).total_seconds() for a_date - in response.facets['facet_fields']['tile_min_time_dt'][::2]]) - - return daysinrangeasc - - def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, - end_time=-1, **kwargs): - - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), - "tile_count_i:[1 TO *]" - ] - } - - if 0 <= start_time <= end_time: - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - additionalparams['fq'].append(time_clause) - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all( - *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'), - **additionalparams) - - def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, - "tile_count_i:[1 TO *]" - ] - } - - if 0 <= start_time <= end_time: - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - additionalparams['fq'].append(time_clause) - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all( - *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'), - **additionalparams) - - def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, - "tile_count_i:[1 TO *]" - ] - } - - if 0 <= start_time <= end_time: - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - additionalparams['fq'].append(time_clause) - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all( - *(search, None, None, False, None), - **additionalparams) - - def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs): - - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, - "tile_count_i:[1 TO *]" - ], - 'rows': 0, - 'facet': 'true', - 'facet.field': 'geo_s', - 'facet.limit': -1, - 'facet.mincount': 1 - } - - if 0 <= start_time <= end_time: - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - additionalparams['fq'].append(time_clause) - - self._merge_kwargs(additionalparams, **kwargs) - - response = self.do_query_raw(*(search, None, None, False, None), **additionalparams) - - distinct_bounds = [wkt.loads(key).bounds for key in response.facets["facet_fields"]["geo_s"][::2]] - - return distinct_bounds - - def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs): - - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "tile_min_lon:\"%s\"" % minx, - "tile_min_lat:\"%s\"" % miny, - "tile_max_lon:\"%s\"" % maxx, - "tile_max_lat:\"%s\"" % maxy, - "tile_count_i:[1 TO *]" - ] - } - - if 0 <= start_time <= end_time: - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - additionalparams['fq'].append(time_clause) - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all( - *(search, None, None, False, None), - **additionalparams) - - def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs): - search = 'dataset_s:%s' % ds - - the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT) - time_clause = "(" \ - "tile_min_time_dt:[* TO %s] " \ - "AND tile_max_time_dt:[%s TO *] " \ - ")" % ( - the_time, the_time - ) - - additionalparams = { - 'fq': [ - "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), - "tile_count_i:[1 TO *]", - time_clause - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all(*(search, None, None, False, None), **additionalparams) - - def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs): - search = 'dataset_s:%s' % ds - - the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT) - time_clause = "(" \ - "tile_min_time_dt:[* TO %s] " \ - "AND tile_max_time_dt:[%s TO *] " \ - ")" % ( - the_time, the_time - ) - - additionalparams = { - 'fq': [ - "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt, - "tile_count_i:[1 TO *]", - time_clause - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all(*(search, None, None, False, None), **additionalparams) - - def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): - search = 'dataset_s:%s' % ds - - the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT) - time_clause = "(" \ - "tile_min_time_dt:[* TO %s] " \ - "AND tile_max_time_dt:[%s TO *] " \ - ")" % ( - the_time, the_time - ) - - additionalparams = { - 'fq': [ - "geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat), - "tile_count_i:[1 TO *]", - time_clause - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all(*(search, "product(tile_avg_val_d, tile_count_i),*", None, False, None), - **additionalparams) - - def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs): - search = 'dataset_s:%s' % ds - - the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT) - time_clause = "(" \ - "tile_min_time_dt:[* TO %s] " \ - "AND tile_max_time_dt:[%s TO *] " \ - ")" % ( - the_time, the_time - ) - - additionalparams = { - 'fq': [ - "geo:\"Intersects(MultiLineString((%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s)))\"" % ( - min_lon, max_lat, max_lon, max_lat, min_lon, max_lat, min_lon, min_lat, max_lon, max_lat, max_lon, - min_lat, min_lon, min_lat, max_lon, min_lat), - "-geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat), - "tile_count_i:[1 TO *]", - time_clause - ] - } - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all(*(search, None, None, False, None), **additionalparams) - - def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs): - """ - Get a list of tile metadata that matches the specified metadata, start_time, end_time. - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :param ds: The dataset name to search - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :return: A list of tile metadata - """ - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': metadata - } - - if 0 <= start_time <= end_time: - additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time)) - - self._merge_kwargs(additionalparams, **kwargs) - - return self.do_query_all( - *(search, None, None, False, None), - **additionalparams) - - def get_formatted_time_clause(self, start_time, end_time): - search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) - search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) - - time_clause = "(" \ - "tile_min_time_dt:[%s TO %s] " \ - "OR tile_max_time_dt:[%s TO %s] " \ - "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \ - ")" % ( - search_start_s, search_end_s, - search_start_s, search_end_s, - search_start_s, search_end_s - ) - return time_clause - - def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): - """ - Return number of tiles that match search criteria. - :param ds: The dataset name to search - :param bounding_polygon: The polygon to search for tiles - :param start_time: The start time to search for tiles - :param end_time: The end time to search for tiles - :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] - :return: number of tiles that match search criteria - """ - search = 'dataset_s:%s' % ds - - additionalparams = { - 'fq': [ - "tile_count_i:[1 TO *]" - ], - 'rows': 0 - } - - if bounding_polygon: - min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds - additionalparams['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) - - if 0 <= start_time <= end_time: - additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time)) - - if metadata: - additionalparams['fq'].extend(metadata) - - self._merge_kwargs(additionalparams, **kwargs) - - results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) - - return found - - def do_query(self, *args, **params): - - response = self.do_query_raw(*args, **params) - - return response.docs, response.raw_response['response']['start'], response.hits - - def do_query_raw(self, *args, **params): - - if 'fl' not in list(params.keys()) and args[1]: - params['fl'] = args[1] - - if 'sort' not in list(params.keys()) and args[4]: - params['sort'] = args[4] - - # If dataset_s is specified as the search term, - # add the _route_ parameter to limit the search to the correct shard - if 'dataset_s:' in args[0]: - ds = args[0].split(':')[-1] - params['shard_keys'] = ds + '!' - - with SOLR_CON_LOCK: - response = self.solrcon.search(args[0], **params) - - return response - - - def do_query_all(self, *args, **params): - - results = [] - - response = self.do_query_raw(*args, **params) - results.extend(response.docs) - - limit = min(params.get('limit', float('inf')), response.hits) - - while len(results) < limit: - params['start'] = len(results) - response = self.do_query_raw(*args, **params) - results.extend(response.docs) - - assert len(results) == limit - - return results - - def convert_iso_to_datetime(self, date): - return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC) - - def convert_iso_to_timestamp(self, date): - return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds() - - def ping(self): - solrAdminPing = '%s/solr/%s/admin/ping' % (self.solrUrl, self.solrCore) - try: - r = requests.get(solrAdminPing, params={'wt': 'json'}) - results = json.loads(r.text) - return results - except: - return None - - @staticmethod - def _merge_kwargs(additionalparams, **kwargs): - # Only Solr-specific kwargs are parsed - # And the special 'limit' - try: - additionalparams['limit'] = kwargs['limit'] - except KeyError: - pass - - try: - additionalparams['_route_'] = kwargs['_route_'] - except KeyError: - pass - - try: - additionalparams['rows'] = kwargs['rows'] - except KeyError: - pass - - try: - additionalparams['start'] = kwargs['start'] - except KeyError: - pass - - try: - kwfq = kwargs['fq'] if isinstance(kwargs['fq'], list) else list(kwargs['fq']) - except KeyError: - kwfq = [] - - try: - additionalparams['fq'].extend(kwfq) - except KeyError: - additionalparams['fq'] = kwfq - - try: - kwfl = kwargs['fl'] if isinstance(kwargs['fl'], list) else [kwargs['fl']] - except KeyError: - kwfl = [] - - try: - additionalparams['fl'].extend(kwfl) - except KeyError: - additionalparams['fl'] = kwfl - - try: - s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']] - except KeyError: - s = None - - try: - additionalparams['sort'].extend(s) - except KeyError: - if s is not None: - additionalparams['sort'] = s diff --git a/data-access/nexustiles/dao/__init__.py b/data-access/nexustiles/dao/__init__.py deleted file mode 100644 index 6acb5d12..00000000 --- a/data-access/nexustiles/dao/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py new file mode 100644 index 00000000..33ab5296 --- /dev/null +++ b/data-access/nexustiles/exception.py @@ -0,0 +1,2 @@ +class NexusTileServiceException(Exception): + pass diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index fde0a5f3..d09c3aa6 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -14,34 +14,27 @@ # limitations under the License. import configparser +import json import logging import sys -import json +import threading from datetime import datetime -from functools import wraps, reduce, partial +from functools import reduce, wraps +from time import sleep +from typing import Dict, Union import numpy as np import numpy.ma as ma import pkg_resources -from pytz import timezone, UTC -from shapely.geometry import MultiPolygon, box import pysolr +from pytz import timezone, UTC +from shapely.geometry import box +from webservice.webmodel import DatasetNotFoundException, NexusProcessingException -import threading -from time import sleep - +from .AbstractTileService import AbstractTileService from .backends.nexusproto.backend import NexusprotoTileService from .backends.zarr.backend import ZarrBackend - - -from abc import ABC, abstractmethod - -from .AbstractTileService import AbstractTileService - from .model.nexusmodel import Tile, BBox, TileStats, TileVariable -from typing import Dict, Union - -from webservice.webmodel import DatasetNotFoundException, NexusProcessingException EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -49,7 +42,7 @@ level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) -logger = logging.getLogger("testing") +logger = logging.getLogger("nexus-tile-svc") def tile_data(default_fetch=True): @@ -83,19 +76,25 @@ def fetch_data_for_func(*args, **kwargs): return tile_data_decorator -class NexusTileServiceException(Exception): - pass - - SOLR_LOCK = threading.Lock() DS_LOCK = threading.Lock() thread_local = threading.local() - -class NexusTileService(AbstractTileService): +class NexusTileService: backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {} + ds_config = None + + __update_thread = None + + @staticmethod + def __update_datasets(): + while True: + with DS_LOCK: + NexusTileService._update_datasets() + sleep(3600) + def __init__(self, config=None): self._config = configparser.RawConfigParser() self._config.read(NexusTileService._get_config_files('config/datasets.ini')) @@ -105,43 +104,54 @@ def __init__(self, config=None): if config: self.override_config(config) - NexusTileService.backends[None] = {"backend": NexusprotoTileService(False, False, config), 'up': True} - NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None] + if not NexusTileService.backends: + NexusTileService.ds_config = configparser.RawConfigParser() + NexusTileService.ds_config.read(NexusTileService._get_config_files('config/datasets.ini')) - def __update_datasets(): - while True: - with DS_LOCK: - self._update_datasets() - sleep(3600) + default_backend = {"backend": NexusprotoTileService(False, False, config), 'up': True} + + NexusTileService.backends[None] = default_backend + NexusTileService.backends['__nexusproto__'] = default_backend - threading.Thread(target=__update_datasets, name='dataset_update', daemon=False).start() + if not NexusTileService.__update_thread: + NexusTileService.__update_thread = threading.Thread( + target=NexusTileService.__update_datasets, + name='dataset_update', + daemon=False + ) + logger.info('Starting dataset refresh thread') + NexusTileService.__update_thread.start() @staticmethod def __get_backend(dataset_s) -> AbstractTileService: - if dataset_s not in NexusTileService.backends: - raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested') + with DS_LOCK: + if dataset_s not in NexusTileService.backends: + raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested') - b = NexusTileService.backends[dataset_s] + b = NexusTileService.backends[dataset_s] - if not b['up']: - success = b['backend'].try_connect() + if not b['up']: + success = b['backend'].try_connect() - if not success: - raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable') - else: - NexusTileService.backends[dataset_s]['up'] = True + if not success: + raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable') + else: + NexusTileService.backends[dataset_s]['up'] = True - return b['backend'] + return b['backend'] - def _update_datasets(self): - solr_url = self._config.get("solr", "host") - solr_core = self._config.get("solr", "core") + @staticmethod + def _update_datasets(): + solr_url = NexusTileService.ds_config.get("solr", "host") + solr_core = NexusTileService.ds_config.get("solr", "core") solr_kwargs = {} - if self._config.has_option("solr", "time_out"): - solr_kwargs["timeout"] = self._config.get("solr", "time_out") + update_logger = logging.getLogger("nexus-tile-svc.backends") + + if NexusTileService.ds_config.has_option("solr", "time_out"): + solr_kwargs["timeout"] = NexusTileService.ds_config.get("solr", "time_out") with SOLR_LOCK: solrcon = getattr(thread_local, 'solrcon', None) @@ -152,33 +162,53 @@ def _update_datasets(self): solrcon = solrcon - response = solrcon.search('*:*') + update_logger.info('Executing update query to check for new datasets') - present_datasets = set() + present_datasets = {None, '__nexusproto__'} + next_cursor_mark = '*' - for dataset in response.docs: - d_id = dataset['dataset_s'] - store_type = dataset.get('store_type_s', 'nexusproto') + while True: + response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc') - present_datasets.add(d_id) + try: + response_cursor_mark = response.nextCursorMark + except AttributeError: + break - if d_id in NexusTileService.backends: - continue - # is_up = NexusTileService.backends[d_id]['backend'].try_connect() + if response_cursor_mark == next_cursor_mark: + break + else: + next_cursor_mark = response_cursor_mark - if store_type == 'nexus_proto' or store_type == 'nexusproto': - NexusTileService.backends[d_id] = NexusTileService.backends[None] - elif store_type == 'zarr': - ds_config = json.loads(dataset['config'][0]) - NexusTileService.backends[d_id] = { - 'backend': ZarrBackend(ds_config), - 'up': True - } - else: - logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') + for dataset in response.docs: + d_id = dataset['dataset_s'] + store_type = dataset.get('store_type_s', 'nexusproto') + + present_datasets.add(d_id) + + if d_id in NexusTileService.backends: + continue + # is_up = NexusTileService.backends[d_id]['backend'].try_connect() + + if store_type == 'nexus_proto' or store_type == 'nexusproto': + update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend") + NexusTileService.backends[d_id] = NexusTileService.backends[None] + elif store_type == 'zarr': + update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend") + + ds_config = json.loads(dataset['config'][0]) + NexusTileService.backends[d_id] = { + 'backend': ZarrBackend(ds_config), + 'up': True + } + else: + logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets) + if len(removed_datasets) > 0: + logger.info(f'{len(removed_datasets)} marked for removal') + for dataset in removed_datasets: logger.info(f"Removing dataset {dataset}") del NexusTileService.backends[dataset] @@ -336,23 +366,17 @@ def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset return tiles def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - tiles = self._metadatastore.find_all_tiles_within_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, - **kwargs) - - return tiles + return NexusTileService.get_stats_within_box_at_time( + min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs + ) - def get_bounding_box(self, tile_ids): + def get_bounding_box(self, tile_ids, ds=None): """ Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. :param tile_ids: List of tile ids :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles """ - tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'], - fetch_data=False, rows=len(tile_ids)) - polys = [] - for tile in tiles: - polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat)) - return box(*MultiPolygon(polys).bounds) + return NexusTileService.__get_backend(ds).get_bounding_box(tile_ids, ds) def get_min_time(self, tile_ids, ds=None): """ @@ -361,8 +385,7 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - min_time = self._metadatastore.find_min_date_from_tiles(tile_ids, ds=ds) - return int((min_time - EPOCH).total_seconds()) + return NexusTileService.__get_backend(ds).get_min_time(tile_ids, ds) def get_max_time(self, tile_ids, ds=None): """ @@ -371,8 +394,7 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - max_time = self._metadatastore.find_max_date_from_tiles(tile_ids, ds=ds) - return int((max_time - EPOCH).total_seconds()) + return int(NexusTileService.__get_backend(ds).get_max_time(tile_ids)) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -398,33 +420,95 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m """ return self._metadatastore.get_tile_count(ds, bounding_polygon, start_time, end_time, metadata, **kwargs) - def fetch_data_for_tiles(self, *tiles): + def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles): + for tile in tiles: + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) - nexus_tile_ids = set([tile.tile_id for tile in tiles]) - matched_tile_data = self._datastore.fetch_nexus_tiles(*nexus_tile_ids) + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] - tile_data_by_id = {str(a_tile_data.tile_id): a_tile_data for a_tile_data in matched_tile_data} + return tiles - missing_data = nexus_tile_ids.difference(list(tile_data_by_id.keys())) - if len(missing_data) > 0: - raise Exception("Missing data for tile_id(s) %s." % missing_data) + def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles): + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat) + tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon) - for a_tile in tiles: - lats, lons, times, data, meta, is_multi_var = tile_data_by_id[a_tile.tile_id].get_lat_lon_time_data_meta() + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] - a_tile.latitudes = lats - a_tile.longitudes = lons - a_tile.times = times - a_tile.data = data - a_tile.meta_data = meta - a_tile.is_multi = is_multi_var + tile.data = ma.masked_where(data_mask, tile.data) - del (tile_data_by_id[a_tile.tile_id]) + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] return tiles - def _metadata_store_docs_to_tiles(self, *store_docs): + def mask_tiles_to_polygon(self, bounding_polygon, tiles): + + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles) + def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles): + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles) + + def mask_tiles_to_time_range(self, start_time, end_time, tiles): + """ + Masks data in tiles to specified time range. + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param tiles: List of tiles + :return: A list tiles with data masked to specified time range + """ + if 0 <= start_time <= end_time: + for tile in tiles: + tile.times = ma.masked_outside(tile.times, start_time, end_time) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \ + | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :] + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def fetch_data_for_tiles(self, *tiles, dataset=None): + return NexusTileService.__get_backend(dataset).fetch_data_for_tiles(*tiles) + + def _metadata_store_docs_to_tiles(self, *store_docs): tiles = [] for store_doc in store_docs: tile = Tile() @@ -521,7 +605,6 @@ def _metadata_store_docs_to_tiles(self, *store_docs): except KeyError: pass - if 'tile_var_name_ss' in store_doc: tile.variables = [] for var_name in store_doc['tile_var_name_ss']: @@ -536,13 +619,6 @@ def _metadata_store_docs_to_tiles(self, *store_docs): return tiles - def pingSolr(self): - status = self._metadatastore.ping() - if status and status["status"] == "OK": - return True - else: - return False - @staticmethod def _get_config_files(filename): log = logging.getLogger(__name__) diff --git a/data-access/setup.py b/data-access/setup.py index ab0248f0..e539e1e0 100644 --- a/data-access/setup.py +++ b/data-access/setup.py @@ -12,11 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import setuptools from setuptools import setup -with open('../VERSION.txt', 'r') as f: - __version__ = f.read() +try: + with open('../VERSION.txt', 'r') as f: + __version__ = f.read() +except: + __version__ = None with open('requirements.txt') as f: @@ -32,8 +35,13 @@ description="NEXUS API.", long_description=open('README.md').read(), - packages=['nexustiles', 'nexustiles.model', 'nexustiles.dao'], - package_data={'nexustiles': ['config/datastores.ini.default', 'config/datastores.ini']}, + packages=setuptools.find_packages(), # ['nexustiles', 'nexustiles.model', 'nexustiles.dao'], + package_data={ + 'nexustiles': + ['config/datasets.ini.default', 'config/datasets.ini'], + 'nexustiles.backends.nexusproto': + ['config/datastores.ini.default', 'config/datastores.ini'] + }, platforms='any', python_requires='~=3.8', install_requires=pip_requirements, From b77aa11f65a8eb486509b4e0e7e5c5a149fdbcc0 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jul 2023 16:33:52 -0700 Subject: [PATCH 05/91] Working(?) np backend --- .../nexustiles/backends/zarr/backend.py | 150 +++++++++++++++++- data-access/nexustiles/nexustiles.py | 39 ++++- data-access/requirements.txt | 2 + 3 files changed, 182 insertions(+), 9 deletions(-) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 93963166..13622453 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -40,6 +40,154 @@ class ZarrBackend(AbstractTileService): - def __init__(self, config): + def __init__(self, path, config): AbstractTileService.__init__(self) self.__config = config + + def get_dataseries_list(self, simple=False): + raise NotImplementedError() + + def find_tile_by_id(self, tile_id, **kwargs): + raise NotImplementedError() + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + raise NotImplementedError() + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + metrics_callback=None, **kwargs): + raise NotImplementedError() + + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + raise NotImplementedError() + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + raise NotImplementedError() + + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall in the given box in the Solr index + raise NotImplementedError() + + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): + # Find tiles that fall within the polygon in the Solr index + raise NotImplementedError() + + def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles whose metadata matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + raise NotImplementedError() + + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + raise NotImplementedError() + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + def get_min_max_time_by_granule(self, ds, granule_name): + raise NotImplementedError() + + def get_dataset_overall_stats(self, ds): + raise NotImplementedError() + + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + raise NotImplementedError() + + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + raise NotImplementedError() + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + raise NotImplementedError() + + def fetch_data_for_tiles(self, *tiles): + raise NotImplementedError() + + def _metadata_store_docs_to_tiles(self, *store_docs): + raise NotImplementedError() + + + diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index d09c3aa6..405b5b70 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -30,6 +30,7 @@ from pytz import timezone, UTC from shapely.geometry import box from webservice.webmodel import DatasetNotFoundException, NexusProcessingException +from webservice.NexusHandler import nexus_initializer from .AbstractTileService import AbstractTileService from .backends.nexusproto.backend import NexusprotoTileService @@ -81,6 +82,16 @@ def fetch_data_for_func(*args, **kwargs): thread_local = threading.local() +@nexus_initializer +class NTSInitializer: + def __init__(self): + self._log = logger.getChild('init') + + def init(self, config): + self._log.info('*** RUNNING NTS INITIALIZATION ***') + NexusTileService(config) + + class NexusTileService: backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {} @@ -89,7 +100,7 @@ class NexusTileService: __update_thread = None @staticmethod - def __update_datasets(): + def __update_datasets_loop(): while True: with DS_LOCK: NexusTileService._update_datasets() @@ -115,7 +126,7 @@ def __init__(self, config=None): if not NexusTileService.__update_thread: NexusTileService.__update_thread = threading.Thread( - target=NexusTileService.__update_datasets, + target=NexusTileService.__update_datasets_loop, name='dataset_update', daemon=False ) @@ -128,7 +139,11 @@ def __init__(self, config=None): def __get_backend(dataset_s) -> AbstractTileService: with DS_LOCK: if dataset_s not in NexusTileService.backends: - raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested') + logger.warning(f'Dataset {dataset_s} not currently loaded. Checking to see if it was recently' + f'added') + NexusTileService._update_datasets() + if dataset_s not in NexusTileService.backends: + raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested') b = NexusTileService.backends[dataset_s] @@ -162,11 +177,13 @@ def _update_datasets(): solrcon = solrcon - update_logger.info('Executing update query to check for new datasets') + update_logger.info('Executing Solr query to check for new datasets') present_datasets = {None, '__nexusproto__'} next_cursor_mark = '*' + added_datasets = 0 + while True: response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc') @@ -190,6 +207,8 @@ def _update_datasets(): continue # is_up = NexusTileService.backends[d_id]['backend'].try_connect() + added_datasets += 1 + if store_type == 'nexus_proto' or store_type == 'nexusproto': update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend") NexusTileService.backends[d_id] = NexusTileService.backends[None] @@ -198,21 +217,25 @@ def _update_datasets(): ds_config = json.loads(dataset['config'][0]) NexusTileService.backends[d_id] = { - 'backend': ZarrBackend(ds_config), + 'backend': ZarrBackend(**ds_config), 'up': True } else: - logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') + update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') + added_datasets -= 1 removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets) if len(removed_datasets) > 0: - logger.info(f'{len(removed_datasets)} marked for removal') + update_logger.info(f'{len(removed_datasets)} old datasets marked for removal') for dataset in removed_datasets: - logger.info(f"Removing dataset {dataset}") + update_logger.info(f"Removing dataset {dataset}") del NexusTileService.backends[dataset] + update_logger.info(f'Finished dataset update: {added_datasets} added, {len(removed_datasets)} removed, ' + f'{len(NexusTileService.backends) - 2} total') + def override_config(self, config): for section in config.sections(): if self._config.has_section(section): # only override preexisting section, ignores the other diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 51270182..7d33cced 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -20,3 +20,5 @@ urllib3==1.26.2 requests nexusproto Shapely +s3fs +fsspec \ No newline at end of file From 4ccec2e5bc4fae53feca1426127fe801236ff067 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jul 2023 16:41:13 -0700 Subject: [PATCH 06/91] gitignore ini --- .gitignore | 3 +- .../nexustiles/config/datastores.ini.default | 39 ------------------- 2 files changed, 2 insertions(+), 40 deletions(-) delete mode 100644 data-access/nexustiles/config/datastores.ini.default diff --git a/.gitignore b/.gitignore index 12ab2d61..23f84355 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ *.idea *.DS_Store analysis/webservice/algorithms/doms/domsconfig.ini -data-access/nexustiles/config/datastores.ini +data-access/nexustiles/backends/nexusproto/config/datastores.ini +data-access/nexustiles/config/datasets.ini venv/ diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default deleted file mode 100644 index d8db1902..00000000 --- a/data-access/nexustiles/config/datastores.ini.default +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[cassandra] -host=localhost -port=9042 -keyspace=nexustiles -local_datacenter=datacenter1 -protocol_version=3 -dc_policy=DCAwareRoundRobinPolicy -username= -password= - -[s3] -bucket=nexus-jpl -region=us-west-2 - -[dynamo] -table=nexus-jpl-table -region=us-west-2 - -[solr] -host=http://localhost:8983 -core=nexustiles - -[datastore] -store=cassandra From 736a44e8740f601eddda87e4bc23eb92c270a32a Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jul 2023 16:43:47 -0700 Subject: [PATCH 07/91] ASF headers --- data-access/nexustiles/backends/__init__.py | 15 +++++++++++++++ .../nexustiles/backends/nexusproto/__init__.py | 15 +++++++++++++++ data-access/nexustiles/backends/zarr/__init__.py | 15 +++++++++++++++ data-access/nexustiles/exception.py | 15 +++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/data-access/nexustiles/backends/__init__.py b/data-access/nexustiles/backends/__init__.py index e69de29b..8afd240a 100644 --- a/data-access/nexustiles/backends/__init__.py +++ b/data-access/nexustiles/backends/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/data-access/nexustiles/backends/nexusproto/__init__.py b/data-access/nexustiles/backends/nexusproto/__init__.py index e69de29b..8afd240a 100644 --- a/data-access/nexustiles/backends/nexusproto/__init__.py +++ b/data-access/nexustiles/backends/nexusproto/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/data-access/nexustiles/backends/zarr/__init__.py b/data-access/nexustiles/backends/zarr/__init__.py index e69de29b..8afd240a 100644 --- a/data-access/nexustiles/backends/zarr/__init__.py +++ b/data-access/nexustiles/backends/zarr/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py index 33ab5296..77850a2f 100644 --- a/data-access/nexustiles/exception.py +++ b/data-access/nexustiles/exception.py @@ -1,2 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + class NexusTileServiceException(Exception): pass From 70bdab12f4dfd80a59f572376e496ddff8145d37 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 11 Jul 2023 15:17:50 -0700 Subject: [PATCH 08/91] First functioning test of 2 simultaneous backends --- data-access/nexustiles/AbstractTileService.py | 3 + .../nexustiles/backends/nexusproto/backend.py | 8 +- .../nexustiles/backends/zarr/backend.py | 195 +++++++++++++++++- data-access/nexustiles/nexustiles.py | 73 ++++--- data-access/requirements.txt | 3 +- 5 files changed, 238 insertions(+), 44 deletions(-) diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py index 6e5b4640..20467784 100644 --- a/data-access/nexustiles/AbstractTileService.py +++ b/data-access/nexustiles/AbstractTileService.py @@ -30,6 +30,9 @@ class AbstractTileService(ABC): # def try_connect(self) -> bool: # raise NotImplementedError() + def __init__(self, dataset_name): + self._name = dataset_name + @abstractmethod def get_dataseries_list(self, simple=False): raise NotImplementedError() diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py index 6aa63644..8cca5813 100644 --- a/data-access/nexustiles/backends/nexusproto/backend.py +++ b/data-access/nexustiles/backends/nexusproto/backend.py @@ -38,16 +38,12 @@ EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) -logger = logging.getLogger("testing") +logger = logging.getLogger(__name__) class NexusprotoTileService(AbstractTileService): def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None): - AbstractTileService.__init__(self) + AbstractTileService.__init__(self, None) self._datastore = None self._metadatastore = None diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 13622453..fe5a49dd 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -30,20 +30,72 @@ from nexustiles.exception import NexusTileServiceException from nexustiles.AbstractTileService import AbstractTileService +from yarl import URL + +import xarray as xr +import s3fs +from urllib.parse import urlparse + EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) -logger = logging.getLogger("testing") +logger = logging.getLogger() class ZarrBackend(AbstractTileService): - def __init__(self, path, config): - AbstractTileService.__init__(self) - self.__config = config - + def __init__(self, dataset_name, path, config=None): + AbstractTileService.__init__(self, dataset_name) + self.__config = config if config is not None else {} + + logger.info(f'Opening zarr backend at {path} for dataset {self._name}') + + url = urlparse(path) + + self.__url = path + + self.__store_type = url.scheme + self.__host = url.netloc + self.__path = url.path + + if 'variable' in config: + data_vars = config['variable'] + elif 'variables' in config: + data_vars = config['variables'] + else: + raise KeyError('Data variables not provided in config') + + if isinstance(data_vars, str): + self.__variables = [data_vars] + elif isinstance(data_vars, list): + self.__variables = data_vars + else: + raise TypeError(f'Improper type for variables config: {type(data_vars)}') + + self.__longitude = config['coords']['longitude'] + self.__latitude = config['coords']['latitude'] + self.__time = config['coords']['time'] + + self.__depth = config['coords'].get('depth') + + if self.__store_type in ['', 'file']: + store = self.__path + elif self.__store_type == 's3': + aws_cfg = self.__config['aws'] + + if aws_cfg['public']: + region = aws_cfg.get('region', 'us-west-2') + store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + else: + s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) + store = s3fs.S3Map(root=path, s3=s3, check=False) + else: + raise ValueError(self.__store_type) + + self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) + def get_dataseries_list(self, simple=False): raise NotImplementedError() @@ -89,10 +141,31 @@ def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **k raise NotImplementedError() def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): - # Find tiles that fall in the given box in the Solr index - raise NotImplementedError() - - def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): + if type(start_time) is datetime: + start_time = (start_time - EPOCH).total_seconds() + if type(end_time) is datetime: + end_time = (end_time - EPOCH).total_seconds() + + params = { + 'min_lat': min_lat, + 'max_lat': max_lat, + 'min_lon': min_lon, + 'max_lon': max_lon + } + + if 0 <= start_time <= end_time: + params['min_time'] = start_time + params['max_time'] = end_time + + if 'depth' in kwargs: + params['depth'] = kwargs['depth'] + elif 'min_depth' in kwargs or 'max_depth' in kwargs: + params['min_depth'] = kwargs.get('min_depth') + params['max_depth'] = kwargs.get('max_depth') + + return [ZarrBackend.__to_url(self._name, **params)] + + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): # Find tiles that fall within the polygon in the Solr index raise NotImplementedError() @@ -184,10 +257,110 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m raise NotImplementedError() def fetch_data_for_tiles(self, *tiles): - raise NotImplementedError() + for tile in tiles: + self.__fetch_data_for_tile(tile) + + return tiles + + def __fetch_data_for_tile(self, tile: Tile): + bbox: BBox = tile.bbox + + min_lat = None + min_lon = None + max_lat = None + max_lon = None + + min_time = float(tile.min_time) + max_time = float(tile.max_time) + + if min_time: + min_time = datetime.fromtimestamp(min_time) + + if max_time: + max_time = datetime.fromtimestamp(max_time) + + if bbox: + min_lat = bbox.min_lat + min_lon = bbox.min_lon + max_lat = bbox.max_lat + max_lon = bbox.max_lon + + sel = { + self.__latitude: slice(min_lat, max_lat), + self.__longitude: slice(min_lon, max_lon), + self.__time: slice(min_time, max_time) + } + + tile.variables = [ + TileVariable(v, v) for v in self.__variables + ] + + matched = self.__ds.sel(sel) + + tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) + tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) + + times = matched[self.__time].to_numpy() + + if np.issubdtype(times.dtype, np.datetime64): + times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int) + + tile.times = ma.masked_invalid(times) + + tile.data = ma.masked_invalid( + [matched[var].to_numpy() for var in self.__variables] + ) + + tile.is_multi = True def _metadata_store_docs_to_tiles(self, *store_docs): - raise NotImplementedError() + return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] + + @staticmethod + def __nts_url_to_tile(nts_url): + tile = Tile() + + url = URL(nts_url) + + tile.tile_id = nts_url + + try: + min_lat = float(url.query['min_lat']) + min_lon = float(url.query['min_lon']) + max_lat = float(url.query['max_lat']) + max_lon = float(url.query['max_lon']) + + tile.bbox = BBox(min_lat, max_lat, min_lon, max_lon) + except KeyError: + pass + + tile.dataset = url.host + + try: + tile.min_time = int(url.query['min_time']) + except KeyError: + pass + + try: + tile.max_time = int(url.query['max_time']) + except KeyError: + pass + + return tile + + @staticmethod + def __to_url(dataset, **kwargs): + if 'dataset' in kwargs: + del kwargs['dataset'] + + if 'ds' in kwargs: + del kwargs['ds'] + return str(URL.build( + scheme='nts', + host=dataset, + path='/', + query=kwargs + )) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 405b5b70..78fe23d4 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -37,6 +37,8 @@ from .backends.zarr.backend import ZarrBackend from .model.nexusmodel import Tile, BBox, TileStats, TileVariable +from requests.structures import CaseInsensitiveDict + EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) logging.basicConfig( @@ -53,13 +55,27 @@ def fetch_data_for_func(*args, **kwargs): metadatastore_start = datetime.now() metadatastore_docs = func(*args, **kwargs) metadatastore_duration = (datetime.now() - metadatastore_start).total_seconds() - tiles = args[0]._metadata_store_docs_to_tiles(*metadatastore_docs) + + # Try to determine source dataset to route calls to proper backend + guessed_dataset = None + + if 'ds' in kwargs: + guessed_dataset = kwargs['ds'] + elif 'dataset' in kwargs: + guessed_dataset = kwargs['dataset'] + else: + for arg in args: + if arg is not None and arg in NexusTileService.backends: + guessed_dataset = arg + break + + tiles = NexusTileService._get_backend(guessed_dataset)._metadata_store_docs_to_tiles(*metadatastore_docs) cassandra_duration = 0 if ('fetch_data' in kwargs and kwargs['fetch_data']) or ('fetch_data' not in kwargs and default_fetch): if len(tiles) > 0: cassandra_start = datetime.now() - args[0].fetch_data_for_tiles(*tiles) + NexusTileService._get_backend(guessed_dataset).fetch_data_for_tiles(*tiles) cassandra_duration += (datetime.now() - cassandra_start).total_seconds() if 'metrics_callback' in kwargs and kwargs['metrics_callback'] is not None: @@ -128,7 +144,7 @@ def __init__(self, config=None): NexusTileService.__update_thread = threading.Thread( target=NexusTileService.__update_datasets_loop, name='dataset_update', - daemon=False + daemon=True ) logger.info('Starting dataset refresh thread') @@ -136,7 +152,10 @@ def __init__(self, config=None): NexusTileService.__update_thread.start() @staticmethod - def __get_backend(dataset_s) -> AbstractTileService: + def _get_backend(dataset_s) -> AbstractTileService: + if dataset_s is not None: + dataset_s = dataset_s.lower() + with DS_LOCK: if dataset_s not in NexusTileService.backends: logger.warning(f'Dataset {dataset_s} not currently loaded. Checking to see if it was recently' @@ -198,7 +217,7 @@ def _update_datasets(): next_cursor_mark = response_cursor_mark for dataset in response.docs: - d_id = dataset['dataset_s'] + d_id = dataset['dataset_s'].lower() store_type = dataset.get('store_type_s', 'nexusproto') present_datasets.add(d_id) @@ -217,7 +236,7 @@ def _update_datasets(): ds_config = json.loads(dataset['config'][0]) NexusTileService.backends[d_id] = { - 'backend': ZarrBackend(**ds_config), + 'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config), 'up': True } else: @@ -251,33 +270,33 @@ def get_dataseries_list(self, simple=False): @tile_data() def find_tile_by_id(self, tile_id, **kwargs): - return NexusTileService.__get_backend('__nexusproto__').find_tile_by_id(tile_id) + return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id) @tile_data() def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - return NexusTileService.__get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs) + return NexusTileService._get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs) def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): - return NexusTileService.__get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, - dataset, start_time, end_time, - metrics_callback, **kwargs) + return NexusTileService._get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, + dataset, start_time, end_time, + metrics_callback, **kwargs) @tile_data() def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): - return NexusTileService.__get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year( + return NexusTileService._get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year( bounding_polygon, ds, day_of_year, **kwargs ) @tile_data() def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - return NexusTileService.__get_backend(dataset).find_all_tiles_in_box_at_time( + return NexusTileService._get_backend(dataset).find_all_tiles_in_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs ) @tile_data() def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): - return NexusTileService.__get_backend(dataset).find_all_tiles_in_polygon_at_time( + return NexusTileService._get_backend(dataset).find_all_tiles_in_polygon_at_time( bounding_polygon, dataset, time, **kwargs ) @@ -289,19 +308,19 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t if type(end_time) is datetime: end_time = (end_time - EPOCH).total_seconds() - return NexusTileService.__get_backend(ds).find_tiles_in_box( + return NexusTileService._get_backend(ds).find_tiles_in_box( min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs ) @tile_data() def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): - return NexusTileService.__get_backend(ds).find_tiles_in_polygon( + return NexusTileService._get_backend(ds).find_tiles_in_polygon( bounding_polygon, ds, start_time, end_time, **kwargs ) @tile_data() def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): - return NexusTileService.__get_backend(ds).find_tiles_by_metadata( + return NexusTileService._get_backend(ds).find_tiles_by_metadata( metadata, ds, start_time, end_time, **kwargs ) @@ -334,13 +353,13 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) :param kwargs: fetch_data: True/False = whether or not to retrieve tile data :return: """ - return NexusTileService.__get_backend(ds).find_tiles_by_exact_bounds( + return NexusTileService._get_backend(ds).find_tiles_by_exact_bounds( bounds, ds, start_time, end_time, **kwargs ) @tile_data() def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - return NexusTileService.__get_backend(dataset).find_all_boundary_tiles_at_time( + return NexusTileService._get_backend(dataset).find_all_boundary_tiles_at_time( min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs ) @@ -363,12 +382,12 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time= return tiles def get_min_max_time_by_granule(self, ds, granule_name): - return NexusTileService.__get_backend(ds).get_min_max_time_by_granule( + return NexusTileService._get_backend(ds).get_min_max_time_by_granule( ds, granule_name ) def get_dataset_overall_stats(self, ds): - return NexusTileService.__get_backend(ds).get_dataset_overall_stats(ds) + return NexusTileService._get_backend(ds).get_dataset_overall_stats(ds) def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs) @@ -399,7 +418,7 @@ def get_bounding_box(self, tile_ids, ds=None): :param tile_ids: List of tile ids :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles """ - return NexusTileService.__get_backend(ds).get_bounding_box(tile_ids, ds) + return NexusTileService._get_backend(ds).get_bounding_box(tile_ids, ds) def get_min_time(self, tile_ids, ds=None): """ @@ -408,7 +427,7 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - return NexusTileService.__get_backend(ds).get_min_time(tile_ids, ds) + return NexusTileService._get_backend(ds).get_min_time(tile_ids, ds) def get_max_time(self, tile_ids, ds=None): """ @@ -417,7 +436,7 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - return int(NexusTileService.__get_backend(ds).get_max_time(tile_ids)) + return int(NexusTileService._get_backend(ds).get_max_time(tile_ids)) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -528,8 +547,10 @@ def mask_tiles_to_time_range(self, start_time, end_time, tiles): return tiles - def fetch_data_for_tiles(self, *tiles, dataset=None): - return NexusTileService.__get_backend(dataset).fetch_data_for_tiles(*tiles) + def fetch_data_for_tiles(self, *tiles): + dataset = tiles[0].dataset + + return NexusTileService._get_backend(dataset).fetch_data_for_tiles(*tiles) def _metadata_store_docs_to_tiles(self, *store_docs): tiles = [] diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 7d33cced..ab96e2af 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -21,4 +21,5 @@ requests nexusproto Shapely s3fs -fsspec \ No newline at end of file +fsspec +xarray~=2022.3.0 \ No newline at end of file From f3981cd8735b146206fe87955cc26cd4d25ad034 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 13:38:12 -0700 Subject: [PATCH 09/91] Removed accidentally committed ini files --- .../backends/nexusproto/config/datastores.ini | 36 ------------------- data-access/nexustiles/config/datasets.ini | 18 ---------- 2 files changed, 54 deletions(-) delete mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini delete mode 100644 data-access/nexustiles/config/datasets.ini diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini b/data-access/nexustiles/backends/nexusproto/config/datastores.ini deleted file mode 100644 index f3facb95..00000000 --- a/data-access/nexustiles/backends/nexusproto/config/datastores.ini +++ /dev/null @@ -1,36 +0,0 @@ -[cassandra] -host=localhost -port=9042 -keyspace=nexustiles -local_datacenter=datacenter1 -protocol_version=3 -dc_policy=WhiteListRoundRobinPolicy -username=cassandra -password=cassandra - -[dynamo] -table=nexus-jpl-table -region=us-west-2 - -[solr] -host=http://localhost:8983 -core=nexustiles - -[s3] -bucket=cdms-dev-zarr -#key=MUR_aggregate/ -#key=MUR_1wk_7_100_100/ -#key=MUR_1wk_7_1500_2500/ -#key=MUR_2017_9dy_7_1500_2500/ -#key=MUR_2017_9dy_7_120_240/ -key=MUR_2017_2yr_30_120_240/ -#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_7_120_240.zarr/ -#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_1_240_240.zarr/ -#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_90_120_240.zarr/ -public=false -region=us-west-2 -profile=saml-pub - -[datastore] -store=cassandra -#store=zarrS3 diff --git a/data-access/nexustiles/config/datasets.ini b/data-access/nexustiles/config/datasets.ini deleted file mode 100644 index 9f586cf2..00000000 --- a/data-access/nexustiles/config/datasets.ini +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[solr] -host=http://localhost:8983 -core=nexusdatasets From 26f6220f2f6a8aaa3787ac6b80effc0b4e236837 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 14:14:31 -0700 Subject: [PATCH 10/91] Working zarr backend ds list + datasets are no longer case sensitive + handling for failed zarr ds opens (bad path, bad creds, &c) --- .../backends/nexusproto/dao/SolrProxy.py | 3 +- .../nexustiles/backends/zarr/backend.py | 75 +++++++++++++++---- data-access/nexustiles/exception.py | 3 +- data-access/nexustiles/nexustiles.py | 37 ++++++--- 4 files changed, 90 insertions(+), 28 deletions(-) diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py index 9b16533d..c9435a2b 100644 --- a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py +++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py @@ -189,7 +189,8 @@ def get_data_series_list_simple(self): l.append({ "shortName": g, "title": g, - "tileCount": v + "tileCount": v, + "type": 'nexusproto' }) l = sorted(l, key=lambda entry: entry["title"]) return l diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index fe5a49dd..de1d86ba 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -37,12 +37,13 @@ from urllib.parse import urlparse EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) -logger = logging.getLogger() +logger = logging.getLogger(__name__) class ZarrBackend(AbstractTileService): @@ -83,21 +84,43 @@ def __init__(self, dataset_name, path, config=None): if self.__store_type in ['', 'file']: store = self.__path elif self.__store_type == 's3': - aws_cfg = self.__config['aws'] - - if aws_cfg['public']: - region = aws_cfg.get('region', 'us-west-2') - store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' - else: - s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) - store = s3fs.S3Map(root=path, s3=s3, check=False) + try: + aws_cfg = self.__config['aws'] + + if aws_cfg['public']: + region = aws_cfg.get('region', 'us-west-2') + store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + else: + s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) + store = s3fs.S3Map(root=path, s3=s3, check=False) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open S3 dataset ({e})') else: raise ValueError(self.__store_type) - self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) + try: + self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open dataset ({e})') def get_dataseries_list(self, simple=False): - raise NotImplementedError() + ds = { + "shortName": self._name, + "title": self._name, + "type": "zarr" + } + + if not simple: + min_date = self.get_min_time([]) + max_date = self.get_max_time([]) + ds['start'] = min_date + ds['end'] = max_date + ds['iso_start'] = datetime.fromtimestamp(min_date).strftime(ISO_8601) + ds['iso_end'] = datetime.fromtimestamp(max_date).strftime(ISO_8601) + + return [ds] def find_tile_by_id(self, tile_id, **kwargs): raise NotImplementedError() @@ -215,6 +238,18 @@ def get_bounding_box(self, tile_ids): """ raise NotImplementedError() + def __get_ds_min_max_date(self): + min_date = self.__ds[self.__time].min().to_numpy() + max_date = self.__ds[self.__time].max().to_numpy() + + if np.issubdtype(min_date.dtype, np.datetime64): + min_date = ((min_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + + if np.issubdtype(max_date.dtype, np.datetime64): + max_date = ((max_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + + return min_date, max_date + def get_min_time(self, tile_ids, ds=None): """ Get the minimum tile date from the list of tile ids @@ -222,7 +257,11 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + if len(tile_ids) == 0: + min_date, max_date = self.__get_ds_min_max_date() + return min_date + else: + raise NotImplementedError() def get_max_time(self, tile_ids, ds=None): """ @@ -231,7 +270,11 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + if len(tile_ids) == 0: + min_date, max_date = self.__get_ds_min_max_date() + return max_date + else: + raise NotImplementedError() def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -334,7 +377,7 @@ def __nts_url_to_tile(nts_url): except KeyError: pass - tile.dataset = url.host + tile.dataset = url.path try: tile.min_time = int(url.query['min_time']) @@ -358,8 +401,8 @@ def __to_url(dataset, **kwargs): return str(URL.build( scheme='nts', - host=dataset, - path='/', + host='', + path=dataset, query=kwargs )) diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py index 77850a2f..d6ed2c64 100644 --- a/data-access/nexustiles/exception.py +++ b/data-access/nexustiles/exception.py @@ -14,4 +14,5 @@ # limitations under the License. class NexusTileServiceException(Exception): - pass + def __init__(self, reason): + Exception.__init__(self, reason) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 78fe23d4..1b58f156 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -37,6 +37,8 @@ from .backends.zarr.backend import ZarrBackend from .model.nexusmodel import Tile, BBox, TileStats, TileVariable +from .exception import NexusTileServiceException + from requests.structures import CaseInsensitiveDict EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -93,6 +95,16 @@ def fetch_data_for_func(*args, **kwargs): return tile_data_decorator +def catch_not_implemented(func): + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except NotImplementedError: + raise NexusTileServiceException('Action unsupported by backend') + + return wrapper + + SOLR_LOCK = threading.Lock() DS_LOCK = threading.Lock() thread_local = threading.local() @@ -154,7 +166,7 @@ def __init__(self, config=None): @staticmethod def _get_backend(dataset_s) -> AbstractTileService: if dataset_s is not None: - dataset_s = dataset_s.lower() + dataset_s = dataset_s with DS_LOCK: if dataset_s not in NexusTileService.backends: @@ -217,7 +229,7 @@ def _update_datasets(): next_cursor_mark = response_cursor_mark for dataset in response.docs: - d_id = dataset['dataset_s'].lower() + d_id = dataset['dataset_s'] store_type = dataset.get('store_type_s', 'nexusproto') present_datasets.add(d_id) @@ -235,10 +247,13 @@ def _update_datasets(): update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend") ds_config = json.loads(dataset['config'][0]) - NexusTileService.backends[d_id] = { - 'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config), - 'up': True - } + try: + NexusTileService.backends[d_id] = { + 'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config), + 'up': True + } + except NexusTileServiceException: + added_datasets -= 1 else: update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') added_datasets -= 1 @@ -263,10 +278,12 @@ def override_config(self, config): self._config.set(section, option, config.get(section, option)) def get_dataseries_list(self, simple=False): - if simple: - return self._metadatastore.get_data_series_list_simple() - else: - return self._metadatastore.get_data_series_list() + datasets = [] + for backend in set([b['backend'] for b in NexusTileService.backends.values() if b['up']]): + datasets.extend(backend.get_dataseries_list(simple)) + + return datasets + @tile_data() def find_tile_by_id(self, tile_id, **kwargs): From 91de6efef7b15480bde3993f9ae47aee8401e5ed Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 16:06:18 -0700 Subject: [PATCH 11/91] Capture and handle NTS requests routed to backend that doesn't (yet) support them --- data-access/nexustiles/nexustiles.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 1b58f156..b8165a1d 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -286,13 +286,16 @@ def get_dataseries_list(self, simple=False): @tile_data() + @catch_not_implemented def find_tile_by_id(self, tile_id, **kwargs): return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id) @tile_data() + @catch_not_implemented def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - return NexusTileService._get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs) + return NexusTileService._get_backend(ds).find_tiles_by_id(tile_ids, ds=ds, **kwargs) + @catch_not_implemented def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): return NexusTileService._get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, @@ -300,24 +303,28 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st metrics_callback, **kwargs) @tile_data() + @catch_not_implemented def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): return NexusTileService._get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year( bounding_polygon, ds, day_of_year, **kwargs ) @tile_data() + @catch_not_implemented def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): return NexusTileService._get_backend(dataset).find_all_tiles_in_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs ) @tile_data() + @catch_not_implemented def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): return NexusTileService._get_backend(dataset).find_all_tiles_in_polygon_at_time( bounding_polygon, dataset, time, **kwargs ) @tile_data() + @catch_not_implemented def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): # Find tiles that fall in the given box in the Solr index if type(start_time) is datetime: @@ -330,12 +337,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t ) @tile_data() + @catch_not_implemented def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs): return NexusTileService._get_backend(ds).find_tiles_in_polygon( bounding_polygon, ds, start_time, end_time, **kwargs ) @tile_data() + @catch_not_implemented def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): return NexusTileService._get_backend(ds).find_tiles_by_metadata( metadata, ds, start_time, end_time, **kwargs @@ -357,6 +366,7 @@ def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, ** return tiles @tile_data() + @catch_not_implemented def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): """ The method will return tiles with the exact given bounds within the time range. It differs from @@ -375,6 +385,7 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) ) @tile_data() + @catch_not_implemented def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): return NexusTileService._get_backend(dataset).find_all_boundary_tiles_at_time( min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs @@ -398,11 +409,13 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time= return tiles + @catch_not_implemented def get_min_max_time_by_granule(self, ds, granule_name): return NexusTileService._get_backend(ds).get_min_max_time_by_granule( ds, granule_name ) + @catch_not_implemented def get_dataset_overall_stats(self, ds): return NexusTileService._get_backend(ds).get_dataset_overall_stats(ds) @@ -424,6 +437,7 @@ def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset return tiles + @catch_not_implemented def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): return NexusTileService.get_stats_within_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs @@ -435,7 +449,7 @@ def get_bounding_box(self, tile_ids, ds=None): :param tile_ids: List of tile ids :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles """ - return NexusTileService._get_backend(ds).get_bounding_box(tile_ids, ds) + return NexusTileService._get_backend(ds).get_bounding_box(tile_ids) def get_min_time(self, tile_ids, ds=None): """ From df23919bc7466d9df12fda9893b581af61d10f80 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 16:06:59 -0700 Subject: [PATCH 12/91] analysis setup fails to find VERSION.txt when building locally --- analysis/setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/analysis/setup.py b/analysis/setup.py index 99cd707c..6472621d 100644 --- a/analysis/setup.py +++ b/analysis/setup.py @@ -17,8 +17,11 @@ import setuptools from subprocess import check_call, CalledProcessError -with open('../VERSION.txt', 'r') as f: - __version__ = f.read() +try: + with open('../VERSION.txt', 'r') as f: + __version__ = f.read() +except: + __version__ = None try: From 07404f063dc9f2b0ae9c2941caef445c7aae26c2 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 16:07:35 -0700 Subject: [PATCH 13/91] Implemented more NTS functions in zarr backend --- .../nexustiles/backends/zarr/backend.py | 89 ++++++++++++++++--- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index de1d86ba..1f46a95e 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -123,14 +123,38 @@ def get_dataseries_list(self, simple=False): return [ds] def find_tile_by_id(self, tile_id, **kwargs): - raise NotImplementedError() + return tile_id def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): - raise NotImplementedError() + return tile_ids def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): - raise NotImplementedError() + start = datetime.now() + + if not isinstance(start_time, datetime): + start_time = datetime.fromtimestamp(start_time) + + if not isinstance(end_time, datetime): + end_time = datetime.fromtimestamp(end_time) + + sel = { + self.__latitude: slice(min_lat, max_lat), + self.__longitude: slice(min_lon, max_lon), + self.__time: slice(start_time, end_time) + } + + times = self.__ds.sel(sel)[self.__time].to_numpy() + + if np.issubdtype(times.dtype, np.datetime64): + times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int) + + times = sorted(list(times)) + + if metrics_callback: + metrics_callback(backend=(datetime.now() - start).total_seconds()) + + return times def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): """ @@ -158,10 +182,10 @@ def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, raise NotImplementedError() def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - raise NotImplementedError() + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs) def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): - raise NotImplementedError() + return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs) def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): if type(start_time) is datetime: @@ -190,7 +214,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): # Find tiles that fall within the polygon in the Solr index - raise NotImplementedError() + bounds = bounding_polygon.bounds + + min_lon = bounds[0] + min_lat = bounds[1] + max_lon = bounds[2] + max_lat = bounds[3] + + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): """ @@ -216,10 +247,17 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) :param kwargs: fetch_data: True/False = whether or not to retrieve tile data :return: """ - raise NotImplementedError() + min_lon = bounds[0] + min_lat = bounds[1] + max_lon = bounds[2] + max_lat = bounds[3] + + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): - raise NotImplementedError() + # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like + # this + return [] def get_min_max_time_by_granule(self, ds, granule_name): raise NotImplementedError() @@ -236,7 +274,20 @@ def get_bounding_box(self, tile_ids): :param tile_ids: List of tile ids :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles """ - raise NotImplementedError() + + bounds = [ + ( + float(URL(u).query['min_lon']), + float(URL(u).query['min_lat']), + float(URL(u).query['max_lon']), + float(URL(u).query['max_lat']) + ) + for u in tile_ids + ] + + poly = MultiPolygon([box(*b) for b in bounds]) + + return box(*poly.bounds) def __get_ds_min_max_date(self): min_date = self.__ds[self.__time].min().to_numpy() @@ -257,11 +308,13 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - if len(tile_ids) == 0: + times = list(filter(lambda x: x is not None, [int(URL(tid).query['min_time']) for tid in tile_ids])) + + if len(times) == 0: min_date, max_date = self.__get_ds_min_max_date() return min_date else: - raise NotImplementedError() + return min(times) def get_max_time(self, tile_ids, ds=None): """ @@ -270,11 +323,13 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ + times = list(filter(lambda x: x is not None, [int(URL(tid).query['max_time']) for tid in tile_ids])) + if len(tile_ids) == 0: min_date, max_date = self.__get_ds_min_max_date() return max_date else: - raise NotImplementedError() + max(times) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -331,14 +386,20 @@ def __fetch_data_for_tile(self, tile: Tile): sel = { self.__latitude: slice(min_lat, max_lat), self.__longitude: slice(min_lon, max_lon), - self.__time: slice(min_time, max_time) } + if min_time == max_time: + sel[self.__time] = min_time + method = 'nearest' + else: + sel[self.__time] = slice(min_time, max_time) + method = None + tile.variables = [ TileVariable(v, v) for v in self.__variables ] - matched = self.__ds.sel(sel) + matched = self.__ds.sel(sel, method=method) tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) From 72888aa9f83f846dd9535835101e21aef93a8410 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 12 Jul 2023 16:11:22 -0700 Subject: [PATCH 14/91] Added misc backend time metrics record field in NCSH --- analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py b/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py index 4499773a..e0334676 100644 --- a/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py +++ b/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py @@ -362,6 +362,9 @@ def _create_metrics_record(self): SparkAccumulatorMetricsField(key='solr', description='Cumulative time to fetch data from Solr', accumulator=self._sc.accumulator(0)), + SparkAccumulatorMetricsField(key='backend', + description='Cumulative time to fetch data from external backend(s)', + accumulator=self._sc.accumulator(0)), SparkAccumulatorMetricsField(key='calculation', description='Cumulative time to do calculations', accumulator=self._sc.accumulator(0)), From 1c4a0e492485be2650c5756541cbfb9376b0a2bf Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 13 Jul 2023 13:55:38 -0700 Subject: [PATCH 15/91] fixes --- .../nexustiles/backends/nexusproto/backend.py | 3 +++ .../nexustiles/backends/zarr/backend.py | 21 +++++++------------ data-access/nexustiles/nexustiles.py | 16 +++++++------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py index 8cca5813..690b109c 100644 --- a/data-access/nexustiles/backends/nexusproto/backend.py +++ b/data-access/nexustiles/backends/nexusproto/backend.py @@ -269,6 +269,9 @@ def get_bounding_box(self, tile_ids): """ tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'], fetch_data=False, rows=len(tile_ids)) + + tiles = self._metadata_store_docs_to_tiles(*tiles) + polys = [] for tile in tiles: polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat)) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 1f46a95e..f4f92c56 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -13,29 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import configparser import logging import sys -import json from datetime import datetime -from functools import reduce +from urllib.parse import urlparse import numpy as np import numpy.ma as ma -import pkg_resources -from pytz import timezone, UTC -from shapely.geometry import MultiPolygon, box - -from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable -from nexustiles.exception import NexusTileServiceException +import s3fs +import xarray as xr from nexustiles.AbstractTileService import AbstractTileService - +from nexustiles.exception import NexusTileServiceException +from nexustiles.model.nexusmodel import Tile, BBox, TileVariable +from pytz import timezone +from shapely.geometry import MultiPolygon, box from yarl import URL -import xarray as xr -import s3fs -from urllib.parse import urlparse - EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index b8165a1d..fb8c0f33 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -67,7 +67,7 @@ def fetch_data_for_func(*args, **kwargs): guessed_dataset = kwargs['dataset'] else: for arg in args: - if arg is not None and arg in NexusTileService.backends: + if isinstance(arg, str) and arg in NexusTileService.backends: guessed_dataset = arg break @@ -178,13 +178,13 @@ def _get_backend(dataset_s) -> AbstractTileService: b = NexusTileService.backends[dataset_s] - if not b['up']: - success = b['backend'].try_connect() - - if not success: - raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable') - else: - NexusTileService.backends[dataset_s]['up'] = True + # if not b['up']: + # success = b['backend'].try_connect() + # + # if not success: + # raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable') + # else: + # NexusTileService.backends[dataset_s]['up'] = True return b['backend'] From 0a7cd7f3f55340107ff1d0e7f924f1dfd1cdfe26 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 17 Jul 2023 16:28:08 -0700 Subject: [PATCH 16/91] Dynamic dataset management --- analysis/webservice/config/web.ini | 2 +- analysis/webservice/management/Datasets.py | 78 ++++++++++++ analysis/webservice/management/__init__.py | 16 +++ .../request/handlers/NexusRequestHandler.py | 29 +++++ .../webservice/webmodel/NexusRequestObject.py | 6 + data-access/nexustiles/nexustiles.py | 113 +++++++++++------- 6 files changed, 197 insertions(+), 47 deletions(-) create mode 100644 analysis/webservice/management/Datasets.py create mode 100644 analysis/webservice/management/__init__.py diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini index 85849758..a9e3dda8 100644 --- a/analysis/webservice/config/web.ini +++ b/analysis/webservice/config/web.ini @@ -29,4 +29,4 @@ static_enabled=true static_dir=static [modules] -module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms \ No newline at end of file +module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms,webservice.management \ No newline at end of file diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py new file mode 100644 index 00000000..195ca38e --- /dev/null +++ b/analysis/webservice/management/Datasets.py @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from yaml import load +import json +from webservice.NexusHandler import nexus_handler +from nexustiles.nexustiles import NexusTileService +from webservice.webmodel import NexusRequestObject, NexusProcessingException +try: + from yaml import CLoader as Loader +except ImportError: + from yaml import Loader + + +class DatasetManagement: + @classmethod + def validate(cls): + pass + + @staticmethod + def parse_config(request: NexusRequestObject): + content_type = request.get_headers()['Content-Type'] + + if content_type in ['application/json', 'application/x-json']: + return json.loads(request.get_request_body()) + elif content_type == 'application/yaml': + return load(request.get_request_body(), Loader=Loader) + else: + raise NexusProcessingException(reason='Invalid Content-Type header', code=400) + + +@nexus_handler +class DatasetAdd(DatasetManagement): + name = 'Add dataset' + path = '/datasets/add' + description = "Add new dataset to running SDAP instance" + + def __init__(self, **args): + pass + + def calc(self, request: NexusRequestObject, **args): + # print('CALC') + try: + config = DatasetManagement.parse_config(request) + except Exception as e: + raise NexusProcessingException( + reason=repr(e), + code=400 + ) + + name = request.get_argument('name') + + if name is None: + raise NexusProcessingException( + reason='Name argument must be provided', + code=400 + ) + + try: + NexusTileService.user_ds_add(name, config) + except Exception as e: + raise NexusProcessingException( + reason=repr(e), + code=500 + ) + diff --git a/analysis/webservice/management/__init__.py b/analysis/webservice/management/__init__.py new file mode 100644 index 00000000..7c9f5ef4 --- /dev/null +++ b/analysis/webservice/management/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from webservice.management.Datasets import DatasetAdd \ No newline at end of file diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py index 26455746..1c7e936c 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py @@ -63,6 +63,35 @@ def get(self): except Exception as e: self.async_onerror_callback(str(e), 500) + @tornado.gen.coroutine + def post(self): + self.logger.info("Received POST %s" % self._request_summary()) + + request = NexusRequestObject(self) + + # create NexusCalcHandler which will process the request + instance = self.__clazz(**self._clazz_init_args) + + try: + # process the request asynchronously on a different thread, + # the current tornado handler is still available to get other user requests + results = yield tornado.ioloop.IOLoop.current().run_in_executor(self.executor, instance.calc, request) + + if results: + try: + self.set_status(results.status_code) + except AttributeError: + pass + + renderer = NexusRendererFactory.get_renderer("JSON") + renderer.render(self, results) + + except NexusProcessingException as e: + self.async_onerror_callback(e.reason, e.code) + + except Exception as e: + self.async_onerror_callback(str(e), 500) + def async_onerror_callback(self, reason, code=500): self.logger.error("Error processing request", exc_info=True) diff --git a/analysis/webservice/webmodel/NexusRequestObject.py b/analysis/webservice/webmodel/NexusRequestObject.py index bbd28280..18962364 100644 --- a/analysis/webservice/webmodel/NexusRequestObject.py +++ b/analysis/webservice/webmodel/NexusRequestObject.py @@ -35,6 +35,12 @@ def __init__(self, reqHandler): self.requestHandler = reqHandler StatsComputeOptions.__init__(self) + def get_headers(self): + return self.requestHandler.request.headers + + def get_request_body(self): + return self.requestHandler.request.body + def get_argument(self, name, default=None): return self.requestHandler.get_argument(name, default=default) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index fb8c0f33..eaecf941 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -188,14 +188,13 @@ def _get_backend(dataset_s) -> AbstractTileService: return b['backend'] + @staticmethod - def _update_datasets(): + def _get_datasets_store(): solr_url = NexusTileService.ds_config.get("solr", "host") solr_core = NexusTileService.ds_config.get("solr", "core") solr_kwargs = {} - update_logger = logging.getLogger("nexus-tile-svc.backends") - if NexusTileService.ds_config.has_option("solr", "time_out"): solr_kwargs["timeout"] = NexusTileService.ds_config.get("solr", "time_out") @@ -208,55 +207,62 @@ def _update_datasets(): solrcon = solrcon - update_logger.info('Executing Solr query to check for new datasets') + return solrcon + + @staticmethod + def _update_datasets(): + update_logger = logging.getLogger("nexus-tile-svc.backends") + solrcon = NexusTileService._get_datasets_store() + + update_logger.info('Executing Solr query to check for new datasets') - present_datasets = {None, '__nexusproto__'} - next_cursor_mark = '*' + present_datasets = {None, '__nexusproto__'} + next_cursor_mark = '*' - added_datasets = 0 + added_datasets = 0 - while True: - response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc') + while True: + response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc') - try: - response_cursor_mark = response.nextCursorMark - except AttributeError: - break + try: + response_cursor_mark = response.nextCursorMark + except AttributeError: + break - if response_cursor_mark == next_cursor_mark: - break - else: - next_cursor_mark = response_cursor_mark - - for dataset in response.docs: - d_id = dataset['dataset_s'] - store_type = dataset.get('store_type_s', 'nexusproto') - - present_datasets.add(d_id) - - if d_id in NexusTileService.backends: - continue - # is_up = NexusTileService.backends[d_id]['backend'].try_connect() - - added_datasets += 1 - - if store_type == 'nexus_proto' or store_type == 'nexusproto': - update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend") - NexusTileService.backends[d_id] = NexusTileService.backends[None] - elif store_type == 'zarr': - update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend") - - ds_config = json.loads(dataset['config'][0]) - try: - NexusTileService.backends[d_id] = { - 'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config), - 'up': True - } - except NexusTileServiceException: - added_datasets -= 1 - else: - update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') + if response_cursor_mark == next_cursor_mark: + break + else: + next_cursor_mark = response_cursor_mark + + for dataset in response.docs: + d_id = dataset['dataset_s'] + store_type = dataset.get('store_type_s', 'nexusproto') + + present_datasets.add(d_id) + + if d_id in NexusTileService.backends: + continue + # is_up = NexusTileService.backends[d_id]['backend'].try_connect() + + added_datasets += 1 + + if store_type == 'nexus_proto' or store_type == 'nexusproto': + update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend") + NexusTileService.backends[d_id] = NexusTileService.backends[None] + elif store_type == 'zarr': + update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend") + + ds_config = json.loads(dataset['config'][0]) + try: + NexusTileService.backends[d_id] = { + 'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config), + 'up': True + } + except NexusTileServiceException: added_datasets -= 1 + else: + update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') + added_datasets -= 1 removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets) @@ -270,6 +276,21 @@ def _update_datasets(): update_logger.info(f'Finished dataset update: {added_datasets} added, {len(removed_datasets)} removed, ' f'{len(NexusTileService.backends) - 2} total') + # Update cfg (ie, creds) of dataset + @staticmethod + def user_ds_update(): + pass + + # Add dataset + backend + @staticmethod + def user_ds_add(name, config): + pass + + # Delete dataset backend (error if it's a hardcoded one) + @staticmethod + def user_ds_delete(): + pass + def override_config(self, config): for section in config.sections(): if self._config.has_section(section): # only override preexisting section, ignores the other From c8e7dbb5e178bf9d88928659dea1792132760179 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 18 Jul 2023 16:12:29 -0700 Subject: [PATCH 17/91] Dynamic dataset management --- analysis/conda-requirements.txt | 2 +- analysis/webservice/management/Datasets.py | 58 ++++++++++++++++++++-- data-access/nexustiles/nexustiles.py | 2 +- data-access/requirements.txt | 3 +- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt index e27bdeae..902d5114 100644 --- a/analysis/conda-requirements.txt +++ b/analysis/conda-requirements.txt @@ -33,4 +33,4 @@ gdal==3.2.1 mock==4.0.3 importlib_metadata==4.11.4 #singledispatch==3.4.0.3 - +schema diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py index 195ca38e..0f8df06d 100644 --- a/analysis/webservice/management/Datasets.py +++ b/analysis/webservice/management/Datasets.py @@ -18,12 +18,34 @@ from webservice.NexusHandler import nexus_handler from nexustiles.nexustiles import NexusTileService from webservice.webmodel import NexusRequestObject, NexusProcessingException + +from schema import Schema, Or, SchemaError +from schema import Optional as Opt + +from urllib.parse import urlparse try: from yaml import CLoader as Loader except ImportError: from yaml import Loader +CONFIG_SCHEMA = Schema({ + Or('variable', 'variables'): Or(str, [str]), + 'coords': { + 'latitude': str, + 'longitude': str, + 'time': str, + Opt('depth'): str + }, + Opt('aws'): { + 'accessKeyID': Or(str, None), + 'secretAccessKey': Or(str, None), + 'public': bool, + Opt('region'): str + } +}) + + class DatasetManagement: @classmethod def validate(cls): @@ -34,12 +56,22 @@ def parse_config(request: NexusRequestObject): content_type = request.get_headers()['Content-Type'] if content_type in ['application/json', 'application/x-json']: - return json.loads(request.get_request_body()) + config_dict = json.loads(request.get_request_body()) elif content_type == 'application/yaml': - return load(request.get_request_body(), Loader=Loader) + config_dict = load(request.get_request_body(), Loader=Loader) else: raise NexusProcessingException(reason='Invalid Content-Type header', code=400) + try: + CONFIG_SCHEMA.validate(config_dict) + except SchemaError as e: + raise NexusProcessingException( + reason=str(e), + code=400 + ) + + return config_dict + @nexus_handler class DatasetAdd(DatasetManagement): @@ -51,7 +83,6 @@ def __init__(self, **args): pass def calc(self, request: NexusRequestObject, **args): - # print('CALC') try: config = DatasetManagement.parse_config(request) except Exception as e: @@ -68,8 +99,27 @@ def calc(self, request: NexusRequestObject, **args): code=400 ) + path = request.get_argument('path') + + if path is None: + raise NexusProcessingException( + reason='Path argument must be provided', + code=400 + ) + + try: + if urlparse(path).scheme not in ['file','','s3']: + raise NexusProcessingException( + reason='Dataset URL must be for a local file or S3 URL', + code=400 + ) + except ValueError: + raise NexusProcessingException( + reason='Could not parse path URL', code=400 + ) + try: - NexusTileService.user_ds_add(name, config) + NexusTileService.user_ds_add(name, path, config) except Exception as e: raise NexusProcessingException( reason=repr(e), diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index eaecf941..68a2a584 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -283,7 +283,7 @@ def user_ds_update(): # Add dataset + backend @staticmethod - def user_ds_add(name, config): + def user_ds_add(name, path, config): pass # Delete dataset backend (error if it's a hardcoded one) diff --git a/data-access/requirements.txt b/data-access/requirements.txt index ab96e2af..c732bede 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -22,4 +22,5 @@ nexusproto Shapely s3fs fsspec -xarray~=2022.3.0 \ No newline at end of file +xarray~=2022.3.0 +numpy==1.24.3 \ No newline at end of file From e78f7ade3422c97008ff6532593d39c2f863e475 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 20 Jul 2023 12:29:27 -0700 Subject: [PATCH 18/91] Dataset management --- analysis/webservice/management/Datasets.py | 70 +++++++++++++++++ .../request/handlers/NexusRequestHandler.py | 4 +- data-access/nexustiles/nexustiles.py | 76 +++++++++++++++++-- 3 files changed, 142 insertions(+), 8 deletions(-) diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py index 0f8df06d..48071f7c 100644 --- a/analysis/webservice/management/Datasets.py +++ b/analysis/webservice/management/Datasets.py @@ -73,6 +73,14 @@ def parse_config(request: NexusRequestObject): return config_dict +class Response: + def __init__(self, response): + self.response = response if response is not None else {} + + def toJson(self): + return json.dumps(self.response) + + @nexus_handler class DatasetAdd(DatasetManagement): name = 'Add dataset' @@ -126,3 +134,65 @@ def calc(self, request: NexusRequestObject, **args): code=500 ) + +@nexus_handler +class DatasetUpdate(DatasetManagement): + name = 'Update dynamically added dataset' + path = '/datasets/update' + description = "Update dataset in running SDAP instance" + + def __init__(self, **args): + pass + + def calc(self, request: NexusRequestObject, **args): + try: + config = DatasetManagement.parse_config(request) + except Exception as e: + raise NexusProcessingException( + reason=repr(e), + code=400 + ) + + name = request.get_argument('name') + + if name is None: + raise NexusProcessingException( + reason='Name argument must be provided', + code=400 + ) + + try: + return Response(NexusTileService.user_ds_update(name, config)) + except Exception as e: + raise NexusProcessingException( + reason=repr(e), + code=500 + ) + + +@nexus_handler +class DatasetDelete(DatasetManagement): + name = 'Remove dataset' + path = '/datasets/remove' + description = "Remove dataset from running SDAP instance" + + def __init__(self, **args): + pass + + def calc(self, request: NexusRequestObject, **args): + name = request.get_argument('name') + + if name is None: + raise NexusProcessingException( + reason='Name argument must be provided', + code=400 + ) + + try: + return Response(NexusTileService.user_ds_delete(name)) + except Exception as e: + raise NexusProcessingException( + reason=repr(e), + code=500 + ) + diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py index 1c7e936c..6392f105 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py @@ -65,7 +65,7 @@ def get(self): @tornado.gen.coroutine def post(self): - self.logger.info("Received POST %s" % self._request_summary()) + self.logger.info("Received %s" % self._request_summary()) request = NexusRequestObject(self) @@ -83,7 +83,7 @@ def post(self): except AttributeError: pass - renderer = NexusRendererFactory.get_renderer("JSON") + renderer = NexusRendererFactory.get_renderer(request) renderer.render(self, results) except NexusProcessingException as e: diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 68a2a584..a5abd241 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -278,18 +278,82 @@ def _update_datasets(): # Update cfg (ie, creds) of dataset @staticmethod - def user_ds_update(): - pass + def user_ds_update(name, config): + solr = NexusTileService._get_datasets_store() + + docs = solr.search(f'dataset_s:{name}').docs + + if len(docs) != 1: + raise ValueError(f'Given name must match exactly one existing dataset; matched {len(docs)}') + + ds = docs[0] + + if 'source_s' not in ds or ds['source_s'] == 'collection_config': + raise ValueError('Provided dataset is source_s in collection config and cannot be deleted') + + config_dict = json.loads(ds['config'][0]) + + config_dict['config'] = config + + solr.delete(id=ds['id']) + solr.add([{ + 'id': name, + 'dataset_s': name, + 'latest_update_l': int(datetime.now().timestamp()), + 'store_type_s': ds['store_type_s'], + 'config': json.dumps(config_dict), + 'source_s': 'user_added' + }]) + solr.commit() + + return {'success': True} # Add dataset + backend @staticmethod - def user_ds_add(name, path, config): - pass + def user_ds_add(name, path, config, type='zarr'): + solr = NexusTileService._get_datasets_store() + + docs = solr.search(f'dataset_s:{name}').docs + + if len(docs) > 0: + raise ValueError(f'Dataset {name} already exists') + + config_dict = { + 'path': path, + 'config': config + } + + solr.add([{ + 'id': name, + 'dataset_s': name, + 'latest_update_l': int(datetime.now().timestamp()), + 'store_type_s': type, + 'config': json.dumps(config_dict), + 'source_s': 'user_added' + }]) + solr.commit() + + return {'success': True} # Delete dataset backend (error if it's a hardcoded one) @staticmethod - def user_ds_delete(): - pass + def user_ds_delete(name): + solr = NexusTileService._get_datasets_store() + + docs = solr.search(f'dataset_s:{name}').docs + + if len(docs) != 1: + raise ValueError(f'Given name must match exactly one existing dataset; matched {len(docs)}') + + ds = docs[0] + + if 'source_s' not in ds or ds['source_s'] == 'collection_config': + raise ValueError('Provided dataset is source_s in collection config and cannot be deleted') + + solr.delete(id=ds['id']) + solr.commit() + + return {'success': True} def override_config(self, config): for section in config.sections(): From a84d77e569fcf224597c73c17d1fa109f36a2a5b Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 27 Jul 2023 10:05:20 -0700 Subject: [PATCH 19/91] Timeseriesspark support --- .../algorithms_spark/TimeSeriesSpark.py | 5 +- .../nexustiles/backends/zarr/backend.py | 47 +++++++++++++------ 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py index faeaa0b1..6a353cf4 100644 --- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py +++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py @@ -488,8 +488,9 @@ def calc_average_on_day(tile_service_factory, metrics_callback, normalize_dates, timestamps[0], timestamps[-1], rows=5000, - metrics_callback=metrics_callback) - + metrics_callback=metrics_callback, + distinct=True) + calculation_start = datetime.now() tile_dict = {} diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index f4f92c56..9aab3cff 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -110,8 +110,8 @@ def get_dataseries_list(self, simple=False): max_date = self.get_max_time([]) ds['start'] = min_date ds['end'] = max_date - ds['iso_start'] = datetime.fromtimestamp(min_date).strftime(ISO_8601) - ds['iso_end'] = datetime.fromtimestamp(max_date).strftime(ISO_8601) + ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601) + ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601) return [ds] @@ -126,10 +126,10 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st start = datetime.now() if not isinstance(start_time, datetime): - start_time = datetime.fromtimestamp(start_time) + start_time = datetime.utcfromtimestamp(start_time) if not isinstance(end_time, datetime): - end_time = datetime.fromtimestamp(end_time) + end_time = datetime.utcfromtimestamp(end_time) sel = { self.__latitude: slice(min_lat, max_lat), @@ -142,7 +142,7 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st if np.issubdtype(times.dtype, np.datetime64): times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int) - times = sorted(list(times)) + times = sorted(times.tolist()) if metrics_callback: metrics_callback(backend=(datetime.now() - start).total_seconds()) @@ -193,9 +193,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t 'max_lon': max_lon } + times = None + if 0 <= start_time <= end_time: - params['min_time'] = start_time - params['max_time'] = end_time + if kwargs.get('distinct', False): + times_asc = self.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time) + times = [(t, t) for t in times_asc] + else: + times = [(start_time, end_time)] if 'depth' in kwargs: params['depth'] = kwargs['depth'] @@ -203,7 +208,10 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t params['min_depth'] = kwargs.get('min_depth') params['max_depth'] = kwargs.get('max_depth') - return [ZarrBackend.__to_url(self._name, **params)] + if times: + return [ZarrBackend.__to_url(self._name, min_time=t[0], max_time=t[1], **params) for t in times] + else: + return [ZarrBackend.__to_url(self._name, **params)] def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): # Find tiles that fall within the polygon in the Solr index @@ -365,10 +373,10 @@ def __fetch_data_for_tile(self, tile: Tile): max_time = float(tile.max_time) if min_time: - min_time = datetime.fromtimestamp(min_time) + min_time = datetime.utcfromtimestamp(min_time) if max_time: - max_time = datetime.fromtimestamp(max_time) + max_time = datetime.utcfromtimestamp(max_time) if bbox: min_lat = bbox.min_lat @@ -376,23 +384,25 @@ def __fetch_data_for_tile(self, tile: Tile): max_lat = bbox.max_lat max_lon = bbox.max_lon - sel = { + sel_g = { self.__latitude: slice(min_lat, max_lat), self.__longitude: slice(min_lon, max_lon), } + sel_t = {} + if min_time == max_time: - sel[self.__time] = min_time + sel_t[self.__time] = [min_time] # List, otherwise self.__time dim will be dropped method = 'nearest' else: - sel[self.__time] = slice(min_time, max_time) + sel_t[self.__time] = slice(min_time, max_time) method = None tile.variables = [ TileVariable(v, v) for v in self.__variables ] - matched = self.__ds.sel(sel, method=method) + matched = self.__ds.sel(sel_g).sel(sel_t, method=method) tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) @@ -453,6 +463,15 @@ def __to_url(dataset, **kwargs): if 'ds' in kwargs: del kwargs['ds'] + # If any params are numpy dtypes, extract them to base python types + for kw in kwargs: + v = kwargs[kw] + + if isinstance(v, np.generic): + v = v.item() + + kwargs[kw] = v + return str(URL.build( scheme='nts', host='', From 53190e2834e47b547bffefb3f5ffc407efa5165c Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 31 Jul 2023 07:54:13 -0700 Subject: [PATCH 20/91] Update backend dict on dataset mgmt query --- data-access/nexustiles/nexustiles.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index a5abd241..772f6f4f 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -306,6 +306,11 @@ def user_ds_update(name, config): }]) solr.commit() + logger.info(f'Updated dataset {name} in Solr. Updating backends') + + with DS_LOCK: + NexusTileService._update_datasets() + return {'success': True} # Add dataset + backend @@ -333,6 +338,11 @@ def user_ds_add(name, path, config, type='zarr'): }]) solr.commit() + logger.info(f'Added dataset {name} to Solr. Updating backends') + + with DS_LOCK: + NexusTileService._update_datasets() + return {'success': True} # Delete dataset backend (error if it's a hardcoded one) @@ -353,6 +363,11 @@ def user_ds_delete(name): solr.delete(id=ds['id']) solr.commit() + logger.info(f'Removed dataset {name} from Solr. Updating backends') + + with DS_LOCK: + NexusTileService._update_datasets() + return {'success': True} def override_config(self, config): From 2e7a0dcc280d7f447e9c09e723ddb3a5215d4460 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 31 Jul 2023 12:36:13 -0700 Subject: [PATCH 21/91] Fixes and improvements --- analysis/webservice/management/Datasets.py | 12 ++++++++-- .../nexustiles/backends/zarr/backend.py | 22 ++++++++++++++----- data-access/nexustiles/nexustiles.py | 10 ++++++++- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py index 48071f7c..ded1e8a2 100644 --- a/analysis/webservice/management/Datasets.py +++ b/analysis/webservice/management/Datasets.py @@ -38,8 +38,8 @@ Opt('depth'): str }, Opt('aws'): { - 'accessKeyID': Or(str, None), - 'secretAccessKey': Or(str, None), + Opt('accessKeyID'): str, + Opt('secretAccessKey'): str, 'public': bool, Opt('region'): str } @@ -64,6 +64,14 @@ def parse_config(request: NexusRequestObject): try: CONFIG_SCHEMA.validate(config_dict) + + if 'aws' in config_dict: + if not config_dict['aws']['public']: + if 'accessKeyID' not in config_dict['aws'] or 'secretAccessKey' not in config_dict['aws']: + raise NexusProcessingException( + reason='Must provide AWS creds for non-public bucket', + code=400 + ) except SchemaError as e: raise NexusProcessingException( reason=str(e), diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 9aab3cff..214a991b 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -81,8 +81,10 @@ def __init__(self, dataset_name, path, config=None): aws_cfg = self.__config['aws'] if aws_cfg['public']: - region = aws_cfg.get('region', 'us-west-2') - store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + # region = aws_cfg.get('region', 'us-west-2') + # store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + s3 = s3fs.S3FileSystem(True) + store = s3fs.S3Map(root=path, s3=s3, check=False) else: s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) store = s3fs.S3Map(root=path, s3=s3, check=False) @@ -116,7 +118,7 @@ def get_dataseries_list(self, simple=False): return [ds] def find_tile_by_id(self, tile_id, **kwargs): - return tile_id + return [tile_id] def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): return tile_ids @@ -330,7 +332,7 @@ def get_max_time(self, tile_ids, ds=None): min_date, max_date = self.__get_ds_min_max_date() return max_date else: - max(times) + return max(times) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -442,6 +444,7 @@ def __nts_url_to_tile(nts_url): pass tile.dataset = url.path + tile.dataset_id = url.path try: tile.min_time = int(url.query['min_time']) @@ -453,6 +456,8 @@ def __nts_url_to_tile(nts_url): except KeyError: pass + tile.meta_data = {} + return tile @staticmethod @@ -463,20 +468,25 @@ def __to_url(dataset, **kwargs): if 'ds' in kwargs: del kwargs['ds'] + params = {} + # If any params are numpy dtypes, extract them to base python types for kw in kwargs: v = kwargs[kw] + if v is None: + continue + if isinstance(v, np.generic): v = v.item() - kwargs[kw] = v + params[kw] = v return str(URL.build( scheme='nts', host='', path=dataset, - query=kwargs + query=params )) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 772f6f4f..ed526c55 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -31,6 +31,7 @@ from shapely.geometry import box from webservice.webmodel import DatasetNotFoundException, NexusProcessingException from webservice.NexusHandler import nexus_initializer +from yarl import URL from .AbstractTileService import AbstractTileService from .backends.nexusproto.backend import NexusprotoTileService @@ -388,11 +389,18 @@ def get_dataseries_list(self, simple=False): @tile_data() @catch_not_implemented def find_tile_by_id(self, tile_id, **kwargs): - return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id) + tile = URL(tile_id) + + if tile.scheme == 'nts': + return NexusTileService._get_backend(tile.path).find_tile_by_id(tile_id) + else: + return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id) @tile_data() @catch_not_implemented def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + if ds is None: + return [self.find_tile_by_id(tid, **kwargs, fetch_data=False) for tid in tile_ids] return NexusTileService._get_backend(ds).find_tiles_by_id(tile_ids, ds=ds, **kwargs) @catch_not_implemented From 08693754a542d655069c19c0503adbd41a401a7c Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 31 Jul 2023 12:38:01 -0700 Subject: [PATCH 22/91] Adapted matchup to work with zarr backends --- analysis/webservice/algorithms_spark/Matchup.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index f27612a5..7f84063e 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -777,9 +777,9 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_ tile_service = tile_service_factory() # Determine the spatial temporal extents of this partition of tiles - tiles_bbox = tile_service.get_bounding_box(tile_ids) - tiles_min_time = tile_service.get_min_time(tile_ids) - tiles_max_time = tile_service.get_max_time(tile_ids) + tiles_bbox = tile_service.get_bounding_box(tile_ids, ds=primary_b.value) + tiles_min_time = tile_service.get_min_time(tile_ids, ds=primary_b.value) + tiles_max_time = tile_service.get_max_time(tile_ids, ds=primary_b.value) # Increase spatial extents by the radius tolerance matchup_min_lon, matchup_min_lat = add_meters_to_lon_lat(tiles_bbox.bounds[0], tiles_bbox.bounds[1], @@ -858,7 +858,7 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_ edge_results = [] for tile in matchup_tiles: # Retrieve tile data and convert to lat/lon projection - tiles = tile_service.find_tile_by_id(tile.tile_id, fetch_data=True) + tiles = tile_service.find_tile_by_id(tile.tile_id, fetch_data=True, ds=secondary_b.value) tile = tiles[0] valid_indices = tile.get_indices() @@ -884,14 +884,14 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_ # The actual matching happens in the generator. This is so that we only load 1 tile into memory at a time match_generators = [match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, bounding_wkt_b.value, - parameter_b.value, rt_b.value, aeqd_proj) for tile_id - in tile_ids] + parameter_b.value, rt_b.value, aeqd_proj, primary_b.value) + for tile_id in tile_ids] return chain(*match_generators) def match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, search_domain_bounding_wkt, - search_parameter, radius_tolerance, aeqd_proj): + search_parameter, radius_tolerance, aeqd_proj, primary_ds): from nexustiles.model.nexusmodel import NexusPoint from webservice.algorithms_spark.Matchup import DomsPoint # Must import DomsPoint or Spark complains @@ -899,7 +899,7 @@ def match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, s try: the_time = datetime.now() tile = tile_service.mask_tiles_to_polygon(wkt.loads(search_domain_bounding_wkt), - tile_service.find_tile_by_id(tile_id))[0] + tile_service.find_tile_by_id(tile_id, ds=primary_ds))[0] print("%s Time to load tile %s" % (str(datetime.now() - the_time), tile_id)) except IndexError: # This should only happen if all measurements in a tile become masked after applying the bounding polygon From 1eb680bb794914f8a9e3d9b8fae3ebd3a0b970cb Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 1 Aug 2023 14:45:25 -0700 Subject: [PATCH 23/91] Zarr support - Distinct slices of time is now default - No longer assuming+shaping as multivar tiles unless needed --- .../webservice/algorithms_spark/HofMoellerSpark.py | 8 ++++---- data-access/nexustiles/backends/zarr/backend.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/analysis/webservice/algorithms_spark/HofMoellerSpark.py b/analysis/webservice/algorithms_spark/HofMoellerSpark.py index 6231bdb1..90ca87c0 100644 --- a/analysis/webservice/algorithms_spark/HofMoellerSpark.py +++ b/analysis/webservice/algorithms_spark/HofMoellerSpark.py @@ -44,12 +44,12 @@ class HofMoellerCalculator(object): def hofmoeller_stats(tile_service_factory, metrics_callback, tile_in_spark): (latlon, tile_id, index, - min_lat, max_lat, min_lon, max_lon) = tile_in_spark + min_lat, max_lat, min_lon, max_lon, dataset) = tile_in_spark tile_service = tile_service_factory() try: # Load the dataset tile - tile = tile_service.find_tile_by_id(tile_id, metrics_callback=metrics_callback)[0] + tile = tile_service.find_tile_by_id(tile_id, metrics_callback=metrics_callback, ds=dataset)[0] calculation_start = datetime.now() # Mask it to the search domain tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat, @@ -352,7 +352,7 @@ def calc(self, compute_options, **args): min_lon, min_lat, max_lon, max_lat = bbox.bounds - nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon) for x, tile in + nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon, tile.dataset) for x, tile in enumerate(self._get_tile_service().find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, metrics_callback=metrics_record.record_metrics, @@ -408,7 +408,7 @@ def calc(self, compute_options, **args): min_lon, min_lat, max_lon, max_lat = bbox.bounds - nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon) for x, tile in + nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon, tile.dataset) for x, tile in enumerate(self._get_tile_service().find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, metrics_callback=metrics_record.record_metrics, diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 214a991b..29099d28 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -198,7 +198,7 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t times = None if 0 <= start_time <= end_time: - if kwargs.get('distinct', False): + if kwargs.get('distinct', True): times_asc = self.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time) times = [(t, t) for t in times_asc] else: @@ -416,11 +416,15 @@ def __fetch_data_for_tile(self, tile: Tile): tile.times = ma.masked_invalid(times) - tile.data = ma.masked_invalid( - [matched[var].to_numpy() for var in self.__variables] - ) + var_data = [matched[var].to_numpy() for var in self.__variables] + + if len(self.__variables) > 1: + tile.data = ma.masked_invalid(var_data) + tile.is_multi = True + else: + tile.data = ma.masked_invalid(var_data[0]) + tile.is_multi = False - tile.is_multi = True def _metadata_store_docs_to_tiles(self, *store_docs): return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] From 0aef0f13d0b2178724eb6aa9e198fb4260066920 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 2 Aug 2023 14:34:15 -0700 Subject: [PATCH 24/91] DDAS adjustments --- .../algorithms_spark/DailyDifferenceAverageSpark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py b/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py index b4245783..12f7deec 100644 --- a/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py +++ b/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py @@ -324,7 +324,7 @@ def calculate_diff(tile_service_factory, tile_ids, bounding_wkt, dataset, climat for tile_id in tile_ids: # Get the dataset tile try: - dataset_tile = get_dataset_tile(tile_service, wkt.loads(bounding_wkt.value), tile_id) + dataset_tile = get_dataset_tile(tile_service, wkt.loads(bounding_wkt.value), tile_id, dataset.value) except NoDatasetTile: # This should only happen if all measurements in a tile become masked after applying the bounding polygon continue @@ -348,12 +348,12 @@ def calculate_diff(tile_service_factory, tile_ids, bounding_wkt, dataset, climat return chain(*diff_generators) -def get_dataset_tile(tile_service, search_bounding_shape, tile_id): +def get_dataset_tile(tile_service, search_bounding_shape, tile_id, dataset): the_time = datetime.now() try: # Load the dataset tile - dataset_tile = tile_service.find_tile_by_id(tile_id)[0] + dataset_tile = tile_service.find_tile_by_id(tile_id, ds=dataset)[0] # Mask it to the search domain dataset_tile = tile_service.mask_tiles_to_polygon(search_bounding_shape, [dataset_tile])[0] except IndexError: From 42b912ebec6e445ef4d163f7774c50aa1c422339 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 3 Aug 2023 14:33:26 -0700 Subject: [PATCH 25/91] find_tile_by_polygon_and_most_recent_day_of_year impl --- .../nexustiles/backends/zarr/backend.py | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 29099d28..d592954c 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -174,7 +174,24 @@ def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found """ - raise NotImplementedError() + + times = self.__ds[self.__time].to_numpy() + + to_doy = lambda dt: datetime.utcfromtimestamp(int(dt)).timetuple().tm_yday + + vfunc = np.vectorize(to_doy) + days_of_year = vfunc(times.astype(datetime) / 1e9) + + try: + time = times[np.where(days_of_year <= day_of_year)[0][-1]].astype(datetime) / 1e9 + except IndexError: + raise NexusTileServiceException(reason='No tiles matched') + + min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds + + return self.find_tiles_in_box( + min_lat, max_lat, min_lon, max_lon, ds, time, time + ) def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs) @@ -371,14 +388,14 @@ def __fetch_data_for_tile(self, tile: Tile): max_lat = None max_lon = None - min_time = float(tile.min_time) - max_time = float(tile.max_time) - - if min_time: - min_time = datetime.utcfromtimestamp(min_time) + min_time = tile.min_time + max_time = tile.max_time - if max_time: - max_time = datetime.utcfromtimestamp(max_time) + # if min_time: + # min_time = datetime.utcfromtimestamp(min_time) + # + # if max_time: + # max_time = datetime.utcfromtimestamp(max_time) if bbox: min_lat = bbox.min_lat @@ -451,12 +468,14 @@ def __nts_url_to_tile(nts_url): tile.dataset_id = url.path try: - tile.min_time = int(url.query['min_time']) + # tile.min_time = int(url.query['min_time']) + tile.min_time = datetime.utcfromtimestamp(int(url.query['min_time'])) except KeyError: pass try: - tile.max_time = int(url.query['max_time']) + # tile.max_time = int(url.query['max_time']) + tile.max_time = datetime.utcfromtimestamp(int(url.query['max_time'])) except KeyError: pass From 1559fbafee08a73e0ab8c44e063e30870592a078 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 8 Aug 2023 15:35:05 -0700 Subject: [PATCH 26/91] Don't sel by time if neither max nor min time are given --- data-access/nexustiles/backends/zarr/backend.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index d592954c..c8fd0fe1 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -410,7 +410,10 @@ def __fetch_data_for_tile(self, tile: Tile): sel_t = {} - if min_time == max_time: + if min_time is None and max_time is None: + sel_t = None + method = None + elif min_time == max_time: sel_t[self.__time] = [min_time] # List, otherwise self.__time dim will be dropped method = 'nearest' else: @@ -421,7 +424,10 @@ def __fetch_data_for_tile(self, tile: Tile): TileVariable(v, v) for v in self.__variables ] - matched = self.__ds.sel(sel_g).sel(sel_t, method=method) + matched = self.__ds.sel(sel_g) #.sel(sel_t, method=method) + + if sel_t is not None: + matched = matched.sel(sel_t, method=method) tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) From 2bb52afb0925e89921f3576138228defea966c47 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 15 Aug 2023 13:15:29 -0700 Subject: [PATCH 27/91] Fix not calling partial when needed --- analysis/webservice/algorithms_spark/CorrMapSpark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/webservice/algorithms_spark/CorrMapSpark.py b/analysis/webservice/algorithms_spark/CorrMapSpark.py index fe1954df..7336993a 100644 --- a/analysis/webservice/algorithms_spark/CorrMapSpark.py +++ b/analysis/webservice/algorithms_spark/CorrMapSpark.py @@ -57,7 +57,7 @@ def _map(tile_service_factory, tile_in): # print 'days_at_a_time = ', days_at_a_time t_incr = 86400 * days_at_a_time - tile_service = tile_service_factory + tile_service = tile_service_factory() # Compute the intermediate summations needed for the Pearson # Correlation Coefficient. We use a one-pass online algorithm From f9dc2aebd77c1739a24823eaf2f529bba220ee4e Mon Sep 17 00:00:00 2001 From: rileykk Date: Fri, 18 Aug 2023 09:32:18 -0700 Subject: [PATCH 28/91] Pinned s3fs and fsspec versions --- data-access/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-access/requirements.txt b/data-access/requirements.txt index c732bede..db1bf2cf 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -20,7 +20,7 @@ urllib3==1.26.2 requests nexusproto Shapely -s3fs -fsspec +s3fs==2022.5.0 +fsspec==2022.5.0 xarray~=2022.3.0 numpy==1.24.3 \ No newline at end of file From a6f602d63705bb753ccfaaced202de366c0dd462 Mon Sep 17 00:00:00 2001 From: rileykk Date: Fri, 18 Aug 2023 11:44:10 -0700 Subject: [PATCH 29/91] Fixed some dependencies to ensure image builds properly + s3fs works --- analysis/conda-requirements.txt | 3 ++- data-access/requirements.txt | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt index 902d5114..22dff066 100644 --- a/analysis/conda-requirements.txt +++ b/analysis/conda-requirements.txt @@ -22,7 +22,8 @@ pytz==2021.1 utm==0.6.0 shapely==1.7.1 backports.functools_lru_cache==1.6.1 -boto3==1.16.63 +boto3>=1.16.63 +botocore==1.24.21 pillow==8.1.0 mpld3=0.5.1 tornado==6.1 diff --git a/data-access/requirements.txt b/data-access/requirements.txt index db1bf2cf..48a1fc6a 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -22,5 +22,9 @@ nexusproto Shapely s3fs==2022.5.0 fsspec==2022.5.0 +botocore==1.24.21 +aiohttp==3.8.1 xarray~=2022.3.0 -numpy==1.24.3 \ No newline at end of file +numpy==1.24.3 +pandas<2.1.0rc0 # Temporary restriction because 2.1.0rc0 fails to build + From 1a451eba314f17d9fabcbf152b4214ab9819da4a Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 21 Aug 2023 07:39:23 -0700 Subject: [PATCH 30/91] Config override for backends --- data-access/nexustiles/nexustiles.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index ed526c55..b4fd6bba 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -141,9 +141,6 @@ def __init__(self, config=None): self._alg_config = config - if config: - self.override_config(config) - if not NexusTileService.backends: NexusTileService.ds_config = configparser.RawConfigParser() NexusTileService.ds_config.read(NexusTileService._get_config_files('config/datasets.ini')) @@ -153,6 +150,9 @@ def __init__(self, config=None): NexusTileService.backends[None] = default_backend NexusTileService.backends['__nexusproto__'] = default_backend + if config: + self.override_config(config) + if not NexusTileService.__update_thread: NexusTileService.__update_thread = threading.Thread( target=NexusTileService.__update_datasets_loop, @@ -377,6 +377,10 @@ def override_config(self, config): for option in config.options(section): if config.get(section, option) is not None: self._config.set(section, option, config.get(section, option)) + if NexusTileService.ds_config.has_section(section): # only override preexisting section, ignores the other + for option in config.options(section): + if config.get(section, option) is not None: + NexusTileService.ds_config.set(section, option, config.get(section, option)) def get_dataseries_list(self, simple=False): datasets = [] From 6f8f7b10f60c316b1a8dd6ed39984b81a8e19294 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 21 Aug 2023 07:40:51 -0700 Subject: [PATCH 31/91] Deps update --- data-access/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 48a1fc6a..9001ed34 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -25,6 +25,7 @@ fsspec==2022.5.0 botocore==1.24.21 aiohttp==3.8.1 xarray~=2022.3.0 +zarr>=2.11.3 numpy==1.24.3 pandas<2.1.0rc0 # Temporary restriction because 2.1.0rc0 fails to build From 483ad9f07d277cba13c18d946821a806995f1afd Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 31 Aug 2023 15:57:57 -0700 Subject: [PATCH 32/91] Add metadata from Zarr collection to /list --- data-access/nexustiles/backends/zarr/backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index c8fd0fe1..818d4b07 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -115,6 +115,8 @@ def get_dataseries_list(self, simple=False): ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601) ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601) + ds['metadata'] = dict(self.__ds.attrs) + return [ds] def find_tile_by_id(self, tile_id, **kwargs): From e1dec656534e3fe8ea8cadd33f96f965051feed7 Mon Sep 17 00:00:00 2001 From: rileykk Date: Fri, 1 Sep 2023 16:26:49 -0700 Subject: [PATCH 33/91] CoG backend start --- data-access/nexustiles/backends/__init__.py | 3 + .../nexustiles/backends/backend.py.template | 284 ++++++++++++++++++ .../nexustiles/backends/cog/__init__.py | 15 + .../nexustiles/backends/cog/backend.py | 278 +++++++++++++++++ data-access/nexustiles/nexustiles.py | 6 +- 5 files changed, 584 insertions(+), 2 deletions(-) create mode 100644 data-access/nexustiles/backends/backend.py.template create mode 100644 data-access/nexustiles/backends/cog/__init__.py create mode 100644 data-access/nexustiles/backends/cog/backend.py diff --git a/data-access/nexustiles/backends/__init__.py b/data-access/nexustiles/backends/__init__.py index 8afd240a..1bee2ee8 100644 --- a/data-access/nexustiles/backends/__init__.py +++ b/data-access/nexustiles/backends/__init__.py @@ -13,3 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. +from nexustiles.backends.cog.backend import CoGBackend +from nexustiles.backends.zarr.backend import ZarrBackend +from nexustiles.backends.nexusproto.backend import NexusprotoTileService \ No newline at end of file diff --git a/data-access/nexustiles/backends/backend.py.template b/data-access/nexustiles/backends/backend.py.template new file mode 100644 index 00000000..84cffa28 --- /dev/null +++ b/data-access/nexustiles/backends/backend.py.template @@ -0,0 +1,284 @@ +############################################################################ +### ### +### THIS IS A TEMPLATE FOR STARTING A NEW NTS BACKEND IMPLEMENTATION ### +### ### +############################################################################ + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys +from datetime import datetime +from urllib.parse import urlparse + +import numpy as np +import numpy.ma as ma +import s3fs +import xarray as xr +from nexustiles.AbstractTileService import AbstractTileService +from nexustiles.exception import NexusTileServiceException +from nexustiles.model.nexusmodel import Tile, BBox, TileVariable +from pytz import timezone +from shapely.geometry import MultiPolygon, box +from yarl import URL + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) +logger = logging.getLogger(__name__) + + +class ZarrBackend(AbstractTileService): + def __init__(self, dataset_name, path, config=None): + AbstractTileService.__init__(self, dataset_name) + self.__config = config if config is not None else {} + + logger.info(f'Opening zarr backend at {path} for dataset {self._name}') + + url = urlparse(path) + + self.__url = path + + self.__store_type = url.scheme + self.__host = url.netloc + self.__path = url.path + + if 'variable' in config: + data_vars = config['variable'] + elif 'variables' in config: + data_vars = config['variables'] + else: + raise KeyError('Data variables not provided in config') + + if isinstance(data_vars, str): + self.__variables = [data_vars] + elif isinstance(data_vars, list): + self.__variables = data_vars + else: + raise TypeError(f'Improper type for variables config: {type(data_vars)}') + + self.__longitude = config['coords']['longitude'] + self.__latitude = config['coords']['latitude'] + self.__time = config['coords']['time'] + + self.__depth = config['coords'].get('depth') + + if self.__store_type in ['', 'file']: + store = self.__path + elif self.__store_type == 's3': + try: + aws_cfg = self.__config['aws'] + + if aws_cfg['public']: + # region = aws_cfg.get('region', 'us-west-2') + # store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + s3 = s3fs.S3FileSystem(True) + store = s3fs.S3Map(root=path, s3=s3, check=False) + else: + s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) + store = s3fs.S3Map(root=path, s3=s3, check=False) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open S3 dataset ({e})') + else: + raise ValueError(self.__store_type) + + try: + self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open dataset ({e})') + + def get_dataseries_list(self, simple=False): + raise NotImplementedError() + + def find_tile_by_id(self, tile_id, **kwargs): + return [tile_id] + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + return tile_ids + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + metrics_callback=None, **kwargs): + raise NotImplementedError() + + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + + raise NotImplementedError() + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs) + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs) + + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + raise NotImplementedError() + + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): + # Find tiles that fall within the polygon in the Solr index + raise NotImplementedError() + + def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles whose metadata matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + raise NotImplementedError() + + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + raise NotImplementedError() + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like + # this + raise NotImplementedError() + + def get_min_max_time_by_granule(self, ds, granule_name): + raise NotImplementedError() + + def get_dataset_overall_stats(self, ds): + raise NotImplementedError() + + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + + raise NotImplementedError() + + # def __get_ds_min_max_date(self): + # min_date = self.__ds[self.__time].min().to_numpy() + # max_date = self.__ds[self.__time].max().to_numpy() + # + # if np.issubdtype(min_date.dtype, np.datetime64): + # min_date = ((min_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + # + # if np.issubdtype(max_date.dtype, np.datetime64): + # max_date = ((max_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + # + # return min_date, max_date + + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + raise NotImplementedError() + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + raise NotImplementedError() + + def fetch_data_for_tiles(self, *tiles): + for tile in tiles: + self.__fetch_data_for_tile(tile) + + return tiles + + def __fetch_data_for_tile(self, tile: Tile): + raise NotImplementedError() + + + def _metadata_store_docs_to_tiles(self, *store_docs): + return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] + + @staticmethod + def __nts_url_to_tile(nts_url): + raise NotImplementedError() + + @staticmethod + def __to_url(dataset, **kwargs): + raise NotImplementedError() + + diff --git a/data-access/nexustiles/backends/cog/__init__.py b/data-access/nexustiles/backends/cog/__init__.py new file mode 100644 index 00000000..8afd240a --- /dev/null +++ b/data-access/nexustiles/backends/cog/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py new file mode 100644 index 00000000..2b0aecbc --- /dev/null +++ b/data-access/nexustiles/backends/cog/backend.py @@ -0,0 +1,278 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys +from datetime import datetime +from urllib.parse import urlparse + +import numpy as np +import numpy.ma as ma +import s3fs +import xarray as xr +from nexustiles.AbstractTileService import AbstractTileService +from nexustiles.exception import NexusTileServiceException +from nexustiles.model.nexusmodel import Tile, BBox, TileVariable +from pytz import timezone +from shapely.geometry import MultiPolygon, box +from yarl import URL + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) +logger = logging.getLogger(__name__) + + +class CoGBackend(AbstractTileService): + def __init__(self, dataset_name, path, config=None): + AbstractTileService.__init__(self, dataset_name) + self.__config = config if config is not None else {} + + logger.info(f'Opening zarr backend at {path} for dataset {self._name}') + + url = urlparse(path) + + self.__url = path + + self.__store_type = url.scheme + self.__host = url.netloc + self.__path = url.path + + if 'variable' in config: + data_vars = config['variable'] + elif 'variables' in config: + data_vars = config['variables'] + else: + raise KeyError('Data variables not provided in config') + + if isinstance(data_vars, str): + self.__variables = [data_vars] + elif isinstance(data_vars, list): + self.__variables = data_vars + else: + raise TypeError(f'Improper type for variables config: {type(data_vars)}') + + self.__longitude = config['coords']['longitude'] + self.__latitude = config['coords']['latitude'] + self.__time = config['coords']['time'] + + self.__depth = config['coords'].get('depth') + + if self.__store_type in ['', 'file']: + store = self.__path + elif self.__store_type == 's3': + try: + aws_cfg = self.__config['aws'] + + if aws_cfg['public']: + # region = aws_cfg.get('region', 'us-west-2') + # store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' + s3 = s3fs.S3FileSystem(True) + store = s3fs.S3Map(root=path, s3=s3, check=False) + else: + s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) + store = s3fs.S3Map(root=path, s3=s3, check=False) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open S3 dataset ({e})') + else: + raise ValueError(self.__store_type) + + try: + self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) + except Exception as e: + logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') + raise NexusTileServiceException(f'Cannot open dataset ({e})') + + def get_dataseries_list(self, simple=False): + raise NotImplementedError() + + def find_tile_by_id(self, tile_id, **kwargs): + return [tile_id] + + def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): + return tile_ids + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, + metrics_callback=None, **kwargs): + raise NotImplementedError() + + def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): + """ + Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding + polygon and the closest day of year. + + For example: + given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32 + search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc) + + Valid matches: + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 + + Invalid matches: + minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32 + minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists + + :param bounding_polygon: The exact bounding polygon of tiles to search for + :param ds: The dataset name being searched + :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned + :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found + """ + + raise NotImplementedError() + + def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs) + + def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs): + return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs) + + def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): + raise NotImplementedError() + + def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): + # Find tiles that fall within the polygon in the Solr index + raise NotImplementedError() + + def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): + """ + Return list of tiles whose metadata matches the specified metadata, start_time, end_time. + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of tiles + """ + raise NotImplementedError() + + def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs): + """ + The method will return tiles with the exact given bounds within the time range. It differs from + find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to + doing a polygon intersection with the given bounds. + + :param bounds: (minx, miny, maxx, maxy) bounds to search for + :param ds: Dataset name to search + :param start_time: Start time to search (seconds since epoch) + :param end_time: End time to search (seconds since epoch) + :param kwargs: fetch_data: True/False = whether or not to retrieve tile data + :return: + """ + raise NotImplementedError() + + def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like + # this + raise NotImplementedError() + + def get_min_max_time_by_granule(self, ds, granule_name): + raise NotImplementedError() + + def get_dataset_overall_stats(self, ds): + raise NotImplementedError() + + def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): + raise NotImplementedError() + + def get_bounding_box(self, tile_ids): + """ + Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. + :param tile_ids: List of tile ids + :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles + """ + + raise NotImplementedError() + + # def __get_ds_min_max_date(self): + # min_date = self.__ds[self.__time].min().to_numpy() + # max_date = self.__ds[self.__time].max().to_numpy() + # + # if np.issubdtype(min_date.dtype, np.datetime64): + # min_date = ((min_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + # + # if np.issubdtype(max_date.dtype, np.datetime64): + # max_date = ((max_date - np.datetime64(EPOCH)) / 1e9).astype(int).item() + # + # return min_date, max_date + + def get_min_time(self, tile_ids, ds=None): + """ + Get the minimum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_max_time(self, tile_ids, ds=None): + """ + Get the maximum tile date from the list of tile ids + :param tile_ids: List of tile ids + :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) + :return: long time in seconds since epoch + """ + raise NotImplementedError() + + def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): + """ + Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. + :param bounding_polygon: The bounding polygon of tiles to search for + :param ds: The dataset name to search + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon + """ + raise NotImplementedError() + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + raise NotImplementedError() + + def fetch_data_for_tiles(self, *tiles): + for tile in tiles: + self.__fetch_data_for_tile(tile) + + return tiles + + def __fetch_data_for_tile(self, tile: Tile): + raise NotImplementedError() + + + def _metadata_store_docs_to_tiles(self, *store_docs): + return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] + + @staticmethod + def __nts_url_to_tile(nts_url): + raise NotImplementedError() + + @staticmethod + def __to_url(dataset, **kwargs): + raise NotImplementedError() + + diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index b4fd6bba..0138d6a2 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -34,10 +34,12 @@ from yarl import URL from .AbstractTileService import AbstractTileService -from .backends.nexusproto.backend import NexusprotoTileService -from .backends.zarr.backend import ZarrBackend +# from .backends.nexusproto.backend import NexusprotoTileService +# from .backends.zarr.backend import ZarrBackend from .model.nexusmodel import Tile, BBox, TileStats, TileVariable +from nexustiles.backends import * + from .exception import NexusTileServiceException from requests.structures import CaseInsensitiveDict From bb2fa00d63d5acddcce8cff88ab341369b1a8df3 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 5 Sep 2023 15:19:17 -0700 Subject: [PATCH 34/91] Start of CoG work Solr proxy for granules collection + some methods for CoG work DS list impl for CoG backend --- .../nexustiles/backends/cog/SolrProxy.py | 106 ++++++++++++++++++ .../nexustiles/backends/cog/__init__.py | 1 + .../nexustiles/backends/cog/backend.py | 86 +++++--------- data-access/nexustiles/nexustiles.py | 25 +++++ 4 files changed, 159 insertions(+), 59 deletions(-) create mode 100644 data-access/nexustiles/backends/cog/SolrProxy.py diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py new file mode 100644 index 00000000..053c6b97 --- /dev/null +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from nexustiles.backends.nexusproto.dao.SolrProxy import SolrProxy as SolrProxyBase +from datetime import datetime + + +SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ' + + +class SolrProxy(SolrProxyBase): + def __init__(self, config): + super(self, config) + self.logger = logging.getLogger(__name__) + + def find_tiffs_in_date_range(self, dataset, start, end, **kwargs): + search = f'dataset_s:{dataset}' + + time_clause = "(" \ + "min_time_dt:[%s TO %s] " \ + "OR max_time_dt:[%s TO %s] " \ + "OR (min_time_dt:[* TO %s] AND max_time_dt:[%s TO *])" \ + ")" % ( + start, end, + start, end, + start, end + ) + + params = { + 'fq': [time_clause], + 'fl': 'path_s, granule_s' + } + + self._merge_kwargs(params, **kwargs) + + return self.do_query_all( + *(search, None, None, False, None), + **params + ) + + def date_range_for_dataset(self, dataset, **kwargs): + search = f'dataset_s:{dataset}' + + kwargs['rows'] = 1 + kwargs['sort'] = ['max_time_dt desc'] + kwargs['fl'] = 'min_time_dt, max_time_dt' + + params = {} + + self._merge_kwargs(params, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **params) + + max_time = self.convert_iso_to_datetime(results[0]['max_time_dt']) + + params['sort'] = ['min_time_dt asc'] + + results, start, found = self.do_query(*(search, None, None, True, None), **params) + + min_time = self.convert_iso_to_datetime(results[0]['min_time_dt']) + + return min_time, max_time + + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): + + search = 'dataset_s:%s' % ds + + search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + + additionalparams = { + 'fq': [ + "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + "{!frange l=0 u=0}ms(min_time_dt,max_time_dt)", + "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s) + ], + 'rows': 0, + 'facet': 'true', + 'facet.field': 'min_time_dt', + 'facet.mincount': '1', + 'facet.limit': '-1' + } + + self._merge_kwargs(additionalparams, **kwargs) + + response = self.do_query_raw(*(search, None, None, False, None), **additionalparams) + + daysinrangeasc = sorted( + [(datetime.strptime(a_date, SOLR_FORMAT) - datetime.utcfromtimestamp(0)).total_seconds() for a_date + in response.facets['facet_fields']['min_time_dt'][::2]]) + + return daysinrangeasc + diff --git a/data-access/nexustiles/backends/cog/__init__.py b/data-access/nexustiles/backends/cog/__init__.py index 8afd240a..67331447 100644 --- a/data-access/nexustiles/backends/cog/__init__.py +++ b/data-access/nexustiles/backends/cog/__init__.py @@ -13,3 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. +from nexustiles.backends.cog.SolrProxy import SolrProxy diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 2b0aecbc..473aa75f 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -21,6 +21,7 @@ import numpy as np import numpy.ma as ma import s3fs +import rioxarray import xarray as xr from nexustiles.AbstractTileService import AbstractTileService from nexustiles.exception import NexusTileServiceException @@ -29,6 +30,8 @@ from shapely.geometry import MultiPolygon, box from yarl import URL +from nexustiles.backends.cog import SolrProxy + EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' @@ -40,68 +43,34 @@ class CoGBackend(AbstractTileService): - def __init__(self, dataset_name, path, config=None): + def __init__(self, dataset_name, bands, solr_config, config=None): AbstractTileService.__init__(self, dataset_name) self.__config = config if config is not None else {} - logger.info(f'Opening zarr backend at {path} for dataset {self._name}') - - url = urlparse(path) - - self.__url = path - - self.__store_type = url.scheme - self.__host = url.netloc - self.__path = url.path - - if 'variable' in config: - data_vars = config['variable'] - elif 'variables' in config: - data_vars = config['variables'] - else: - raise KeyError('Data variables not provided in config') - - if isinstance(data_vars, str): - self.__variables = [data_vars] - elif isinstance(data_vars, list): - self.__variables = data_vars - else: - raise TypeError(f'Improper type for variables config: {type(data_vars)}') - - self.__longitude = config['coords']['longitude'] - self.__latitude = config['coords']['latitude'] - self.__time = config['coords']['time'] - - self.__depth = config['coords'].get('depth') - - if self.__store_type in ['', 'file']: - store = self.__path - elif self.__store_type == 's3': - try: - aws_cfg = self.__config['aws'] - - if aws_cfg['public']: - # region = aws_cfg.get('region', 'us-west-2') - # store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}' - s3 = s3fs.S3FileSystem(True) - store = s3fs.S3Map(root=path, s3=s3, check=False) - else: - s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey']) - store = s3fs.S3Map(root=path, s3=s3, check=False) - except Exception as e: - logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') - raise NexusTileServiceException(f'Cannot open S3 dataset ({e})') - else: - raise ValueError(self.__store_type) - - try: - self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True) - except Exception as e: - logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') - raise NexusTileServiceException(f'Cannot open dataset ({e})') + logger.info(f'Opening CoG backend at for dataset {self._name}') + + self.__bands = bands + + self.__longitude = 'longitude' + self.__latitude = 'latitude' + self.__time = 'time' + + # self.__depth = config['coords'].get('depth') + + self.__solr = SolrProxy(solr_config) def get_dataseries_list(self, simple=False): - raise NotImplementedError() + ds = dict( + shortName=self._name, + title=self._name, + type='Cloud Optimized GeoTIFF' + ) + + if not simple: + min_date, max_date = self.__solr.date_range_for_dataset(self._name) + + ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601) + ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601) def find_tile_by_id(self, tile_id, **kwargs): return [tile_id] @@ -263,9 +232,8 @@ def fetch_data_for_tiles(self, *tiles): def __fetch_data_for_tile(self, tile: Tile): raise NotImplementedError() - def _metadata_store_docs_to_tiles(self, *store_docs): - return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] + return [CoGBackend.__nts_url_to_tile(d) for d in store_docs] @staticmethod def __nts_url_to_tile(nts_url): diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 0138d6a2..d643bf73 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -14,6 +14,7 @@ # limitations under the License. import configparser +import io import json import logging import sys @@ -263,6 +264,30 @@ def _update_datasets(): } except NexusTileServiceException: added_datasets -= 1 + elif store_type.lower() in ['cog', 'cloud_optimized_geotiff']: + update_logger.info(f'Detected new CoG dataset {d_id}, opening new CoG backend') + + ds_config = json.loads(dataset['config'][0]) + + solr_config_str = io.StringIO() + NexusTileService.ds_config.write(solr_config_str) + + solr_config_str.seek(0) + solr_config = configparser.ConfigParser() + solr_config.read(solr_config_str) + solr_config.set('solr', 'core', 'nexusgranules') + + try: + NexusTileService.backends[d_id] = { + 'backend': CoGBackend( + dataset_name=dataset['dataset_s'], + solr_config=solr_config, + **ds_config + ), + 'up': True + } + except NexusTileServiceException: + added_datasets -= 1 else: update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}') added_datasets -= 1 From d57a531d4f30d0c70272f748112dd04d8014dd31 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 5 Sep 2023 15:19:28 -0700 Subject: [PATCH 35/91] rioxarray dep --- data-access/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 05b78947..6a43957a 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -26,6 +26,7 @@ fsspec==2022.5.0 botocore==1.24.21 aiohttp==3.8.1 xarray~=2022.3.0 +rioxarray zarr>=2.11.3 pandas<2.1.0rc0 # Temporary restriction because 2.1.0rc0 fails to build From 3ad5530dffe96365fcbe435f9edd225802734899 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 6 Sep 2023 12:13:09 -0700 Subject: [PATCH 36/91] CoG URL work and some Solr queries --- .../nexustiles/backends/cog/SolrProxy.py | 31 +++- .../nexustiles/backends/cog/backend.py | 135 +++++++++++++++++- .../nexustiles/backends/zarr/backend.py | 2 +- 3 files changed, 158 insertions(+), 10 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 053c6b97..69ffd694 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -16,6 +16,8 @@ import logging from nexustiles.backends.nexusproto.dao.SolrProxy import SolrProxy as SolrProxyBase from datetime import datetime +from shapely import Polygon +from typing import Union, Optional, Dict SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ' @@ -23,10 +25,17 @@ class SolrProxy(SolrProxyBase): def __init__(self, config): - super(self, config) + SolrProxyBase.__init__(self, config) self.logger = logging.getLogger(__name__) - def find_tiffs_in_date_range(self, dataset, start, end, **kwargs): + def find_tiffs_in_bounds( + self, + dataset, + start, + end, + bounds: Optional[Union[Dict[str, float], str, Polygon]] = None, + **kwargs + ): search = f'dataset_s:{dataset}' time_clause = "(" \ @@ -41,9 +50,23 @@ def find_tiffs_in_date_range(self, dataset, start, end, **kwargs): params = { 'fq': [time_clause], - 'fl': 'path_s, granule_s' + # 'fl': ['path_s', 'granule_s'] } + if bounds is not None: + if type(bounds) in [dict, str]: + if isinstance(bounds, dict): + max_lat = bounds['max_lat'] + max_lon = bounds['max_lon'] + min_lat = bounds['min_lat'] + min_lon = bounds['min_lon'] + else: + min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) + + params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) + elif isinstance(bounds, Polygon): + params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) + self._merge_kwargs(params, **kwargs) return self.do_query_all( @@ -56,7 +79,7 @@ def date_range_for_dataset(self, dataset, **kwargs): kwargs['rows'] = 1 kwargs['sort'] = ['max_time_dt desc'] - kwargs['fl'] = 'min_time_dt, max_time_dt' + # kwargs['fl'] = ['min_time_dt', 'max_time_dt'] params = {} diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 473aa75f..cde40ce2 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -29,7 +29,7 @@ from pytz import timezone from shapely.geometry import MultiPolygon, box from yarl import URL - +from rioxarray.exceptions import MissingCRS from nexustiles.backends.cog import SolrProxy EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) @@ -229,18 +229,143 @@ def fetch_data_for_tiles(self, *tiles): return tiles + + @staticmethod + def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): + url = urlparse(url) + + if url.scheme in ['file', '']: + tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True) + else: + raise NotImplementedError(f'Support not yet added for tiffs with {url.scheme} URLs') + + try: + tiff.rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + except MissingCRS: + tiff.rio.write_crs('EPSG:4326').rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + + rename = dict(x='longitude', y='latitude') + + for band in bands: + band_num = bands[band] + + rename[f'band_{band_num}'] = band + + tiff.rename(rename) + + tiff.expand_dims({"time": 1}) + tiff = tiff.assign_coords({"time": [time]}) + + return tiff + + def __fetch_data_for_tile(self, tile: Tile): - raise NotImplementedError() + bbox: BBox = tile.bbox + + min_lat = None + min_lon = None + max_lat = None + max_lon = None + + if tile.min_time: + min_time = tile.min_time + else: + min_time = None + + if tile.max_time: + max_time = tile.max_time + else: + max_time = None + + # if min_time: + # min_time = datetime.utcfromtimestamp(min_time) + # + # if max_time: + # max_time = datetime.utcfromtimestamp(max_time) + + if bbox: + min_lat = bbox.min_lat + min_lon = bbox.min_lon + max_lat = bbox.max_lat + max_lon = bbox.max_lon + + granule = tile.granule + + ds = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) def _metadata_store_docs_to_tiles(self, *store_docs): return [CoGBackend.__nts_url_to_tile(d) for d in store_docs] @staticmethod def __nts_url_to_tile(nts_url): - raise NotImplementedError() + tile = Tile() + + url = URL(nts_url) + + tile.tile_id = nts_url + + try: + min_lat = float(url.query['min_lat']) + min_lon = float(url.query['min_lon']) + max_lat = float(url.query['max_lat']) + max_lon = float(url.query['max_lon']) + + tile.bbox = BBox(min_lat, max_lat, min_lon, max_lon) + except KeyError: + pass + + tile.dataset = url.path + tile.dataset_id = url.path + tile.granule = url.query['path'] + + try: + # tile.min_time = int(url.query['min_time']) + tile.min_time = datetime.utcfromtimestamp(int(url.query['min_time'])) + except KeyError: + pass + + try: + # tile.max_time = int(url.query['max_time']) + tile.max_time = datetime.utcfromtimestamp(int(url.query['max_time'])) + except KeyError: + pass + + tile.meta_data = {} + + return tile @staticmethod - def __to_url(dataset, **kwargs): - raise NotImplementedError() + def __to_url(dataset, tiff, **kwargs): + if 'dataset' in kwargs: + del kwargs['dataset'] + + if 'ds' in kwargs: + del kwargs['ds'] + + if 'path' in kwargs: + del kwargs['path'] + + params = {} + + # If any params are numpy dtypes, extract them to base python types + for kw in kwargs: + v = kwargs[kw] + + if v is None: + continue + + if isinstance(v, np.generic): + v = v.item() + + params[kw] = v + + params['path'] = tiff + + return str(URL.build( + scheme='cog', + host='', + path=dataset, + query=params + )) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index c8fd0fe1..f437f524 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -512,7 +512,7 @@ def __to_url(dataset, **kwargs): params[kw] = v return str(URL.build( - scheme='nts', + scheme='zarr', host='', path=dataset, query=params From aee843b93e81493db2423a64e58afebddcba93fa Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 6 Sep 2023 17:14:15 -0700 Subject: [PATCH 37/91] More functions implemented --- .../nexustiles/backends/cog/SolrProxy.py | 36 ++++ .../nexustiles/backends/cog/backend.py | 160 ++++++++++++++++-- .../nexustiles/backends/zarr/backend.py | 1 - 3 files changed, 179 insertions(+), 18 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 69ffd694..e43c31a7 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -97,6 +97,42 @@ def date_range_for_dataset(self, dataset, **kwargs): return min_time, max_time + def find_min_date_from_tiffs(self, paths, ds, **kwargs): + search = f'dataset_s:{ds}' + + kwargs['rows'] = 1 + kwargs['fl'] = 'min_time_dt' + kwargs['sort'] = ['min_time_dt asc'] + additionalparams = { + 'fq': [ + "{!terms f=path_s}%s" % ','.join(paths) if len(paths) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['min_time_dt']) + + def find_max_date_from_tiffs(self, paths, ds, **kwargs): + search = f'dataset_s:{ds}' + + kwargs['rows'] = 1 + kwargs['fl'] = 'max_time_dt' + kwargs['sort'] = ['max_time_dt desc'] + additionalparams = { + 'fq': [ + "{!terms f=path_s}%s" % ','.join(paths) if len(paths) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['max_time_dt']) + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): search = 'dataset_s:%s' % ds diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index cde40ce2..7b9a68d0 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -57,7 +57,7 @@ def __init__(self, dataset_name, bands, solr_config, config=None): # self.__depth = config['coords'].get('depth') - self.__solr = SolrProxy(solr_config) + self.__solr: SolrProxy = SolrProxy(solr_config) def get_dataseries_list(self, simple=False): ds = dict( @@ -80,7 +80,16 @@ def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): - raise NotImplementedError() + return self.__solr.find_days_in_range_asc( + min_lat, + max_lat, + min_lon, + max_lon, + dataset, + start_time, + end_time, + **kwargs + ) def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): """ @@ -115,11 +124,64 @@ def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **k return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs) def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): - raise NotImplementedError() + tiffs = self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + { + 'min_lat': min_lat, + 'max_lat': max_lat, + 'min_lon': min_lon, + 'max_lon': max_lon + } + ) + + params = { + 'min_lat': min_lat, + 'max_lat': max_lat, + 'min_lon': min_lon, + 'max_lon': max_lon + } + + if 'depth' in kwargs: + params['depth'] = kwargs['depth'] + elif 'min_depth' in kwargs or 'max_depth' in kwargs: + params['min_depth'] = kwargs.get('min_depth') + params['max_depth'] = kwargs.get('max_depth') + + return[CoGBackend.__to_url( + self._name, + tiff['path_s'], + **params) for tiff in tiffs] def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): # Find tiles that fall within the polygon in the Solr index - raise NotImplementedError() + tiffs = self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + bounding_polygon + ) + + bounds = bounding_polygon.bounds + + params = { + 'min_lat': bounds[1], + 'max_lat': bounds[3], + 'min_lon': bounds[0], + 'max_lon': bounds[2] + } + + if 'depth' in kwargs: + params['depth'] = kwargs['depth'] + elif 'min_depth' in kwargs or 'max_depth' in kwargs: + params['min_depth'] = kwargs.get('min_depth') + params['max_depth'] = kwargs.get('max_depth') + + return [CoGBackend.__to_url( + self._name, + tiff['path_s'], + **params) for tiff in tiffs] def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): """ @@ -145,7 +207,12 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) :param kwargs: fetch_data: True/False = whether or not to retrieve tile data :return: """ - raise NotImplementedError() + min_lon = bounds[0] + min_lat = bounds[1] + max_lon = bounds[2] + max_lat = bounds[3] + + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like @@ -189,7 +256,10 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + paths = [URL(t).query['path'] for t in tile_ids] + + min_time = self.__solr.find_min_date_from_tiles(paths, self._name) + return int((min_time - EPOCH).total_seconds()) def get_max_time(self, tile_ids, ds=None): """ @@ -198,7 +268,10 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + paths = [URL(t).query['path'] for t in tile_ids] + + max_time = self.__solr.find_max_date_from_tiles(paths, self._name) + return int((max_time - EPOCH).total_seconds()) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -221,7 +294,12 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] :return: number of tiles that match search criteria """ - raise NotImplementedError() + return len(self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + bounds=bounding_polygon + )) def fetch_data_for_tiles(self, *tiles): for tile in tiles: @@ -229,7 +307,6 @@ def fetch_data_for_tiles(self, *tiles): return tiles - @staticmethod def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): url = urlparse(url) @@ -246,18 +323,23 @@ def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): rename = dict(x='longitude', y='latitude') + drop = set(tiff.data_vars) + for band in bands: band_num = bands[band] - rename[f'band_{band_num}'] = band + key = f'band_{band_num}' - tiff.rename(rename) + rename[key] = band + drop.discard(key) - tiff.expand_dims({"time": 1}) - tiff = tiff.assign_coords({"time": [time]}) + drop.discard('spatial_ref') - return tiff + tiff = tiff.rename(rename).drop_vars(drop, errors='ignore') + tiff = tiff.expand_dims({"time": 1}).assign_coords({"time": [time]}) + + return tiff def __fetch_data_for_tile(self, tile: Tile): bbox: BBox = tile.bbox @@ -291,7 +373,53 @@ def __fetch_data_for_tile(self, tile: Tile): granule = tile.granule - ds = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) + ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) + variables = list(ds.data_vars) + + sel_g = { + self.__latitude: slice(min_lat, max_lat), + self.__longitude: slice(min_lon, max_lon), + } + + sel_t = {} + + if min_time is None and max_time is None: + sel_t = None + method = None + elif min_time == max_time: + sel_t[self.__time] = [min_time] # List, otherwise self.__time dim will be dropped + method = 'nearest' + else: + sel_t[self.__time] = slice(min_time, max_time) + method = None + + tile.variables = [ + TileVariable(v, v) for v in variables + ] + + matched = self.__ds.sel(sel_g) + + if sel_t is not None: + matched = matched.sel(sel_t, method=method) + + tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) + tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) + + times = matched[self.__time].to_numpy() + + if np.issubdtype(times.dtype, np.datetime64): + times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int) + + tile.times = ma.masked_invalid(times) + + var_data = [matched[var].to_numpy() for var in variables] + + if len(variables) > 1: + tile.data = ma.masked_invalid(var_data) + tile.is_multi = True + else: + tile.data = ma.masked_invalid(var_data[0]) + tile.is_multi = False def _metadata_store_docs_to_tiles(self, *store_docs): return [CoGBackend.__nts_url_to_tile(d) for d in store_docs] @@ -367,5 +495,3 @@ def __to_url(dataset, tiff, **kwargs): path=dataset, query=params )) - - diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index f437f524..5cfa4c72 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -448,7 +448,6 @@ def __fetch_data_for_tile(self, tile: Tile): tile.data = ma.masked_invalid(var_data[0]) tile.is_multi = False - def _metadata_store_docs_to_tiles(self, *store_docs): return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] From c7b938da828d74d2e885406b017a3504d4477eaa Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 6 Sep 2023 17:14:15 -0700 Subject: [PATCH 38/91] More functions implemented --- .../nexustiles/backends/cog/SolrProxy.py | 36 ++++ .../nexustiles/backends/cog/backend.py | 161 ++++++++++++++++-- .../nexustiles/backends/zarr/backend.py | 1 - 3 files changed, 179 insertions(+), 19 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 69ffd694..e43c31a7 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -97,6 +97,42 @@ def date_range_for_dataset(self, dataset, **kwargs): return min_time, max_time + def find_min_date_from_tiffs(self, paths, ds, **kwargs): + search = f'dataset_s:{ds}' + + kwargs['rows'] = 1 + kwargs['fl'] = 'min_time_dt' + kwargs['sort'] = ['min_time_dt asc'] + additionalparams = { + 'fq': [ + "{!terms f=path_s}%s" % ','.join(paths) if len(paths) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['min_time_dt']) + + def find_max_date_from_tiffs(self, paths, ds, **kwargs): + search = f'dataset_s:{ds}' + + kwargs['rows'] = 1 + kwargs['fl'] = 'max_time_dt' + kwargs['sort'] = ['max_time_dt desc'] + additionalparams = { + 'fq': [ + "{!terms f=path_s}%s" % ','.join(paths) if len(paths) > 0 else '' + ] + } + + self._merge_kwargs(additionalparams, **kwargs) + + results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams) + + return self.convert_iso_to_datetime(results[0]['max_time_dt']) + def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs): search = 'dataset_s:%s' % ds diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index cde40ce2..e6722997 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -20,7 +20,6 @@ import numpy as np import numpy.ma as ma -import s3fs import rioxarray import xarray as xr from nexustiles.AbstractTileService import AbstractTileService @@ -57,7 +56,7 @@ def __init__(self, dataset_name, bands, solr_config, config=None): # self.__depth = config['coords'].get('depth') - self.__solr = SolrProxy(solr_config) + self.__solr: SolrProxy = SolrProxy(solr_config) def get_dataseries_list(self, simple=False): ds = dict( @@ -80,7 +79,16 @@ def find_tiles_by_id(self, tile_ids, ds=None, **kwargs): def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time, metrics_callback=None, **kwargs): - raise NotImplementedError() + return self.__solr.find_days_in_range_asc( + min_lat, + max_lat, + min_lon, + max_lon, + dataset, + start_time, + end_time, + **kwargs + ) def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs): """ @@ -115,11 +123,64 @@ def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **k return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs) def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs): - raise NotImplementedError() + tiffs = self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + { + 'min_lat': min_lat, + 'max_lat': max_lat, + 'min_lon': min_lon, + 'max_lon': max_lon + } + ) + + params = { + 'min_lat': min_lat, + 'max_lat': max_lat, + 'min_lon': min_lon, + 'max_lon': max_lon + } + + if 'depth' in kwargs: + params['depth'] = kwargs['depth'] + elif 'min_depth' in kwargs or 'max_depth' in kwargs: + params['min_depth'] = kwargs.get('min_depth') + params['max_depth'] = kwargs.get('max_depth') + + return[CoGBackend.__to_url( + self._name, + tiff['path_s'], + **params) for tiff in tiffs] def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): # Find tiles that fall within the polygon in the Solr index - raise NotImplementedError() + tiffs = self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + bounding_polygon + ) + + bounds = bounding_polygon.bounds + + params = { + 'min_lat': bounds[1], + 'max_lat': bounds[3], + 'min_lon': bounds[0], + 'max_lon': bounds[2] + } + + if 'depth' in kwargs: + params['depth'] = kwargs['depth'] + elif 'min_depth' in kwargs or 'max_depth' in kwargs: + params['min_depth'] = kwargs.get('min_depth') + params['max_depth'] = kwargs.get('max_depth') + + return [CoGBackend.__to_url( + self._name, + tiff['path_s'], + **params) for tiff in tiffs] def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): """ @@ -145,7 +206,12 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs) :param kwargs: fetch_data: True/False = whether or not to retrieve tile data :return: """ - raise NotImplementedError() + min_lon = bounds[0] + min_lat = bounds[1] + max_lon = bounds[2] + max_lat = bounds[3] + + return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs) def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs): # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like @@ -189,7 +255,10 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + paths = [URL(t).query['path'] for t in tile_ids] + + min_time = self.__solr.find_min_date_from_tiles(paths, self._name) + return int((min_time - EPOCH).total_seconds()) def get_max_time(self, tile_ids, ds=None): """ @@ -198,7 +267,10 @@ def get_max_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - raise NotImplementedError() + paths = [URL(t).query['path'] for t in tile_ids] + + max_time = self.__solr.find_max_date_from_tiles(paths, self._name) + return int((max_time - EPOCH).total_seconds()) def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ @@ -221,7 +293,12 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] :return: number of tiles that match search criteria """ - raise NotImplementedError() + return len(self.__solr.find_tiffs_in_bounds( + ds, + start_time, + end_time, + bounds=bounding_polygon + )) def fetch_data_for_tiles(self, *tiles): for tile in tiles: @@ -229,7 +306,6 @@ def fetch_data_for_tiles(self, *tiles): return tiles - @staticmethod def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): url = urlparse(url) @@ -246,18 +322,23 @@ def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): rename = dict(x='longitude', y='latitude') + drop = set(tiff.data_vars) + for band in bands: band_num = bands[band] - rename[f'band_{band_num}'] = band + key = f'band_{band_num}' - tiff.rename(rename) + rename[key] = band + drop.discard(key) - tiff.expand_dims({"time": 1}) - tiff = tiff.assign_coords({"time": [time]}) + drop.discard('spatial_ref') - return tiff + tiff = tiff.rename(rename).drop_vars(drop, errors='ignore') + tiff = tiff.expand_dims({"time": 1}).assign_coords({"time": [time]}) + + return tiff def __fetch_data_for_tile(self, tile: Tile): bbox: BBox = tile.bbox @@ -291,7 +372,53 @@ def __fetch_data_for_tile(self, tile: Tile): granule = tile.granule - ds = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) + ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) + variables = list(ds.data_vars) + + sel_g = { + self.__latitude: slice(min_lat, max_lat), + self.__longitude: slice(min_lon, max_lon), + } + + sel_t = {} + + if min_time is None and max_time is None: + sel_t = None + method = None + elif min_time == max_time: + sel_t[self.__time] = [min_time] # List, otherwise self.__time dim will be dropped + method = 'nearest' + else: + sel_t[self.__time] = slice(min_time, max_time) + method = None + + tile.variables = [ + TileVariable(v, v) for v in variables + ] + + matched = self.__ds.sel(sel_g) + + if sel_t is not None: + matched = matched.sel(sel_t, method=method) + + tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy()) + tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy()) + + times = matched[self.__time].to_numpy() + + if np.issubdtype(times.dtype, np.datetime64): + times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int) + + tile.times = ma.masked_invalid(times) + + var_data = [matched[var].to_numpy() for var in variables] + + if len(variables) > 1: + tile.data = ma.masked_invalid(var_data) + tile.is_multi = True + else: + tile.data = ma.masked_invalid(var_data[0]) + tile.is_multi = False def _metadata_store_docs_to_tiles(self, *store_docs): return [CoGBackend.__nts_url_to_tile(d) for d in store_docs] @@ -367,5 +494,3 @@ def __to_url(dataset, tiff, **kwargs): path=dataset, query=params )) - - diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index f437f524..5cfa4c72 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -448,7 +448,6 @@ def __fetch_data_for_tile(self, tile: Tile): tile.data = ma.masked_invalid(var_data[0]) tile.is_multi = False - def _metadata_store_docs_to_tiles(self, *store_docs): return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs] From d51cf638c4f55dbad04dea4908346164f3873c4d Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 11 Sep 2023 11:28:56 -0700 Subject: [PATCH 39/91] More CoG stuff --- analysis/webservice/webapp.py | 8 ++ .../nexustiles/backends/cog/SolrProxy.py | 2 +- .../nexustiles/backends/cog/backend.py | 8 +- .../backends/nexusproto/dao/SolrProxy.py | 2 +- data-access/nexustiles/nexustiles.py | 73 +++++++++++++++++-- 5 files changed, 82 insertions(+), 11 deletions(-) diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py index e060d3c3..7967c3ab 100644 --- a/analysis/webservice/webapp.py +++ b/analysis/webservice/webapp.py @@ -27,6 +27,8 @@ from webservice.nexus_tornado.app_builders import NexusAppBuilder from webservice.nexus_tornado.app_builders import RedirectAppBuilder +from nexustiles.nexustiles import NexusTileService + def inject_args_in_config(args, config): """ @@ -114,6 +116,12 @@ def main(): log.info("Starting web server in debug mode: %s" % options.debug) server = tornado.web.HTTPServer(router) server.listen(options.port) + log.info('Waiting for dataset backends to come up...') + + with NexusTileService.DS_LOCK: + if not NexusTileService.is_update_tread_alive(): + raise Exception('Backend thread crashed') + log.info("Starting HTTP listener...") tornado.ioloop.IOLoop.current().start() diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index e43c31a7..182a1b3f 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -16,7 +16,7 @@ import logging from nexustiles.backends.nexusproto.dao.SolrProxy import SolrProxy as SolrProxyBase from datetime import datetime -from shapely import Polygon +from shapely.geometry import Polygon from typing import Union, Optional, Dict diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index e6722997..5556f3d8 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -68,8 +68,12 @@ def get_dataseries_list(self, simple=False): if not simple: min_date, max_date = self.__solr.date_range_for_dataset(self._name) - ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601) - ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601) + ds['start'] = (min_date - EPOCH).total_seconds() + ds['end'] = (max_date - EPOCH).total_seconds() + ds['iso_start'] = min_date.strftime(ISO_8601) + ds['iso_end'] = max_date.strftime(ISO_8601) + + return [ds] def find_tile_by_id(self, tile_id, **kwargs): return [tile_id] diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py index c9435a2b..76fa918f 100644 --- a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py +++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py @@ -38,7 +38,7 @@ def __init__(self, config): self.solrCore = config.get("solr", "core") solr_kargs = {} if config.has_option("solr", "time_out"): - solr_kargs["timeout"] = config.get("solr", "time_out") + solr_kargs["timeout"] = float(config.get("solr", "time_out")) self.logger = logging.getLogger('nexus') with SOLR_CON_LOCK: diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index d643bf73..34fb26fc 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -110,7 +110,6 @@ def wrapper(*args, **kwargs): SOLR_LOCK = threading.Lock() -DS_LOCK = threading.Lock() thread_local = threading.local() @@ -129,12 +128,14 @@ class NexusTileService: ds_config = None + DS_LOCK = threading.Lock() + __update_thread = None @staticmethod def __update_datasets_loop(): while True: - with DS_LOCK: + with NexusTileService.DS_LOCK: NexusTileService._update_datasets() sleep(3600) @@ -167,12 +168,16 @@ def __init__(self, config=None): NexusTileService.__update_thread.start() + @staticmethod + def is_update_tread_alive(): + return NexusTileService.__update_thread is not None and NexusTileService.__update_thread.is_alive() + @staticmethod def _get_backend(dataset_s) -> AbstractTileService: if dataset_s is not None: dataset_s = dataset_s - with DS_LOCK: + with NexusTileService.DS_LOCK: if dataset_s not in NexusTileService.backends: logger.warning(f'Dataset {dataset_s} not currently loaded. Checking to see if it was recently' f'added') @@ -270,11 +275,17 @@ def _update_datasets(): ds_config = json.loads(dataset['config'][0]) solr_config_str = io.StringIO() + + print({section: dict(NexusTileService.ds_config[section]) for section in NexusTileService.ds_config.sections()}) + NexusTileService.ds_config.write(solr_config_str) solr_config_str.seek(0) solr_config = configparser.ConfigParser() - solr_config.read(solr_config_str) + solr_config.read_file(solr_config_str) + + print({section: dict(solr_config[section]) for section in solr_config.sections()}) + solr_config.set('solr', 'core', 'nexusgranules') try: @@ -336,7 +347,7 @@ def user_ds_update(name, config): logger.info(f'Updated dataset {name} in Solr. Updating backends') - with DS_LOCK: + with NexusTileService.DS_LOCK: NexusTileService._update_datasets() return {'success': True} @@ -368,7 +379,7 @@ def user_ds_add(name, path, config, type='zarr'): logger.info(f'Added dataset {name} to Solr. Updating backends') - with DS_LOCK: + with NexusTileService.DS_LOCK: NexusTileService._update_datasets() return {'success': True} @@ -393,7 +404,7 @@ def user_ds_delete(name): logger.info(f'Removed dataset {name} from Solr. Updating backends') - with DS_LOCK: + with NexusTileService.DS_LOCK: NexusTileService._update_datasets() return {'success': True} @@ -717,6 +728,54 @@ def mask_tiles_to_time_range(self, start_time, end_time, tiles): return tiles + def mask_tiles_to_elevation(self, min_e, max_e, tiles): + """ + Masks data in tiles to specified time range. + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param tiles: List of tiles + :return: A list tiles with data masked to specified time range + """ + for tile in tiles: + tile.elevation = ma.masked_outside(tile.elevation, min_e, max_e) + + # Or together the masks of the individual arrays to create the new mask + data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \ + | ma.getmaskarray(tile.elevation)[np.newaxis, :, :] \ + + # If this is multi-var, need to mask each variable separately. + if tile.is_multi: + # Combine space/time mask with existing mask on data + data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask]) + + num_vars = len(tile.data) + multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0) + multi_data_mask = np.broadcast_to(multi_data_mask, tile.data.shape) + + tile.data = ma.masked_where(multi_data_mask, tile.data) + else: + print(data_mask.shape) + print(tile.data.shape) + + data_mask = np.broadcast_to(data_mask, tile.data.shape) + tile.data = ma.masked_where(data_mask, tile.data) + + tiles[:] = [tile for tile in tiles if not tile.data.mask.all()] + + return tiles + + def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): + """ + Return number of tiles that match search criteria. + :param ds: The dataset name to search + :param bounding_polygon: The polygon to search for tiles + :param start_time: The start time to search for tiles + :param end_time: The end time to search for tiles + :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"] + :return: number of tiles that match search criteria + """ + return self._metadatastore.get_tile_count(ds, bounding_polygon, start_time, end_time, metadata, **kwargs) + def fetch_data_for_tiles(self, *tiles): dataset = tiles[0].dataset From f5750c32eafaef0fb5f31795037c30cec1e1c325 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 14 Sep 2023 14:07:16 -0700 Subject: [PATCH 40/91] Zarr: Probe lat order and flip if necessary --- data-access/nexustiles/backends/zarr/backend.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 818d4b07..01559000 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -100,6 +100,13 @@ def __init__(self, dataset_name, path, config=None): logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') raise NexusTileServiceException(f'Cannot open dataset ({e})') + lats = self.__ds[self.__latitude].to_numpy() + delta = lats[1] - lats[0] + + if delta < 0: + logger.warning(f'Latitude coordinate for {self._name} is in descending order. Flipping it to ascending') + self.__ds = self.__ds.isel({self.__latitude: slice(None, None, -1)}) + def get_dataseries_list(self, simple=False): ds = { "shortName": self._name, From b97677e9cc814659e28e2ae410cce5bd14d9113e Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 14 Sep 2023 14:07:16 -0700 Subject: [PATCH 41/91] Zarr: Probe lat order and flip if necessary --- data-access/nexustiles/backends/zarr/backend.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 5cfa4c72..03c7f330 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -100,6 +100,13 @@ def __init__(self, dataset_name, path, config=None): logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}') raise NexusTileServiceException(f'Cannot open dataset ({e})') + lats = self.__ds[self.__latitude].to_numpy() + delta = lats[1] - lats[0] + + if delta < 0: + logger.warning(f'Latitude coordinate for {self._name} is in descending order. Flipping it to ascending') + self.__ds = self.__ds.isel({self.__latitude: slice(None, None, -1)}) + def get_dataseries_list(self, simple=False): ds = { "shortName": self._name, From 4cdb485171552c5df76a635e0a3a5eb70f43f684 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 18 Sep 2023 16:50:23 -0700 Subject: [PATCH 42/91] Fixes for subsetting --- .../nexustiles/backends/cog/SolrProxy.py | 35 ++++++++++--------- .../nexustiles/backends/cog/backend.py | 18 ++++++---- data-access/nexustiles/nexustiles.py | 4 --- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 182a1b3f..901fc121 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -38,14 +38,17 @@ def find_tiffs_in_bounds( ): search = f'dataset_s:{dataset}' + search_start_s = datetime.utcfromtimestamp(start).strftime(SOLR_FORMAT) + search_end_s = datetime.utcfromtimestamp(end).strftime(SOLR_FORMAT) + time_clause = "(" \ "min_time_dt:[%s TO %s] " \ "OR max_time_dt:[%s TO %s] " \ "OR (min_time_dt:[* TO %s] AND max_time_dt:[%s TO *])" \ ")" % ( - start, end, - start, end, - start, end + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s ) params = { @@ -53,19 +56,19 @@ def find_tiffs_in_bounds( # 'fl': ['path_s', 'granule_s'] } - if bounds is not None: - if type(bounds) in [dict, str]: - if isinstance(bounds, dict): - max_lat = bounds['max_lat'] - max_lon = bounds['max_lon'] - min_lat = bounds['min_lat'] - min_lon = bounds['min_lon'] - else: - min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) - - params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) - elif isinstance(bounds, Polygon): - params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) + # if bounds is not None: + # if type(bounds) in [dict, str]: + # if isinstance(bounds, dict): + # max_lat = bounds['max_lat'] + # max_lon = bounds['max_lon'] + # min_lat = bounds['min_lat'] + # min_lon = bounds['min_lon'] + # else: + # min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) + # + # params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) + # elif isinstance(bounds, Polygon): + # params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) self._merge_kwargs(params, **kwargs) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 5556f3d8..804fad02 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -155,6 +155,8 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t return[CoGBackend.__to_url( self._name, tiff['path_s'], + min_time=tiff.get('min_time_dt'), + max_time=tiff.get('max_time_dt'), **params) for tiff in tiffs] def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs): @@ -184,6 +186,8 @@ def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_ return [CoGBackend.__to_url( self._name, tiff['path_s'], + min_time=tiff.get('min_time_dt'), + max_time=tiff.get('max_time_dt'), **params) for tiff in tiffs] def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs): @@ -315,7 +319,7 @@ def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): url = urlparse(url) if url.scheme in ['file', '']: - tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True) + tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True).to_dataset('band') else: raise NotImplementedError(f'Support not yet added for tiffs with {url.scheme} URLs') @@ -331,10 +335,8 @@ def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): for band in bands: band_num = bands[band] - key = f'band_{band_num}' - - rename[key] = band - drop.discard(key) + rename[band_num] = band + drop.discard(band_num) drop.discard('spatial_ref') @@ -400,7 +402,7 @@ def __fetch_data_for_tile(self, tile: Tile): TileVariable(v, v) for v in variables ] - matched = self.__ds.sel(sel_g) + matched = ds.sel(sel_g) if sel_t is not None: matched = matched.sel(sel_t, method=method) @@ -452,12 +454,16 @@ def __nts_url_to_tile(nts_url): try: # tile.min_time = int(url.query['min_time']) tile.min_time = datetime.utcfromtimestamp(int(url.query['min_time'])) + except ValueError: + tile.min_time = datetime.strptime(url.query['min_time'], '%Y-%m-%dT%H:%M:%SZ') except KeyError: pass try: # tile.max_time = int(url.query['max_time']) tile.max_time = datetime.utcfromtimestamp(int(url.query['max_time'])) + except ValueError: + tile.max_time = datetime.strptime(url.query['max_time'], '%Y-%m-%dT%H:%M:%SZ') except KeyError: pass diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 34fb26fc..b1a0841e 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -276,16 +276,12 @@ def _update_datasets(): solr_config_str = io.StringIO() - print({section: dict(NexusTileService.ds_config[section]) for section in NexusTileService.ds_config.sections()}) - NexusTileService.ds_config.write(solr_config_str) solr_config_str.seek(0) solr_config = configparser.ConfigParser() solr_config.read_file(solr_config_str) - print({section: dict(solr_config[section]) for section in solr_config.sections()}) - solr_config.set('solr', 'core', 'nexusgranules') try: From 1ee25c27d9eaab28a0c58799023438e0c1c7c19b Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 20 Sep 2023 09:12:56 -0700 Subject: [PATCH 43/91] Warnings for geo subsetting --- .../nexustiles/backends/cog/SolrProxy.py | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 901fc121..0d957fbd 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -56,19 +56,21 @@ def find_tiffs_in_bounds( # 'fl': ['path_s', 'granule_s'] } - # if bounds is not None: - # if type(bounds) in [dict, str]: - # if isinstance(bounds, dict): - # max_lat = bounds['max_lat'] - # max_lon = bounds['max_lon'] - # min_lat = bounds['min_lat'] - # min_lon = bounds['min_lon'] - # else: - # min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) - # - # params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) - # elif isinstance(bounds, Polygon): - # params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) + if bounds is not None: + self.logger.warning('Subsetting GeoTIFF granule by bbox not yet implemented') + + # if type(bounds) in [dict, str]: + # if isinstance(bounds, dict): + # max_lat = bounds['max_lat'] + # max_lon = bounds['max_lon'] + # min_lat = bounds['min_lat'] + # min_lon = bounds['min_lon'] + # else: + # min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) + # + # params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) + # elif isinstance(bounds, Polygon): + # params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) self._merge_kwargs(params, **kwargs) @@ -143,11 +145,23 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_t search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT) search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT) + self.logger.warning('CoG backend does not yet support geo subsetting for TIFF selection') + + time_clause = "(" \ + "min_time_dt:[%s TO %s] " \ + "OR max_time_dt:[%s TO %s] " \ + "OR (min_time_dt:[* TO %s] AND max_time_dt:[%s TO *])" \ + ")" % ( + search_start_s, search_end_s, + search_start_s, search_end_s, + search_start_s, search_end_s + ) + additionalparams = { 'fq': [ - "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + # "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), "{!frange l=0 u=0}ms(min_time_dt,max_time_dt)", - "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s) + time_clause ], 'rows': 0, 'facet': 'true', From 7fc260ae53b2b97f0140e12c1a18f9ba9d6e7b4e Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 20 Sep 2023 09:14:00 -0700 Subject: [PATCH 44/91] Strip quotes from variable names CM can sometimes publish with extra quotes resulting in KeyErrors --- data-access/nexustiles/backends/zarr/backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 01559000..e1d0a0c1 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -68,6 +68,8 @@ def __init__(self, dataset_name, path, config=None): else: raise TypeError(f'Improper type for variables config: {type(data_vars)}') + self.__variables = [v.strip('\"\'') for v in self.__variables] + self.__longitude = config['coords']['longitude'] self.__latitude = config['coords']['latitude'] self.__time = config['coords']['time'] From 62564f7414c25c05b2089c900675fc04e699e487 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 20 Sep 2023 09:14:00 -0700 Subject: [PATCH 45/91] Strip quotes from variable names CM can sometimes publish with extra quotes resulting in KeyErrors --- data-access/nexustiles/backends/zarr/backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py index 03c7f330..3800bf8e 100644 --- a/data-access/nexustiles/backends/zarr/backend.py +++ b/data-access/nexustiles/backends/zarr/backend.py @@ -68,6 +68,8 @@ def __init__(self, dataset_name, path, config=None): else: raise TypeError(f'Improper type for variables config: {type(data_vars)}') + self.__variables = [v.strip('\"\'') for v in self.__variables] + self.__longitude = config['coords']['longitude'] self.__latitude = config['coords']['latitude'] self.__time = config['coords']['time'] From b5a223b9b2c84ae3ff5f2a2056f839d1f233a4ef Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 21 Sep 2023 08:50:40 -0700 Subject: [PATCH 46/91] Fixed find_tile_by_id not routing to correct backend Changed tile url schema to reflect backend type; didn't make that change here --- data-access/nexustiles/nexustiles.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index b1a0841e..3d8510fb 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -176,6 +176,10 @@ def is_update_tread_alive(): def _get_backend(dataset_s) -> AbstractTileService: if dataset_s is not None: dataset_s = dataset_s + else: + logger.warning('_get_backend called with dataset_s=None') + + print(f'Getting backend for {dataset_s}') with NexusTileService.DS_LOCK: if dataset_s not in NexusTileService.backends: @@ -429,7 +433,7 @@ def get_dataseries_list(self, simple=False): def find_tile_by_id(self, tile_id, **kwargs): tile = URL(tile_id) - if tile.scheme == 'nts': + if tile.scheme != '': return NexusTileService._get_backend(tile.path).find_tile_by_id(tile_id) else: return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id) From 06504e0f4b65bfbf174301123bc0a71f2a5dbaca Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 21 Sep 2023 11:10:35 -0700 Subject: [PATCH 47/91] Ensure geotiffs are sorted by dt --- data-access/nexustiles/backends/cog/SolrProxy.py | 2 +- data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py | 3 ++- data-access/nexustiles/nexustiles.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index 0d957fbd..c78540ca 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -53,7 +53,7 @@ def find_tiffs_in_bounds( params = { 'fq': [time_clause], - # 'fl': ['path_s', 'granule_s'] + 'sort': ['min_time_dt asc'] } if bounds is not None: diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py index 76fa918f..51f0d93c 100644 --- a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py +++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py @@ -726,7 +726,8 @@ def _merge_kwargs(additionalparams, **kwargs): s = None try: - additionalparams['sort'].extend(s) + if s is not None: + additionalparams['sort'].extend(s) except KeyError: if s is not None: additionalparams['sort'] = s diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 3d8510fb..60dc4a6e 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -179,7 +179,7 @@ def _get_backend(dataset_s) -> AbstractTileService: else: logger.warning('_get_backend called with dataset_s=None') - print(f'Getting backend for {dataset_s}') + logger.debug(f'Getting backend for {dataset_s}') with NexusTileService.DS_LOCK: if dataset_s not in NexusTileService.backends: From b5df944ec6dcc3bb038c02f5849fd300c0a219c6 Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 25 Sep 2023 13:37:09 -0700 Subject: [PATCH 48/91] removed resultSizeLimit param from matchup --- analysis/webservice/algorithms_spark/Matchup.py | 17 +++-------------- analysis/webservice/apidocs/openapi.yml | 13 ------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index a55f61d1..77ecc346 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -137,14 +137,6 @@ class Matchup(NexusCalcSparkTornadoHandler): + "If true, only the nearest point will be returned for each primary point. " + "If false, all points within the tolerances will be returned for each primary point. Default: False" }, - "resultSizeLimit": { - "name": "Result Size Limit", - "type": "int", - "description": "Optional integer value that limits the number of results returned from the matchup. " - "If the number of primary matches is greater than this limit, the service will respond with " - "(HTTP 202: Accepted) and an empty response body. A value of 0 means return all results. " - "Default: 500" - }, "prioritizeDistance": { "name": "Prioritize distance", "type": "boolean", @@ -223,8 +215,6 @@ def parse_arguments(self, request): match_once = request.get_boolean_arg("matchOnce", default=False) - result_size_limit = request.get_int_arg("resultSizeLimit", default=500) - start_seconds_from_epoch = int((start_time - EPOCH).total_seconds()) end_seconds_from_epoch = int((end_time - EPOCH).total_seconds()) @@ -234,7 +224,7 @@ def parse_arguments(self, request): return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \ start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \ depth_min, depth_max, time_tolerance, radius_tolerance, \ - platforms, match_once, result_size_limit, prioritize_distance + platforms, match_once, prioritize_distance def get_job_pool(self, tile_ids): if len(tile_ids) > LARGE_JOB_THRESHOLD: @@ -244,7 +234,7 @@ def get_job_pool(self, tile_ids): def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, start_time, end_time, depth_min, depth_max, time_tolerance, radius_tolerance, platforms, match_once, - result_size_limit, start, prioritize_distance): + start, prioritize_distance): # Call spark_matchup self.log.debug("Calling Spark Driver") @@ -310,7 +300,7 @@ def calc(self, request, tornado_io_loop, **args): bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \ start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \ depth_min, depth_max, time_tolerance, radius_tolerance, \ - platforms, match_once, result_size_limit, prioritize_distance = self.parse_arguments(request) + platforms, match_once, prioritize_distance = self.parse_arguments(request) args = { "primary": primary_ds_name, @@ -380,7 +370,6 @@ def calc(self, request, tornado_io_loop, **args): radius_tolerance=radius_tolerance, platforms=platforms, match_once=match_once, - result_size_limit=result_size_limit, start=start, prioritize_distance=prioritize_distance )) diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index ea9b16ba..dc6fdb4a 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -154,19 +154,6 @@ paths: type: boolean default: false example: false - - in: query - name: resultSizeLimit - description: | - Optional integer value that limits the number of results - returned from the matchup. If the number of primary matches - is greater than this limit, the service will respond with - (HTTP 202 Accepted) and an empty response body. A value of - 0 means return all results. - required: false - schema: - type: integer - default: 500 - example: 500 - in: query name: prioritizeDistance description: | From 5e0fbb2521cc8ce2fa33281707aeb28950384a6b Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 25 Sep 2023 15:45:41 -0700 Subject: [PATCH 49/91] Add # of primaries/avergae secondaries to job output --- .../webservice/algorithms/doms/ExecutionStatus.py | 12 +++++++++++- .../webservice/algorithms/doms/ResultsStorage.py | 4 ++-- .../webservice/webmodel/NexusExecutionResults.py | 13 +++++++++++-- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py index 1bae4556..2add7b1f 100644 --- a/analysis/webservice/algorithms/doms/ExecutionStatus.py +++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py @@ -53,6 +53,14 @@ def calc(self, request, **args): code=404 ) + # Get execution stats. This call will raise an exception if the + # execution is not done. + with ResultsRetrieval(self.config) as retrieval: + try: + execution_stats = retrieval.retrieveStats(execution_id) + except NexusProcessingException: + execution_stats = {} + job_status = NexusExecutionResults.ExecutionStatus(execution_details['status']) host = f'{request.requestHandler.request.protocol}://{request.requestHandler.request.host}' @@ -63,5 +71,7 @@ def calc(self, request, **args): execution_id=execution_id, message=execution_details['message'], params=execution_params, - host=host + host=host, + num_primary_matched=execution_stats.get('numPrimaryMatched'), + num_secondary_matched=execution_stats.get('numSecondaryMatched') ) diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 39db27b3..99e3c6b7 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -286,7 +286,7 @@ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1 execution_id = uuid.UUID(execution_id) params = self.retrieveParams(execution_id) - stats = self.__retrieveStats(execution_id) + stats = self.retrieveStats(execution_id) data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size) return params, stats, data @@ -357,7 +357,7 @@ def __rowToDataEntry(self, row, trim_data=False): return entry - def __retrieveStats(self, id): + def retrieveStats(self, id): cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1" rows = self._session.execute(cql, (id,)) for row in rows: diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index d5c12046..7cf9abb1 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -44,7 +44,8 @@ def construct_job_status(job_state, created, updated, execution_id, params, host } -def construct_done(status, created, completed, execution_id, params, host): +def construct_done(status, created, completed, execution_id, params, host, + num_primary_matched, num_secondary_matched): job_body = construct_job_status( status, created, @@ -53,6 +54,9 @@ def construct_done(status, created, completed, execution_id, params, host): params, host ) + # Add stats to body + job_body['totalPrimaryMatched'] = num_primary_matched + job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) # Construct urls formats = [ @@ -112,7 +116,8 @@ def construct_cancelled(status, created, completed, execution_id, params, host): class NexusExecutionResults: def __init__(self, status=None, created=None, completed=None, execution_id=None, message='', - params=None, host=None, status_code=200): + params=None, host=None, status_code=200, num_primary_matched=None, + num_secondary_matched=None): self.status_code = status_code self.status = status self.created = created @@ -121,6 +126,8 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None, self.message = message self.execution_params = params self.host = host + self.num_primary_matched = num_primary_matched + self.num_secondary_matched = num_secondary_matched def toJson(self): params = { @@ -132,6 +139,8 @@ def toJson(self): } if self.status == ExecutionStatus.SUCCESS: params['completed'] = self.completed + params['num_primary_matched'] = self.num_primary_matched + params['num_secondary_matched'] = self.num_secondary_matched construct = construct_done elif self.status == ExecutionStatus.RUNNING: construct = construct_running From fbad6b72bf649709cdb51152d49f65bf0a7c4cac Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 25 Sep 2023 15:48:01 -0700 Subject: [PATCH 50/91] rename to executionId --- analysis/webservice/apidocs/openapi.yml | 4 ++-- analysis/webservice/webmodel/NexusExecutionResults.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index dc6fdb4a..f5c57a3e 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -684,7 +684,7 @@ paths: - in: query name: id description: | - The job execution ID + The execution ID required: true schema: type: string @@ -702,7 +702,7 @@ paths: - in: query name: id description: | - The job execution ID + The execution ID required: true schema: type: string diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index 7cf9abb1..c80914dd 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -40,7 +40,7 @@ def construct_job_status(job_state, created, updated, execution_id, params, host 'rel': 'self' }], 'params': params, - 'jobID': execution_id + 'executionID': execution_id } From e0a5999792b466b502c65d7d042d911a5214a4ba Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 25 Sep 2023 15:50:20 -0700 Subject: [PATCH 51/91] update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11789189..b8ed55b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,9 +24,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-482: Updated Saildrone in situ endpoint in config file - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results. - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id. +- SDAP-493: + - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint + - Updated /job endpoint with details about number of primary and secondary tiles. ### Deprecated ### Removed - SDAP-465: Removed `climatology` directory. +- SDAP-493: + - Removed `resultSizeLimit` from /match_spark endpoint ### Fixed - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters - SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark` From 8942afc55d7f438b35a2df7392b09496c19813c9 Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 29 Sep 2023 10:59:32 -0700 Subject: [PATCH 52/91] add totalSecondaryMatched field to /job output --- analysis/webservice/webmodel/NexusExecutionResults.py | 1 + 1 file changed, 1 insertion(+) diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index c80914dd..961fd198 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -56,6 +56,7 @@ def construct_done(status, created, completed, execution_id, params, host, ) # Add stats to body job_body['totalPrimaryMatched'] = num_primary_matched + job_body['totalSecondaryMatched'] = num_secondary_matched job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) # Construct urls From dd73036307a313eaba57203f208196da2f6a3ab0 Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 29 Sep 2023 14:25:21 -0700 Subject: [PATCH 53/91] num unique secondaries addition --- .../algorithms/doms/DomsInitialization.py | 5 +++-- .../algorithms/doms/ExecutionStatus.py | 3 ++- .../algorithms/doms/ResultsStorage.py | 20 ++++++++++--------- .../webservice/algorithms_spark/Matchup.py | 8 +++++--- .../webmodel/NexusExecutionResults.py | 7 +++++-- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py index 43627b14..a10a7e70 100644 --- a/analysis/webservice/algorithms/doms/DomsInitialization.py +++ b/analysis/webservice/algorithms/doms/DomsInitialization.py @@ -173,7 +173,7 @@ def createDomsDataTable(self, session): def createDomsExecutionStatsTable(self, session): log = logging.getLogger(__name__) - log.info("Verifying doms_execuction_stats table") + log.info("Verifying doms_execution_stats table") cql = """ CREATE TABLE IF NOT EXISTS doms_execution_stats ( execution_id uuid PRIMARY KEY, @@ -181,7 +181,8 @@ def createDomsExecutionStatsTable(self, session): num_gridded_checked int, num_insitu_matched int, num_insitu_checked int, - time_to_complete int + time_to_complete int, + num_unique_secondaries int ); """ session.execute(cql) diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py index 2add7b1f..eafdbbbf 100644 --- a/analysis/webservice/algorithms/doms/ExecutionStatus.py +++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py @@ -73,5 +73,6 @@ def calc(self, request, **args): params=execution_params, host=host, num_primary_matched=execution_stats.get('numPrimaryMatched'), - num_secondary_matched=execution_stats.get('numSecondaryMatched') + num_secondary_matched=execution_stats.get('numSecondaryMatched'), + num_unique_secondaries=execution_stats.get('numUniqueSecondaries') ) diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 99e3c6b7..48b2122d 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -166,17 +166,18 @@ def __insertParams(self, execution_id, params): def __insertStats(self, execution_id, stats): cql = """ INSERT INTO doms_execution_stats - (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete) + (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete, num_unique_secondaries) VALUES - (%s, %s, %s, %s, %s, %s) + (%s, %s, %s, %s, %s, %s, %s) """ self._session.execute(cql, ( execution_id, - stats["numPrimaryMatched"], + stats['numPrimaryMatched'], None, - stats["numSecondaryMatched"], + stats['numSecondaryMatched'], None, - stats["timeToComplete"] + stats['timeToComplete'], + stats['numUniqueSecondaries'] )) def __insertResults(self, execution_id, results): @@ -358,13 +359,14 @@ def __rowToDataEntry(self, row, trim_data=False): return entry def retrieveStats(self, id): - cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1" + cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1" rows = self._session.execute(cql, (id,)) for row in rows: stats = { - "timeToComplete": row.time_to_complete, - "numSecondaryMatched": row.num_insitu_matched, - "numPrimaryMatched": row.num_gridded_matched, + 'timeToComplete': row.time_to_complete, + 'numSecondaryMatched': row.num_insitu_matched, + 'numPrimaryMatched': row.num_gridded_matched, + 'numUniqueSecondaries': row.num_unique_secondaries } return stats diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index 77ecc346..46d1d89d 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -276,10 +276,12 @@ def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name, total_keys = len(list(spark_result.keys())) total_values = sum(len(v) for v in spark_result.values()) + unique_values = len(set([point.data_id for point in spark_result.values()])) details = { - "timeToComplete": int((end - start).total_seconds()), - "numSecondaryMatched": total_values, - "numPrimaryMatched": total_keys + 'timeToComplete': int((end - start).total_seconds()), + 'numSecondaryMatched': total_values, + 'numPrimaryMatched': total_keys, + 'numUniqueSecondaries': unique_values } matches = Matchup.convert_to_matches(spark_result) diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index 961fd198..2b0007ac 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -45,7 +45,7 @@ def construct_job_status(job_state, created, updated, execution_id, params, host def construct_done(status, created, completed, execution_id, params, host, - num_primary_matched, num_secondary_matched): + num_primary_matched, num_secondary_matched, num_unique_secondaries): job_body = construct_job_status( status, created, @@ -58,6 +58,7 @@ def construct_done(status, created, completed, execution_id, params, host, job_body['totalPrimaryMatched'] = num_primary_matched job_body['totalSecondaryMatched'] = num_secondary_matched job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) + job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries # Construct urls formats = [ @@ -118,7 +119,7 @@ def construct_cancelled(status, created, completed, execution_id, params, host): class NexusExecutionResults: def __init__(self, status=None, created=None, completed=None, execution_id=None, message='', params=None, host=None, status_code=200, num_primary_matched=None, - num_secondary_matched=None): + num_secondary_matched=None, num_unique_secondaries=None): self.status_code = status_code self.status = status self.created = created @@ -129,6 +130,7 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None, self.host = host self.num_primary_matched = num_primary_matched self.num_secondary_matched = num_secondary_matched + self.num_unique_secondaries = num_unique_secondaries def toJson(self): params = { @@ -142,6 +144,7 @@ def toJson(self): params['completed'] = self.completed params['num_primary_matched'] = self.num_primary_matched params['num_secondary_matched'] = self.num_secondary_matched + params['num_unique_secondaries'] = self.num_unique_secondaries construct = construct_done elif self.status == ExecutionStatus.RUNNING: construct = construct_running From a7beb85129e7567fc898f3cf0b0bf6369c4259f3 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 3 Oct 2023 11:50:49 -0700 Subject: [PATCH 54/91] Reenabled TIFF geo subsetting --- .../nexustiles/backends/cog/SolrProxy.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/data-access/nexustiles/backends/cog/SolrProxy.py b/data-access/nexustiles/backends/cog/SolrProxy.py index c78540ca..92a96cda 100644 --- a/data-access/nexustiles/backends/cog/SolrProxy.py +++ b/data-access/nexustiles/backends/cog/SolrProxy.py @@ -57,20 +57,18 @@ def find_tiffs_in_bounds( } if bounds is not None: - self.logger.warning('Subsetting GeoTIFF granule by bbox not yet implemented') - - # if type(bounds) in [dict, str]: - # if isinstance(bounds, dict): - # max_lat = bounds['max_lat'] - # max_lon = bounds['max_lon'] - # min_lat = bounds['min_lat'] - # min_lon = bounds['min_lon'] - # else: - # min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) - # - # params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) - # elif isinstance(bounds, Polygon): - # params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) + if type(bounds) in [dict, str]: + if isinstance(bounds, dict): + max_lat = bounds['max_lat'] + max_lon = bounds['max_lon'] + min_lat = bounds['min_lat'] + min_lon = bounds['min_lon'] + else: + min_lon, min_lat, max_lon, max_lat = tuple([float(p) for p in bounds.split(',')]) + + params['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon)) + elif isinstance(bounds, Polygon): + params['fq'].append('{!field f=geo}Intersects(%s)' % bounds.wkt) self._merge_kwargs(params, **kwargs) @@ -159,7 +157,7 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_t additionalparams = { 'fq': [ - # "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), + "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon), "{!frange l=0 u=0}ms(min_time_dt,max_time_dt)", time_clause ], From 741d4b6af0e5f7180b25b3643eb89558352fd2ee Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 3 Oct 2023 11:51:06 -0700 Subject: [PATCH 55/91] Ensure latitudes are in ascending order --- data-access/nexustiles/backends/cog/backend.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 804fad02..e77bbd2c 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -381,6 +381,13 @@ def __fetch_data_for_tile(self, tile: Tile): ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) variables = list(ds.data_vars) + lats = ds[self.__latitude].to_numpy() + delta = lats[1] - lats[0] + + if delta < 0: + logger.warning(f'Latitude coordinate for {self._name} is in descending order. Flipping it to ascending') + ds = ds.isel({self.__latitude: slice(None, None, -1)}) + sel_g = { self.__latitude: slice(min_lat, max_lat), self.__longitude: slice(min_lon, max_lon), From 50034201c47cf7256e07c9307578f7828b228139 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 9 Oct 2023 16:22:06 -0700 Subject: [PATCH 56/91] Catch and report algs that are unsupported by ds backend --- .../request/handlers/NexusRequestHandler.py | 24 +++++++++++++++++++ data-access/nexustiles/exception.py | 6 +++++ data-access/nexustiles/nexustiles.py | 14 ++++++----- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py index 7a559679..7552dd44 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py @@ -22,6 +22,9 @@ from webservice.webmodel import NexusRequestObjectTornadoFree, NexusRequestObject, NexusProcessingException from webservice.algorithms_spark.NexusCalcSparkTornadoHandler import NexusCalcSparkTornadoHandler +from nexustiles.exception import AlgorithmUnsupportedForDatasetException + +from py4j.protocol import Py4JJavaError class NexusRequestHandler(tornado.web.RequestHandler): def initialize(self, thread_pool, clazz=None, **kargs): @@ -72,7 +75,28 @@ def get(self): except NexusProcessingException as e: self.async_onerror_callback(e.reason, e.code) + # except pyspark + + except AlgorithmUnsupportedForDatasetException as e: + self.logger.exception(e) + self.async_onerror_callback( + reason='Algorithm unsupported for dataset (backend has yet to implement functionality)', + code=400 + ) + + except Py4JJavaError as e: + self.logger.exception(e) + + if 'AlgorithmUnsupportedForDatasetException' in str(e): + self.async_onerror_callback( + reason='Algorithm unsupported for dataset (backend has yet to implement functionality)', + code=400 + ) + else: + self.async_onerror_callback(str(e), 500) + except Exception as e: + print(type(e)) self.async_onerror_callback(str(e), 500) @tornado.gen.coroutine diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py index d6ed2c64..c99abebc 100644 --- a/data-access/nexustiles/exception.py +++ b/data-access/nexustiles/exception.py @@ -16,3 +16,9 @@ class NexusTileServiceException(Exception): def __init__(self, reason): Exception.__init__(self, reason) + + +class AlgorithmUnsupportedForDatasetException(Exception): + def __init__(self, reason): + Exception.__init__(self, reason) + diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py index 60dc4a6e..66c07252 100644 --- a/data-access/nexustiles/nexustiles.py +++ b/data-access/nexustiles/nexustiles.py @@ -41,7 +41,7 @@ from nexustiles.backends import * -from .exception import NexusTileServiceException +from .exception import NexusTileServiceException, AlgorithmUnsupportedForDatasetException from requests.structures import CaseInsensitiveDict @@ -104,7 +104,7 @@ def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except NotImplementedError: - raise NexusTileServiceException('Action unsupported by backend') + raise AlgorithmUnsupportedForDatasetException('Action unsupported by backend') return wrapper @@ -593,6 +593,7 @@ def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, datas min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs ) + @catch_not_implemented def get_bounding_box(self, tile_ids, ds=None): """ Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids. @@ -601,6 +602,7 @@ def get_bounding_box(self, tile_ids, ds=None): """ return NexusTileService._get_backend(ds).get_bounding_box(tile_ids) + @catch_not_implemented def get_min_time(self, tile_ids, ds=None): """ Get the minimum tile date from the list of tile ids @@ -608,8 +610,9 @@ def get_min_time(self, tile_ids, ds=None): :param ds: Filter by a specific dataset. Defaults to None (queries all datasets) :return: long time in seconds since epoch """ - return NexusTileService._get_backend(ds).get_min_time(tile_ids, ds) + return int(NexusTileService._get_backend(ds).get_min_time(tile_ids, ds)) + @catch_not_implemented def get_max_time(self, tile_ids, ds=None): """ Get the maximum tile date from the list of tile ids @@ -619,6 +622,7 @@ def get_max_time(self, tile_ids, ds=None): """ return int(NexusTileService._get_backend(ds).get_max_time(tile_ids)) + @catch_not_implemented def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time): """ Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range. @@ -631,6 +635,7 @@ def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_tim bounds = self._metadatastore.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time) return [box(*b) for b in bounds] + @catch_not_implemented def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs): """ Return number of tiles that match search criteria. @@ -754,9 +759,6 @@ def mask_tiles_to_elevation(self, min_e, max_e, tiles): tile.data = ma.masked_where(multi_data_mask, tile.data) else: - print(data_mask.shape) - print(tile.data.shape) - data_mask = np.broadcast_to(data_mask, tile.data.shape) tile.data = ma.masked_where(data_mask, tile.data) From 15fce2e00765726bae5e1a77bf45a82bde05eee4 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 9 Oct 2023 16:22:06 -0700 Subject: [PATCH 57/91] Catch and report algs that are unsupported by ds backend --- data-access/nexustiles/exception.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py index c99abebc..1f2fd839 100644 --- a/data-access/nexustiles/exception.py +++ b/data-access/nexustiles/exception.py @@ -22,3 +22,9 @@ class AlgorithmUnsupportedForDatasetException(Exception): def __init__(self, reason): Exception.__init__(self, reason) + + +class AlgorithmUnsupportedForDatasetException(Exception): + def __init__(self, reason): + Exception.__init__(self, reason) + From d0742addc16ffa029116c45bfc4e25a5980a71b3 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 12 Oct 2023 15:41:36 -0700 Subject: [PATCH 58/91] Code for opening tiffs in S3 UNTESTED --- .../nexustiles/backends/cog/backend.py | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index e77bbd2c..66433c96 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -21,6 +21,9 @@ import numpy as np import numpy.ma as ma import rioxarray +import rasterio as rio +from rasterio.session import AWSSession +import boto3 import xarray as xr from nexustiles.AbstractTileService import AbstractTileService from nexustiles.exception import NexusTileServiceException @@ -315,18 +318,48 @@ def fetch_data_for_tiles(self, *tiles): return tiles @staticmethod - def __open_granule_at_url(url, time: np.datetime64, bands, **kwargs): + def __open_granule_at_url(url, time: np.datetime64, bands, config, **kwargs): url = urlparse(url) if url.scheme in ['file', '']: tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True).to_dataset('band') + elif url.scheme == 's3': + try: + aws_cfg = config['aws'] + + key_id = aws_cfg['accessKeyID'] + secret = aws_cfg['secretAccessKey'] + except KeyError: + raise NexusTileServiceException(f'AWS config not provided for dataset {url.path}') + + session = boto3.Session( + aws_access_key_id=key_id, + aws_secret_access_key=secret, + region_name=aws_cfg.get('region', 'us-west-2') + ) + + with rio.Env( + AWSSession(session), + GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR' + ): + tiff = rioxarray.open_rasterio(url, mask_and_scale=True) + + ##### + # NOTE: This will likely be inefficient so leaving it disabled for now. I don't know how it will + # handle accessing data when the rio.Env context exits so maybe we want to pull it into memory before + # it does??? + # + # tiff = tiff.load() + + tiff = tiff.to_dataset('band') else: raise NotImplementedError(f'Support not yet added for tiffs with {url.scheme} URLs') try: - tiff.rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + tiff = tiff.rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) except MissingCRS: - tiff.rio.write_crs('EPSG:4326').rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + # tiff.rio.write_crs('EPSG:4326').rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + pass rename = dict(x='longitude', y='latitude') @@ -378,7 +411,7 @@ def __fetch_data_for_tile(self, tile: Tile): granule = tile.granule - ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands) + ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands, self.__config) variables = list(ds.data_vars) lats = ds[self.__latitude].to_numpy() From db68d4fa8f1feb4682500b9947c875d286d191ff Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 13 Oct 2023 10:12:01 -0700 Subject: [PATCH 59/91] updated docs to use correct sea_water_temperature param name --- analysis/webservice/apidocs/openapi.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index f5c57a3e..b719ad85 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -139,8 +139,7 @@ paths: required: false schema: type: string - default: sea_surface_temperature - example: sea_surface_temperature + example: sea_water_temperature - in: query name: matchOnce description: | From a8be9b8c599f949ebe00a595c879973132956b4f Mon Sep 17 00:00:00 2001 From: skorper Date: Wed, 1 Nov 2023 15:28:43 -0700 Subject: [PATCH 60/91] bugfix --- analysis/webservice/algorithms_spark/Matchup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index 46d1d89d..8955d95c 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -276,7 +276,7 @@ def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name, total_keys = len(list(spark_result.keys())) total_values = sum(len(v) for v in spark_result.values()) - unique_values = len(set([point.data_id for point in spark_result.values()])) + unique_values = len(set([point.data_id for v in spark_result.values() for point in v])) details = { 'timeToComplete': int((end - start).total_seconds()), 'numSecondaryMatched': total_values, From 62de86772600c94110c748b3a9358712094cab8f Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 6 Nov 2023 13:39:02 -0800 Subject: [PATCH 61/91] fix division by zero bug --- analysis/webservice/webmodel/NexusExecutionResults.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index 2b0007ac..7dd7af99 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -57,7 +57,8 @@ def construct_done(status, created, completed, execution_id, params, host, # Add stats to body job_body['totalPrimaryMatched'] = num_primary_matched job_body['totalSecondaryMatched'] = num_secondary_matched - job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) + job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) \ + if num_primary_matched > 0 else 0 job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries # Construct urls From 972f3ddf076af8c51311a62e8e23a84a9f926d91 Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 8 Nov 2023 14:58:11 -0800 Subject: [PATCH 62/91] add params to dataset management handler classes --- analysis/webservice/management/Datasets.py | 42 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py index ded1e8a2..40b267fd 100644 --- a/analysis/webservice/management/Datasets.py +++ b/analysis/webservice/management/Datasets.py @@ -93,7 +93,24 @@ def toJson(self): class DatasetAdd(DatasetManagement): name = 'Add dataset' path = '/datasets/add' - description = "Add new dataset to running SDAP instance" + description = "Add new Zarr dataset to running SDAP instance" + params = { + "name": { + "name": "Dataset name", + "type": "string", + "description": "Name of new dataset to add" + }, + "path": { + "name": "Path or URL", + "type": "string", + "description": "Path/URL of Zarr group" + }, + "body": { + "name": "Request body", + "type": "application/json OR application/yaml", + "description": "POST request body. Config options for Zarr (variabe, coords, aws (if applicable))" + } + } def __init__(self, **args): pass @@ -147,7 +164,19 @@ def calc(self, request: NexusRequestObject, **args): class DatasetUpdate(DatasetManagement): name = 'Update dynamically added dataset' path = '/datasets/update' - description = "Update dataset in running SDAP instance" + description = "Update Zarr dataset in running SDAP instance" + params = { + "name": { + "name": "Dataset name", + "type": "string", + "description": "Name of dataset to update" + }, + "body": { + "name": "Request body", + "type": "application/json OR application/yaml", + "description": "POST request body. Config options for Zarr (variabe, coords, aws (if applicable))" + } + } def __init__(self, **args): pass @@ -182,7 +211,14 @@ def calc(self, request: NexusRequestObject, **args): class DatasetDelete(DatasetManagement): name = 'Remove dataset' path = '/datasets/remove' - description = "Remove dataset from running SDAP instance" + description = "Remove Zarr dataset from running SDAP instance" + params = { + "name": { + "name": "Dataset name", + "type": "string", + "description": "Name of dataset to remove" + } + } def __init__(self, **args): pass From 831ca37b4d4b524cfd6804edcc3aa5d874a39271 Mon Sep 17 00:00:00 2001 From: skorper Date: Thu, 16 Nov 2023 09:23:30 -0800 Subject: [PATCH 63/91] add page number to default filename for matchup output --- CHANGELOG.md | 1 + analysis/webservice/algorithms/doms/BaseDomsHandler.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e0a4e98..64e65c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-493: - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint - Updated /job endpoint with details about number of primary and secondary tiles. +- SDAP-499: Added page number to default filename for matchup output ### Deprecated ### Removed - SDAP-465: Removed `climatology` directory. diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py index 84c91633..d4dcd512 100644 --- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py +++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py @@ -114,7 +114,7 @@ def toNetCDF(self): return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details) def filename(self): - return f'CDMS_{self.__executionId}' + return f'CDMS_{self.__executionId}_page{self.__details["pageNum"]}' class DomsCSVFormatter: From 4ab2f9b4a4f72a34b922cb496cedb87e684335b7 Mon Sep 17 00:00:00 2001 From: skorper Date: Thu, 16 Nov 2023 13:22:09 -0800 Subject: [PATCH 64/91] pagination improvements --- .../algorithms/doms/ExecutionStatus.py | 5 ++- .../algorithms/doms/ResultsRetrieval.py | 2 + .../webservice/algorithms_spark/Matchup.py | 10 ++--- analysis/webservice/apidocs/openapi.yml | 14 +++++++ .../webmodel/NexusExecutionResults.py | 40 ++++++++++++------- 5 files changed, 50 insertions(+), 21 deletions(-) diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py index 17c6ca95..63cf423b 100644 --- a/analysis/webservice/algorithms/doms/ExecutionStatus.py +++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py @@ -42,6 +42,8 @@ def calc(self, request, **args): except ValueError: raise NexusProcessingException(reason='"id" argument must be a valid uuid', code=400) + filename = request.get_argument('filename', None) + # Check if the job is done with ResultsRetrieval(self.config) as retrieval: try: @@ -74,5 +76,6 @@ def calc(self, request, **args): host=host, num_primary_matched=execution_stats.get('numPrimaryMatched'), num_secondary_matched=execution_stats.get('numSecondaryMatched'), - num_unique_secondaries=execution_stats.get('numUniqueSecondaries') + num_unique_secondaries=execution_stats.get('numUniqueSecondaries'), + filename=filename ) diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py index f03c1caa..cdec9294 100644 --- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py +++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py @@ -45,6 +45,8 @@ def calc(self, computeOptions, **args): simple_results = computeOptions.get_boolean_arg("simpleResults", default=False) + filename = computeOptions.get_argument("filename", default=None) + with ResultsStorage.ResultsRetrieval(self.config) as storage: params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size) diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py index 8955d95c..7c7f551b 100644 --- a/analysis/webservice/algorithms_spark/Matchup.py +++ b/analysis/webservice/algorithms_spark/Matchup.py @@ -219,12 +219,13 @@ def parse_arguments(self, request): end_seconds_from_epoch = int((end_time - EPOCH).total_seconds()) prioritize_distance = request.get_boolean_arg("prioritizeDistance", default=True) + filename = request.get_argument('filename', default=None) return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \ start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \ depth_min, depth_max, time_tolerance, radius_tolerance, \ - platforms, match_once, prioritize_distance + platforms, match_once, prioritize_distance, filename def get_job_pool(self, tile_ids): if len(tile_ids) > LARGE_JOB_THRESHOLD: @@ -302,7 +303,7 @@ def calc(self, request, tornado_io_loop, **args): bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \ start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \ depth_min, depth_max, time_tolerance, radius_tolerance, \ - platforms, match_once, prioritize_distance = self.parse_arguments(request) + platforms, match_once, prioritize_distance, filename = self.parse_arguments(request) args = { "primary": primary_ds_name, @@ -375,9 +376,8 @@ def calc(self, request, tornado_io_loop, **args): start=start, prioritize_distance=prioritize_distance )) - - request.requestHandler.redirect(f'/job?id={execution_id}') - + filename_param = f'&filename={filename}' if filename else '' + request.requestHandler.redirect(f'/job?id={execution_id}{filename_param}') @classmethod def convert_to_matches(cls, spark_result): diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index b719ad85..8c6efdc9 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -166,6 +166,13 @@ paths: type: boolean default: true example: true + - in: query + name: filename + description: | + Optional filename. Will be passed into /job and results links + required: false + schema: + type: string responses: '200': description: Successful operation @@ -689,6 +696,13 @@ paths: type: string format: uuid example: c864a51b-3d87-4872-9070-632820b1cae2 + - in: query + name: filename + description: | + Optional filename. Will be passed into /job results links + required: false + schema: + type: string /job/cancel: get: summary: | diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index 7dd7af99..47a891a9 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -27,15 +27,17 @@ class ExecutionStatus(Enum): CANCELLED = 'cancelled' -def construct_job_status(job_state, created, updated, execution_id, params, host, message=''): +def construct_job_status(job_state, created, updated, execution_id, params, host, message='', + filename=None): + filename_param = f'&filename={filename}' if filename else '' return { 'status': job_state.value, 'message': message, 'createdAt': created, 'updatedAt': updated, 'links': [{ - 'href': f'{host}/job?id={execution_id}', - 'title': 'The current page', + 'href': f'{host}/job?id={execution_id}{filename_param}', + 'title': 'Get job status - the current page', 'type': 'application/json', 'rel': 'self' }], @@ -45,14 +47,15 @@ def construct_job_status(job_state, created, updated, execution_id, params, host def construct_done(status, created, completed, execution_id, params, host, - num_primary_matched, num_secondary_matched, num_unique_secondaries): + num_primary_matched, num_secondary_matched, num_unique_secondaries, filename): job_body = construct_job_status( status, created, completed, execution_id, params, - host + host, + filename=filename ) # Add stats to body job_body['totalPrimaryMatched'] = num_primary_matched @@ -61,6 +64,8 @@ def construct_done(status, created, completed, execution_id, params, host, if num_primary_matched > 0 else 0 job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries + filename_param = f'&filename={filename}' if filename else '' + # Construct urls formats = [ ('CSV', 'text/csv'), @@ -68,8 +73,8 @@ def construct_done(status, created, completed, execution_id, params, host, ('NETCDF', 'binary/octet-stream') ] data_links = [{ - 'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}', - 'title': 'Download results', + 'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}{filename_param}', + 'title': f'Download {output_format} results', 'type': mime, 'rel': 'data' } for output_format, mime in formats] @@ -77,14 +82,15 @@ def construct_done(status, created, completed, execution_id, params, host, return job_body -def construct_running(status, created, execution_id, params, host): +def construct_running(status, created, execution_id, params, host, filename): job_body = construct_job_status( status, created, None, execution_id, params, - host + host, + filename=filename ) job_body['links'].append({ 'href': f'{host}/job/cancel?id={execution_id}', @@ -94,7 +100,7 @@ def construct_running(status, created, execution_id, params, host): return job_body -def construct_error(status, created, completed, execution_id, message, params, host): +def construct_error(status, created, completed, execution_id, message, params, host, filename): return construct_job_status( status, created, @@ -102,25 +108,27 @@ def construct_error(status, created, completed, execution_id, message, params, h execution_id, params, host, - message + message, + filename=filename ) -def construct_cancelled(status, created, completed, execution_id, params, host): +def construct_cancelled(status, created, completed, execution_id, params, host, filename): return construct_job_status( status, created, completed, execution_id, params, - host + host, + filename=filename ) class NexusExecutionResults: def __init__(self, status=None, created=None, completed=None, execution_id=None, message='', params=None, host=None, status_code=200, num_primary_matched=None, - num_secondary_matched=None, num_unique_secondaries=None): + num_secondary_matched=None, num_unique_secondaries=None, filename=None): self.status_code = status_code self.status = status self.created = created @@ -132,6 +140,7 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None, self.num_primary_matched = num_primary_matched self.num_secondary_matched = num_secondary_matched self.num_unique_secondaries = num_unique_secondaries + self.filename = filename def toJson(self): params = { @@ -139,7 +148,8 @@ def toJson(self): 'created': self.created, 'execution_id': self.execution_id, 'params': self.execution_params, - 'host': self.host + 'host': self.host, + 'filename': self.filename } if self.status == ExecutionStatus.SUCCESS: params['completed'] = self.completed From 3677c11db7b9346933a185f5129fd670234cd4ca Mon Sep 17 00:00:00 2001 From: skorper Date: Thu, 16 Nov 2023 13:26:48 -0800 Subject: [PATCH 65/91] removed debugging line --- analysis/webservice/algorithms/doms/ResultsRetrieval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py index cdec9294..f03c1caa 100644 --- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py +++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py @@ -45,8 +45,6 @@ def calc(self, computeOptions, **args): simple_results = computeOptions.get_boolean_arg("simpleResults", default=False) - filename = computeOptions.get_argument("filename", default=None) - with ResultsStorage.ResultsRetrieval(self.config) as storage: params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size) From 86f1348d6d283eb688b1b619f95dde2f8e635218 Mon Sep 17 00:00:00 2001 From: skorper Date: Thu, 16 Nov 2023 13:27:43 -0800 Subject: [PATCH 66/91] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e0a4e98..6ffff5dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-493: - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint - Updated /job endpoint with details about number of primary and secondary tiles. +- SDAP-500: Improvements to SDAP Asynchronous Jobs ### Deprecated ### Removed - SDAP-465: Removed `climatology` directory. From 1e8cc4e9d31d295e172c0db4bba61a5776642bea Mon Sep 17 00:00:00 2001 From: Riley Kuttruff <72955101+RKuttruff@users.noreply.github.com> Date: Mon, 27 Nov 2023 15:44:38 -0800 Subject: [PATCH 67/91] Update helm cassandra dependency (#289) * Update helm cassandra dependency * Bump default cassandra PV to 4 * Bump default cassandra PV to 4 in tools * Changelog * Fixed small documentation issue --------- Co-authored-by: rileykk --- CHANGELOG.md | 1 + analysis/webservice/algorithms/doms/ResultsStorage.py | 3 +++ analysis/webservice/algorithms/doms/domsconfig.ini.default | 2 +- data-access/nexustiles/config/datastores.ini.default | 2 +- helm/requirements.yaml | 2 +- helm/values.yaml | 3 +-- tools/deletebyquery/deletebyquery.py | 2 +- tools/doms-data-tools/update_doms_data_pk.py | 2 +- tools/domspurge/README.md | 2 +- tools/domspurge/purge.py | 2 +- 10 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ffff5dc..5e36c0a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Status code for results endpoint if execution id is not found fixed to be `404` instead of `500`. - Ensured links in the `/job` endpoint are https - SDAP-488: Workaround to build issue on Apple Silicon (M1/M2). Image build installs nexusproto through PyPI instead of building from source. A build arg `BUILD_NEXUSPROTO` was defined to allow building from source if desired +- SDAP-496: Fix `solr-cloud-init` image failing to run. ### Security ## [1.1.0] - 2023-04-26 diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 48b2122d..1dea1610 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -65,6 +65,9 @@ def __enter__(self): dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) token_policy = TokenAwarePolicy(dc_policy) + logger.info(f'Connecting to Cassandra cluster @ {[host for host in cassHost.split(",")]}; datacenter: ' + f'{cassDatacenter}; protocol version: {cassVersion}') + self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy, protocol_version=cassVersion, auth_provider=auth_provider) diff --git a/analysis/webservice/algorithms/doms/domsconfig.ini.default b/analysis/webservice/algorithms/doms/domsconfig.ini.default index 55f9b16c..f4e44960 100644 --- a/analysis/webservice/algorithms/doms/domsconfig.ini.default +++ b/analysis/webservice/algorithms/doms/domsconfig.ini.default @@ -18,7 +18,7 @@ host=localhost port=9042 keyspace=doms local_datacenter=datacenter1 -protocol_version=3 +protocol_version=4 dc_policy=DCAwareRoundRobinPolicy username= password= diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default index d8db1902..51455a38 100644 --- a/data-access/nexustiles/config/datastores.ini.default +++ b/data-access/nexustiles/config/datastores.ini.default @@ -18,7 +18,7 @@ host=localhost port=9042 keyspace=nexustiles local_datacenter=datacenter1 -protocol_version=3 +protocol_version=4 dc_policy=DCAwareRoundRobinPolicy username= password= diff --git a/helm/requirements.yaml b/helm/requirements.yaml index a9996586..1de8cf0f 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -12,7 +12,7 @@ dependencies: repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami condition: solr.enabled - name: cassandra - version: 5.5.3 + version: 9.1.7 repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami condition: cassandra.enabled diff --git a/helm/values.yaml b/helm/values.yaml index 4105362e..fe2481ef 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -195,8 +195,7 @@ cassandra: dbUser: user: cassandra password: cassandra - cluster: - replicaCount: 1 + replicaCount: 1 persistence: storageClass: hostpath size: 8Gi diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py index 4fb7bd66..8b98111a 100644 --- a/tools/deletebyquery/deletebyquery.py +++ b/tools/deletebyquery/deletebyquery.py @@ -262,7 +262,7 @@ def parse_args(): help='The version of the Cassandra protocol the driver should use.', required=False, choices=['1', '2', '3', '4', '5'], - default='3') + default='4') parser.add_argument('--solr-rows', help='Number of rows to fetch with each Solr query to build the list of tiles to delete', diff --git a/tools/doms-data-tools/update_doms_data_pk.py b/tools/doms-data-tools/update_doms_data_pk.py index ed8dbe5e..749995da 100644 --- a/tools/doms-data-tools/update_doms_data_pk.py +++ b/tools/doms-data-tools/update_doms_data_pk.py @@ -114,7 +114,7 @@ def main(): request_timeout=60.0, ) }, - protocol_version=3, + protocol_version=4, auth_provider=auth_provider) as cluster: session = cluster.connect('doms') diff --git a/tools/domspurge/README.md b/tools/domspurge/README.md index 92f7cfb1..e88b62f5 100644 --- a/tools/domspurge/README.md +++ b/tools/domspurge/README.md @@ -33,5 +33,5 @@ You can build an image for this script to run it in a Kubernetes CronJob. ```shell cd /incubator-sdap-nexus -docker build . -f Dockerfile -t sdap-local/DomsPurge: +docker build . -f tools/domspurge/Dockerfile -t sdap-local/DomsPurge: ``` diff --git a/tools/domspurge/purge.py b/tools/domspurge/purge.py index 4fb2fc37..d4bb15a8 100644 --- a/tools/domspurge/purge.py +++ b/tools/domspurge/purge.py @@ -270,7 +270,7 @@ def parse_args(): required=False, dest='pv', choices=['1', '2', '3', '4', '5'], - default='3') + default='4') time_before = purge_options.add_mutually_exclusive_group(required=True) From 32ca3d709237d324decada84fe92a4a4044b6521 Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 5 Jan 2024 15:33:49 -0800 Subject: [PATCH 68/91] stac catalog --- .../algorithms/doms/ResultsStorage.py | 15 +- .../webservice/algorithms/doms/StacCatalog.py | 166 ++++++++++++++++++ .../webservice/algorithms/doms/__init__.py | 1 + .../webmodel/NexusExecutionResults.py | 6 + 4 files changed, 180 insertions(+), 8 deletions(-) create mode 100644 analysis/webservice/algorithms/doms/StacCatalog.py diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 39db27b3..6b4cc1c2 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -286,7 +286,7 @@ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1 execution_id = uuid.UUID(execution_id) params = self.retrieveParams(execution_id) - stats = self.__retrieveStats(execution_id) + stats = self.retrieveStats(execution_id) data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size) return params, stats, data @@ -357,19 +357,18 @@ def __rowToDataEntry(self, row, trim_data=False): return entry - def __retrieveStats(self, id): - cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1" + def retrieveStats(self, id): + cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1" rows = self._session.execute(cql, (id,)) for row in rows: stats = { - "timeToComplete": row.time_to_complete, - "numSecondaryMatched": row.num_insitu_matched, - "numPrimaryMatched": row.num_gridded_matched, + 'timeToComplete': row.time_to_complete, + 'numSecondaryMatched': row.num_insitu_matched, + 'numPrimaryMatched': row.num_gridded_matched, + 'numUniqueSecondaries': row.num_unique_secondaries } return stats - raise NexusProcessingException(reason=f'No stats found for id {str(id)}', code=404) - def retrieveParams(self, id): cql = "SELECT * FROM doms_params where execution_id = %s limit 1" rows = self._session.execute(cql, (id,)) diff --git a/analysis/webservice/algorithms/doms/StacCatalog.py b/analysis/webservice/algorithms/doms/StacCatalog.py new file mode 100644 index 00000000..2c1aa125 --- /dev/null +++ b/analysis/webservice/algorithms/doms/StacCatalog.py @@ -0,0 +1,166 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the 'License'); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import re +import uuid +from typing import List + +from webservice.NexusHandler import nexus_handler +from webservice.algorithms.doms.ResultsStorage import ResultsRetrieval +from webservice.webmodel import NexusProcessingException +from webservice.webmodel import NexusResults + +from . import BaseDomsHandler + + +class StacResults(NexusResults): + def __init__(self, contents): + NexusResults.__init__(self) + self.contents = contents + + def toJson(self): + return json.dumps(self.contents, indent=4) + + +@nexus_handler +class StacCatalog(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = 'STAC Catalog Handler' + path = '^/cdmscatalog/?.*$' + description = '' + params = {} + singleton = True + + def __init__(self, tile_service_factory, config=None): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory) + self.config = config + + def construct_catalog(self, execution_id: str): + return { + 'stac_version': '1.0.0', + 'type': 'Catalog', + 'id': str(execution_id), + 'description': 'STAC Catalog for CDMS output', + 'links': [ + { + 'rel': 'collection', + 'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}', + 'title': f'Collection of pages for {execution_id} {output_format} output' + } + for output_format in ['CSV', 'JSON', 'NETCDF'] + ] + } + + def construct_collection(self, execution_id: str, output_format: str, + num_primary_matched: int, page_size: int, start_time: str, + end_time: str, bbox: List[float]): + links = [ + { + 'rel': 'self', + 'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}', + 'title': 'The current page', + 'type': 'application/json' + }, + { + 'rel': 'root', + 'href': f'https://{self.host}/cdmscatalog/{execution_id}', + 'title': f'Root catalog for {execution_id}', + } + ] + + url = f'https://{self.host}/cdmsresults?id={execution_id}&output={output_format}' + for page_num in range(1, num_primary_matched, page_size): + links.append({ + 'rel': 'data', + 'href': f'{url}&pageNum={page_num}&pageSize={page_size}' + }) + + return { + 'stac_version': '1.0.0', + 'type': 'Collection', + 'license': 'not-provided', + 'id': f'{execution_id}.{output_format}', + 'description': 'Collection of results for CDMS execution and result format', + 'extent': { + 'spatial': { + 'bbox': bbox + }, + 'temporal': { + 'interval': [start_time, end_time] + } + }, + 'links': links, + } + + def calc(self, request, **args): + page_size = request.get_int_arg('pageSize', default=1000) + url_path_regex = '^\/cdmscatalog\/?(?P[a-zA-Z0-9-]*)\/?(?P[a-zA-Z0-9]*)' + match = re.search(url_path_regex, request.requestHandler.request.path) + + execution_id = match.group('id') + output_format = match.group('format') + + self.host = request.requestHandler.request.host + + if not execution_id: + raise NexusProcessingException( + reason=f'Execution ID path param must be provided.', + code=400 + ) + + if execution_id: + try: + execution_id = uuid.UUID(execution_id) + except ValueError: + raise NexusProcessingException( + reason=f'"{execution_id}" is not a valid uuid', + code=400 + ) + + if output_format and output_format.upper() not in ['CSV', 'JSON', 'NETCDF']: + raise NexusProcessingException( + reason=f'"{output_format}" is not a valid format. Should be CSV, JSON, or NETCDF.', + code=400 + ) + + if execution_id and not output_format: + # Route to STAC catalog for execution + stac_output = self.construct_catalog(execution_id) + elif execution_id and output_format: + # Route to STAC collection for execution+format + + with ResultsRetrieval(self.config) as retrieval: + try: + execution_stats = retrieval.retrieveStats(execution_id) + execution_params = retrieval.retrieveParams(execution_id) + except NexusProcessingException: + execution_stats = {} + + num_primary_matched = execution_stats.get('numPrimaryMatched', 0) + start_time = execution_params['startTime'].isoformat() + end_time = execution_params['endTime'].isoformat() + bbox = list(map(float, execution_params['bbox'].split(','))) + + stac_output = self.construct_collection( + execution_id, output_format, num_primary_matched, page_size, + start_time, end_time, bbox + ) + else: + raise NexusProcessingException( + reason=f'Invalid path parameters were provided', + code=400 + ) + + return StacResults(stac_output) diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py index bc568f83..7e5715f4 100644 --- a/analysis/webservice/algorithms/doms/__init__.py +++ b/analysis/webservice/algorithms/doms/__init__.py @@ -20,6 +20,7 @@ from . import DatasetListQuery from . import DomsInitialization from . import MatchupQuery +from . import StacCatalog from . import MetadataQuery from . import ResultsPlotQuery from . import ResultsRetrieval diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py index d5c12046..be9d332a 100644 --- a/analysis/webservice/webmodel/NexusExecutionResults.py +++ b/analysis/webservice/webmodel/NexusExecutionResults.py @@ -60,6 +60,12 @@ def construct_done(status, created, completed, execution_id, params, host): ('JSON', 'application/json'), ('NETCDF', 'binary/octet-stream') ] + job_body['links'].append({ + 'href': f'{host}/cdmscatalog/{execution_id}', + 'title': 'STAC Catalog for execution results', + 'type': 'application/json', + 'rel': 'stac' + }) data_links = [{ 'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}', 'title': 'Download results', From 3563ae9820f3c8699e15f348f61b94d7f5aa65b5 Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 5 Jan 2024 16:03:33 -0800 Subject: [PATCH 69/91] Updated openapi spec --- analysis/webservice/apidocs/openapi.yml | 52 +++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index ea9b16ba..0420bf9d 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -721,6 +721,58 @@ paths: type: string format: uuid example: c864a51b-3d87-4872-9070-632820b1cae2 + /cdmscatalog/{executionId}: + get: + summary: | + Get STAC Catalog for execution + operationId: cdmscatalog + tags: + - Analytics + description: "Get STAC catalog by execution id" + parameters: + - in: path + name: executionId + description: | + The job execution ID + required: true + schema: + type: string + format: uuid + example: c864a51b-3d87-4872-9070-632820b1cae2 + /cdmscatalog/{executionId}/{format}: + get: + summary: | + Get STAC Catalog format catalog for execution + operationId: cdmscatalogcollection + tags: + - Analytics + description: "Get STAC catalog by execution id" + parameters: + - in: path + name: executionId + description: | + The job execution ID + required: true + schema: + type: string + format: uuid + example: c864a51b-3d87-4872-9070-632820b1cae2 + - in: path + name: format + description: | + CDMS results format + required: true + schema: + type: string + enum: [JSON,CSV,NETCDF] + example: JSON + - in: query + name: pageSize + description: | + How many primary matches on each page of CDMS results + required: false + schema: + type: integer externalDocs: description: Documentation url: https://incubator-sdap-nexus.readthedocs.io/en/latest/index.html From 0691d87932ec0a7c54ef77f1839100681848df37 Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 5 Jan 2024 16:15:40 -0800 Subject: [PATCH 70/91] move stac endpoints to matchup tag in openapi spec --- analysis/webservice/apidocs/openapi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml index 0420bf9d..3bb5103a 100644 --- a/analysis/webservice/apidocs/openapi.yml +++ b/analysis/webservice/apidocs/openapi.yml @@ -727,7 +727,7 @@ paths: Get STAC Catalog for execution operationId: cdmscatalog tags: - - Analytics + - Matchup description: "Get STAC catalog by execution id" parameters: - in: path @@ -745,7 +745,7 @@ paths: Get STAC Catalog format catalog for execution operationId: cdmscatalogcollection tags: - - Analytics + - Matchup description: "Get STAC catalog by execution id" parameters: - in: path From e02fc78f64b76e0974cd9bbc6f09b01cd9593447 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 11 Jan 2024 10:49:19 -0800 Subject: [PATCH 71/91] SDAP-507 - Changes to remove geos sub-dependency --- analysis/webservice/algorithms/doms/BaseDomsHandler.py | 7 ------- analysis/webservice/algorithms/doms/ResultsPlotQuery.py | 2 +- analysis/webservice/algorithms/doms/__init__.py | 2 +- .../nexus_tornado/app_builders/HandlerArgsBuilder.py | 7 ++++--- docker/nexus-webapp/Dockerfile | 4 ++-- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py index 84c91633..faa384f7 100644 --- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py +++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py @@ -35,13 +35,6 @@ EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' -try: - from osgeo import gdal - from osgeo.gdalnumeric import * -except ImportError: - import gdal - from gdalnumeric import * - from netCDF4 import Dataset import netCDF4 import tempfile diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py index 950c7964..864cdc3b 100644 --- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py +++ b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py @@ -26,7 +26,7 @@ class PlotTypes: HISTOGRAM = "histogram" -@nexus_handler +# @nexus_handler class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): name = "DOMS Results Plotting" path = "/domsplot" diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py index bc568f83..8a94798e 100644 --- a/analysis/webservice/algorithms/doms/__init__.py +++ b/analysis/webservice/algorithms/doms/__init__.py @@ -21,7 +21,7 @@ from . import DomsInitialization from . import MatchupQuery from . import MetadataQuery -from . import ResultsPlotQuery +# from . import ResultsPlotQuery from . import ResultsRetrieval from . import ResultsStorage from . import StatsQuery diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py index 2a84ae7e..3b8b480f 100644 --- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py +++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py @@ -37,9 +37,10 @@ def handler_needs_algorithm_config(class_wrapper): class_wrapper == webservice.algorithms_spark.Matchup.Matchup or class_wrapper == webservice.algorithms_spark.MatchupDoms.MatchupDoms or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler) - or issubclass(class_wrapper, webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler) + or issubclass(class_wrapper, + webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler) or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler - or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler + # or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler ) @staticmethod @@ -50,7 +51,7 @@ def get_args(self, clazz_wrapper): args = dict( clazz=clazz_wrapper, tile_service_factory=self.tile_service_factory, - thread_pool=self. request_thread_pool + thread_pool=self.request_thread_pool ) if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler): diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile index 515d6ab0..6f13f9f4 100644 --- a/docker/nexus-webapp/Dockerfile +++ b/docker/nexus-webapp/Dockerfile @@ -95,10 +95,10 @@ RUN python3 setup.py install clean WORKDIR /incubator-sdap-nexus/analysis RUN python3 setup.py install clean && mamba clean -afy +RUN pip install shapely==1.7.1 WORKDIR /incubator-sdap-nexus/tools/deletebyquery -RUN pip3 install cassandra-driver==3.20.1 -RUN pip3 install pyspark py4j +RUN pip3 install cassandra-driver==3.20.1 pyspark py4j RUN pip3 install -r requirements.txt RUN pip3 install cython RUN rm requirements.txt From 51231cad14ba5242bbee1814ba1141a366d33b61 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 11 Jan 2024 10:54:14 -0800 Subject: [PATCH 72/91] SDAP-507 - Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d62724..793c6017 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-482: Updated Saildrone in situ endpoint in config file - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results. - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id. +- SDAP-507: Changes to remove `geos` sub-dependency from core image build: + - Removed `gdal` and `basemap` as core dependencies + - Moved `shapely` installation in docker build from conda install to pip install + - Disabled `/domsplot` endpoint & commented out references to its source file as it depends on `basemap` and raises `ImportError`s at startup ### Deprecated ### Removed - SDAP-465: Removed `climatology` directory. From 5c755736517ac33c526e6f00ab3331fd02c48411 Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 11 Jan 2024 10:49:19 -0800 Subject: [PATCH 73/91] SDAP-507 - Changes to remove geos sub-dependency --- analysis/webservice/algorithms/doms/BaseDomsHandler.py | 7 ------- analysis/webservice/algorithms/doms/ResultsPlotQuery.py | 2 +- analysis/webservice/algorithms/doms/__init__.py | 2 +- .../nexus_tornado/app_builders/HandlerArgsBuilder.py | 7 ++++--- docker/nexus-webapp/Dockerfile | 4 ++-- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py index 84c91633..faa384f7 100644 --- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py +++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py @@ -35,13 +35,6 @@ EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' -try: - from osgeo import gdal - from osgeo.gdalnumeric import * -except ImportError: - import gdal - from gdalnumeric import * - from netCDF4 import Dataset import netCDF4 import tempfile diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py index 950c7964..864cdc3b 100644 --- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py +++ b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py @@ -26,7 +26,7 @@ class PlotTypes: HISTOGRAM = "histogram" -@nexus_handler +# @nexus_handler class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): name = "DOMS Results Plotting" path = "/domsplot" diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py index bc568f83..8a94798e 100644 --- a/analysis/webservice/algorithms/doms/__init__.py +++ b/analysis/webservice/algorithms/doms/__init__.py @@ -21,7 +21,7 @@ from . import DomsInitialization from . import MatchupQuery from . import MetadataQuery -from . import ResultsPlotQuery +# from . import ResultsPlotQuery from . import ResultsRetrieval from . import ResultsStorage from . import StatsQuery diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py index 2a84ae7e..3b8b480f 100644 --- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py +++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py @@ -37,9 +37,10 @@ def handler_needs_algorithm_config(class_wrapper): class_wrapper == webservice.algorithms_spark.Matchup.Matchup or class_wrapper == webservice.algorithms_spark.MatchupDoms.MatchupDoms or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler) - or issubclass(class_wrapper, webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler) + or issubclass(class_wrapper, + webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler) or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler - or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler + # or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler ) @staticmethod @@ -50,7 +51,7 @@ def get_args(self, clazz_wrapper): args = dict( clazz=clazz_wrapper, tile_service_factory=self.tile_service_factory, - thread_pool=self. request_thread_pool + thread_pool=self.request_thread_pool ) if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler): diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile index 6aaadda8..c38c5f29 100644 --- a/docker/nexus-webapp/Dockerfile +++ b/docker/nexus-webapp/Dockerfile @@ -95,11 +95,11 @@ RUN python3 setup.py install clean WORKDIR /incubator-sdap-nexus/analysis RUN python3 setup.py install clean && mamba clean -afy +RUN pip install shapely==1.7.1 WORKDIR /incubator-sdap-nexus/tools/deletebyquery ARG CASS_DRIVER_BUILD_CONCURRENCY=8 -RUN pip3 install cassandra-driver==3.20.1 -RUN pip3 install pyspark py4j +RUN pip3 install cassandra-driver==3.20.1 pyspark py4j RUN pip3 install -r requirements.txt RUN pip3 install cython From 7f717c0fcf1f31701cff258a19145327592562ff Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 11 Jan 2024 10:54:14 -0800 Subject: [PATCH 74/91] SDAP-507 - Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5396fdde..55c5bc6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for deploying on k8s version 1.25: - Upgraded Cassandra Helm chart dependency version - Bumped default Cassandra protocol version 3 -> 4 in webapp and tools +- SDAP-507: Changes to remove `geos` sub-dependency from core image build: + - Removed `gdal` and `basemap` as core dependencies + - Moved `shapely` installation in docker build from conda install to pip install + - Disabled `/domsplot` endpoint & commented out references to its source file as it depends on `basemap` and raises `ImportError`s at startup ### Deprecated ### Removed - SDAP-465: Removed `climatology` directory. From 9779f409bd8b5aff9d9d17b244ab6cfbddedfb28 Mon Sep 17 00:00:00 2001 From: rileykk Date: Fri, 19 Jan 2024 09:15:01 -0800 Subject: [PATCH 75/91] delete instead of comment out --- analysis/webservice/algorithms/doms/__init__.py | 1 - .../webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py | 1 - 2 files changed, 2 deletions(-) diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py index 8a94798e..8bddad9e 100644 --- a/analysis/webservice/algorithms/doms/__init__.py +++ b/analysis/webservice/algorithms/doms/__init__.py @@ -21,7 +21,6 @@ from . import DomsInitialization from . import MatchupQuery from . import MetadataQuery -# from . import ResultsPlotQuery from . import ResultsRetrieval from . import ResultsStorage from . import StatsQuery diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py index 3b8b480f..f2d6f1b4 100644 --- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py +++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py @@ -40,7 +40,6 @@ def handler_needs_algorithm_config(class_wrapper): or issubclass(class_wrapper, webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler) or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler - # or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler ) @staticmethod From 937876031c63d7d1eac279eb7ba34c1a9a62355c Mon Sep 17 00:00:00 2001 From: skorper Date: Fri, 19 Jan 2024 09:45:40 -0800 Subject: [PATCH 76/91] Revert "Update helm cassandra dependency (#289)" This reverts commit 1e8cc4e9d31d295e172c0db4bba61a5776642bea. --- CHANGELOG.md | 1 - analysis/webservice/algorithms/doms/ResultsStorage.py | 3 --- analysis/webservice/algorithms/doms/domsconfig.ini.default | 2 +- data-access/nexustiles/config/datastores.ini.default | 2 +- helm/requirements.yaml | 2 +- helm/values.yaml | 3 ++- tools/deletebyquery/deletebyquery.py | 2 +- tools/doms-data-tools/update_doms_data_pk.py | 2 +- tools/domspurge/README.md | 2 +- tools/domspurge/purge.py | 2 +- 10 files changed, 9 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e686e60..ed72f245 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,7 +42,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Status code for results endpoint if execution id is not found fixed to be `404` instead of `500`. - Ensured links in the `/job` endpoint are https - SDAP-488: Workaround to build issue on Apple Silicon (M1/M2). Image build installs nexusproto through PyPI instead of building from source. A build arg `BUILD_NEXUSPROTO` was defined to allow building from source if desired -- SDAP-496: Fix `solr-cloud-init` image failing to run. ### Security ## [1.1.0] - 2023-04-26 diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py index 1dea1610..48b2122d 100644 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -65,9 +65,6 @@ def __enter__(self): dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) token_policy = TokenAwarePolicy(dc_policy) - logger.info(f'Connecting to Cassandra cluster @ {[host for host in cassHost.split(",")]}; datacenter: ' - f'{cassDatacenter}; protocol version: {cassVersion}') - self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy, protocol_version=cassVersion, auth_provider=auth_provider) diff --git a/analysis/webservice/algorithms/doms/domsconfig.ini.default b/analysis/webservice/algorithms/doms/domsconfig.ini.default index f4e44960..55f9b16c 100644 --- a/analysis/webservice/algorithms/doms/domsconfig.ini.default +++ b/analysis/webservice/algorithms/doms/domsconfig.ini.default @@ -18,7 +18,7 @@ host=localhost port=9042 keyspace=doms local_datacenter=datacenter1 -protocol_version=4 +protocol_version=3 dc_policy=DCAwareRoundRobinPolicy username= password= diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default index 51455a38..d8db1902 100644 --- a/data-access/nexustiles/config/datastores.ini.default +++ b/data-access/nexustiles/config/datastores.ini.default @@ -18,7 +18,7 @@ host=localhost port=9042 keyspace=nexustiles local_datacenter=datacenter1 -protocol_version=4 +protocol_version=3 dc_policy=DCAwareRoundRobinPolicy username= password= diff --git a/helm/requirements.yaml b/helm/requirements.yaml index 1de8cf0f..a9996586 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -12,7 +12,7 @@ dependencies: repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami condition: solr.enabled - name: cassandra - version: 9.1.7 + version: 5.5.3 repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami condition: cassandra.enabled diff --git a/helm/values.yaml b/helm/values.yaml index fe2481ef..4105362e 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -195,7 +195,8 @@ cassandra: dbUser: user: cassandra password: cassandra - replicaCount: 1 + cluster: + replicaCount: 1 persistence: storageClass: hostpath size: 8Gi diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py index 8b98111a..4fb7bd66 100644 --- a/tools/deletebyquery/deletebyquery.py +++ b/tools/deletebyquery/deletebyquery.py @@ -262,7 +262,7 @@ def parse_args(): help='The version of the Cassandra protocol the driver should use.', required=False, choices=['1', '2', '3', '4', '5'], - default='4') + default='3') parser.add_argument('--solr-rows', help='Number of rows to fetch with each Solr query to build the list of tiles to delete', diff --git a/tools/doms-data-tools/update_doms_data_pk.py b/tools/doms-data-tools/update_doms_data_pk.py index 749995da..ed8dbe5e 100644 --- a/tools/doms-data-tools/update_doms_data_pk.py +++ b/tools/doms-data-tools/update_doms_data_pk.py @@ -114,7 +114,7 @@ def main(): request_timeout=60.0, ) }, - protocol_version=4, + protocol_version=3, auth_provider=auth_provider) as cluster: session = cluster.connect('doms') diff --git a/tools/domspurge/README.md b/tools/domspurge/README.md index e88b62f5..92f7cfb1 100644 --- a/tools/domspurge/README.md +++ b/tools/domspurge/README.md @@ -33,5 +33,5 @@ You can build an image for this script to run it in a Kubernetes CronJob. ```shell cd /incubator-sdap-nexus -docker build . -f tools/domspurge/Dockerfile -t sdap-local/DomsPurge: +docker build . -f Dockerfile -t sdap-local/DomsPurge: ``` diff --git a/tools/domspurge/purge.py b/tools/domspurge/purge.py index d4bb15a8..4fb2fc37 100644 --- a/tools/domspurge/purge.py +++ b/tools/domspurge/purge.py @@ -270,7 +270,7 @@ def parse_args(): required=False, dest='pv', choices=['1', '2', '3', '4', '5'], - default='4') + default='3') time_before = purge_options.add_mutually_exclusive_group(required=True) From 530314652bd1e74907962a9b03ceddfdc302c3ff Mon Sep 17 00:00:00 2001 From: rileykk Date: Fri, 19 Jan 2024 10:20:44 -0800 Subject: [PATCH 77/91] deleted disabled endpoint files --- .../algorithms/doms/ResultsPlotQuery.py | 56 ------ .../webservice/algorithms/doms/mapplot.py | 174 ------------------ 2 files changed, 230 deletions(-) delete mode 100644 analysis/webservice/algorithms/doms/ResultsPlotQuery.py delete mode 100644 analysis/webservice/algorithms/doms/mapplot.py diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py deleted file mode 100644 index 864cdc3b..00000000 --- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import BaseDomsHandler -from . import histogramplot -from . import mapplot -from . import scatterplot -from webservice.NexusHandler import nexus_handler - - -class PlotTypes: - SCATTER = "scatter" - MAP = "map" - HISTOGRAM = "histogram" - - -# @nexus_handler -class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Results Plotting" - path = "/domsplot" - description = "" - params = {} - singleton = True - - def __init__(self, tile_service_factory, config=None): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory) - self.config = config - - def calc(self, computeOptions, **args): - id = computeOptions.get_argument("id", None) - parameter = computeOptions.get_argument('parameter', 'sst') - - plotType = computeOptions.get_argument("type", PlotTypes.SCATTER) - - normAndCurve = computeOptions.get_boolean_arg("normandcurve", False) - - if plotType == PlotTypes.SCATTER: - return scatterplot.createScatterPlot(id, parameter, config=self.config) - elif plotType == PlotTypes.MAP: - return mapplot.createMapPlot(id, parameter, config=self.config) - elif plotType == PlotTypes.HISTOGRAM: - return histogramplot.createHistogramPlot(id, parameter, normAndCurve, config=self.config) - else: - raise Exception("Unsupported plot type '%s' specified." % plotType) diff --git a/analysis/webservice/algorithms/doms/mapplot.py b/analysis/webservice/algorithms/doms/mapplot.py deleted file mode 100644 index 8b93d3c6..00000000 --- a/analysis/webservice/algorithms/doms/mapplot.py +++ /dev/null @@ -1,174 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import io -from multiprocessing import Process, Manager - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from mpl_toolkits.basemap import Basemap - -from . import BaseDomsHandler -from . import ResultsStorage - -if not matplotlib.get_backend(): - matplotlib.use('Agg') - -PARAMETER_TO_FIELD = { - "sst": "sea_water_temperature", - "sss": "sea_water_salinity" -} - -PARAMETER_TO_UNITS = { - "sst": "($^\circ$ C)", - "sss": "(g/L)" -} - - -def __square(minLon, maxLon, minLat, maxLat): - if maxLat - minLat > maxLon - minLon: - a = ((maxLat - minLat) - (maxLon - minLon)) / 2.0 - minLon -= a - maxLon += a - elif maxLon - minLon > maxLat - minLat: - a = ((maxLon - minLon) - (maxLat - minLat)) / 2.0 - minLat -= a - maxLat += a - - return minLon, maxLon, minLat, maxLat - - -def render(d, lats, lons, z, primary, secondary, parameter): - fig = plt.figure() - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) - - ax.set_title(f'{primary} vs. {secondary}') - # ax.set_ylabel('Latitude') - # ax.set_xlabel('Longitude') - - minLatA = np.min(lats) - maxLatA = np.max(lats) - minLonA = np.min(lons) - maxLonA = np.max(lons) - - minLat = minLatA - (abs(maxLatA - minLatA) * 0.1) - maxLat = maxLatA + (abs(maxLatA - minLatA) * 0.1) - - minLon = minLonA - (abs(maxLonA - minLonA) * 0.1) - maxLon = maxLonA + (abs(maxLonA - minLonA) * 0.1) - - minLon, maxLon, minLat, maxLat = __square(minLon, maxLon, minLat, maxLat) - - # m = Basemap(projection='mill', llcrnrlon=-180,llcrnrlat=-80,urcrnrlon=180,urcrnrlat=80,resolution='l') - m = Basemap(projection='mill', llcrnrlon=minLon, llcrnrlat=minLat, urcrnrlon=maxLon, urcrnrlat=maxLat, - resolution='l') - - m.drawparallels(np.arange(minLat, maxLat, (maxLat - minLat) / 5.0), labels=[1, 0, 0, 0], fontsize=10) - m.drawmeridians(np.arange(minLon, maxLon, (maxLon - minLon) / 5.0), labels=[0, 0, 0, 1], fontsize=10) - - m.drawcoastlines() - m.drawmapboundary(fill_color='#99ffff') - m.fillcontinents(color='#cc9966', lake_color='#99ffff') - - # lats, lons = np.meshgrid(lats, lons) - - masked_array = np.ma.array(z, mask=np.isnan(z)) - z = masked_array - - values = np.zeros(len(z)) - for i in range(0, len(z)): - values[i] = ((z[i] - np.min(z)) / (np.max(z) - np.min(z)) * 20.0) + 10 - - x, y = m(lons, lats) - - im1 = m.scatter(x, y, values) - - im1.set_array(z) - cb = m.colorbar(im1) - - units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] - cb.set_label("Difference %s" % units) - - buf = io.BytesIO() - plt.savefig(buf, format='png') - plot = buf.getvalue() - if d is not None: - d['plot'] = plot - return plot - - -class DomsMapPlotQueryResults(BaseDomsHandler.DomsQueryResults): - def __init__(self, lats, lons, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, - computeOptions=None, executionId=None, plot=None): - BaseDomsHandler.DomsQueryResults.__init__(self, results={"lats": lats, "lons": lons, "values": z}, args=args, - details=details, bounds=bounds, count=count, - computeOptions=computeOptions, executionId=executionId) - self.__lats = lats - self.__lons = lons - self.__z = np.array(z) - self.__parameter = parameter - self.__primary = primary - self.__secondary = secondary - self.__plot = plot - - def toImage(self): - return self.__plot - - -def renderAsync(x, y, z, primary, secondary, parameter): - manager = Manager() - d = manager.dict() - p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter)) - p.start() - p.join() - return d['plot'] - - -def createMapPlot(id, parameter, config=None): - with ResultsStorage.ResultsRetrieval(config) as storage: - params, stats, data = storage.retrieveResults(id) - - primary = params["primary"] - secondary = params["matchup"][0] - - lats = [] - lons = [] - z = [] - - field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] - - for entry in data: - for match in entry["matches"]: - if match["source"] == secondary: - - if field in entry and field in match: - a = entry[field] - b = match[field] - z.append((a - b)) - z.append((a - b)) - else: - z.append(1.0) - z.append(1.0) - lats.append(entry["y"]) - lons.append(entry["x"]) - lats.append(match["y"]) - lons.append(match["x"]) - - plot = renderAsync(lats, lons, z, primary, secondary, parameter) - r = DomsMapPlotQueryResults(lats=lats, lons=lons, z=z, parameter=parameter, primary=primary, secondary=secondary, - args=params, - details=stats, bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) - return r From ee5e5c8da244af7b0e0a2864dd894459b0706350 Mon Sep 17 00:00:00 2001 From: skorper Date: Thu, 25 Jan 2024 12:14:25 -0800 Subject: [PATCH 78/91] fix bug where still-running jobs failed /job endpoint due to missing metadata --- analysis/webservice/algorithms/doms/ExecutionStatus.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py index 63cf423b..9817b070 100644 --- a/analysis/webservice/algorithms/doms/ExecutionStatus.py +++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py @@ -63,6 +63,9 @@ def calc(self, request, **args): except NexusProcessingException: execution_stats = {} + if execution_stats is None: + execution_stats = {} + job_status = NexusExecutionResults.ExecutionStatus(execution_details['status']) host = f'https://{request.requestHandler.request.host}' From 639d7d773b1074af04ed6621c1465d2e4e3132d6 Mon Sep 17 00:00:00 2001 From: Riley Kuttruff <72955101+RKuttruff@users.noreply.github.com> Date: Thu, 1 Feb 2024 13:59:23 -0800 Subject: [PATCH 79/91] Update .asf.yaml (#293) Co-authored-by: rileykk --- .asf.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index 7574d148..035f24d2 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -16,3 +16,17 @@ github: autolink_jira: SDAP + protected_branches: + master: + strict: true # Require branches be up to date + required_pull_request_reviews: + dismiss_stale_reviews: true + require_code_owner_reviews: true + required_approving_review_count: 1 + develop: + strict: true # Require branches be up to date + required_pull_request_reviews: + dismiss_stale_reviews: true + require_code_owner_reviews: true + required_approving_review_count: 1 + del_branch_on_merge: true From 3875f2dfc51daf489062290d16f93d78239cca3c Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 1 Feb 2024 15:30:09 -0800 Subject: [PATCH 80/91] Moved changelog entries --- CHANGELOG.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ba371e0..94e2fa60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,11 +41,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-482: Updated Saildrone in situ endpoint in config file - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results. - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id. -- SDAP-493: - - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint - - Updated /job endpoint with details about number of primary and secondary tiles. -- SDAP-500: Improvements to SDAP Asynchronous Jobs -- SDAP-499: Added page number to default filename for matchup output - Support for deploying on k8s version 1.25: - Upgraded Cassandra Helm chart dependency version - Bumped default Cassandra protocol version 3 -> 4 in webapp and tools @@ -57,8 +52,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - SDAP-465: Removed `climatology` directory. - SDAP-501: Updated dependencies to remove `chardet` -- SDAP-493: - - Removed `resultSizeLimit` from /match_spark endpoint ### Fixed - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters - SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark` From d989c660d01719b1ae0b6280f79ae75e32ff687b Mon Sep 17 00:00:00 2001 From: rileykk Date: Thu, 1 Feb 2024 15:33:05 -0800 Subject: [PATCH 81/91] SDAP-472 changelog entries --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94e2fa60..84eced44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - SDAP-506: - Added STAC Catalog endpoint for matchup outputs +- SDAP-472: + - Support for Zarr backend (gridded data only) + - Dataset management endpoints for Zarr datasets ### Changed - SDAP-493: - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint - Updated /job endpoint with details about number of primary and secondary tiles. - SDAP-500: Improvements to SDAP Asynchronous Jobs - SDAP-499: Added page number to default filename for matchup output +- SDAP-472: Overhauled `data-access` to support multiple backends for simultaneous support of multiple ARD formats ### Deprecated ### Removed - SDAP-493: From ec2ed11958ca9965144290754afe63a85aaef5b6 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 12 Feb 2024 10:16:40 -0800 Subject: [PATCH 82/91] pyproj requirement --- analysis/conda-requirements.txt | 5 +---- data-access/requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt index 22dff066..0d459902 100644 --- a/analysis/conda-requirements.txt +++ b/analysis/conda-requirements.txt @@ -15,7 +15,6 @@ netcdf4==1.5.5.1 -basemap==1.2.2 scipy==1.6.0 pyspark==3.2.1 pytz==2021.1 @@ -27,10 +26,8 @@ botocore==1.24.21 pillow==8.1.0 mpld3=0.5.1 tornado==6.1 -pyproj==2.6.1.post1 +pyproj==3.3.1 pyyaml==6.0 -geos==3.8.1 -gdal==3.2.1 mock==4.0.3 importlib_metadata==4.11.4 #singledispatch==3.4.0.3 diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 6a43957a..6e08f967 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -26,7 +26,8 @@ fsspec==2022.5.0 botocore==1.24.21 aiohttp==3.8.1 xarray~=2022.3.0 -rioxarray +rioxarray==0.15.0 +pyproj==3.3.1 zarr>=2.11.3 pandas<2.1.0rc0 # Temporary restriction because 2.1.0rc0 fails to build From 1392983b38b54e00219a48902f212e26d1b1c7fd Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 12 Feb 2024 16:04:45 -0800 Subject: [PATCH 83/91] CoG: some minor fixes for docker build &c --- analysis/webservice/algorithms/DataInBoundsSearch.py | 4 ++++ data-access/nexustiles/backends/cog/backend.py | 11 ++++++----- data-access/requirements.txt | 3 ++- docker/nexus-webapp/Dockerfile | 9 +++++---- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/analysis/webservice/algorithms/DataInBoundsSearch.py b/analysis/webservice/algorithms/DataInBoundsSearch.py index 2df061fb..195dfcf0 100644 --- a/analysis/webservice/algorithms/DataInBoundsSearch.py +++ b/analysis/webservice/algorithms/DataInBoundsSearch.py @@ -16,6 +16,7 @@ import json import numpy +import logging from datetime import datetime from pytz import timezone @@ -26,6 +27,7 @@ EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' +logger = logging.getLogger(__name__) @nexus_handler @@ -133,6 +135,8 @@ def calc(self, computeOptions, **args): else: tiles = self._get_tile_service().get_tiles_by_metadata(metadata_filter, ds, start_time, end_time) + logger.info(f'Matched {len(tiles)} tiles') + data = [] for tile in tiles: for nexus_point in tile.nexus_point_generator(): diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 66433c96..c878adf8 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -318,8 +318,10 @@ def fetch_data_for_tiles(self, *tiles): return tiles @staticmethod - def __open_granule_at_url(url, time: np.datetime64, bands, config, **kwargs): - url = urlparse(url) + def __open_granule_at_url(url_s, time: np.datetime64, bands, config, **kwargs): + url = urlparse(url_s) + + logger.debug(f'Opening cog at {url_s}') if url.scheme in ['file', '']: tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True).to_dataset('band') @@ -342,7 +344,7 @@ def __open_granule_at_url(url, time: np.datetime64, bands, config, **kwargs): AWSSession(session), GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR' ): - tiff = rioxarray.open_rasterio(url, mask_and_scale=True) + tiff = rioxarray.open_rasterio(url_s, mask_and_scale=True) ##### # NOTE: This will likely be inefficient so leaving it disabled for now. I don't know how it will @@ -358,7 +360,6 @@ def __open_granule_at_url(url, time: np.datetime64, bands, config, **kwargs): try: tiff = tiff.rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) except MissingCRS: - # tiff.rio.write_crs('EPSG:4326').rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) pass rename = dict(x='longitude', y='latitude') @@ -411,7 +412,7 @@ def __fetch_data_for_tile(self, tile: Tile): granule = tile.granule - ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat()), self.__bands, self.__config) + ds: xr.Dataset = CoGBackend.__open_granule_at_url(granule, np.datetime64(min_time.isoformat(), 'ns'), self.__bands, self.__config) variables = list(ds.data_vars) lats = ds[self.__latitude].to_numpy() diff --git a/data-access/requirements.txt b/data-access/requirements.txt index 6e08f967..9ef7f212 100644 --- a/data-access/requirements.txt +++ b/data-access/requirements.txt @@ -14,7 +14,7 @@ # limitations under the License. numpy==1.24.3 -cassandra-driver==3.24.0 +cassandra-driver==3.29.0 pysolr==3.9.0 elasticsearch==8.3.1 urllib3==1.26.2 @@ -27,6 +27,7 @@ botocore==1.24.21 aiohttp==3.8.1 xarray~=2022.3.0 rioxarray==0.15.0 +rasterio==1.2.8 pyproj==3.3.1 zarr>=2.11.3 pandas<2.1.0rc0 # Temporary restriction because 2.1.0rc0 fails to build diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile index c38c5f29..28fb7c81 100644 --- a/docker/nexus-webapp/Dockerfile +++ b/docker/nexus-webapp/Dockerfile @@ -26,12 +26,12 @@ ARG CONDA_DIR="/opt/conda" ENV \ PYTHONPATH=/opt/conda/share/py4j/py4j0.10.9.2.jar \ NEXUS_SRC=/tmp/incubator-sdap-nexus \ - PROJ_LIB=/opt/conda/lib/python3.8/site-packages/pyproj/data \ + PROJ_LIB=/opt/conda/share/proj \ PATH="$CONDA_DIR/bin:$PATH" \ PYTHONDONTWRITEBYTECODE=1 \ SPARK_HOME=/opt/spark \ - PYSPARK_DRIVER_PYTHON=/opt/conda/bin/python3.8 \ - PYSPARK_PYTHON=/opt/conda/bin/python3.8 \ + PYSPARK_DRIVER_PYTHON=/opt/conda/bin/python3.9 \ + PYSPARK_PYTHON=/opt/conda/bin/python3.9 \ LD_LIBRARY_PATH=/usr/lib \ REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt @@ -65,6 +65,7 @@ COPY docker/nexus-webapp/install_conda.sh ./install_conda.sh RUN /tmp/install_conda.sh RUN conda install -c conda-forge python=3.8.15=h257c98d_0_cpython tqdm=4.64.1=py38h06a4308_0 mamba && conda clean -afy +RUN mamba install -c conda-forge python=3.9.7 && mamba clean -afy RUN cd /usr/lib && ln -s libcom_err.so.2 libcom_err.so.3 && \ cd /opt/conda/lib && \ @@ -99,7 +100,7 @@ RUN pip install shapely==1.7.1 WORKDIR /incubator-sdap-nexus/tools/deletebyquery ARG CASS_DRIVER_BUILD_CONCURRENCY=8 -RUN pip3 install cassandra-driver==3.20.1 pyspark py4j +RUN pip3 install cassandra-driver==3.29.0 pyspark py4j RUN pip3 install -r requirements.txt RUN pip3 install cython From 168ae734919f3361e68a4e95559b4c443f2c9c1d Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 1 Apr 2024 14:21:12 -0700 Subject: [PATCH 84/91] Dependencies update to poetry --- poetry.lock | 163 ++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 2 + 2 files changed, 156 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 82d44a32..9fe26285 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,20 @@ # This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +[[package]] +name = "affine" +version = "2.4.0" +description = "Matrices describing affine transformation of the plane" +optional = false +python-versions = ">=3.7" +files = [ + {file = "affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92"}, + {file = "affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea"}, +] + +[package.extras] +dev = ["coveralls", "flake8", "pydocstyle"] +test = ["pytest (>=4.6)", "pytest-cov"] + [[package]] name = "aiobotocore" version = "2.3.4" @@ -402,6 +417,40 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +optional = false +python-versions = "*" +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "cligj" +version = "0.7.2" +description = "Click params for commmand line interfaces to GeoJSON" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" +files = [ + {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, + {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +test = ["pytest-cov"] + [[package]] name = "colorama" version = "0.4.6" @@ -656,13 +705,13 @@ profile = ["gprof2dot (>=2022.7.29)"] [[package]] name = "elastic-transport" -version = "8.12.0" +version = "8.13.0" description = "Transport classes and utilities shared among Python Elastic client libraries" optional = false python-versions = ">=3.7" files = [ - {file = "elastic-transport-8.12.0.tar.gz", hash = "sha256:48839b942fcce199eece1558ecea6272e116c58da87ca8d495ef12eb61effaf7"}, - {file = "elastic_transport-8.12.0-py3-none-any.whl", hash = "sha256:87d9dc9dee64a05235e7624ed7e6ab6e5ca16619aa7a6d22e853273b9f1cfbee"}, + {file = "elastic-transport-8.13.0.tar.gz", hash = "sha256:2410ec1ff51221e8b3a01c0afa9f0d0498e1386a269283801f5c12f98e42dc45"}, + {file = "elastic_transport-8.13.0-py3-none-any.whl", hash = "sha256:aec890afdddd057762b27ff3553b0be8fa4673ec1a4fd922dfbd00325874bb3d"}, ] [package.dependencies] @@ -670,7 +719,7 @@ certifi = "*" urllib3 = ">=1.26.2,<3" [package.extras] -develop = ["aiohttp", "furo", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] +develop = ["aiohttp", "furo", "httpx", "mock", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] [[package]] name = "elasticsearch" @@ -958,13 +1007,13 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs [[package]] name = "importlib-resources" -version = "6.3.2" +version = "6.4.0" description = "Read resources from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_resources-6.3.2-py3-none-any.whl", hash = "sha256:f41f4098b16cd140a97d256137cfd943d958219007990b2afb00439fc623f580"}, - {file = "importlib_resources-6.3.2.tar.gz", hash = "sha256:963eb79649252b0160c1afcfe5a1d3fe3ad66edd0a8b114beacffb70c0674223"}, + {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, + {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, ] [package.dependencies] @@ -972,7 +1021,7 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["jaraco.collections", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] [[package]] name = "iniconfig" @@ -2027,6 +2076,43 @@ files = [ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] +[[package]] +name = "rasterio" +version = "1.2.8" +description = "Fast and direct raster I/O for use with Numpy and SciPy" +optional = false +python-versions = ">=3.6" +files = [ + {file = "rasterio-1.2.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca9bb81518d369cbeae57aff3608538f61dcf8e12cf4bfe92dd931bc263ade"}, + {file = "rasterio-1.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8b13e5f7c57512885bfede605b021bd025684b507dac7b5ac8aee5430930114c"}, + {file = "rasterio-1.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:84cea32a0675f1b22924664169f0da4be406e790072501d505aa771da9662bca"}, + {file = "rasterio-1.2.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:722f76a09091d22fd26200ff5a7820eeb263f1cd22151c77e25a3cc473d5af72"}, + {file = "rasterio-1.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74e80044767f41c84121fef373926f0d3e7411b944982429328d11442a649c0a"}, + {file = "rasterio-1.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c30caf0823811553ec45865697afc5e4a1fc4c6f30f8dada9067061cd8507af6"}, + {file = "rasterio-1.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2d41b39e41a7011d1dba0cb8983336de69ad68fb17857ccbd11cde0fcfdb2e3"}, + {file = "rasterio-1.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f248a64f57a0db5b12eb50d9ac1ccd45a6085d979c3e73b406f9e6b5a8355080"}, + {file = "rasterio-1.2.8.tar.gz", hash = "sha256:8196b7a71fea3c1573dd48d9ab0e78955e1d26e81848cce318c4930bd96782fe"}, +] + +[package.dependencies] +affine = "*" +attrs = "*" +certifi = "*" +click = ">=4.0" +click-plugins = "*" +cligj = ">=0.5" +numpy = "*" +setuptools = "*" +snuggs = ">=1.4.1" + +[package.extras] +all = ["boto3 (>=1.2.4)", "ghp-import", "hypothesis", "ipython (>=2.0)", "matplotlib", "numpydoc", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely", "sphinx", "sphinx-rtd-theme"] +docs = ["ghp-import", "numpydoc", "sphinx", "sphinx-rtd-theme"] +ipython = ["ipython (>=2.0)"] +plot = ["matplotlib"] +s3 = ["boto3 (>=1.2.4)"] +test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely"] + [[package]] name = "requests" version = "2.31.0" @@ -2048,6 +2134,31 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rioxarray" +version = "0.13.4" +description = "geospatial xarray extension powered by rasterio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rioxarray-0.13.4-py3-none-any.whl", hash = "sha256:56eef711d9817d3c729c1a267c940e7dff66bfc874a0b24ed3604ea2f958dfb2"}, + {file = "rioxarray-0.13.4.tar.gz", hash = "sha256:0cad24ad2c3c5ee181a0cfad2b8c2152a609b7eb118a3430034aec171e9cf14f"}, +] + +[package.dependencies] +numpy = ">=1.21" +packaging = "*" +pyproj = ">=2.2" +rasterio = ">=1.1.1" +xarray = ">=0.17" + +[package.extras] +all = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +dev = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +doc = ["nbsphinx", "sphinx-click", "sphinx-rtd-theme"] +interp = ["scipy"] +test = ["dask", "netcdf4", "pytest (>=3.6)", "pytest-cov", "pytest-timeout"] + [[package]] name = "s3fs" version = "2022.5.0" @@ -2130,6 +2241,22 @@ files = [ [package.dependencies] numpy = ">=1.16.5" +[[package]] +name = "setuptools" +version = "69.2.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, + {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "shapely" version = "1.7.1" @@ -2178,6 +2305,24 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "snuggs" +version = "1.4.7" +description = "Snuggs are s-expressions for Numpy" +optional = false +python-versions = "*" +files = [ + {file = "snuggs-1.4.7-py3-none-any.whl", hash = "sha256:988dde5d4db88e9d71c99457404773dabcc7a1c45971bfbe81900999942d9f07"}, + {file = "snuggs-1.4.7.tar.gz", hash = "sha256:501cf113fe3892e14e2fee76da5cd0606b7e149c411c271898e6259ebde2617b"}, +] + +[package.dependencies] +numpy = "*" +pyparsing = ">=2.1.6" + +[package.extras] +test = ["hypothesis", "pytest"] + [[package]] name = "tomli" version = "2.0.1" @@ -2542,4 +2687,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "~=3.8" -content-hash = "d19daaa356c410276a8cb00f1ab395b777986d146f61abd345fc1e62a0b8747b" +content-hash = "0f8054c5991ada6a82c25cd2a010820ab2a21bdda86cd94324e77eec9109ac9d" diff --git a/pyproject.toml b/pyproject.toml index 03b02e08..5c05925a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,8 @@ aiohttp = "3.8.1" xarray = "^2022.3.0" zarr = "2.11.3" pandas = "<2.1.0rc0" +rasterio = "1.2.8" +rioxarray = "*" [tool.poetry.dev-dependencies] From cefa8abb78478c36c63870a0136280c638358969 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 2 Apr 2024 16:23:51 -0700 Subject: [PATCH 85/91] Handling of GeoTIFFs with non-float dtypes --- data-access/nexustiles/backends/cog/backend.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index c878adf8..54f6378f 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -324,7 +324,7 @@ def __open_granule_at_url(url_s, time: np.datetime64, bands, config, **kwargs): logger.debug(f'Opening cog at {url_s}') if url.scheme in ['file', '']: - tiff = rioxarray.open_rasterio(url.path, mask_and_scale=True).to_dataset('band') + tiff = rioxarray.open_rasterio(url.path, mask_and_scale=False).to_dataset('band') elif url.scheme == 's3': try: aws_cfg = config['aws'] @@ -344,7 +344,7 @@ def __open_granule_at_url(url_s, time: np.datetime64, bands, config, **kwargs): AWSSession(session), GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR' ): - tiff = rioxarray.open_rasterio(url_s, mask_and_scale=True) + tiff = rioxarray.open_rasterio(url_s, mask_and_scale=False) ##### # NOTE: This will likely be inefficient so leaving it disabled for now. I don't know how it will @@ -357,8 +357,10 @@ def __open_granule_at_url(url_s, time: np.datetime64, bands, config, **kwargs): else: raise NotImplementedError(f'Support not yet added for tiffs with {url.scheme} URLs') + tiff = xr.decode_cf(tiff) + try: - tiff = tiff.rio.reproject(dst_crs='EPSG:4326', nodata=np.nan) + tiff = tiff.rio.reproject(dst_crs='EPSG:4326') except MissingCRS: pass From 2800569dbf43320b80f7f448d8604169821ffc89 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 16 Apr 2024 16:06:44 -0700 Subject: [PATCH 86/91] Improved opening of GeoTIFFs to avoid mask & scale issues with dtype --- data-access/nexustiles/backends/cog/backend.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index 54f6378f..e50c04c4 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -357,8 +357,20 @@ def __open_granule_at_url(url_s, time: np.datetime64, bands, config, **kwargs): else: raise NotImplementedError(f'Support not yet added for tiffs with {url.scheme} URLs') + # Broadcast the dataset attributes to the vars for decoding + for var in tiff.data_vars: + tiff[var].attrs.update(tiff.attrs) + + # Save the dtypes of the vars cause they will be lost to float32 on decode + var_dtypes = {var: tiff[var].dtype for var in tiff.data_vars} + + # Decode tiff = xr.decode_cf(tiff) + # Cast variables back to original dtypes + for var in tiff.data_vars: + tiff[var] = tiff[var].astype(var_dtypes[var]) + try: tiff = tiff.rio.reproject(dst_crs='EPSG:4326') except MissingCRS: From 18bbfeaee454fbef44909605bd97a8667a2241b0 Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jun 2024 07:47:11 -0700 Subject: [PATCH 87/91] Poetry re-lock --- poetry.lock | 441 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 257 insertions(+), 184 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9fe26285..c763016d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "affine" @@ -314,13 +314,13 @@ graph = ["gremlinpython (==3.4.6)"] [[package]] name = "certifi" -version = "2024.2.2" +version = "2024.6.2" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, - {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, + {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, + {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, ] [[package]] @@ -358,32 +358,40 @@ numpy = ">1.13.3" [[package]] name = "cftime" -version = "1.6.3" +version = "1.6.4" description = "Time-handling functionality from netcdf4-python" optional = false python-versions = ">=3.8" files = [ - {file = "cftime-1.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b62d42546fa5c914dfea5b15a9aaed2087ea1211cc36d08c374502ef95892038"}, - {file = "cftime-1.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb6dd70b2ccabfe1a14b7fbb0bbdce0418e71697094373c0d573c880790fa291"}, - {file = "cftime-1.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9878bfd8c1c3f24184ecbd528f739ba46ebaceaf1c8a24d348d7befb117a285"}, - {file = "cftime-1.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:3cf6e216a4c06f9a628cdf8e9c9d5e8097fb3eb02dd087dd14ab3b18478a7271"}, - {file = "cftime-1.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d2c01456d9d7b46aa710a41d1c711a50d5ea259aff4a987d0e973d1093bc922"}, - {file = "cftime-1.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:80eb1170ce1639016f55760847f4aadd04b0312496c5bac2797e930914bba48d"}, - {file = "cftime-1.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d87dadd0824262bdd7493babd2a44447da0a22175ded8ae9e060a3aebec7c5d7"}, - {file = "cftime-1.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:0a38eb9f5c733a23e1714bd3ef2762ed5acee34f127670f8fb4ad6464946f6b3"}, - {file = "cftime-1.6.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2d113a01ab924445e61d65c26bbd95bc08e4a22878d3b947064bba056c884c4a"}, - {file = "cftime-1.6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f11685663a6af97418908060492a07663c16d42519c139ca03c2ffb1377fd25"}, - {file = "cftime-1.6.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a98abb1d46d118e52b0611ce668a0b714b407be26177ef0581ecf5e95f894725"}, - {file = "cftime-1.6.3-cp312-cp312-win_amd64.whl", hash = "sha256:4d6fbd5f41b322cfa7b0ac3aaadeceb4450100a164b5bccbbb9e7c5048489a88"}, - {file = "cftime-1.6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bedb577bc8b8f3f10f5336c0792e5dae88605781890f50f36b45bb46907968e8"}, - {file = "cftime-1.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:022dabf1610cdd04a693e730fa8f71d307059717f29dba921e7486e553412bb4"}, - {file = "cftime-1.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbf782ab4ac0605bdec2b941952c897595613203942b7f8c2fccd17efa5147df"}, - {file = "cftime-1.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:9eb177a02db7cd84aa6962278e4bd2d3106a545de82e6aacd9404f1e153661db"}, - {file = "cftime-1.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b86be8c2f254147be4ba88f12099466dde457a4a3a21de6c69d52a7224c13ae"}, - {file = "cftime-1.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:523b9a6bf03f5e36407979e248381d0fcab2d225b915bbde77d00c6dde192b90"}, - {file = "cftime-1.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a14d2c7d22fd2a6dfa6ad563283b6d6679f1df95e0ed8d14b8f284dad402887"}, - {file = "cftime-1.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:d9b00c2844c7a1701d8ede5336b6321dfee256ceab81a34a1aff0483d56891a6"}, - {file = "cftime-1.6.3.tar.gz", hash = "sha256:d0a6b29f72a13f08e008b9becff247cc75c84acb213332ede18879c5b6aa4dfd"}, + {file = "cftime-1.6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ee70074df4bae0d9ee98f201cf5f11fd302791cf1cdeb73c34f685d6b632e17d"}, + {file = "cftime-1.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e5456fd58d4cc6b8d7b4932b749617ee142b62a52bc5d8e3c282ce69ce3a20ba"}, + {file = "cftime-1.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1289e08617be350a6b26c6e4352a0cb088625ac33d25e95110df549c26d6ab8e"}, + {file = "cftime-1.6.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b132d9225b4a109929866200846c72302316db9069e2de3ec8d8ec377f567f"}, + {file = "cftime-1.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ca1a264570e68fbb611bba251641b8efd0cf88c0ad2dcab5fa784df264232b75"}, + {file = "cftime-1.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:6fc82928cbf477bebf233f41914e64bff7b9e894c7f0c34170784a48250f8da7"}, + {file = "cftime-1.6.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c1558d9b477bd29626cd8bfc89e736635f72887d1a993e2834ab579bba7abb8c"}, + {file = "cftime-1.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:03494e7b66a2fbb6b04e364ab67185130dee0ced660abac5c1559070571d143d"}, + {file = "cftime-1.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4dcb2a01d4e614437582af33b36db4fb441b7666758482864827a1f037d2b639"}, + {file = "cftime-1.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b47bf25195fb3889bbae34df0e80957eb69c48f66902f5d538c7a8ec34253f6"}, + {file = "cftime-1.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d4f2cc0d5c6ffba9c5b0fd1ecd0c7c1c426d0be7b8de1480e2a9fb857c1905e9"}, + {file = "cftime-1.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:76b8f1e5d1e424accdf760a43e0a1793a7b640bab83cb067273d5c9dbb336c44"}, + {file = "cftime-1.6.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c349a91fa7ac9ec50118b04a8746bdea967bd2fc525d87c776003040b8d3392"}, + {file = "cftime-1.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:588d073400798adc24ece759cd1cb24ef730f55d1f70e31a898e7686f9d763d8"}, + {file = "cftime-1.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e07b91b488570573bbeb6f815656a8974d13d15b2279c82de2927f4f692bbcd"}, + {file = "cftime-1.6.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f92f2e405eeda47b30ab6231d8b7d136a55f21034d394f93ade322d356948654"}, + {file = "cftime-1.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:567574df94d0de1101bb5da76e7fbc6eabfddeeb2eb8cf83286b3599a136bbf7"}, + {file = "cftime-1.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:5b5ad7559a16bedadb66af8e417b6805f758acb57aa38d2730844dfc63a1e667"}, + {file = "cftime-1.6.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c072fe9e09925af66a9473edf5752ca1890ba752e7c1935d1f0245ad48f0977c"}, + {file = "cftime-1.6.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c05a71389f53d6340cb365b60f028c08268c72401660b9ef76108dee9f1cb5b2"}, + {file = "cftime-1.6.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0edeb1cb019d8155b2513cffb96749c0d7d459370e69bdf03077e0bee214aed8"}, + {file = "cftime-1.6.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8f05d5d6bb4137f9783fa61ad330030fcea8dcc6946dea69a27774edbe480e7"}, + {file = "cftime-1.6.4-cp38-cp38-win_amd64.whl", hash = "sha256:b32ac1278a2a111b066d5a1e6e5ce6f38c4c505993a6a3130873b56f99d7b56f"}, + {file = "cftime-1.6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c20f03e12af39534c3450bba376272803bfb850b5ce6433c839bfaa99f8d835a"}, + {file = "cftime-1.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:90609b3c1a31a756a68ecdbc961a4ce0b22c1620f166c8dabfa3a4c337ac8b9e"}, + {file = "cftime-1.6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbe11ad73b2a0ddc79995da21459fc2a3fd6b1593ca73f00a60e4d81c3e230f3"}, + {file = "cftime-1.6.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25f043703e785de0bd7cd8222c0a53317e9aeb3dfc062588b05e6f3ebb007468"}, + {file = "cftime-1.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f9acc272df1022f24fe7dbe9de43fa5d8271985161df14549e4d8d28c90dc9ea"}, + {file = "cftime-1.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:e8467b6fbf8dbfe0be8c04d61180765fdd3b9ab0fe51313a0bbf87e63634a3d8"}, ] [package.dependencies] @@ -451,6 +459,17 @@ click = ">=4.0" [package.extras] test = ["pytest-cov"] +[[package]] +name = "cloudpickle" +version = "3.0.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, + {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -462,17 +481,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "contextlib2" -version = "21.6.0" -description = "Backports and enhancements for the contextlib module" -optional = false -python-versions = ">=3.6" -files = [ - {file = "contextlib2-21.6.0-py2.py3-none-any.whl", hash = "sha256:3fbdb64466afd23abaf6c977627b75b6139a5a3e8ce38405c5b413aed7a0471f"}, - {file = "contextlib2-21.6.0.tar.gz", hash = "sha256:ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869"}, -] - [[package]] name = "contourpy" version = "1.1.0" @@ -608,63 +616,63 @@ test-no-images = ["pytest", "pytest-cov", "wurlitzer"] [[package]] name = "coverage" -version = "7.4.4" +version = "7.5.3" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2"}, - {file = "coverage-7.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c"}, - {file = "coverage-7.4.4-cp310-cp310-win32.whl", hash = "sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d"}, - {file = "coverage-7.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b"}, - {file = "coverage-7.4.4-cp311-cp311-win32.whl", hash = "sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286"}, - {file = "coverage-7.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9"}, - {file = "coverage-7.4.4-cp312-cp312-win32.whl", hash = "sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0"}, - {file = "coverage-7.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c"}, - {file = "coverage-7.4.4-cp38-cp38-win32.whl", hash = "sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e"}, - {file = "coverage-7.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade"}, - {file = "coverage-7.4.4-cp39-cp39-win32.whl", hash = "sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57"}, - {file = "coverage-7.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c"}, - {file = "coverage-7.4.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677"}, - {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, + {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, + {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, + {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, + {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, + {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, + {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, + {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, + {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, + {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, + {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, + {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, + {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, ] [package.dependencies] @@ -688,6 +696,34 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "dask" +version = "2023.3.1" +description = "Parallel PyData with Task Scheduling" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dask-2023.3.1-py3-none-any.whl", hash = "sha256:4a83c05760aedb7deeee8c16d24479292635a1ded6c3f803bf6c3d94ec9e7d20"}, + {file = "dask-2023.3.1.tar.gz", hash = "sha256:62d334012d7cd814186931ea83ebf1a6231c2af4260ad204dc78080a55947c17"}, +] + +[package.dependencies] +click = ">=7.0" +cloudpickle = ">=1.1.1" +fsspec = ">=0.6.0" +packaging = ">=20.0" +partd = ">=1.2.0" +pyyaml = ">=5.3.1" +toolz = ">=0.8.2" + +[package.extras] +array = ["numpy (>=1.21)"] +complete = ["bokeh (>=2.4.2,<3)", "distributed (==2023.3.1)", "jinja2 (>=2.10.3)", "lz4 (>=4.3.2)", "numpy (>=1.21)", "pandas (>=1.3)", "pyarrow (>=7.0)"] +dataframe = ["numpy (>=1.21)", "pandas (>=1.3)"] +diagnostics = ["bokeh (>=2.4.2,<3)", "jinja2 (>=2.10.3)"] +distributed = ["distributed (==2023.3.1)"] +test = ["pandas[test]", "pre-commit", "pytest", "pytest-rerunfailures", "pytest-xdist"] + [[package]] name = "dill" version = "0.3.8" @@ -705,13 +741,13 @@ profile = ["gprof2dot (>=2022.7.29)"] [[package]] name = "elastic-transport" -version = "8.13.0" +version = "8.13.1" description = "Transport classes and utilities shared among Python Elastic client libraries" optional = false python-versions = ">=3.7" files = [ - {file = "elastic-transport-8.13.0.tar.gz", hash = "sha256:2410ec1ff51221e8b3a01c0afa9f0d0498e1386a269283801f5c12f98e42dc45"}, - {file = "elastic_transport-8.13.0-py3-none-any.whl", hash = "sha256:aec890afdddd057762b27ff3553b0be8fa4673ec1a4fd922dfbd00325874bb3d"}, + {file = "elastic_transport-8.13.1-py3-none-any.whl", hash = "sha256:5d4bb6b8e9d74a9c16de274e91a5caf65a3a8d12876f1e99152975e15b2746fe"}, + {file = "elastic_transport-8.13.1.tar.gz", hash = "sha256:16339d392b4bbe86ad00b4bdeecff10edf516d32bc6c16053846625f2c6ea250"}, ] [package.dependencies] @@ -741,13 +777,13 @@ requests = ["requests (>=2.4.0,<3.0.0)"] [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] [package.extras] @@ -766,53 +802,53 @@ files = [ [[package]] name = "fonttools" -version = "4.50.0" +version = "4.53.0" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" files = [ - {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effd303fb422f8ce06543a36ca69148471144c534cc25f30e5be752bc4f46736"}, - {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7913992ab836f621d06aabac118fc258b9947a775a607e1a737eb3a91c360335"}, - {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e0a1c5bd2f63da4043b63888534b52c5a1fd7ae187c8ffc64cbb7ae475b9dab"}, - {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d40fc98540fa5360e7ecf2c56ddf3c6e7dd04929543618fd7b5cc76e66390562"}, - {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fff65fbb7afe137bac3113827855e0204482727bddd00a806034ab0d3951d0d"}, - {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1aeae3dd2ee719074a9372c89ad94f7c581903306d76befdaca2a559f802472"}, - {file = "fonttools-4.50.0-cp310-cp310-win32.whl", hash = "sha256:e9623afa319405da33b43c85cceb0585a6f5d3a1d7c604daf4f7e1dd55c03d1f"}, - {file = "fonttools-4.50.0-cp310-cp310-win_amd64.whl", hash = "sha256:778c5f43e7e654ef7fe0605e80894930bc3a7772e2f496238e57218610140f54"}, - {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3dfb102e7f63b78c832e4539969167ffcc0375b013080e6472350965a5fe8048"}, - {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e58fe34cb379ba3d01d5d319d67dd3ce7ca9a47ad044ea2b22635cd2d1247fc"}, - {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c673ab40d15a442a4e6eb09bf007c1dda47c84ac1e2eecbdf359adacb799c24"}, - {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b3ac35cdcd1a4c90c23a5200212c1bb74fa05833cc7c14291d7043a52ca2aaa"}, - {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8844e7a2c5f7ecf977e82eb6b3014f025c8b454e046d941ece05b768be5847ae"}, - {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f849bd3c5c2249b49c98eca5aaebb920d2bfd92b3c69e84ca9bddf133e9f83f0"}, - {file = "fonttools-4.50.0-cp311-cp311-win32.whl", hash = "sha256:39293ff231b36b035575e81c14626dfc14407a20de5262f9596c2cbb199c3625"}, - {file = "fonttools-4.50.0-cp311-cp311-win_amd64.whl", hash = "sha256:c33d5023523b44d3481624f840c8646656a1def7630ca562f222eb3ead16c438"}, - {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b4a886a6dbe60100ba1cd24de962f8cd18139bd32808da80de1fa9f9f27bf1dc"}, - {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b2ca1837bfbe5eafa11313dbc7edada79052709a1fffa10cea691210af4aa1fa"}, - {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0493dd97ac8977e48ffc1476b932b37c847cbb87fd68673dee5182004906828"}, - {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77844e2f1b0889120b6c222fc49b2b75c3d88b930615e98893b899b9352a27ea"}, - {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3566bfb8c55ed9100afe1ba6f0f12265cd63a1387b9661eb6031a1578a28bad1"}, - {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:35e10ddbc129cf61775d58a14f2d44121178d89874d32cae1eac722e687d9019"}, - {file = "fonttools-4.50.0-cp312-cp312-win32.whl", hash = "sha256:cc8140baf9fa8f9b903f2b393a6c413a220fa990264b215bf48484f3d0bf8710"}, - {file = "fonttools-4.50.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ccc85fd96373ab73c59833b824d7a73846670a0cb1f3afbaee2b2c426a8f931"}, - {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e270a406219af37581d96c810172001ec536e29e5593aa40d4c01cca3e145aa6"}, - {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac2463de667233372e9e1c7e9de3d914b708437ef52a3199fdbf5a60184f190c"}, - {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47abd6669195abe87c22750dbcd366dc3a0648f1b7c93c2baa97429c4dc1506e"}, - {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:074841375e2e3d559aecc86e1224caf78e8b8417bb391e7d2506412538f21adc"}, - {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0743fd2191ad7ab43d78cd747215b12033ddee24fa1e088605a3efe80d6984de"}, - {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3d7080cce7be5ed65bee3496f09f79a82865a514863197ff4d4d177389e981b0"}, - {file = "fonttools-4.50.0-cp38-cp38-win32.whl", hash = "sha256:a467ba4e2eadc1d5cc1a11d355abb945f680473fbe30d15617e104c81f483045"}, - {file = "fonttools-4.50.0-cp38-cp38-win_amd64.whl", hash = "sha256:f77e048f805e00870659d6318fd89ef28ca4ee16a22b4c5e1905b735495fc422"}, - {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6245eafd553c4e9a0708e93be51392bd2288c773523892fbd616d33fd2fda59"}, - {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a4062cc7e8de26f1603323ef3ae2171c9d29c8a9f5e067d555a2813cd5c7a7e0"}, - {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34692850dfd64ba06af61e5791a441f664cb7d21e7b544e8f385718430e8f8e4"}, - {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:678dd95f26a67e02c50dcb5bf250f95231d455642afbc65a3b0bcdacd4e4dd38"}, - {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f2ce7b0b295fe64ac0a85aef46a0f2614995774bd7bc643b85679c0283287f9"}, - {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d346f4dc2221bfb7ab652d1e37d327578434ce559baf7113b0f55768437fe6a0"}, - {file = "fonttools-4.50.0-cp39-cp39-win32.whl", hash = "sha256:a51eeaf52ba3afd70bf489be20e52fdfafe6c03d652b02477c6ce23c995222f4"}, - {file = "fonttools-4.50.0-cp39-cp39-win_amd64.whl", hash = "sha256:8639be40d583e5d9da67795aa3eeeda0488fb577a1d42ae11a5036f18fb16d93"}, - {file = "fonttools-4.50.0-py3-none-any.whl", hash = "sha256:48fa36da06247aa8282766cfd63efff1bb24e55f020f29a335939ed3844d20d3"}, - {file = "fonttools-4.50.0.tar.gz", hash = "sha256:fa5cf61058c7dbb104c2ac4e782bf1b2016a8cf2f69de6e4dd6a865d2c969bb5"}, + {file = "fonttools-4.53.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:52a6e0a7a0bf611c19bc8ec8f7592bdae79c8296c70eb05917fd831354699b20"}, + {file = "fonttools-4.53.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:099634631b9dd271d4a835d2b2a9e042ccc94ecdf7e2dd9f7f34f7daf333358d"}, + {file = "fonttools-4.53.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e40013572bfb843d6794a3ce076c29ef4efd15937ab833f520117f8eccc84fd6"}, + {file = "fonttools-4.53.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:715b41c3e231f7334cbe79dfc698213dcb7211520ec7a3bc2ba20c8515e8a3b5"}, + {file = "fonttools-4.53.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74ae2441731a05b44d5988d3ac2cf784d3ee0a535dbed257cbfff4be8bb49eb9"}, + {file = "fonttools-4.53.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:95db0c6581a54b47c30860d013977b8a14febc206c8b5ff562f9fe32738a8aca"}, + {file = "fonttools-4.53.0-cp310-cp310-win32.whl", hash = "sha256:9cd7a6beec6495d1dffb1033d50a3f82dfece23e9eb3c20cd3c2444d27514068"}, + {file = "fonttools-4.53.0-cp310-cp310-win_amd64.whl", hash = "sha256:daaef7390e632283051e3cf3e16aff2b68b247e99aea916f64e578c0449c9c68"}, + {file = "fonttools-4.53.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a209d2e624ba492df4f3bfad5996d1f76f03069c6133c60cd04f9a9e715595ec"}, + {file = "fonttools-4.53.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f520d9ac5b938e6494f58a25c77564beca7d0199ecf726e1bd3d56872c59749"}, + {file = "fonttools-4.53.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eceef49f457253000e6a2d0f7bd08ff4e9fe96ec4ffce2dbcb32e34d9c1b8161"}, + {file = "fonttools-4.53.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1f3e34373aa16045484b4d9d352d4c6b5f9f77ac77a178252ccbc851e8b2ee"}, + {file = "fonttools-4.53.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:28d072169fe8275fb1a0d35e3233f6df36a7e8474e56cb790a7258ad822b6fd6"}, + {file = "fonttools-4.53.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a2a6ba400d386e904fd05db81f73bee0008af37799a7586deaa4aef8cd5971e"}, + {file = "fonttools-4.53.0-cp311-cp311-win32.whl", hash = "sha256:bb7273789f69b565d88e97e9e1da602b4ee7ba733caf35a6c2affd4334d4f005"}, + {file = "fonttools-4.53.0-cp311-cp311-win_amd64.whl", hash = "sha256:9fe9096a60113e1d755e9e6bda15ef7e03391ee0554d22829aa506cdf946f796"}, + {file = "fonttools-4.53.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d8f191a17369bd53a5557a5ee4bab91d5330ca3aefcdf17fab9a497b0e7cff7a"}, + {file = "fonttools-4.53.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:93156dd7f90ae0a1b0e8871032a07ef3178f553f0c70c386025a808f3a63b1f4"}, + {file = "fonttools-4.53.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bff98816cb144fb7b85e4b5ba3888a33b56ecef075b0e95b95bcd0a5fbf20f06"}, + {file = "fonttools-4.53.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:973d030180eca8255b1bce6ffc09ef38a05dcec0e8320cc9b7bcaa65346f341d"}, + {file = "fonttools-4.53.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4ee5a24e281fbd8261c6ab29faa7fd9a87a12e8c0eed485b705236c65999109"}, + {file = "fonttools-4.53.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5bc124fae781a4422f61b98d1d7faa47985f663a64770b78f13d2c072410c2"}, + {file = "fonttools-4.53.0-cp312-cp312-win32.whl", hash = "sha256:a239afa1126b6a619130909c8404070e2b473dd2b7fc4aacacd2e763f8597fea"}, + {file = "fonttools-4.53.0-cp312-cp312-win_amd64.whl", hash = "sha256:45b4afb069039f0366a43a5d454bc54eea942bfb66b3fc3e9a2c07ef4d617380"}, + {file = "fonttools-4.53.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:93bc9e5aaa06ff928d751dc6be889ff3e7d2aa393ab873bc7f6396a99f6fbb12"}, + {file = "fonttools-4.53.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2367d47816cc9783a28645bc1dac07f8ffc93e0f015e8c9fc674a5b76a6da6e4"}, + {file = "fonttools-4.53.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:907fa0b662dd8fc1d7c661b90782ce81afb510fc4b7aa6ae7304d6c094b27bce"}, + {file = "fonttools-4.53.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e0ad3c6ea4bd6a289d958a1eb922767233f00982cf0fe42b177657c86c80a8f"}, + {file = "fonttools-4.53.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:73121a9b7ff93ada888aaee3985a88495489cc027894458cb1a736660bdfb206"}, + {file = "fonttools-4.53.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ee595d7ba9bba130b2bec555a40aafa60c26ce68ed0cf509983e0f12d88674fd"}, + {file = "fonttools-4.53.0-cp38-cp38-win32.whl", hash = "sha256:fca66d9ff2ac89b03f5aa17e0b21a97c21f3491c46b583bb131eb32c7bab33af"}, + {file = "fonttools-4.53.0-cp38-cp38-win_amd64.whl", hash = "sha256:31f0e3147375002aae30696dd1dc596636abbd22fca09d2e730ecde0baad1d6b"}, + {file = "fonttools-4.53.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7d6166192dcd925c78a91d599b48960e0a46fe565391c79fe6de481ac44d20ac"}, + {file = "fonttools-4.53.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef50ec31649fbc3acf6afd261ed89d09eb909b97cc289d80476166df8438524d"}, + {file = "fonttools-4.53.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f193f060391a455920d61684a70017ef5284ccbe6023bb056e15e5ac3de11d1"}, + {file = "fonttools-4.53.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba9f09ff17f947392a855e3455a846f9855f6cf6bec33e9a427d3c1d254c712f"}, + {file = "fonttools-4.53.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c555e039d268445172b909b1b6bdcba42ada1cf4a60e367d68702e3f87e5f64"}, + {file = "fonttools-4.53.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a4788036201c908079e89ae3f5399b33bf45b9ea4514913f4dbbe4fac08efe0"}, + {file = "fonttools-4.53.0-cp39-cp39-win32.whl", hash = "sha256:d1a24f51a3305362b94681120c508758a88f207fa0a681c16b5a4172e9e6c7a9"}, + {file = "fonttools-4.53.0-cp39-cp39-win_amd64.whl", hash = "sha256:1e677bfb2b4bd0e5e99e0f7283e65e47a9814b0486cb64a41adf9ef110e078f2"}, + {file = "fonttools-4.53.0-py3-none-any.whl", hash = "sha256:6b4f04b1fbc01a3569d63359f2227c89ab294550de277fd09d8fca6185669fa4"}, + {file = "fonttools-4.53.0.tar.gz", hash = "sha256:c93ed66d32de1559b6fc348838c7572d5c0ac1e4a258e76763a5caddd8944002"}, ] [package.extras] @@ -977,13 +1013,13 @@ six = "*" [[package]] name = "idna" -version = "3.6" +version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] @@ -1050,13 +1086,13 @@ colors = ["colorama (>=0.4.6)"] [[package]] name = "jinja2" -version = "3.1.3" +version = "3.1.4" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, - {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, ] [package.dependencies] @@ -1235,6 +1271,17 @@ files = [ {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, ] +[[package]] +name = "locket" +version = "1.0.0" +description = "File-based locks for Python on Linux and Windows" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3"}, + {file = "locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632"}, +] + [[package]] name = "markupsafe" version = "2.1.5" @@ -1636,13 +1683,13 @@ files = [ [[package]] name = "packaging" -version = "24.0" +version = "24.1" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] [[package]] @@ -1712,6 +1759,24 @@ sql-other = ["SQLAlchemy (>=1.4.16)"] test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.6.3)"] +[[package]] +name = "partd" +version = "1.4.1" +description = "Appendable key-value storage" +optional = false +python-versions = ">=3.7" +files = [ + {file = "partd-1.4.1-py3-none-any.whl", hash = "sha256:27e766663d36c161e2827aa3e28541c992f0b9527d3cca047e13fb3acdb989e6"}, + {file = "partd-1.4.1.tar.gz", hash = "sha256:56c25dd49e6fea5727e731203c466c6e092f308d8f0024e199d02f6aa2167f67"}, +] + +[package.dependencies] +locket = "*" +toolz = "*" + +[package.extras] +complete = ["blosc", "numpy (>=1.9.0)", "pandas (>=0.19.0)", "pyzmq"] + [[package]] name = "pillow" version = "8.1.0" @@ -1755,28 +1820,29 @@ files = [ [[package]] name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, ] [package.extras] docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] [[package]] name = "pluggy" -version = "1.4.0" +version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] [package.extras] @@ -2115,13 +2181,13 @@ test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytes [[package]] name = "requests" -version = "2.31.0" +version = "2.32.3" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] @@ -2198,18 +2264,15 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] [[package]] name = "schema" -version = "0.7.5" +version = "0.7.7" description = "Simple data validation library" optional = false python-versions = "*" files = [ - {file = "schema-0.7.5-py2.py3-none-any.whl", hash = "sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c"}, - {file = "schema-0.7.5.tar.gz", hash = "sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197"}, + {file = "schema-0.7.7-py2.py3-none-any.whl", hash = "sha256:5d976a5b50f36e74e2157b47097b60002bd4d42e65425fcc9c9befadb4255dde"}, + {file = "schema-0.7.7.tar.gz", hash = "sha256:7da553abd2958a19dc2547c388cde53398b39196175a9be59ea1caf5ab0a1807"}, ] -[package.dependencies] -contextlib2 = ">=0.5.5" - [[package]] name = "scipy" version = "1.6.0" @@ -2243,19 +2306,18 @@ numpy = ">=1.16.5" [[package]] name = "setuptools" -version = "69.2.0" +version = "70.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, - {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, + {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, + {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shapely" @@ -2336,13 +2398,24 @@ files = [ [[package]] name = "tomlkit" -version = "0.12.4" +version = "0.12.5" description = "Style preserving TOML library" optional = false python-versions = ">=3.7" files = [ - {file = "tomlkit-0.12.4-py3-none-any.whl", hash = "sha256:5cd82d48a3dd89dee1f9d64420aa20ae65cfbd00668d6f094d7578a78efbb77b"}, - {file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"}, + {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, + {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, +] + +[[package]] +name = "toolz" +version = "0.12.1" +description = "List processing tools and functional utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, + {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, ] [[package]] @@ -2397,13 +2470,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -2671,20 +2744,20 @@ jupyter = ["ipytree", "notebook"] [[package]] name = "zipp" -version = "3.18.1" +version = "3.19.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" files = [ - {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"}, - {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"}, + {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, + {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [metadata] lock-version = "2.0" python-versions = "~=3.8" -content-hash = "0f8054c5991ada6a82c25cd2a010820ab2a21bdda86cd94324e77eec9109ac9d" +content-hash = "06c58e556b59874647eaf39875e35ec3095bf945000542eedbf3cf7847e33bb8" From 9a11badd23065773e095ae94a359f4dd9a6f703c Mon Sep 17 00:00:00 2001 From: rileykk Date: Mon, 10 Jun 2024 07:50:10 -0700 Subject: [PATCH 88/91] Fix bad merge --- analysis/webservice/webapp.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py index bade1b4b..9b11e6ae 100644 --- a/analysis/webservice/webapp.py +++ b/analysis/webservice/webapp.py @@ -144,12 +144,6 @@ def main(): server.listen(options.port) log.info('Waiting for dataset backends to come up...') - with NexusTileService.DS_LOCK: - if not NexusTileService.is_update_tread_alive(): - raise Exception('Backend thread crashed') - - log.info('Waiting for dataset backends to come up...') - with NexusTileService.DS_LOCK: if not NexusTileService.is_update_thread_alive(): log.critical('A fatal error occurred when loading the datasets') From c7e6e8256a63f6a0a504da8400e967eb097116fa Mon Sep 17 00:00:00 2001 From: rileykk Date: Wed, 10 Jul 2024 13:59:30 -0700 Subject: [PATCH 89/91] poetry lock --- poetry.lock | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 690e672b..cda7bccb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,20 @@ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +[[package]] +name = "affine" +version = "2.4.0" +description = "Matrices describing affine transformation of the plane" +optional = false +python-versions = ">=3.7" +files = [ + {file = "affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92"}, + {file = "affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea"}, +] + +[package.extras] +dev = ["coveralls", "flake8", "pydocstyle"] +test = ["pytest (>=4.6)", "pytest-cov"] + [[package]] name = "aiobotocore" version = "2.3.4" @@ -410,6 +425,40 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +optional = false +python-versions = "*" +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "cligj" +version = "0.7.2" +description = "Click params for commmand line interfaces to GeoJSON" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" +files = [ + {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, + {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +test = ["pytest-cov"] + [[package]] name = "cloudpickle" version = "3.0.0" @@ -2122,6 +2171,43 @@ files = [ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] +[[package]] +name = "rasterio" +version = "1.2.8" +description = "Fast and direct raster I/O for use with Numpy and SciPy" +optional = false +python-versions = ">=3.6" +files = [ + {file = "rasterio-1.2.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca9bb81518d369cbeae57aff3608538f61dcf8e12cf4bfe92dd931bc263ade"}, + {file = "rasterio-1.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8b13e5f7c57512885bfede605b021bd025684b507dac7b5ac8aee5430930114c"}, + {file = "rasterio-1.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:84cea32a0675f1b22924664169f0da4be406e790072501d505aa771da9662bca"}, + {file = "rasterio-1.2.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:722f76a09091d22fd26200ff5a7820eeb263f1cd22151c77e25a3cc473d5af72"}, + {file = "rasterio-1.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74e80044767f41c84121fef373926f0d3e7411b944982429328d11442a649c0a"}, + {file = "rasterio-1.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c30caf0823811553ec45865697afc5e4a1fc4c6f30f8dada9067061cd8507af6"}, + {file = "rasterio-1.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2d41b39e41a7011d1dba0cb8983336de69ad68fb17857ccbd11cde0fcfdb2e3"}, + {file = "rasterio-1.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f248a64f57a0db5b12eb50d9ac1ccd45a6085d979c3e73b406f9e6b5a8355080"}, + {file = "rasterio-1.2.8.tar.gz", hash = "sha256:8196b7a71fea3c1573dd48d9ab0e78955e1d26e81848cce318c4930bd96782fe"}, +] + +[package.dependencies] +affine = "*" +attrs = "*" +certifi = "*" +click = ">=4.0" +click-plugins = "*" +cligj = ">=0.5" +numpy = "*" +setuptools = "*" +snuggs = ">=1.4.1" + +[package.extras] +all = ["boto3 (>=1.2.4)", "ghp-import", "hypothesis", "ipython (>=2.0)", "matplotlib", "numpydoc", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely", "sphinx", "sphinx-rtd-theme"] +docs = ["ghp-import", "numpydoc", "sphinx", "sphinx-rtd-theme"] +ipython = ["ipython (>=2.0)"] +plot = ["matplotlib"] +s3 = ["boto3 (>=1.2.4)"] +test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely"] + [[package]] name = "requests" version = "2.32.3" @@ -2143,6 +2229,31 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rioxarray" +version = "0.13.4" +description = "geospatial xarray extension powered by rasterio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rioxarray-0.13.4-py3-none-any.whl", hash = "sha256:56eef711d9817d3c729c1a267c940e7dff66bfc874a0b24ed3604ea2f958dfb2"}, + {file = "rioxarray-0.13.4.tar.gz", hash = "sha256:0cad24ad2c3c5ee181a0cfad2b8c2152a609b7eb118a3430034aec171e9cf14f"}, +] + +[package.dependencies] +numpy = ">=1.21" +packaging = "*" +pyproj = ">=2.2" +rasterio = ">=1.1.1" +xarray = ">=0.17" + +[package.extras] +all = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +dev = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +doc = ["nbsphinx", "sphinx-click", "sphinx-rtd-theme"] +interp = ["scipy"] +test = ["dask", "netcdf4", "pytest (>=3.6)", "pytest-cov", "pytest-timeout"] + [[package]] name = "s3fs" version = "2022.5.0" @@ -2222,6 +2333,21 @@ files = [ [package.dependencies] numpy = ">=1.16.5" +[[package]] +name = "setuptools" +version = "70.3.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, + {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "shapely" version = "1.7.1" @@ -2270,6 +2396,24 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "snuggs" +version = "1.4.7" +description = "Snuggs are s-expressions for Numpy" +optional = false +python-versions = "*" +files = [ + {file = "snuggs-1.4.7-py3-none-any.whl", hash = "sha256:988dde5d4db88e9d71c99457404773dabcc7a1c45971bfbe81900999942d9f07"}, + {file = "snuggs-1.4.7.tar.gz", hash = "sha256:501cf113fe3892e14e2fee76da5cd0606b7e149c411c271898e6259ebde2617b"}, +] + +[package.dependencies] +numpy = "*" +pyparsing = ">=2.1.6" + +[package.extras] +test = ["hypothesis", "pytest"] + [[package]] name = "tomli" version = "2.0.1" @@ -2283,13 +2427,13 @@ files = [ [[package]] name = "tomlkit" -version = "0.12.5" +version = "0.13.0" description = "Style preserving TOML library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, - {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, + {file = "tomlkit-0.13.0-py3-none-any.whl", hash = "sha256:7075d3042d03b80f603482d69bf0c8f345c2b30e41699fd8883227f89972b264"}, + {file = "tomlkit-0.13.0.tar.gz", hash = "sha256:08ad192699734149f5b97b45f1f18dad7eb1b6d16bc72ad0c2335772650d7b72"}, ] [[package]] @@ -2645,4 +2789,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "~=3.8" -content-hash = "d5724c5c4ddd9411f2faf5519c3b3584bc9b8259e84d5c692911322ff67f4042" +content-hash = "04bda1af8a15475675de6547f3adea6b4d53e30f7049d594f8d7669f4450767c" From f2e5cf184974a5c9d96c48f58a08a1194ead795f Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 8 Oct 2024 07:27:21 -0700 Subject: [PATCH 90/91] Poetry re-lock --- poetry.lock | 571 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 414 insertions(+), 157 deletions(-) diff --git a/poetry.lock b/poetry.lock index de1ef5d5..731304cd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,20 @@ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +[[package]] +name = "affine" +version = "2.4.0" +description = "Matrices describing affine transformation of the plane" +optional = false +python-versions = ">=3.7" +files = [ + {file = "affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92"}, + {file = "affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea"}, +] + +[package.extras] +dev = ["coveralls", "flake8", "pydocstyle"] +test = ["pytest (>=4.6)", "pytest-cov"] + [[package]] name = "aiobotocore" version = "2.3.4" @@ -414,6 +429,40 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +optional = false +python-versions = "*" +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "cligj" +version = "0.7.2" +description = "Click params for commmand line interfaces to GeoJSON" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" +files = [ + {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, + {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +test = ["pytest-cov"] + [[package]] name = "cloudpickle" version = "3.0.0" @@ -701,13 +750,13 @@ test = ["pandas[test]", "pre-commit", "pytest", "pytest-rerunfailures", "pytest- [[package]] name = "dill" -version = "0.3.8" +version = "0.3.9" description = "serialize all of Python" optional = false python-versions = ">=3.8" files = [ - {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, - {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, + {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"}, + {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"}, ] [package.extras] @@ -777,59 +826,59 @@ files = [ [[package]] name = "fonttools" -version = "4.54.0" +version = "4.54.1" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" files = [ - {file = "fonttools-4.54.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b2957597455a21fc55849cf5094507028b241035e9bf2d98daa006c152553640"}, - {file = "fonttools-4.54.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:18a043a029994c28638bd40cf0d7dbe8edfbacb6b60f6a5ccdfcc4db98eaa4e4"}, - {file = "fonttools-4.54.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb1dd36e8612b31f30ae8fa264fdddf1a0c22bab0990c5f97542b86cbf0b77ec"}, - {file = "fonttools-4.54.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2703efc48b6e88b58249fb6316373e15e5b2e5835a58114954b290faebbd89da"}, - {file = "fonttools-4.54.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:21a209d7ff42ab567e449ba8f86af7bc5e93e2463bd07cbfae7284057d1552ac"}, - {file = "fonttools-4.54.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:812d04179b6a99bff3241153c928e1b3db98c76113375ce6b561e93dc749da3f"}, - {file = "fonttools-4.54.0-cp310-cp310-win32.whl", hash = "sha256:0d15664cbdc059ca1a32ff2a5cb5428ffd47f2e739430d9d11b0b6e2a97f2b8b"}, - {file = "fonttools-4.54.0-cp310-cp310-win_amd64.whl", hash = "sha256:abc5acdfdb01e2af1de55153f3720376edf4df8bcad84bdc54c08abda2089fd4"}, - {file = "fonttools-4.54.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:96e7a37190a20063dc6f301665180148ec7671f9b6ef089dba2280a8434adacc"}, - {file = "fonttools-4.54.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a42e0500944de3abf8723a439c7c94678d14b702808a593d7bfcece4a3ff4479"}, - {file = "fonttools-4.54.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24160f6df15e01d0edfb64729373950c2869871a611924d50c2e676162dcc42d"}, - {file = "fonttools-4.54.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3c556e69f66de64b2604d6875d5d1913484f89336d782a4bb89b772648436a3"}, - {file = "fonttools-4.54.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2ee6664fe61a932f52c499d2e8d72e6c7c6207449e2ec12928ebf80f2580ea31"}, - {file = "fonttools-4.54.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79bb6834403cbb0f851df7173e8e9adbcfe3bb2e09a472de4c2e8a2667257b47"}, - {file = "fonttools-4.54.0-cp311-cp311-win32.whl", hash = "sha256:6679b471655f4f6bcdacb2b05bb059fc8d10983870e1a039d101da50562b90ec"}, - {file = "fonttools-4.54.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d328d8d7414d7a70186a0d5c6fe9eea04b8b019ae070964b0555acfa763bba"}, - {file = "fonttools-4.54.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:34758e8481a5054e7e203c5e15c41dc3ec67716407bd1f00ebf014fe94f934e3"}, - {file = "fonttools-4.54.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:49124ff0efd6ded3e320912409527c9f3dae34acf34dcca141961a0c2dee484e"}, - {file = "fonttools-4.54.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:105b4dbf35bd8aad2c79b8b12ca911a00d7e445a251383a523497e0fb06c4242"}, - {file = "fonttools-4.54.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b6b613894d8e90093326ab6014c202a7a503e34dfb4a632b2ec78078f406c43"}, - {file = "fonttools-4.54.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6587da0a397c9ae36b8c7e3febfca8c4563d287f7339d805cd4a9a356a39f6bf"}, - {file = "fonttools-4.54.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:801bdd3496ec6df3920ae5cf43567208246c944288d2a508985491c9126f4dd9"}, - {file = "fonttools-4.54.0-cp312-cp312-win32.whl", hash = "sha256:e299ecc34635621b792bf42dcc3be50810dd74c888474e09b47596853a08db56"}, - {file = "fonttools-4.54.0-cp312-cp312-win_amd64.whl", hash = "sha256:f7b2e35b912235290b5e8df0cab17e3365be887c88588fdd9589e7635e665460"}, - {file = "fonttools-4.54.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:948fafa5035cf22ed35040c07b7a4ebe9c9d3436401d4d4a4fea19a24bee8fd5"}, - {file = "fonttools-4.54.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef61d49d1f724dd8f1bf99328cfbc5e64900f451be0eacfcd15a1e00493779be"}, - {file = "fonttools-4.54.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d037c0b7d93408584065f5d30cd3a1c533a195d96669de116df3b594f6753b6"}, - {file = "fonttools-4.54.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbb7646fd6f6fdf754015cbb50db10cd53770432e56bd6b2e6411fb54a1b83b2"}, - {file = "fonttools-4.54.0-cp313-cp313-win32.whl", hash = "sha256:66143c6607d85647ef5097c7d3005118288ef6d7607487c10b04549f830eee01"}, - {file = "fonttools-4.54.0-cp313-cp313-win_amd64.whl", hash = "sha256:f66a6e29a201a4e0ff8a1f33dc90781f018e0dd8caa29d33311110952bdf8285"}, - {file = "fonttools-4.54.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eb871afe7bd480d233c0c29a694cbc553743e8af9c8daa9c70284862b35c5e80"}, - {file = "fonttools-4.54.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f864d49effec5877c1ea559e2cb01bf6162f066c9013b78e1b31c13c120bee4"}, - {file = "fonttools-4.54.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e56abc2aad22298bd62f1314940b22f613eb4e9a50c5d9450d50c4c42e4617bf"}, - {file = "fonttools-4.54.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:633bd642239412115a4d203728980bf57993f1bcd22299c71f0c2ea669b75604"}, - {file = "fonttools-4.54.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1170ed2208ace22ebe3bd119cec42fec9d393a133c204d6c7a28f28820c1eced"}, - {file = "fonttools-4.54.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:59ed3b6fcdfc29e4ffb75d300710bef50379caa639cd8e1b83415f7f1462d6ec"}, - {file = "fonttools-4.54.0-cp38-cp38-win32.whl", hash = "sha256:c6db5c17464f50ccd1b6d362e54d5e5930e551382c79f36f5f73b2bfd20fc340"}, - {file = "fonttools-4.54.0-cp38-cp38-win_amd64.whl", hash = "sha256:c4392e878e8e8d14ab7963a5accf25802eb6a9499c40e698c9bf571816026daf"}, - {file = "fonttools-4.54.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a05cb4ebb638147a11b15eb2fffbe71bbf2df7ec6d6655430a07d97164dddb0"}, - {file = "fonttools-4.54.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7b80c2e5ce6e69291fe73f7a71f26ae767e53e8c2e4b08826d7c9524ef0ebaad"}, - {file = "fonttools-4.54.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:627c0e59883fb97be4ec46cb0561f521214f3d8a10ad7f2a4030d3cd38a0a0ab"}, - {file = "fonttools-4.54.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc4e10d9c7e9ec55431f49f7425bc5c0472f0b25ff56ad57a66d7e503d36e83e"}, - {file = "fonttools-4.54.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:370a2018eeaeba47742103ac4e3877acfa7819ea64725aa7646f16e1cbab6223"}, - {file = "fonttools-4.54.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4dc1e6ebff17e2f012d5084058fd89fd66c7fd02ac9960380fab236114a977fb"}, - {file = "fonttools-4.54.0-cp39-cp39-win32.whl", hash = "sha256:fff3ff4a7e864b98502a15b04f3b9eedd26f8ff3f60be325cd715b9af8e54d05"}, - {file = "fonttools-4.54.0-cp39-cp39-win_amd64.whl", hash = "sha256:e7e1c173b21d00f336ab0d4edf2ea64e7a8530863bae789d97cd52a4363fbd6f"}, - {file = "fonttools-4.54.0-py3-none-any.whl", hash = "sha256:351058cd623af4c45490c744e2bbc5671fc78dce95866e92122c9ba6c28ea8b6"}, - {file = "fonttools-4.54.0.tar.gz", hash = "sha256:9f3482ff1189668fa9f8eafe8ff8541fb154b6f0170f8477889c028eb893c6ee"}, + {file = "fonttools-4.54.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ed7ee041ff7b34cc62f07545e55e1468808691dddfd315d51dd82a6b37ddef2"}, + {file = "fonttools-4.54.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41bb0b250c8132b2fcac148e2e9198e62ff06f3cc472065dff839327945c5882"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7965af9b67dd546e52afcf2e38641b5be956d68c425bef2158e95af11d229f10"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278913a168f90d53378c20c23b80f4e599dca62fbffae4cc620c8eed476b723e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0e88e3018ac809b9662615072dcd6b84dca4c2d991c6d66e1970a112503bba7e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4aa4817f0031206e637d1e685251ac61be64d1adef111060df84fdcbc6ab6c44"}, + {file = "fonttools-4.54.1-cp310-cp310-win32.whl", hash = "sha256:7e3b7d44e18c085fd8c16dcc6f1ad6c61b71ff463636fcb13df7b1b818bd0c02"}, + {file = "fonttools-4.54.1-cp310-cp310-win_amd64.whl", hash = "sha256:dd9cc95b8d6e27d01e1e1f1fae8559ef3c02c76317da650a19047f249acd519d"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5419771b64248484299fa77689d4f3aeed643ea6630b2ea750eeab219588ba20"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:301540e89cf4ce89d462eb23a89464fef50915255ece765d10eee8b2bf9d75b2"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ae5091547e74e7efecc3cbf8e75200bc92daaeb88e5433c5e3e95ea8ce5aa7"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82834962b3d7c5ca98cb56001c33cf20eb110ecf442725dc5fdf36d16ed1ab07"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d26732ae002cc3d2ecab04897bb02ae3f11f06dd7575d1df46acd2f7c012a8d8"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58974b4987b2a71ee08ade1e7f47f410c367cdfc5a94fabd599c88165f56213a"}, + {file = "fonttools-4.54.1-cp311-cp311-win32.whl", hash = "sha256:ab774fa225238986218a463f3fe151e04d8c25d7de09df7f0f5fce27b1243dbc"}, + {file = "fonttools-4.54.1-cp311-cp311-win_amd64.whl", hash = "sha256:07e005dc454eee1cc60105d6a29593459a06321c21897f769a281ff2d08939f6"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:54471032f7cb5fca694b5f1a0aaeba4af6e10ae989df408e0216f7fd6cdc405d"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fa92cb248e573daab8d032919623cc309c005086d743afb014c836636166f08"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a911591200114969befa7f2cb74ac148bce5a91df5645443371aba6d222e263"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93d458c8a6a354dc8b48fc78d66d2a8a90b941f7fec30e94c7ad9982b1fa6bab"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5eb2474a7c5be8a5331146758debb2669bf5635c021aee00fd7c353558fc659d"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9c563351ddc230725c4bdf7d9e1e92cbe6ae8553942bd1fb2b2ff0884e8b714"}, + {file = "fonttools-4.54.1-cp312-cp312-win32.whl", hash = "sha256:fdb062893fd6d47b527d39346e0c5578b7957dcea6d6a3b6794569370013d9ac"}, + {file = "fonttools-4.54.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4564cf40cebcb53f3dc825e85910bf54835e8a8b6880d59e5159f0f325e637e"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6e37561751b017cf5c40fce0d90fd9e8274716de327ec4ffb0df957160be3bff"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:357cacb988a18aace66e5e55fe1247f2ee706e01debc4b1a20d77400354cddeb"}, + {file = "fonttools-4.54.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e953cc0bddc2beaf3a3c3b5dd9ab7554677da72dfaf46951e193c9653e515a"}, + {file = "fonttools-4.54.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58d29b9a294573d8319f16f2f79e42428ba9b6480442fa1836e4eb89c4d9d61c"}, + {file = "fonttools-4.54.1-cp313-cp313-win32.whl", hash = "sha256:9ef1b167e22709b46bf8168368b7b5d3efeaaa746c6d39661c1b4405b6352e58"}, + {file = "fonttools-4.54.1-cp313-cp313-win_amd64.whl", hash = "sha256:262705b1663f18c04250bd1242b0515d3bbae177bee7752be67c979b7d47f43d"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ed2f80ca07025551636c555dec2b755dd005e2ea8fbeb99fc5cdff319b70b23b"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9dc080e5a1c3b2656caff2ac2633d009b3a9ff7b5e93d0452f40cd76d3da3b3c"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d152d1be65652fc65e695e5619e0aa0982295a95a9b29b52b85775243c06556"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8583e563df41fdecef31b793b4dd3af8a9caa03397be648945ad32717a92885b"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d1d353ef198c422515a3e974a1e8d5b304cd54a4c2eebcae708e37cd9eeffb1"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fda582236fee135d4daeca056c8c88ec5f6f6d88a004a79b84a02547c8f57386"}, + {file = "fonttools-4.54.1-cp38-cp38-win32.whl", hash = "sha256:e7d82b9e56716ed32574ee106cabca80992e6bbdcf25a88d97d21f73a0aae664"}, + {file = "fonttools-4.54.1-cp38-cp38-win_amd64.whl", hash = "sha256:ada215fd079e23e060157aab12eba0d66704316547f334eee9ff26f8c0d7b8ab"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f5b8a096e649768c2f4233f947cf9737f8dbf8728b90e2771e2497c6e3d21d13"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e10d2e0a12e18f4e2dd031e1bf7c3d7017be5c8dbe524d07706179f355c5dac"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31c32d7d4b0958600eac75eaf524b7b7cb68d3a8c196635252b7a2c30d80e986"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c39287f5c8f4a0c5a55daf9eaf9ccd223ea59eed3f6d467133cc727d7b943a55"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a7a310c6e0471602fe3bf8efaf193d396ea561486aeaa7adc1f132e02d30c4b9"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d3b659d1029946f4ff9b6183984578041b520ce0f8fb7078bb37ec7445806b33"}, + {file = "fonttools-4.54.1-cp39-cp39-win32.whl", hash = "sha256:e96bc94c8cda58f577277d4a71f51c8e2129b8b36fd05adece6320dd3d57de8a"}, + {file = "fonttools-4.54.1-cp39-cp39-win_amd64.whl", hash = "sha256:e8a4b261c1ef91e7188a30571be6ad98d1c6d9fa2427244c545e2fa0a2494dd7"}, + {file = "fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd"}, + {file = "fonttools-4.54.1.tar.gz", hash = "sha256:957f669d4922f92c171ba01bef7f29410668db09f6c02111e22b2bce446f3285"}, ] [package.extras] @@ -1852,6 +1901,113 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "propcache" +version = "0.2.0" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, + {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, + {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, + {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, + {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, + {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, + {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, + {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, + {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, + {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, + {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, + {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, + {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, + {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, + {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, +] + [[package]] name = "psutil" version = "6.0.0" @@ -2174,6 +2330,43 @@ files = [ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] +[[package]] +name = "rasterio" +version = "1.2.8" +description = "Fast and direct raster I/O for use with Numpy and SciPy" +optional = false +python-versions = ">=3.6" +files = [ + {file = "rasterio-1.2.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca9bb81518d369cbeae57aff3608538f61dcf8e12cf4bfe92dd931bc263ade"}, + {file = "rasterio-1.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8b13e5f7c57512885bfede605b021bd025684b507dac7b5ac8aee5430930114c"}, + {file = "rasterio-1.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:84cea32a0675f1b22924664169f0da4be406e790072501d505aa771da9662bca"}, + {file = "rasterio-1.2.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:722f76a09091d22fd26200ff5a7820eeb263f1cd22151c77e25a3cc473d5af72"}, + {file = "rasterio-1.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74e80044767f41c84121fef373926f0d3e7411b944982429328d11442a649c0a"}, + {file = "rasterio-1.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c30caf0823811553ec45865697afc5e4a1fc4c6f30f8dada9067061cd8507af6"}, + {file = "rasterio-1.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2d41b39e41a7011d1dba0cb8983336de69ad68fb17857ccbd11cde0fcfdb2e3"}, + {file = "rasterio-1.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f248a64f57a0db5b12eb50d9ac1ccd45a6085d979c3e73b406f9e6b5a8355080"}, + {file = "rasterio-1.2.8.tar.gz", hash = "sha256:8196b7a71fea3c1573dd48d9ab0e78955e1d26e81848cce318c4930bd96782fe"}, +] + +[package.dependencies] +affine = "*" +attrs = "*" +certifi = "*" +click = ">=4.0" +click-plugins = "*" +cligj = ">=0.5" +numpy = "*" +setuptools = "*" +snuggs = ">=1.4.1" + +[package.extras] +all = ["boto3 (>=1.2.4)", "ghp-import", "hypothesis", "ipython (>=2.0)", "matplotlib", "numpydoc", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely", "sphinx", "sphinx-rtd-theme"] +docs = ["ghp-import", "numpydoc", "sphinx", "sphinx-rtd-theme"] +ipython = ["ipython (>=2.0)"] +plot = ["matplotlib"] +s3 = ["boto3 (>=1.2.4)"] +test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytest-cov (>=2.2.0)", "shapely"] + [[package]] name = "requests" version = "2.32.3" @@ -2195,6 +2388,31 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rioxarray" +version = "0.13.4" +description = "geospatial xarray extension powered by rasterio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rioxarray-0.13.4-py3-none-any.whl", hash = "sha256:56eef711d9817d3c729c1a267c940e7dff66bfc874a0b24ed3604ea2f958dfb2"}, + {file = "rioxarray-0.13.4.tar.gz", hash = "sha256:0cad24ad2c3c5ee181a0cfad2b8c2152a609b7eb118a3430034aec171e9cf14f"}, +] + +[package.dependencies] +numpy = ">=1.21" +packaging = "*" +pyproj = ">=2.2" +rasterio = ">=1.1.1" +xarray = ">=0.17" + +[package.extras] +all = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +dev = ["dask", "mypy", "nbsphinx", "netcdf4", "pre-commit", "pylint", "pytest (>=3.6)", "pytest-cov", "pytest-timeout", "scipy", "sphinx-click", "sphinx-rtd-theme"] +doc = ["nbsphinx", "sphinx-click", "sphinx-rtd-theme"] +interp = ["scipy"] +test = ["dask", "netcdf4", "pytest (>=3.6)", "pytest-cov", "pytest-timeout"] + [[package]] name = "s3fs" version = "2022.5.0" @@ -2274,6 +2492,26 @@ files = [ [package.dependencies] numpy = ">=1.16.5" +[[package]] +name = "setuptools" +version = "75.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"}, + {file = "setuptools-75.1.0.tar.gz", hash = "sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] + [[package]] name = "shapely" version = "1.7.1" @@ -2322,15 +2560,33 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "snuggs" +version = "1.4.7" +description = "Snuggs are s-expressions for Numpy" +optional = false +python-versions = "*" +files = [ + {file = "snuggs-1.4.7-py3-none-any.whl", hash = "sha256:988dde5d4db88e9d71c99457404773dabcc7a1c45971bfbe81900999942d9f07"}, + {file = "snuggs-1.4.7.tar.gz", hash = "sha256:501cf113fe3892e14e2fee76da5cd0606b7e149c411c271898e6259ebde2617b"}, +] + +[package.dependencies] +numpy = "*" +pyparsing = ">=2.1.6" + +[package.extras] +test = ["hypothesis", "pytest"] + [[package]] name = "tomli" -version = "2.0.1" +version = "2.0.2" description = "A lil' TOML parser" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, + {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, ] [[package]] @@ -2346,13 +2602,13 @@ files = [ [[package]] name = "toolz" -version = "0.12.1" +version = "1.0.0" description = "List processing tools and functional utilities" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, - {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, + {file = "toolz-1.0.0-py3-none-any.whl", hash = "sha256:292c8f1c4e7516bf9086f8850935c799a874039c8bcf959d47b600e4c44a6236"}, + {file = "toolz-1.0.0.tar.gz", hash = "sha256:2c86e3d9a04798ac556793bced838816296a2f085017664e4995cb40a1047a02"}, ] [[package]] @@ -2418,13 +2674,13 @@ files = [ [[package]] name = "tzdata" -version = "2024.1" +version = "2024.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, - {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] [[package]] @@ -2558,108 +2814,109 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [[package]] name = "yarl" -version = "1.11.1" +version = "1.14.0" description = "Yet another URL library" optional = false python-versions = ">=3.8" files = [ - {file = "yarl-1.11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:400cd42185f92de559d29eeb529e71d80dfbd2f45c36844914a4a34297ca6f00"}, - {file = "yarl-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8258c86f47e080a258993eed877d579c71da7bda26af86ce6c2d2d072c11320d"}, - {file = "yarl-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2164cd9725092761fed26f299e3f276bb4b537ca58e6ff6b252eae9631b5c96e"}, - {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08ea567c16f140af8ddc7cb58e27e9138a1386e3e6e53982abaa6f2377b38cc"}, - {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:768ecc550096b028754ea28bf90fde071c379c62c43afa574edc6f33ee5daaec"}, - {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2909fa3a7d249ef64eeb2faa04b7957e34fefb6ec9966506312349ed8a7e77bf"}, - {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01a8697ec24f17c349c4f655763c4db70eebc56a5f82995e5e26e837c6eb0e49"}, - {file = "yarl-1.11.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e286580b6511aac7c3268a78cdb861ec739d3e5a2a53b4809faef6b49778eaff"}, - {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4179522dc0305c3fc9782549175c8e8849252fefeb077c92a73889ccbcd508ad"}, - {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:27fcb271a41b746bd0e2a92182df507e1c204759f460ff784ca614e12dd85145"}, - {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f61db3b7e870914dbd9434b560075e0366771eecbe6d2b5561f5bc7485f39efd"}, - {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:c92261eb2ad367629dc437536463dc934030c9e7caca861cc51990fe6c565f26"}, - {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d95b52fbef190ca87d8c42f49e314eace4fc52070f3dfa5f87a6594b0c1c6e46"}, - {file = "yarl-1.11.1-cp310-cp310-win32.whl", hash = "sha256:489fa8bde4f1244ad6c5f6d11bb33e09cf0d1d0367edb197619c3e3fc06f3d91"}, - {file = "yarl-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:476e20c433b356e16e9a141449f25161e6b69984fb4cdbd7cd4bd54c17844998"}, - {file = "yarl-1.11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:946eedc12895873891aaceb39bceb484b4977f70373e0122da483f6c38faaa68"}, - {file = "yarl-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21a7c12321436b066c11ec19c7e3cb9aec18884fe0d5b25d03d756a9e654edfe"}, - {file = "yarl-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c35f493b867912f6fda721a59cc7c4766d382040bdf1ddaeeaa7fa4d072f4675"}, - {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25861303e0be76b60fddc1250ec5986c42f0a5c0c50ff57cc30b1be199c00e63"}, - {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4b53f73077e839b3f89c992223f15b1d2ab314bdbdf502afdc7bb18e95eae27"}, - {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:327c724b01b8641a1bf1ab3b232fb638706e50f76c0b5bf16051ab65c868fac5"}, - {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4307d9a3417eea87715c9736d050c83e8c1904e9b7aada6ce61b46361b733d92"}, - {file = "yarl-1.11.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a28bed68ab8fb7e380775f0029a079f08a17799cb3387a65d14ace16c12e2b"}, - {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:067b961853c8e62725ff2893226fef3d0da060656a9827f3f520fb1d19b2b68a"}, - {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8215f6f21394d1f46e222abeb06316e77ef328d628f593502d8fc2a9117bde83"}, - {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:498442e3af2a860a663baa14fbf23fb04b0dd758039c0e7c8f91cb9279799bff"}, - {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:69721b8effdb588cb055cc22f7c5105ca6fdaa5aeb3ea09021d517882c4a904c"}, - {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e969fa4c1e0b1a391f3fcbcb9ec31e84440253325b534519be0d28f4b6b533e"}, - {file = "yarl-1.11.1-cp311-cp311-win32.whl", hash = "sha256:7d51324a04fc4b0e097ff8a153e9276c2593106a811704025bbc1d6916f45ca6"}, - {file = "yarl-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:15061ce6584ece023457fb8b7a7a69ec40bf7114d781a8c4f5dcd68e28b5c53b"}, - {file = "yarl-1.11.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a4264515f9117be204935cd230fb2a052dd3792789cc94c101c535d349b3dab0"}, - {file = "yarl-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f41fa79114a1d2eddb5eea7b912d6160508f57440bd302ce96eaa384914cd265"}, - {file = "yarl-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02da8759b47d964f9173c8675710720b468aa1c1693be0c9c64abb9d8d9a4867"}, - {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9361628f28f48dcf8b2f528420d4d68102f593f9c2e592bfc842f5fb337e44fd"}, - {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b91044952da03b6f95fdba398d7993dd983b64d3c31c358a4c89e3c19b6f7aef"}, - {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74db2ef03b442276d25951749a803ddb6e270d02dda1d1c556f6ae595a0d76a8"}, - {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e975a2211952a8a083d1b9d9ba26472981ae338e720b419eb50535de3c02870"}, - {file = "yarl-1.11.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8aef97ba1dd2138112890ef848e17d8526fe80b21f743b4ee65947ea184f07a2"}, - {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a7915ea49b0c113641dc4d9338efa9bd66b6a9a485ffe75b9907e8573ca94b84"}, - {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:504cf0d4c5e4579a51261d6091267f9fd997ef58558c4ffa7a3e1460bd2336fa"}, - {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3de5292f9f0ee285e6bd168b2a77b2a00d74cbcfa420ed078456d3023d2f6dff"}, - {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a34e1e30f1774fa35d37202bbeae62423e9a79d78d0874e5556a593479fdf239"}, - {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:66b63c504d2ca43bf7221a1f72fbe981ff56ecb39004c70a94485d13e37ebf45"}, - {file = "yarl-1.11.1-cp312-cp312-win32.whl", hash = "sha256:a28b70c9e2213de425d9cba5ab2e7f7a1c8ca23a99c4b5159bf77b9c31251447"}, - {file = "yarl-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:17b5a386d0d36fb828e2fb3ef08c8829c1ebf977eef88e5367d1c8c94b454639"}, - {file = "yarl-1.11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1fa2e7a406fbd45b61b4433e3aa254a2c3e14c4b3186f6e952d08a730807fa0c"}, - {file = "yarl-1.11.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:750f656832d7d3cb0c76be137ee79405cc17e792f31e0a01eee390e383b2936e"}, - {file = "yarl-1.11.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b8486f322d8f6a38539136a22c55f94d269addb24db5cb6f61adc61eabc9d93"}, - {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fce4da3703ee6048ad4138fe74619c50874afe98b1ad87b2698ef95bf92c96d"}, - {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed653638ef669e0efc6fe2acb792275cb419bf9cb5c5049399f3556995f23c7"}, - {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18ac56c9dd70941ecad42b5a906820824ca72ff84ad6fa18db33c2537ae2e089"}, - {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:688654f8507464745ab563b041d1fb7dab5d9912ca6b06e61d1c4708366832f5"}, - {file = "yarl-1.11.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4973eac1e2ff63cf187073cd4e1f1148dcd119314ab79b88e1b3fad74a18c9d5"}, - {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:964a428132227edff96d6f3cf261573cb0f1a60c9a764ce28cda9525f18f7786"}, - {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6d23754b9939cbab02c63434776df1170e43b09c6a517585c7ce2b3d449b7318"}, - {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c2dc4250fe94d8cd864d66018f8344d4af50e3758e9d725e94fecfa27588ff82"}, - {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09696438cb43ea6f9492ef237761b043f9179f455f405279e609f2bc9100212a"}, - {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:999bfee0a5b7385a0af5ffb606393509cfde70ecca4f01c36985be6d33e336da"}, - {file = "yarl-1.11.1-cp313-cp313-win32.whl", hash = "sha256:ce928c9c6409c79e10f39604a7e214b3cb69552952fbda8d836c052832e6a979"}, - {file = "yarl-1.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:501c503eed2bb306638ccb60c174f856cc3246c861829ff40eaa80e2f0330367"}, - {file = "yarl-1.11.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dae7bd0daeb33aa3e79e72877d3d51052e8b19c9025ecf0374f542ea8ec120e4"}, - {file = "yarl-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3ff6b1617aa39279fe18a76c8d165469c48b159931d9b48239065767ee455b2b"}, - {file = "yarl-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3257978c870728a52dcce8c2902bf01f6c53b65094b457bf87b2644ee6238ddc"}, - {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f351fa31234699d6084ff98283cb1e852270fe9e250a3b3bf7804eb493bd937"}, - {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8aef1b64da41d18026632d99a06b3fefe1d08e85dd81d849fa7c96301ed22f1b"}, - {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7175a87ab8f7fbde37160a15e58e138ba3b2b0e05492d7351314a250d61b1591"}, - {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba444bdd4caa2a94456ef67a2f383710928820dd0117aae6650a4d17029fa25e"}, - {file = "yarl-1.11.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ea9682124fc062e3d931c6911934a678cb28453f957ddccf51f568c2f2b5e05"}, - {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8418c053aeb236b20b0ab8fa6bacfc2feaaf7d4683dd96528610989c99723d5f"}, - {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:61a5f2c14d0a1adfdd82258f756b23a550c13ba4c86c84106be4c111a3a4e413"}, - {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f3a6d90cab0bdf07df8f176eae3a07127daafcf7457b997b2bf46776da2c7eb7"}, - {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:077da604852be488c9a05a524068cdae1e972b7dc02438161c32420fb4ec5e14"}, - {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:15439f3c5c72686b6c3ff235279630d08936ace67d0fe5c8d5bbc3ef06f5a420"}, - {file = "yarl-1.11.1-cp38-cp38-win32.whl", hash = "sha256:238a21849dd7554cb4d25a14ffbfa0ef380bb7ba201f45b144a14454a72ffa5a"}, - {file = "yarl-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:67459cf8cf31da0e2cbdb4b040507e535d25cfbb1604ca76396a3a66b8ba37a6"}, - {file = "yarl-1.11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:884eab2ce97cbaf89f264372eae58388862c33c4f551c15680dd80f53c89a269"}, - {file = "yarl-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a336eaa7ee7e87cdece3cedb395c9657d227bfceb6781295cf56abcd3386a26"}, - {file = "yarl-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87f020d010ba80a247c4abc335fc13421037800ca20b42af5ae40e5fd75e7909"}, - {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:637c7ddb585a62d4469f843dac221f23eec3cbad31693b23abbc2c366ad41ff4"}, - {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48dfd117ab93f0129084577a07287376cc69c08138694396f305636e229caa1a"}, - {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75e0ae31fb5ccab6eda09ba1494e87eb226dcbd2372dae96b87800e1dcc98804"}, - {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f46f81501160c28d0c0b7333b4f7be8983dbbc161983b6fb814024d1b4952f79"}, - {file = "yarl-1.11.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04293941646647b3bfb1719d1d11ff1028e9c30199509a844da3c0f5919dc520"}, - {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:250e888fa62d73e721f3041e3a9abf427788a1934b426b45e1b92f62c1f68366"}, - {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e8f63904df26d1a66aabc141bfd258bf738b9bc7bc6bdef22713b4f5ef789a4c"}, - {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:aac44097d838dda26526cffb63bdd8737a2dbdf5f2c68efb72ad83aec6673c7e"}, - {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:267b24f891e74eccbdff42241c5fb4f974de2d6271dcc7d7e0c9ae1079a560d9"}, - {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6907daa4b9d7a688063ed098c472f96e8181733c525e03e866fb5db480a424df"}, - {file = "yarl-1.11.1-cp39-cp39-win32.whl", hash = "sha256:14438dfc5015661f75f85bc5adad0743678eefee266ff0c9a8e32969d5d69f74"}, - {file = "yarl-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:94d0caaa912bfcdc702a4204cd5e2bb01eb917fc4f5ea2315aa23962549561b0"}, - {file = "yarl-1.11.1-py3-none-any.whl", hash = "sha256:72bf26f66456baa0584eff63e44545c9f0eaed9b73cb6601b647c91f14c11f38"}, - {file = "yarl-1.11.1.tar.gz", hash = "sha256:1bb2d9e212fb7449b8fb73bc461b51eaa17cc8430b4a87d87be7b25052d92f53"}, + {file = "yarl-1.14.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1bfc25aa6a7c99cf86564210f79a0b7d4484159c67e01232b116e445b3036547"}, + {file = "yarl-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0cf21f46a15d445417de8fc89f2568852cf57fe8ca1ab3d19ddb24d45c0383ae"}, + {file = "yarl-1.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1dda53508df0de87b6e6b0a52d6718ff6c62a5aca8f5552748404963df639269"}, + {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:587c3cc59bc148a9b1c07a019346eda2549bc9f468acd2f9824d185749acf0a6"}, + {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3007a5b75cb50140708420fe688c393e71139324df599434633019314ceb8b59"}, + {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06ff23462398333c78b6f4f8d3d70410d657a471c2c5bbe6086133be43fc8f1a"}, + {file = "yarl-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689a99a42ee4583fcb0d3a67a0204664aa1539684aed72bdafcbd505197a91c4"}, + {file = "yarl-1.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0547ab1e9345dc468cac8368d88ea4c5bd473ebc1d8d755347d7401982b5dd8"}, + {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:742aef0a99844faaac200564ea6f5e08facb285d37ea18bd1a5acf2771f3255a"}, + {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:176110bff341b6730f64a1eb3a7070e12b373cf1c910a9337e7c3240497db76f"}, + {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:46a9772a1efa93f9cd170ad33101c1817c77e0e9914d4fe33e2da299d7cf0f9b"}, + {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ee2c68e4f2dd1b1c15b849ba1c96fac105fca6ffdb7c1e8be51da6fabbdeafb9"}, + {file = "yarl-1.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:047b258e00b99091b6f90355521f026238c63bd76dcf996d93527bb13320eefd"}, + {file = "yarl-1.14.0-cp310-cp310-win32.whl", hash = "sha256:0aa92e3e30a04f9462a25077db689c4ac5ea9ab6cc68a2e563881b987d42f16d"}, + {file = "yarl-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:d9baec588f015d0ee564057aa7574313c53a530662ffad930b7886becc85abdf"}, + {file = "yarl-1.14.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:07f9eaf57719d6721ab15805d85f4b01a5b509a0868d7320134371bcb652152d"}, + {file = "yarl-1.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c14b504a74e58e2deb0378b3eca10f3d076635c100f45b113c18c770b4a47a50"}, + {file = "yarl-1.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a682a127930f3fc4e42583becca6049e1d7214bcad23520c590edd741d2114"}, + {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73bedd2be05f48af19f0f2e9e1353921ce0c83f4a1c9e8556ecdcf1f1eae4892"}, + {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3ab950f8814f3b7b5e3eebc117986f817ec933676f68f0a6c5b2137dd7c9c69"}, + {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b693c63e7e64b524f54aa4888403c680342d1ad0d97be1707c531584d6aeeb4f"}, + {file = "yarl-1.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85cb3e40eaa98489f1e2e8b29f5ad02ee1ee40d6ce6b88d50cf0f205de1d9d2c"}, + {file = "yarl-1.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f24f08b6c9b9818fd80612c97857d28f9779f0d1211653ece9844fc7b414df2"}, + {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:29a84a46ec3ebae7a1c024c055612b11e9363a8a23238b3e905552d77a2bc51b"}, + {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5cd5dad8366e0168e0fd23d10705a603790484a6dbb9eb272b33673b8f2cce72"}, + {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a152751af7ef7b5d5fa6d215756e508dd05eb07d0cf2ba51f3e740076aa74373"}, + {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3d569f877ed9a708e4c71a2d13d2940cb0791da309f70bd970ac1a5c088a0a92"}, + {file = "yarl-1.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6a615cad11ec3428020fb3c5a88d85ce1b5c69fd66e9fcb91a7daa5e855325dd"}, + {file = "yarl-1.14.0-cp311-cp311-win32.whl", hash = "sha256:bab03192091681d54e8225c53f270b0517637915d9297028409a2a5114ff4634"}, + {file = "yarl-1.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:985623575e5c4ea763056ffe0e2d63836f771a8c294b3de06d09480538316b13"}, + {file = "yarl-1.14.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fc2c80bc87fba076e6cbb926216c27fba274dae7100a7b9a0983b53132dd99f2"}, + {file = "yarl-1.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:55c144d363ad4626ca744556c049c94e2b95096041ac87098bb363dcc8635e8d"}, + {file = "yarl-1.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b03384eed107dbeb5f625a99dc3a7de8be04fc8480c9ad42fccbc73434170b20"}, + {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f72a0d746d38cb299b79ce3d4d60ba0892c84bbc905d0d49c13df5bace1b65f8"}, + {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8648180b34faaea4aa5b5ca7e871d9eb1277033fa439693855cf0ea9195f85f1"}, + {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9557c9322aaa33174d285b0c1961fb32499d65ad1866155b7845edc876c3c835"}, + {file = "yarl-1.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f50eb3837012a937a2b649ec872b66ba9541ad9d6f103ddcafb8231cfcafd22"}, + {file = "yarl-1.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8892fa575ac9b1b25fae7b221bc4792a273877b9b56a99ee2d8d03eeb3dbb1d2"}, + {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6a2c5c5bb2556dfbfffffc2bcfb9c235fd2b566d5006dfb2a37afc7e3278a07"}, + {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ab3abc0b78a5dfaa4795a6afbe7b282b6aa88d81cf8c1bb5e394993d7cae3457"}, + {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:47eede5d11d669ab3759b63afb70d28d5328c14744b8edba3323e27dc52d298d"}, + {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fe4d2536c827f508348d7b40c08767e8c7071614250927233bf0c92170451c0a"}, + {file = "yarl-1.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0fd7b941dd1b00b5f0acb97455fea2c4b7aac2dd31ea43fb9d155e9bc7b78664"}, + {file = "yarl-1.14.0-cp312-cp312-win32.whl", hash = "sha256:99ff3744f5fe48288be6bc402533b38e89749623a43208e1d57091fc96b783b9"}, + {file = "yarl-1.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:1ca3894e9e9f72da93544f64988d9c052254a338a9f855165f37f51edb6591de"}, + {file = "yarl-1.14.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5d02d700705d67e09e1f57681f758f0b9d4412eeb70b2eb8d96ca6200b486db3"}, + {file = "yarl-1.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:30600ba5db60f7c0820ef38a2568bb7379e1418ecc947a0f76fd8b2ff4257a97"}, + {file = "yarl-1.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e85d86527baebb41a214cc3b45c17177177d900a2ad5783dbe6f291642d4906f"}, + {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37001e5d4621cef710c8dc1429ca04e189e572f128ab12312eab4e04cf007132"}, + {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4f4547944d4f5cfcdc03f3f097d6f05bbbc915eaaf80a2ee120d0e756de377d"}, + {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75ff4c819757f9bdb35de049a509814d6ce851fe26f06eb95a392a5640052482"}, + {file = "yarl-1.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68ac1a09392ed6e3fd14be880d39b951d7b981fd135416db7d18a6208c536561"}, + {file = "yarl-1.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96952f642ac69075e44c7d0284528938fdff39422a1d90d3e45ce40b72e5e2d9"}, + {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a56fbe3d7f3bce1d060ea18d2413a2ca9ca814eea7cedc4d247b5f338d54844e"}, + {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e2637d75e92763d1322cb5041573279ec43a80c0f7fbbd2d64f5aee98447b17"}, + {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9abe80ae2c9d37c17599557b712e6515f4100a80efb2cda15f5f070306477cd2"}, + {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:217a782020b875538eebf3948fac3a7f9bbbd0fd9bf8538f7c2ad7489e80f4e8"}, + {file = "yarl-1.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9cfef3f14f75bf6aba73a76caf61f9d00865912a04a4393c468a7ce0981b519"}, + {file = "yarl-1.14.0-cp313-cp313-win32.whl", hash = "sha256:d8361c7d04e6a264481f0b802e395f647cd3f8bbe27acfa7c12049efea675bd1"}, + {file = "yarl-1.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:bc24f968b82455f336b79bf37dbb243b7d76cd40897489888d663d4e028f5069"}, + {file = "yarl-1.14.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:91d875f75fabf76b3018c5f196bf3d308ed2b49ddcb46c1576d6b075754a1393"}, + {file = "yarl-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4009def9be3a7e5175db20aa2d7307ecd00bbf50f7f0f989300710eee1d0b0b9"}, + {file = "yarl-1.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:582cedde49603f139be572252a318b30dc41039bc0b8165f070f279e5d12187f"}, + {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbd9ff43a04f8ffe8a959a944c2dca10d22f5f99fc6a459f49c3ebfb409309d9"}, + {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9f805e37ed16cc212fdc538a608422d7517e7faf539bedea4fe69425bc55d76"}, + {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95e16e9eaa2d7f5d87421b8fe694dd71606aa61d74b824c8d17fc85cc51983d1"}, + {file = "yarl-1.14.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:816d24f584edefcc5ca63428f0b38fee00b39fe64e3c5e558f895a18983efe96"}, + {file = "yarl-1.14.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd2660c01367eb3ef081b8fa0a5da7fe767f9427aa82023a961a5f28f0d4af6c"}, + {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:94b2bb9bcfd5be9d27004ea4398fb640373dd0c1a9e219084f42c08f77a720ab"}, + {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:c2089a9afef887664115f7fa6d3c0edd6454adaca5488dba836ca91f60401075"}, + {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2192f718db4a8509f63dd6d950f143279211fa7e6a2c612edc17d85bf043d36e"}, + {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:8385ab36bf812e9d37cf7613999a87715f27ef67a53f0687d28c44b819df7cb0"}, + {file = "yarl-1.14.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b4c1ecba93e7826dc71ddba75fb7740cdb52e7bd0be9f03136b83f54e6a1f511"}, + {file = "yarl-1.14.0-cp38-cp38-win32.whl", hash = "sha256:e749af6c912a7bb441d105c50c1a3da720474e8acb91c89350080dd600228f0e"}, + {file = "yarl-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:147e36331f6f63e08a14640acf12369e041e0751bb70d9362df68c2d9dcf0c87"}, + {file = "yarl-1.14.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a9f917966d27f7ce30039fe8d900f913c5304134096554fd9bea0774bcda6d1"}, + {file = "yarl-1.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a2f8fb7f944bcdfecd4e8d855f84c703804a594da5123dd206f75036e536d4d"}, + {file = "yarl-1.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f4e475f29a9122f908d0f1f706e1f2fc3656536ffd21014ff8a6f2e1b14d1d8"}, + {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8089d4634d8fa2b1806ce44fefa4979b1ab2c12c0bc7ef3dfa45c8a374811348"}, + {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b16f6c75cffc2dc0616ea295abb0e1967601bd1fb1e0af6a1de1c6c887f3439"}, + {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498b3c55087b9d762636bca9b45f60d37e51d24341786dc01b81253f9552a607"}, + {file = "yarl-1.14.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3f8bfc1db82589ef965ed234b87de30d140db8b6dc50ada9e33951ccd8ec07a"}, + {file = "yarl-1.14.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:625f207b1799e95e7c823f42f473c1e9dbfb6192bd56bba8695656d92be4535f"}, + {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:781e2495e408a81e4eaeedeb41ba32b63b1980dddf8b60dbbeff6036bcd35049"}, + {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:659603d26d40dd4463200df9bfbc339fbfaed3fe32e5c432fe1dc2b5d4aa94b4"}, + {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4e0d45ebf975634468682c8bec021618b3ad52c37619e5c938f8f831fa1ac5c0"}, + {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:a2e4725a08cb2b4794db09e350c86dee18202bb8286527210e13a1514dc9a59a"}, + {file = "yarl-1.14.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:19268b4fec1d7760134f2de46ef2608c2920134fb1fa61e451f679e41356dc55"}, + {file = "yarl-1.14.0-cp39-cp39-win32.whl", hash = "sha256:337912bcdcf193ade64b9aae5a4017a0a1950caf8ca140362e361543c6773f21"}, + {file = "yarl-1.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:b6d0147574ce2e7b812c989e50fa72bbc5338045411a836bd066ce5fc8ac0bce"}, + {file = "yarl-1.14.0-py3-none-any.whl", hash = "sha256:c8ed4034f0765f8861620c1f2f2364d2e58520ea288497084dae880424fc0d9f"}, + {file = "yarl-1.14.0.tar.gz", hash = "sha256:88c7d9d58aab0724b979ab5617330acb1c7030b79379c8138c1c8c94e121d1b3"}, ] [package.dependencies] idna = ">=2.0" multidict = ">=4.0" +propcache = ">=0.2.0" [[package]] name = "zarr" @@ -2703,4 +2960,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "~=3.8" -content-hash = "2bdf3a4ef2a7f9b982ad43dd9806fcc8fa3c450c994fa48c8d776c3a963af56b" +content-hash = "092a8232d65317934fe8aef1cd75b46977932d787222dce8540855ca47625e51" From 0cb9c83260984df3c87c01a9bdb714da41905a0d Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 8 Oct 2024 07:54:46 -0700 Subject: [PATCH 91/91] Update backend for new NTS method --- data-access/nexustiles/backends/cog/backend.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/data-access/nexustiles/backends/cog/backend.py b/data-access/nexustiles/backends/cog/backend.py index e50c04c4..016cd203 100644 --- a/data-access/nexustiles/backends/cog/backend.py +++ b/data-access/nexustiles/backends/cog/backend.py @@ -229,6 +229,10 @@ def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, da # this raise NotImplementedError() + def find_tiles_along_line(self, start_point, end_point, ds=None, start_time=0, end_time=-1, **kwargs): + return self.__solr.find_tiles_along_line(start_point, end_point, ds, start_time, + end_time, **kwargs) + def get_min_max_time_by_granule(self, ds, granule_name): raise NotImplementedError()