From cbf5b54d7b412282975b457c7beb1400e840b7a8 Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Mon, 17 Nov 2025 11:52:00 -0600 Subject: [PATCH 1/8] Dataset cache purge cronjob --- docs/deployment/reference.md | 9 ++- .../templates/dataset-lifecycle/cronjob.yaml | 24 ++++++++ helm/servicex/values.yaml | 12 ++++ .../resources/datasets/delete_dataset.py | 28 ++++++---- .../resources/datasets/get_all.py | 24 +++++--- .../internal/dataset_lifecycle_ops.py | 55 +++++++++++++++++++ servicex_app/servicex_app/routes.py | 5 +- 7 files changed, 133 insertions(+), 24 deletions(-) create mode 100644 helm/servicex/templates/dataset-lifecycle/cronjob.yaml create mode 100644 servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py diff --git a/docs/deployment/reference.md b/docs/deployment/reference.md index fd4ce6669..f1db2aa6b 100644 --- a/docs/deployment/reference.md +++ b/docs/deployment/reference.md @@ -111,8 +111,13 @@ parameters for the [rabbitMQ](https://github.com/bitnami/charts/tree/master/bitn | `transformer.persistence.existingClaim` | Existing persistent volume claim | nil | | `transformer.subdir` | Subdirectory of the mount to write transformer results to (should end with trailing /) | nil | | `dataLifecycle.enabled` | Enable deployment of data lifecycle jobs | false | -| `dataLifecycle.image` | Default image for data lifecycle job | `sslhep/servicex_minio_cleanup` | -| `dataLifecycle.tag` | Data lifecycle job image tag | | +| `dataLifecycle.image` | Default image for data lifecycle job | `python` | +| `dataLifecycle.tag` | Data lifecycle job image tag | `3.10` | | `dataLifecycle.pullPolicy` | Data lifecycle image pull policy | `Always` | | `dataLifecycle.schedule` | Schedule for minioCleanup cronjob. See [reference](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax) for details on fields | `* */8 * * *` (every 8 hours) | | `dataLifecycle.retention` | We will archive any transforms older than this. Use the gnu date command --date argument. See [date command](https://www.geeksforgeeks.org/date-command-linux-examples/#4-how-to-display-past-dates)for examples | 7 days ago | +| `datasetLifecycle.image` | Default image for dataset cache lifecycle job | `curlimages/curl` | +| `datasetLifecycle.tag` | Dataset cache lifecycle job image tag | `8.17.0` | +| `datasetLifecycle.pullPolicy` | Dataset cache lifecycle image pull policy | `Always` | +| `datasetLifecycle.schedule` | Schedule for dataset cache cleanup cronjob. See [reference](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax) for details on fields | `0 * * * *` (top of every hour) | +| `datasetLifecycle.cacheLifetime` | Lifetime of dataset cache, in hours | 24 | diff --git a/helm/servicex/templates/dataset-lifecycle/cronjob.yaml b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml new file mode 100644 index 000000000..93fddf154 --- /dev/null +++ b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml @@ -0,0 +1,24 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Release.Name }}-dataset-lifecycle-job +spec: + schedule: {{ .Values.datasetLifecycle.schedule | default "0 * * * *" | quote }} + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + metadata: + labels: + app: {{ .Release.Name }}-dataset-lifecycle-job + spec: + containers: + - name: {{ .Release.Name }}-dataset-lifecycle-job + image: {{ .Values.datasetLifecycle.image }}:{{ .Values.datasetLifecycle.tag }} + imagePullPolicy: {{ .Values.datasetLifecycle.pullPolicy }} + env: + - name: LIFETIME + value: {{ .Values.datasetLifecycle.cacheLifetime }} + command: + - curl --request POST "http://{{ .Release.Name }}-servicex-app:8000/servicex/internal/dataset-lifecycle?age=$LIFETIME" + restartPolicy: OnFailure \ No newline at end of file diff --git a/helm/servicex/values.yaml b/helm/servicex/values.yaml index 22209e739..be4a4da58 100644 --- a/helm/servicex/values.yaml +++ b/helm/servicex/values.yaml @@ -261,3 +261,15 @@ dataLifecycle: # See https://www.geeksforgeeks.org/date-command-linux-examples/#4-how-to-display-past-dates # for examples retention: "7 days ago" + +# This obsoletes the dataset cache for datasets older than "cacheLifetime" hours. +# Does not touch transforms or output files. +datasetLifecycle: + # image should support curl + image: curlimages/curl + tag: "8.17.0" + pullPolicy: Always + schedule: "0 * * * *" + + # How long to keep datasets in the cache, in hours + age: 24 diff --git a/servicex_app/servicex_app/resources/datasets/delete_dataset.py b/servicex_app/servicex_app/resources/datasets/delete_dataset.py index b056df187..f9f2a0028 100644 --- a/servicex_app/servicex_app/resources/datasets/delete_dataset.py +++ b/servicex_app/servicex_app/resources/datasets/delete_dataset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, IRIS-HEP +# Copyright (c) 2024-25, IRIS-HEP # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -30,18 +30,22 @@ from servicex_app.resources.servicex_resource import ServiceXResource -class DeleteDataset(ServiceXResource): - @auth_required - def delete(self, dataset_id): - dataset = Dataset.find_by_id(dataset_id) +def delete_dataset(dataset_id): + dataset = Dataset.find_by_id(dataset_id) + + if not dataset: + return {"message": f"Dataset {dataset_id} not found"}, 404 - if not dataset: - return {"message": f"Dataset {dataset_id} not found"}, 404 + if dataset.stale: + return {"message": f"Dataset {dataset_id} has already been deleted"}, 400 - if dataset.stale: - return {"message": f"Dataset {dataset_id} has already been deleted"}, 400 + dataset.stale = True + dataset.save_to_db() - dataset.stale = True - dataset.save_to_db() + return {"dataset-id": dataset_id, "stale": True}, 200 - return {"dataset-id": dataset_id, "stale": True} + +class DeleteDataset(ServiceXResource): + @auth_required + def delete(self, dataset_id): + return delete_dataset(dataset_id) diff --git a/servicex_app/servicex_app/resources/datasets/get_all.py b/servicex_app/servicex_app/resources/datasets/get_all.py index 722f8536c..6f0472857 100644 --- a/servicex_app/servicex_app/resources/datasets/get_all.py +++ b/servicex_app/servicex_app/resources/datasets/get_all.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, IRIS-HEP +# Copyright (c) 2024-25, IRIS-HEP # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,20 +31,26 @@ from servicex_app.models import Dataset from servicex_app.resources.servicex_resource import ServiceXResource +from typing import List + parser = reqparse.RequestParser() parser.add_argument("did-finder", type=str, location="args", required=False) parser.add_argument("show-deleted", type=bool, location="args", required=False) +def get_all_datasets(args={}) -> List[Dataset]: + show_deleted = args["show-deleted"] if "show-deleted" in args else False + if "did-finder" in args and args["did-finder"]: + did_finder = args["did-finder"] + datasets = Dataset.get_by_did_finder(did_finder, show_deleted) + else: + datasets = Dataset.get_all(show_deleted) + + return datasets + + class AllDatasets(ServiceXResource): @auth_required def get(self): args = parser.parse_args() - show_deleted = args["show-deleted"] if "show-deleted" in args else False - if "did-finder" in args and args["did-finder"]: - did_finder = args["did-finder"] - datasets = Dataset.get_by_did_finder(did_finder, show_deleted) - else: - datasets = Dataset.get_all(show_deleted) - - return {"datasets": [dataset.to_json() for dataset in datasets]} + return {"datasets": [dataset.to_json() for dataset in get_all_datasets(args)]} diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py new file mode 100644 index 000000000..e567fe959 --- /dev/null +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025, IRIS-HEP +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from datetime import datetime, timedelta + +from flask import request, current_app + +from servicex_app.resources.servicex_resource import ServiceXResource +from ..datasets.get_all import get_all_datasets +from ..datasets.delete_dataset import delete_dataset + + +class DatasetLifecycleOps(ServiceXResource): + def post(self): + """ + Obsolete cached datasets older than N hours + """ + now = datetime.now() + try: + age = float(request.args.get('age', 24)) + except Exception: + return {"message": "Invalid age parameter"}, 422 + delta = timedelta(hours=age) + datasets = get_all_datasets() # by default this will only give non-stale datasets + todelete = [_.id for _ in datasets if (now-_.last_updated) > delta] + current_app.logger.info(f"Obsoletion called for datasets older than {delta}. " + f"Obsoleting {len(todelete)} datasets.") + for dataset_id in todelete: + delete_dataset(dataset_id) + + return {"message": "Success"}, 200 diff --git a/servicex_app/servicex_app/routes.py b/servicex_app/servicex_app/routes.py index 148946dbd..2799af8c1 100644 --- a/servicex_app/servicex_app/routes.py +++ b/servicex_app/servicex_app/routes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, IRIS-HEP +# Copyright (c) 2019-2025, IRIS-HEP # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -32,6 +32,7 @@ from servicex_app.resources.datasets.get_one import OneDataset from servicex_app.resources.transformation.delete import DeleteTransform from servicex_app.resources.internal.data_lifecycle_ops import DataLifecycleOps +from servicex_app.resources.internal.dataset_lifecycle_ops import DatasetLifecycleOps def add_routes( @@ -198,3 +199,5 @@ def add_routes( DataLifecycleOps.make_api(object_store) api.add_resource(DataLifecycleOps, "/servicex/internal/data-lifecycle") + + api.add_resource(DatasetLifecycleOps, "/servicex/internal/dataset-lifecycle") From 4c7d8aeb10cca7e8b6f77e3f37df1c7d3d2353bc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 18:42:31 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/deployment/reference.md | 2 +- .../templates/dataset-lifecycle/cronjob.yaml | 2 +- helm/servicex/values.yaml | 3 +-- .../resources/internal/dataset_lifecycle_ops.py | 14 +++++++++----- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/deployment/reference.md b/docs/deployment/reference.md index 083ea2ea0..3a88d5a2b 100644 --- a/docs/deployment/reference.md +++ b/docs/deployment/reference.md @@ -121,4 +121,4 @@ parameters for the [rabbitMQ](https://github.com/bitnami/charts/tree/master/bitn | `datasetLifecycle.tag` | Dataset cache lifecycle job image tag | `8.17.0` | | `datasetLifecycle.pullPolicy` | Dataset cache lifecycle image pull policy | `Always` | | `datasetLifecycle.schedule` | Schedule for dataset cache cleanup cronjob. See [reference](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax) for details on fields | `0 * * * *` (top of every hour) | -| `datasetLifecycle.cacheLifetime` | Lifetime of dataset cache, in hours | 24 | \ No newline at end of file +| `datasetLifecycle.cacheLifetime` | Lifetime of dataset cache, in hours | 24 | diff --git a/helm/servicex/templates/dataset-lifecycle/cronjob.yaml b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml index 93fddf154..ed02b6489 100644 --- a/helm/servicex/templates/dataset-lifecycle/cronjob.yaml +++ b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml @@ -21,4 +21,4 @@ spec: value: {{ .Values.datasetLifecycle.cacheLifetime }} command: - curl --request POST "http://{{ .Release.Name }}-servicex-app:8000/servicex/internal/dataset-lifecycle?age=$LIFETIME" - restartPolicy: OnFailure \ No newline at end of file + restartPolicy: OnFailure diff --git a/helm/servicex/values.yaml b/helm/servicex/values.yaml index 77fdd8236..1dfba67e4 100644 --- a/helm/servicex/values.yaml +++ b/helm/servicex/values.yaml @@ -261,7 +261,7 @@ dataLifecycle: # See https://www.geeksforgeeks.org/date-command-linux-examples/#4-how-to-display-past-dates # for examples retention: "7 days ago" - + # The cleanup service will go beyond the retention date to keep the server side cache size below # this threshold. Specify this as a string with Mb, Gb, Tb or Pb units in the string maxDesiredCacheSize: "1Tb" @@ -277,4 +277,3 @@ datasetLifecycle: # How long to keep datasets in the cache, in hours age: 24 - diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py index e567fe959..99f8c3d1e 100644 --- a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -41,14 +41,18 @@ def post(self): """ now = datetime.now() try: - age = float(request.args.get('age', 24)) + age = float(request.args.get("age", 24)) except Exception: return {"message": "Invalid age parameter"}, 422 delta = timedelta(hours=age) - datasets = get_all_datasets() # by default this will only give non-stale datasets - todelete = [_.id for _ in datasets if (now-_.last_updated) > delta] - current_app.logger.info(f"Obsoletion called for datasets older than {delta}. " - f"Obsoleting {len(todelete)} datasets.") + datasets = ( + get_all_datasets() + ) # by default this will only give non-stale datasets + todelete = [_.id for _ in datasets if (now - _.last_updated) > delta] + current_app.logger.info( + f"Obsoletion called for datasets older than {delta}. " + f"Obsoleting {len(todelete)} datasets." + ) for dataset_id in todelete: delete_dataset(dataset_id) From 946cca3bac9f92bc01a8ddaa5eef432a317de7e5 Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Mon, 17 Nov 2025 12:42:54 -0600 Subject: [PATCH 3/8] Bad variable name fix --- helm/servicex/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/servicex/values.yaml b/helm/servicex/values.yaml index 1dfba67e4..7aeb80968 100644 --- a/helm/servicex/values.yaml +++ b/helm/servicex/values.yaml @@ -276,4 +276,4 @@ datasetLifecycle: schedule: "0 * * * *" # How long to keep datasets in the cache, in hours - age: 24 + cacheLifetime: 24 From 628488cce5960b727853b594aa68bd385bcdcc1f Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Mon, 17 Nov 2025 13:40:03 -0600 Subject: [PATCH 4/8] Add tests, fix bug --- .../internal/dataset_lifecycle_ops.py | 2 +- .../internal/test_dataset_lifecycle_ops.py | 87 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py index 99f8c3d1e..cbd15e16f 100644 --- a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -41,7 +41,7 @@ def post(self): """ now = datetime.now() try: - age = float(request.args.get("age", 24)) + age = float(request.get_json().get("age", 24)) except Exception: return {"message": "Invalid age parameter"}, 422 delta = timedelta(hours=age) diff --git a/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py b/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py new file mode 100644 index 000000000..800a6b4db --- /dev/null +++ b/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py @@ -0,0 +1,87 @@ +# Copyright (c) 2025, IRIS-HEP +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from datetime import datetime +from unittest.mock import patch + +from pytest import fixture + +from servicex_app.models import Dataset + +from servicex_app_test.resource_test_base import ResourceTestBase + + +class TestDatasetLifecycle(ResourceTestBase): + @fixture + def fake_dataset_list(self): + with patch( + "servicex_app.resources.internal.dataset_lifecycle_ops.get_all_datasets" + ) as dsfunc: + dsfunc.return_value = [ + Dataset( + last_used=datetime(2022, 1, 1), + last_updated=datetime(2022, 1, 1), + id=1, + name="not-orphaned", + events=100, + size=1000, + n_files=1, + lookup_status="complete", + did_finder="rucio", + ), + Dataset( + last_used=datetime.now(), + last_updated=datetime.now(), + id=2, + name="orphaned", + events=100, + size=1000, + n_files=1, + lookup_status="complete", + did_finder="rucio", + ), + ] + yield dsfunc + + def test_fail_on_bad_param(self, client): + with client.application.app_context(): + response = client.post( + "/servicex/internal/dataset-lifecycle", json={"age": "string"} + ) + assert response.status_code == 422 + + def test_deletion(self, fake_dataset_list, client): + with client.application.app_context(): + with patch( + "servicex_app.resources.internal.dataset_lifecycle_ops.delete_dataset" + ) as deletion_obj: + response = client.post( + "/servicex/internal/dataset-lifecycle", json={"age": 24} + ) + fake_dataset_list.assert_called_once() + deletion_obj.assert_called_once() + assert response.status_code == 200 From 97a7d5522952d4dded50787f57185c38252299ed Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Mon, 17 Nov 2025 13:54:46 -0600 Subject: [PATCH 5/8] Null check Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../servicex_app/resources/internal/dataset_lifecycle_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py index cbd15e16f..20fd0cdc7 100644 --- a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -48,7 +48,7 @@ def post(self): datasets = ( get_all_datasets() ) # by default this will only give non-stale datasets - todelete = [_.id for _ in datasets if (now - _.last_updated) > delta] + todelete = [_.id for _ in datasets if _.last_updated and (now - _.last_updated) > delta] current_app.logger.info( f"Obsoletion called for datasets older than {delta}. " f"Obsoleting {len(todelete)} datasets." From 4d609d921decdf1cca5b257df4863ea03e9f72fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:54:57 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../servicex_app/resources/internal/dataset_lifecycle_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py index 20fd0cdc7..d984162c9 100644 --- a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -48,7 +48,9 @@ def post(self): datasets = ( get_all_datasets() ) # by default this will only give non-stale datasets - todelete = [_.id for _ in datasets if _.last_updated and (now - _.last_updated) > delta] + todelete = [ + _.id for _ in datasets if _.last_updated and (now - _.last_updated) > delta + ] current_app.logger.info( f"Obsoletion called for datasets older than {delta}. " f"Obsoleting {len(todelete)} datasets." From 25d022e0d9464c104bea9271aea1c6464c29714c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 20:03:40 +0000 Subject: [PATCH 7/8] Use timezone-aware datetime for dataset lifecycle ops Co-authored-by: ponyisi <4177101+ponyisi@users.noreply.github.com> --- .../resources/internal/dataset_lifecycle_ops.py | 4 ++-- .../resources/internal/test_dataset_lifecycle_ops.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py index d984162c9..a3dffe5d4 100644 --- a/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app/resources/internal/dataset_lifecycle_ops.py @@ -25,7 +25,7 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from flask import request, current_app @@ -39,7 +39,7 @@ def post(self): """ Obsolete cached datasets older than N hours """ - now = datetime.now() + now = datetime.now(timezone.utc) try: age = float(request.get_json().get("age", 24)) except Exception: diff --git a/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py b/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py index 800a6b4db..b08ca7554 100644 --- a/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py +++ b/servicex_app/servicex_app_test/resources/internal/test_dataset_lifecycle_ops.py @@ -25,7 +25,7 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from datetime import datetime +from datetime import datetime, timezone from unittest.mock import patch from pytest import fixture @@ -43,8 +43,8 @@ def fake_dataset_list(self): ) as dsfunc: dsfunc.return_value = [ Dataset( - last_used=datetime(2022, 1, 1), - last_updated=datetime(2022, 1, 1), + last_used=datetime(2022, 1, 1, tzinfo=timezone.utc), + last_updated=datetime(2022, 1, 1, tzinfo=timezone.utc), id=1, name="not-orphaned", events=100, @@ -54,8 +54,8 @@ def fake_dataset_list(self): did_finder="rucio", ), Dataset( - last_used=datetime.now(), - last_updated=datetime.now(), + last_used=datetime.now(timezone.utc), + last_updated=datetime.now(timezone.utc), id=2, name="orphaned", events=100, From 30ccd81fc6fbce458cd6695bd1b55a345fb71cb5 Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Mon, 17 Nov 2025 14:30:50 -0600 Subject: [PATCH 8/8] Use args --- helm/servicex/templates/dataset-lifecycle/cronjob.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/servicex/templates/dataset-lifecycle/cronjob.yaml b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml index ed02b6489..98b9aa83f 100644 --- a/helm/servicex/templates/dataset-lifecycle/cronjob.yaml +++ b/helm/servicex/templates/dataset-lifecycle/cronjob.yaml @@ -19,6 +19,6 @@ spec: env: - name: LIFETIME value: {{ .Values.datasetLifecycle.cacheLifetime }} - command: - - curl --request POST "http://{{ .Release.Name }}-servicex-app:8000/servicex/internal/dataset-lifecycle?age=$LIFETIME" + args: + - --request POST "http://{{ .Release.Name }}-servicex-app:8000/servicex/internal/dataset-lifecycle?age=$(LIFETIME)" restartPolicy: OnFailure