From 4cc65c2ad519646a2cdf1abb1d865ce30c1f1dfc Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 20 Dec 2024 15:16:24 -0500 Subject: [PATCH 01/18] Simplify: a single log.debug invocation for any case which leads to sleep --- dandi/download.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index e08afd72c..71c79a1c5 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1088,7 +1088,7 @@ def _check_if_more_attempts_allowed( downloaded_in_attempt: int, ) -> int | None: """Check if we should retry the download, return potentially adjusted 'attempts_allowed'""" - sleep_amount = random.random() * 5 * attempt + sleep_amount: float = -1.0 if os.environ.get("DANDI_DOWNLOAD_AGGRESSIVE_RETRY"): # in such a case if we downloaded a little more -- # consider it a successful attempt @@ -1143,17 +1143,9 @@ def _check_if_more_attempts_allowed( sleep_amount, exc, ) - else: - lgr.debug( - "%s - download failed on attempt #%d: %s, will sleep a bit and retry", - path, - attempt, - exc, - ) - # if is_access_denied(exc) or attempt >= 2: - # raise - # sleep a little and retry - else: + if sleep_amount < 0: + # it was not Retry-after set, so we come up with random duration to sleep + sleep_amount = random.random() * 5 * attempt lgr.debug( "%s - download failed on attempt #%d: %s, will sleep a bit and retry", path, From 88f39016ae9b530609e45d9ef9c60d7483a31627 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 20 Dec 2024 16:49:28 -0500 Subject: [PATCH 02/18] Retry-After is provided by 429 (too many) so retry on that too --- dandi/consts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index 0b7bc1b76..6e3191c7f 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -141,7 +141,7 @@ def urls(self) -> Iterator[str]: "linc-staging", "https://staging.lincbrain.org", "https://staging-api.lincbrain.org/api", - ) + ), } # to map back url: name known_instances_rev = { @@ -160,7 +160,7 @@ def urls(self) -> Iterator[str]: #: HTTP response status codes that should always be retried (until we run out #: of retries) -RETRY_STATUSES = (500, 502, 503, 504) +RETRY_STATUSES = (429, 500, 502, 503, 504) VIDEO_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv", ".mkv"] VIDEO_FILE_MODULES = ["processing", "acquisition"] From 6bb3146dab97a76f6ec9ba95e726b709711a78fd Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 20 Dec 2024 16:50:36 -0500 Subject: [PATCH 03/18] Handle Retry-After as a date, and add tests for all logic of checking attempts --- dandi/download.py | 48 ++++++++++++------ dandi/tests/test_download.py | 94 ++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 16 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 71c79a1c5..35bb0e47e 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -3,7 +3,8 @@ from collections import Counter, deque from collections.abc import Callable, Iterable, Iterator, Sequence from dataclasses import InitVar, dataclass, field -from datetime import datetime +from datetime import UTC, datetime +from email.utils import parsedate_to_datetime from enum import Enum from functools import partial import hashlib @@ -1085,7 +1086,7 @@ def _check_if_more_attempts_allowed( exc: requests.RequestException, attempt: int, attempts_allowed: int, - downloaded_in_attempt: int, + downloaded_in_attempt: int = 0, ) -> int | None: """Check if we should retry the download, return potentially adjusted 'attempts_allowed'""" sleep_amount: float = -1.0 @@ -1121,20 +1122,35 @@ def _check_if_more_attempts_allowed( ) return None elif retry_after := exc.response.headers.get("Retry-After"): - # playing safe - if not str(retry_after).isdigit(): - # our code is wrong, do not crash but issue warning so - # we might get report/fix it up - lgr.warning( - "%s - download failed due to response %d with non-integer" - " Retry-After=%r: %s", - path, - exc.response.status_code, - retry_after, - exc, - ) - return None - sleep_amount = int(retry_after) + if retry_after.isdigit(): + sleep_amount = int(retry_after) + else: + # else if it is a datestamp like "Wed, 21 Oct 2015 07:28:00 GMT" + # we could parse it and calculate how long to sleep + try: + retry_after_date = parsedate_to_datetime(retry_after) + current_date = datetime.now(UTC) + difference = retry_after_date - current_date + sleep_amount = int(difference.total_seconds()) + if sleep_amount < 0: + lgr.warning( + "%s - date in Retry-After=%r is in the past (current is %r)", + path, + retry_after, + current_date, + ) + except ValueError as exc_ve: + # our code or response is wrong, do not crash but issue warning + # and continue with "default" sleep logic + lgr.warning( + "%s - download failed due to response %d with non-integer or future date" + " Retry-After=%r: %s with %s upon converting assuming it is a date", + path, + exc.response.status_code, + retry_after, + exc, + exc_ve, + ) lgr.debug( "%s - download failed due to response %d with " "Retry-After=%d: %s, will sleep and retry", diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index fb9626d36..3d65e19a7 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -2,6 +2,8 @@ from collections.abc import Callable from contextlib import nullcontext +from email.utils import parsedate_to_datetime +from functools import partial from glob import glob import json import logging @@ -12,10 +14,13 @@ import re from shutil import rmtree import time +from unittest import mock import numpy as np import pytest from pytest_mock import MockerFixture +import requests +from requests.exceptions import HTTPError import responses import zarr @@ -32,6 +37,7 @@ PathType, ProgressCombiner, PYOUTHelper, + _check_if_more_attempts_allowed, download, ) from ..exceptions import NotFoundError @@ -1169,3 +1175,91 @@ def test_DownloadDirectory_exc( assert dl.dirpath.exists() assert dl.fp is None assert dl.writefile.read_bytes() == b"456" + + +def test__check_if_more_attempts_allowed(): + f = partial(_check_if_more_attempts_allowed, "some/path") + + response403 = requests.Response() + response403.status_code = 403 # no retry + + response500 = requests.Response() + response500.status_code = 500 + + # we do retry if cause is unknown (no response) + with mock.patch("time.sleep") as mock_sleep: + assert f(HTTPError(), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] >= 0 + + # or if some 500 + with mock.patch("time.sleep") as mock_sleep: + assert f(HTTPError(response=response500), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] >= 0 + + # do not bother if already at limit + with mock.patch("time.sleep") as mock_sleep: + assert f(HTTPError(), attempt=2, attempts_allowed=2) is None + mock_sleep.assert_not_called() + + # do not bother if 403 + with mock.patch("time.sleep") as mock_sleep: + assert f(HTTPError(response=response403), attempt=1, attempts_allowed=2) is None + mock_sleep.assert_not_called() + + # And in case of "Aggressive setting" when DANDI_DOWNLOAD_AGGRESSIVE_RETRY + # env var is set to 1, we retry if there was extra content downloaded + # patch env var DANDI_DOWNLOAD_AGGRESSIVE_RETRY + with mock.patch.dict(os.environ, {"DANDI_DOWNLOAD_AGGRESSIVE_RETRY": "1"}): + with mock.patch("time.sleep") as mock_sleep: + assert ( + f(HTTPError(), attempt=2, attempts_allowed=2, downloaded_in_attempt=0) + is None + ) + mock_sleep.assert_not_called() + + assert ( + f(HTTPError(), attempt=2, attempts_allowed=2, downloaded_in_attempt=1) + == 3 + ) + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] >= 0 + + +@pytest.mark.parametrize("status_code", [429, 503]) +def test__check_if_more_attempts_allowed_retries(status_code): + f = partial(_check_if_more_attempts_allowed, "some/path") + + response = requests.Response() + response.status_code = status_code + + response.headers["Retry-After"] = "10" + with mock.patch("time.sleep") as mock_sleep: + assert f(HTTPError(response=response), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] == 10 + + response.headers["Retry-After"] = "Wed, 21 Oct 2015 07:28:00 GMT" + with mock.patch("time.sleep") as mock_sleep, mock.patch( + "dandi.download.datetime" + ) as mock_datetime: + # shifted by 2 minutes + mock_datetime.now.return_value = parsedate_to_datetime( + "Wed, 21 Oct 2015 07:26:00 GMT" + ) + assert f(HTTPError(response=response), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] == 120 + + # we would still sleep some time if Retry-After is not decypherable + response.headers["Retry-After"] = "nondecypherable" + with mock.patch("time.sleep") as mock_sleep: + assert ( + _check_if_more_attempts_allowed( + "some/path", HTTPError(response=response), attempt=1, attempts_allowed=2 + ) + == 2 + ) + mock_sleep.assert_called_once() + assert mock_sleep.call_args.args[0] > 0 From 8b3f54633a1a84ca0b5d5f4c86f1723de9bea08a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 29 Jan 2025 13:19:08 -0500 Subject: [PATCH 04/18] Replace UTC with use of timezone.utc for compatibility datetime.UTC was introduced only in 3.11, so type checking which uses 3.9 ATM fails rightfully, as unittesting on 3.9 and 3.10 --- dandi/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 35bb0e47e..d2e0e3b8c 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -3,7 +3,7 @@ from collections import Counter, deque from collections.abc import Callable, Iterable, Iterator, Sequence from dataclasses import InitVar, dataclass, field -from datetime import UTC, datetime +from datetime import datetime, timezone from email.utils import parsedate_to_datetime from enum import Enum from functools import partial @@ -1129,7 +1129,7 @@ def _check_if_more_attempts_allowed( # we could parse it and calculate how long to sleep try: retry_after_date = parsedate_to_datetime(retry_after) - current_date = datetime.now(UTC) + current_date = datetime.now(timezone.utc) difference = retry_after_date - current_date sleep_amount = int(difference.total_seconds()) if sleep_amount < 0: From 47cc585acf2a0d7d95d1ce5541a68af3ed05f070 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 29 Jan 2025 15:05:09 -0500 Subject: [PATCH 05/18] RF: centralize retry-after parsing logic, add to other places where RETRY_CODES are used It also add additional checks/protection against odd retry-after results (too long negatives or positives), which should address some concerns raised in prior code review --- dandi/dandiapi.py | 8 +++++ dandi/dandiarchive.py | 30 ++++++++++++----- dandi/download.py | 53 ++++++++--------------------- dandi/tests/test_download.py | 4 +-- dandi/utils.py | 64 ++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 51 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index e351b32e3..87f86e164 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -43,6 +43,7 @@ chunked, ensure_datetime, get_instance, + get_retry_after, is_interactive, is_page2_url, joinurl, @@ -236,6 +237,13 @@ def request( ) if data is not None and hasattr(data, "seek"): data.seek(0) + if retry_after := get_retry_after(result): + lgr.debug( + "Sleeping for %d seconds as instructed in response " + "(in addition to tenacity imposed)", + retry_after, + ) + sleep(retry_after) result.raise_for_status() except Exception as e: if isinstance(e, requests.HTTPError): diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index afffe4450..75d27607d 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -51,7 +51,7 @@ ) from .dandiapi import BaseRemoteAsset, DandiAPIClient, RemoteDandiset from .exceptions import FailedToConnectError, NotFoundError, UnknownURLError -from .utils import get_instance +from .utils import get_instance, get_retry_after lgr = get_logger() @@ -893,14 +893,26 @@ def follow_redirect(url: str) -> str: while True: r = requests.head(url, allow_redirects=True) if r.status_code in RETRY_STATUSES and i < 4: - delay = 0.1 * 10**i - lgr.warning( - "HEAD request to %s returned %d; sleeping for %f seconds and then retrying...", - url, - r.status_code, - delay, - ) - sleep(delay) + retry_after = get_retry_after(r) + if retry_after is not None: + delay = retry_after + else: + delay = 0.1 * 10**i + if delay: + lgr.warning( + "HEAD request to %s returned %d; " + "sleeping for %f seconds and then retrying...", + url, + r.status_code, + delay, + ) + sleep(delay) + else: + lgr.warning( + "HEAD request to %s returned %d; retrying...", + url, + r.status_code, + ) i += 1 continue elif r.status_code == 404: diff --git a/dandi/download.py b/dandi/download.py index d2e0e3b8c..a56d89475 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -3,8 +3,7 @@ from collections import Counter, deque from collections.abc import Callable, Iterable, Iterator, Sequence from dataclasses import InitVar, dataclass, field -from datetime import datetime, timezone -from email.utils import parsedate_to_datetime +from datetime import datetime from enum import Enum from functools import partial import hashlib @@ -50,6 +49,7 @@ ensure_datetime, exclude_from_zarr, flattened, + get_retry_after, is_same_time, path_is_subpath, pluralize, @@ -1121,44 +1121,17 @@ def _check_if_more_attempts_allowed( exc, ) return None - elif retry_after := exc.response.headers.get("Retry-After"): - if retry_after.isdigit(): - sleep_amount = int(retry_after) - else: - # else if it is a datestamp like "Wed, 21 Oct 2015 07:28:00 GMT" - # we could parse it and calculate how long to sleep - try: - retry_after_date = parsedate_to_datetime(retry_after) - current_date = datetime.now(timezone.utc) - difference = retry_after_date - current_date - sleep_amount = int(difference.total_seconds()) - if sleep_amount < 0: - lgr.warning( - "%s - date in Retry-After=%r is in the past (current is %r)", - path, - retry_after, - current_date, - ) - except ValueError as exc_ve: - # our code or response is wrong, do not crash but issue warning - # and continue with "default" sleep logic - lgr.warning( - "%s - download failed due to response %d with non-integer or future date" - " Retry-After=%r: %s with %s upon converting assuming it is a date", - path, - exc.response.status_code, - retry_after, - exc, - exc_ve, - ) - lgr.debug( - "%s - download failed due to response %d with " - "Retry-After=%d: %s, will sleep and retry", - path, - exc.response.status_code, - sleep_amount, - exc, - ) + elif sleep_amount_ := get_retry_after(exc.response): + if sleep_amount_ is not None: # could be 0 + sleep_amount = sleep_amount_ + lgr.debug( + "%s - download failed due to response %d with " + "Retry-After=%d: %s, will sleep and retry", + path, + exc.response.status_code, + sleep_amount, + exc, + ) if sleep_amount < 0: # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 3d65e19a7..5359719a9 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1242,10 +1242,10 @@ def test__check_if_more_attempts_allowed_retries(status_code): response.headers["Retry-After"] = "Wed, 21 Oct 2015 07:28:00 GMT" with mock.patch("time.sleep") as mock_sleep, mock.patch( - "dandi.download.datetime" + "dandi.utils.datetime" ) as mock_datetime: # shifted by 2 minutes - mock_datetime.now.return_value = parsedate_to_datetime( + mock_datetime.datetime.now.return_value = parsedate_to_datetime( "Wed, 21 Oct 2015 07:26:00 GMT" ) assert f(HTTPError(response=response), attempt=1, attempts_allowed=2) == 2 diff --git a/dandi/utils.py b/dandi/utils.py index b7f9ed5b5..fbbeae896 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -3,6 +3,7 @@ from bisect import bisect from collections.abc import Iterable, Iterator import datetime +from email.utils import parsedate_to_datetime from functools import lru_cache from importlib.metadata import version as importlib_version import inspect @@ -890,3 +891,66 @@ def joinurl(base: str, path: str) -> str: return path else: return base.rstrip("/") + "/" + path.lstrip("/") + + +def get_retry_after(response: requests.Response) -> Optional[int]: + """If provided and parsed ok, returns duration in seconds to sleep before retry. + + If not provided in the response header `Retry-After`, would + return None. + If parsing fails, or provided date/sleep does not make sense + since either too far in the past (over 2 seconds) or in the future + (over a week), would return None. + """ + if_unparsable = None + retry_after = response.headers.get("Retry-After") + if retry_after is None: + return None + sleep_amount: int | None + if retry_after.isdecimal(): + sleep_amount = int(retry_after) + else: + # else if it is a datestamp like "Wed, 21 Oct 2015 07:28:00 GMT" + # we could parse it and calculate how long to sleep + current_date = datetime.datetime.now(datetime.timezone.utc) + try: + retry_after_date = parsedate_to_datetime(retry_after) + except (ValueError, TypeError) as exc_ve: + # our code or response is wrong, do not crash but issue warning + # and continue with "if_unparsable" sleep logic + lgr.warning( + "response %d has incorrect date in Retry-After=%r: %s. " "Returning %r", + response.status_code, + retry_after, + exc_ve, + if_unparsable, + ) + sleep_amount = if_unparsable + else: + difference = retry_after_date - current_date + sleep_amount = int(difference.total_seconds()) + + if sleep_amount: + if -2 < sleep_amount < 0: + # allow for up to a few seconds delay in us receiving/parsing etc + # but otherwise assume abnormality and just return if_unparsable + sleep_amount = 0 + elif sleep_amount < 0: + lgr.warning( + "date in Retry-After=%r is in the past (current is %r). " + "Returning %r", + retry_after, + current_date, + if_unparsable, + ) + sleep_amount = if_unparsable + elif sleep_amount > 7 * 24 * 60 * 60: # week + lgr.warning( + "date in Retry-After=%r is over a week in the future (current is %r). " + "Returning %r", + retry_after, + current_date, + if_unparsable, + ) + sleep_amount = if_unparsable + return sleep_amount From 7d2a8586b7967faad5c416e207ace89aa3f9fd4e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 08:59:08 -0500 Subject: [PATCH 06/18] ENH: remove stale comment (we retry only on selected), fix typo, inform how long to sleep --- dandi/download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index f01d8fb8e..0c376e644 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1107,10 +1107,9 @@ def _check_if_more_attempts_allowed( if attempt >= attempts_allowed: lgr.debug("%s - download failed after %d attempts: %s", path, attempt, exc) return None - # TODO: actually we should probably retry only on selected codes, elif exc.response is not None: if exc.response.status_code not in ( - 400, # Bad Request, but happened with gider: + 400, # Bad Request, but happened with girder: # https://github.com/dandi/dandi-cli/issues/87 *RETRY_STATUSES, ): @@ -1136,10 +1135,11 @@ def _check_if_more_attempts_allowed( # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt lgr.debug( - "%s - download failed on attempt #%d: %s, will sleep a bit and retry", + "%s - download failed on attempt #%d: %s, will sleep %f and retry", path, attempt, exc, + sleep_amount, ) time.sleep(sleep_amount) return attempts_allowed From 948b7f7c431ee84d3f09b7adf41a2675cfb8fc5b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 09:01:11 -0500 Subject: [PATCH 07/18] RF: rename function to reflect that it would sleep as well --- dandi/download.py | 8 +++++--- dandi/tests/test_download.py | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 0c376e644..123532577 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -797,7 +797,7 @@ def _download_file( # Catching RequestException lets us retry on timeout & connection # errors (among others) in addition to HTTP status errors. except requests.RequestException as exc: - attempts_allowed_or_not = _check_if_more_attempts_allowed( + attempts_allowed_or_not = _check_attempts_and_sleep( path=path, exc=exc, attempt=attempt, @@ -1081,14 +1081,16 @@ def downloads_gen(): yield {"status": "done"} -def _check_if_more_attempts_allowed( +def _check_attempts_and_sleep( path: Path, exc: requests.RequestException, attempt: int, attempts_allowed: int, downloaded_in_attempt: int = 0, ) -> int | None: - """Check if we should retry the download, return potentially adjusted 'attempts_allowed'""" + """Check if we should retry the download, sleep if still allowed, + and return potentially adjusted 'attempts_allowed' + """ sleep_amount: float = -1.0 if os.environ.get("DANDI_DOWNLOAD_AGGRESSIVE_RETRY"): # in such a case if we downloaded a little more -- diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 5359719a9..b17ced573 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -37,7 +37,7 @@ PathType, ProgressCombiner, PYOUTHelper, - _check_if_more_attempts_allowed, + _check_attempts_and_sleep, download, ) from ..exceptions import NotFoundError @@ -1178,7 +1178,7 @@ def test_DownloadDirectory_exc( def test__check_if_more_attempts_allowed(): - f = partial(_check_if_more_attempts_allowed, "some/path") + f = partial(_check_attempts_and_sleep, "some/path") response403 = requests.Response() response403.status_code = 403 # no retry @@ -1229,7 +1229,7 @@ def test__check_if_more_attempts_allowed(): @pytest.mark.parametrize("status_code", [429, 503]) def test__check_if_more_attempts_allowed_retries(status_code): - f = partial(_check_if_more_attempts_allowed, "some/path") + f = partial(_check_attempts_and_sleep, "some/path") response = requests.Response() response.status_code = status_code @@ -1256,7 +1256,7 @@ def test__check_if_more_attempts_allowed_retries(status_code): response.headers["Retry-After"] = "nondecypherable" with mock.patch("time.sleep") as mock_sleep: assert ( - _check_if_more_attempts_allowed( + _check_attempts_and_sleep( "some/path", HTTPError(response=response), attempt=1, attempts_allowed=2 ) == 2 From 6c9fa519dd769f5632e5ee42c1289dac79280c65 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 09:03:58 -0500 Subject: [PATCH 08/18] Replace "elif" with "if" for clarity --- dandi/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/download.py b/dandi/download.py index 123532577..0172ed1f8 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1109,7 +1109,7 @@ def _check_attempts_and_sleep( if attempt >= attempts_allowed: lgr.debug("%s - download failed after %d attempts: %s", path, attempt, exc) return None - elif exc.response is not None: + if exc.response is not None: if exc.response.status_code not in ( 400, # Bad Request, but happened with girder: # https://github.com/dandi/dandi-cli/issues/87 From b4b2ea1b8769f5440eb4cd6dd80890cf6048c53b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 16:37:43 -0500 Subject: [PATCH 09/18] Make code cleaner and more Pythonic (use `None`, not "-1") Co-authored-by: John T. Wodder II --- dandi/download.py | 4 ++-- dandi/tests/test_download.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index 0172ed1f8..a8cc80876 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1091,7 +1091,7 @@ def _check_attempts_and_sleep( """Check if we should retry the download, sleep if still allowed, and return potentially adjusted 'attempts_allowed' """ - sleep_amount: float = -1.0 + sleep_amount: float | None = None if os.environ.get("DANDI_DOWNLOAD_AGGRESSIVE_RETRY"): # in such a case if we downloaded a little more -- # consider it a successful attempt @@ -1133,7 +1133,7 @@ def _check_attempts_and_sleep( sleep_amount, exc, ) - if sleep_amount < 0: + if sleep_amount is None: # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt lgr.debug( diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index b17ced573..2f515e4b1 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1177,7 +1177,7 @@ def test_DownloadDirectory_exc( assert dl.writefile.read_bytes() == b"456" -def test__check_if_more_attempts_allowed(): +def test__check_attempts_and_sleep() -> None: f = partial(_check_attempts_and_sleep, "some/path") response403 = requests.Response() @@ -1228,7 +1228,7 @@ def test__check_if_more_attempts_allowed(): @pytest.mark.parametrize("status_code", [429, 503]) -def test__check_if_more_attempts_allowed_retries(status_code): +def test__check_attempts_and_sleep_retries(status_code: int) -> None: f = partial(_check_attempts_and_sleep, "some/path") response = requests.Response() @@ -1253,7 +1253,7 @@ def test__check_if_more_attempts_allowed_retries(status_code): assert mock_sleep.call_args.args[0] == 120 # we would still sleep some time if Retry-After is not decypherable - response.headers["Retry-After"] = "nondecypherable" + response.headers["Retry-After"] = "indecipherable" with mock.patch("time.sleep") as mock_sleep: assert ( _check_attempts_and_sleep( From 3883afcd5262d11c1fdbf31b26a882b092184e7b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 16:55:06 -0500 Subject: [PATCH 10/18] Another pythonic tuneup Co-authored-by: John T. Wodder II --- dandi/download.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index a8cc80876..90aa239aa 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1122,17 +1122,15 @@ def _check_attempts_and_sleep( exc, ) return None - elif sleep_amount_ := get_retry_after(exc.response): - if sleep_amount_ is not None: # could be 0 - sleep_amount = sleep_amount_ - lgr.debug( - "%s - download failed due to response %d with " - "Retry-After=%d: %s, will sleep and retry", - path, - exc.response.status_code, - sleep_amount, - exc, - ) + elif (sleep_amount := get_retry_after(exc.response)) is not None: + lgr.debug( + "%s - download failed due to response %d with " + "Retry-After=%d: %s, will sleep and retry", + path, + exc.response.status_code, + sleep_amount, + exc, + ) if sleep_amount is None: # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt From 48eaa2bf3036f4323ad2b572b92cd3b8dc8f98e5 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 16:55:39 -0500 Subject: [PATCH 11/18] Use try/except instead of isdecimal test Co-authored-by: John T. Wodder II --- dandi/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/utils.py b/dandi/utils.py index fbbeae896..f20de17ff 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -907,9 +907,9 @@ def get_retry_after(response: requests.Response) -> Optional[int]: if retry_after is None: return None sleep_amount: int | None - if retry_after.isdecimal(): + try: sleep_amount = int(retry_after) - else: + except ValueError: # else if it is a datestamp like "Wed, 21 Oct 2015 07:28:00 GMT" # we could parse it and calculate how long to sleep current_date = datetime.datetime.now(datetime.timezone.utc) From 6aeaa610747d32914fa03e3f15bbf8861b2b5056 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 16:40:52 -0500 Subject: [PATCH 12/18] RF+BF: dedent checking of sleep_amount, do not use if_unparsable --- dandi/utils.py | 53 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/dandi/utils.py b/dandi/utils.py index f20de17ff..7084ca586 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -902,55 +902,54 @@ def get_retry_after(response: requests.Response) -> Optional[int]: since either too far in the past (over 2 seconds) or in the future (over a week), would return None. """ - if_unparsable = None retry_after = response.headers.get("Retry-After") if retry_after is None: return None sleep_amount: int | None + current_date = datetime.datetime.now(datetime.timezone.utc) try: sleep_amount = int(retry_after) except ValueError: # else if it is a datestamp like "Wed, 21 Oct 2015 07:28:00 GMT" # we could parse it and calculate how long to sleep - current_date = datetime.datetime.now(datetime.timezone.utc) try: retry_after_date = parsedate_to_datetime(retry_after) except (ValueError, TypeError) as exc_ve: # our code or response is wrong, do not crash but issue warning # and continue with "if_unparsable" sleep logic + sleep_amount = None lgr.warning( "response %d has incorrect date in Retry-After=%r: %s. " "Returning %r", response.status_code, retry_after, exc_ve, - if_unparsable, + sleep_amount, ) - sleep_amount = if_unparsable else: difference = retry_after_date - current_date sleep_amount = int(difference.total_seconds()) - if sleep_amount: - if -2 < sleep_amount < 0: - # allow for up to a few seconds delay in us receiving/parsing etc - # but otherwise assume abnormality and just return if_unparsable - sleep_amount = 0 - elif sleep_amount < 0: - lgr.warning( - "date in Retry-After=%r is in the past (current is %r). " - "Returning %r", - retry_after, - current_date, - if_unparsable, - ) - sleep_amount = if_unparsable - elif sleep_amount > 7 * 24 * 60 * 60: # week - lgr.warning( - "date in Retry-After=%r is over a week in the future (current is %r). " - "Returning %r", - retry_after, - current_date, - if_unparsable, - ) - sleep_amount = if_unparsable + if sleep_amount: + if -2 < sleep_amount < 0: + # allow for up to a few seconds delay in us receiving/parsing etc + # but otherwise assume abnormality and just return if_unparsable + sleep_amount = 0 + elif sleep_amount < 0: + sleep_amount = None + lgr.warning( + "date in Retry-After=%r is in the past (current is %r). " + "Returning %r", + retry_after, + current_date, + sleep_amount, + ) + elif sleep_amount > 7 * 24 * 60 * 60: # week + sleep_amount = None + lgr.warning( + "date in Retry-After=%r is over a week in the future (current is %r). " + "Returning %r", + retry_after, + current_date, + sleep_amount, + ) return sleep_amount From fa60dfd0430c94afaaece1cc2854cfc1bc8de692 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 17:06:32 -0500 Subject: [PATCH 13/18] Fix type info -- only Path is allowed --- dandi/tests/test_download.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 2f515e4b1..ec6182995 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1178,7 +1178,7 @@ def test_DownloadDirectory_exc( def test__check_attempts_and_sleep() -> None: - f = partial(_check_attempts_and_sleep, "some/path") + f = partial(_check_attempts_and_sleep, Path("some/path")) response403 = requests.Response() response403.status_code = 403 # no retry @@ -1229,7 +1229,7 @@ def test__check_attempts_and_sleep() -> None: @pytest.mark.parametrize("status_code", [429, 503]) def test__check_attempts_and_sleep_retries(status_code: int) -> None: - f = partial(_check_attempts_and_sleep, "some/path") + f = partial(_check_attempts_and_sleep, Path("some/path")) response = requests.Response() response.status_code = status_code @@ -1257,7 +1257,10 @@ def test__check_attempts_and_sleep_retries(status_code: int) -> None: with mock.patch("time.sleep") as mock_sleep: assert ( _check_attempts_and_sleep( - "some/path", HTTPError(response=response), attempt=1, attempts_allowed=2 + Path("some/path"), + HTTPError(response=response), + attempt=1, + attempts_allowed=2, ) == 2 ) From 0fe489aab36b3e3ffe7ce7d79a4f9a50f12d070b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 17:10:38 -0500 Subject: [PATCH 14/18] RF: use "if" not "elif" after a statement with return in the body --- dandi/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/download.py b/dandi/download.py index 90aa239aa..e375a60c1 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1122,7 +1122,7 @@ def _check_attempts_and_sleep( exc, ) return None - elif (sleep_amount := get_retry_after(exc.response)) is not None: + if (sleep_amount := get_retry_after(exc.response)) is not None: lgr.debug( "%s - download failed due to response %d with " "Retry-After=%d: %s, will sleep and retry", From b2736e404d790a65a43ec8a696aa59ba5d7c3751 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Feb 2025 17:56:37 -0500 Subject: [PATCH 15/18] Add tests for odd times encountered --- dandi/tests/test_download.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index ec6182995..de03e9872 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -1266,3 +1266,29 @@ def test__check_attempts_and_sleep_retries(status_code: int) -> None: ) mock_sleep.assert_called_once() assert mock_sleep.call_args.args[0] > 0 + + # shifted by 1 year! (too long) + response.headers["Retry-After"] = "Wed, 21 Oct 2016 07:28:00 GMT" + with mock.patch("time.sleep") as mock_sleep, mock.patch( + "dandi.utils.datetime" + ) as mock_datetime: + mock_datetime.datetime.now.return_value = parsedate_to_datetime( + "Wed, 21 Oct 2015 07:28:00 GMT" + ) + assert f(HTTPError(response=response), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + # and we do sleep some time + assert mock_sleep.call_args.args[0] > 0 + + # in the past second (too quick) + response.headers["Retry-After"] = "Wed, 21 Oct 2015 07:27:59 GMT" + with mock.patch("time.sleep") as mock_sleep, mock.patch( + "dandi.utils.datetime" + ) as mock_datetime: + mock_datetime.datetime.now.return_value = parsedate_to_datetime( + "Wed, 21 Oct 2015 07:28:00 GMT" + ) + assert f(HTTPError(response=response), attempt=1, attempts_allowed=2) == 2 + mock_sleep.assert_called_once() + # and we do not sleep really + assert not mock_sleep.call_args.args[0] From 2b43e05a4979970bf67c4df8152656277e0b4d89 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 11 Feb 2025 18:38:08 -0500 Subject: [PATCH 16/18] Apply suggestions from code review: improved docstring, use of %s for Retry-After= etc Co-authored-by: Isaac To Was amended locally later due to a dirty suggestion applied --- dandi/download.py | 16 ++++++++++++++-- dandi/utils.py | 6 +++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index e375a60c1..b81524fdf 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1088,8 +1088,20 @@ def _check_attempts_and_sleep( attempts_allowed: int, downloaded_in_attempt: int = 0, ) -> int | None: - """Check if we should retry the download, sleep if still allowed, + """ + Check if we should retry the download, sleep if still allowed, and return potentially adjusted 'attempts_allowed' + + :param path: Destination of the download + :param exc: Exception raised during the last download attempt + :param attempt: The index of the last download attempt + :param attempts_allowed: The number of download attempts currently allowed + :param downloaded_in_attempt: The number of bytes downloaded in the last attempt + + :returns: The number of download attempts allowed, potentially adjusted, if download + should be retried. None if download should not be retried. + Note: If download should be retried, this function sleeps before returning. + otherwise, it returns immediately. """ sleep_amount: float | None = None if os.environ.get("DANDI_DOWNLOAD_AGGRESSIVE_RETRY"): @@ -1107,7 +1119,7 @@ def _check_attempts_and_sleep( ) attempts_allowed += 1 if attempt >= attempts_allowed: - lgr.debug("%s - download failed after %d attempts: %s", path, attempt, exc) + lgr.debug("%s - download failed after %d attempts: %s", path, attempt + 1, exc) return None if exc.response is not None: if exc.response.status_code not in ( diff --git a/dandi/utils.py b/dandi/utils.py index 7084ca586..a5c3fbc16 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -919,7 +919,7 @@ def get_retry_after(response: requests.Response) -> Optional[int]: # and continue with "if_unparsable" sleep logic sleep_amount = None lgr.warning( - "response %d has incorrect date in Retry-After=%r: %s. " "Returning %r", + "response %d has incorrect date in Retry-After=%s: %s. Returning %r", response.status_code, retry_after, exc_ve, @@ -937,7 +937,7 @@ def get_retry_after(response: requests.Response) -> Optional[int]: elif sleep_amount < 0: sleep_amount = None lgr.warning( - "date in Retry-After=%r is in the past (current is %r). " + "date in Retry-After=%s is in the past (current is %r). " "Returning %r", retry_after, current_date, @@ -946,7 +946,7 @@ def get_retry_after(response: requests.Response) -> Optional[int]: elif sleep_amount > 7 * 24 * 60 * 60: # week sleep_amount = None lgr.warning( - "date in Retry-After=%r is over a week in the future (current is %r). " + "date in Retry-After=%s is over a week in the future (current is %r). " "Returning %r", retry_after, current_date, From 9cd62a2d29cc4504982f4f799c34ae07a3dfb1a7 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 11 Feb 2025 18:44:12 -0500 Subject: [PATCH 17/18] RF: Do not bother logging yet another debug msg on Retry-After in download --- dandi/download.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index b81524fdf..a8669251f 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1134,15 +1134,7 @@ def _check_attempts_and_sleep( exc, ) return None - if (sleep_amount := get_retry_after(exc.response)) is not None: - lgr.debug( - "%s - download failed due to response %d with " - "Retry-After=%d: %s, will sleep and retry", - path, - exc.response.status_code, - sleep_amount, - exc, - ) + sleep_amount = get_retry_after(exc.response) if sleep_amount is None: # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt From 7d236cf53aea37c88753d11dc9d843bb60cc28aa Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 11 Feb 2025 18:48:52 -0500 Subject: [PATCH 18/18] Dedent log msg to always inform on how long we would sleep for one reason or another --- dandi/download.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dandi/download.py b/dandi/download.py index a8669251f..536878389 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -1138,13 +1138,13 @@ def _check_attempts_and_sleep( if sleep_amount is None: # it was not Retry-after set, so we come up with random duration to sleep sleep_amount = random.random() * 5 * attempt - lgr.debug( - "%s - download failed on attempt #%d: %s, will sleep %f and retry", - path, - attempt, - exc, - sleep_amount, - ) + lgr.debug( + "%s - download failed on attempt #%d: %s, will sleep %f and retry", + path, + attempt, + exc, + sleep_amount, + ) time.sleep(sleep_amount) return attempts_allowed