Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dandi/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,6 @@ def urls(self) -> Iterator[str]:
REQUEST_RETRIES = 12

DOWNLOAD_TIMEOUT = 30

#: Suffix used for temporary download directories
DOWNLOAD_SUFFIX = ".dandidownload"
4 changes: 2 additions & 2 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import requests

from . import get_logger
from .consts import RETRY_STATUSES, dandiset_metadata_file
from .consts import DOWNLOAD_SUFFIX, RETRY_STATUSES, dandiset_metadata_file
from .dandiapi import AssetType, BaseRemoteZarrAsset, RemoteDandiset
from .dandiarchive import (
AssetItemURL,
Expand Down Expand Up @@ -863,7 +863,7 @@ def __init__(self, filepath: str | Path, digests: dict[str, str]) -> None:
self.digests = digests
#: The working directory in which downloaded data will be temporarily
#: stored
self.dirpath = self.filepath.with_name(self.filepath.name + ".dandidownload")
self.dirpath = self.filepath.with_name(self.filepath.name + DOWNLOAD_SUFFIX)
#: The file in `dirpath` to which data will be written as it is
#: received
self.writefile = self.dirpath / "file"
Expand Down
95 changes: 94 additions & 1 deletion dandi/tests/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from collections import defaultdict
from datetime import datetime, timezone
import os
from pathlib import Path
from shutil import copyfile, rmtree
Expand All @@ -19,7 +20,12 @@

from .fixtures import SampleDandiset, sweep_embargo
from .test_helpers import assert_dirtrees_eq
from ..consts import ZARR_MIME_TYPE, EmbargoStatus, dandiset_metadata_file
from ..consts import (
DOWNLOAD_SUFFIX,
ZARR_MIME_TYPE,
EmbargoStatus,
dandiset_metadata_file,
)
from ..dandiapi import AssetType, RemoteBlobAsset, RemoteZarrAsset, RESTFullAPIClient
from ..dandiset import Dandiset
from ..download import download
Expand Down Expand Up @@ -614,3 +620,90 @@ def mock_request(self, method, path, **kwargs):
assert (
request_attempts[url] == 1
), f"URL {url} should not have been retried but had {request_attempts[url]} attempts"


@pytest.mark.ai_generated
def test_upload_rejects_dandidownload_paths(
new_dandiset: SampleDandiset, tmp_path: Path
) -> None:
"""Test that upload rejects assets with .dandidownload paths"""
dspath = new_dandiset.dspath

# Test 1: Regular file with .dandidownload in path
badfile_path = dspath / f"test{DOWNLOAD_SUFFIX}" / "file.nwb"
badfile_path.parent.mkdir(parents=True)
make_nwb_file(
badfile_path,
session_description="test session",
identifier="test123",
session_start_time=datetime(2017, 4, 15, 12, tzinfo=timezone.utc),
subject=pynwb.file.Subject(subject_id="test"),
)

with pytest.raises(
UploadError,
match=f"contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
):
new_dandiset.upload(allow_any_path=True)

# Clean up for next test
rmtree(badfile_path.parent)

# Test 2: Zarr asset with .dandidownload in internal path
zarr_path = dspath / "test.zarr"
zarr.save(zarr_path, np.arange(100))

# Create a .dandidownload directory inside the zarr
bad_zarr_path = zarr_path / f"sub{DOWNLOAD_SUFFIX}"
bad_zarr_path.mkdir()
(bad_zarr_path / "badfile").write_text("bad data")

with pytest.raises(
UploadError,
match=f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
):
new_dandiset.upload()

# Clean up
rmtree(bad_zarr_path)

# Test 3: Zarr asset with .dandidownload in filename
bad_file_in_zarr = zarr_path / f"data{DOWNLOAD_SUFFIX}"
bad_file_in_zarr.write_text("bad data")

with pytest.raises(
UploadError,
match=f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
):
new_dandiset.upload()

# Clean up
bad_file_in_zarr.unlink()

# Test 4: Normal zarr should upload fine after removing bad paths
new_dandiset.upload()
(asset,) = new_dandiset.dandiset.get_assets()
assert isinstance(asset, RemoteZarrAsset)
assert asset.path == "test.zarr"


@pytest.mark.ai_generated
def test_upload_rejects_dandidownload_nwb_file(new_dandiset: SampleDandiset) -> None:
"""Test that upload rejects NWB files with .dandidownload in their path"""
dspath = new_dandiset.dspath

# Create an NWB file with .dandidownload in its name
bad_nwb_path = dspath / f"test{DOWNLOAD_SUFFIX}.nwb"
make_nwb_file(
bad_nwb_path,
session_description="test session",
identifier="test456",
session_start_time=datetime(2017, 4, 15, 12, tzinfo=timezone.utc),
subject=pynwb.file.Subject(subject_id="test"),
)

with pytest.raises(
UploadError,
match=f"contains {DOWNLOAD_SUFFIX} path which indicates incomplete download",
):
new_dandiset.upload(allow_any_path=True)
28 changes: 28 additions & 0 deletions dandi/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from . import __version__, lgr
from .consts import (
DOWNLOAD_SUFFIX,
DRAFT,
DandiInstance,
dandiset_identifier_regex,
Expand All @@ -41,6 +42,30 @@
from .validate_types import Severity


def _check_dandidownload_paths(dfile: DandiFile) -> None:
"""
Check if an asset contains .dandidownload paths and raise UploadError if found.

.dandidownload paths indicate incomplete or interrupted downloads and should not
be uploaded as they represent "garbage" data in the file structure.
"""
# Check the main file path
if DOWNLOAD_SUFFIX in str(dfile.filepath):
raise UploadError(
f"Asset contains {DOWNLOAD_SUFFIX} path which indicates incomplete "
f"download: {dfile.filepath}"
)

# For Zarr assets, check all internal paths
if isinstance(dfile, ZarrAsset):
for entry in dfile.iterfiles():
if DOWNLOAD_SUFFIX in str(entry):
raise UploadError(
f"Zarr asset contains {DOWNLOAD_SUFFIX} path which indicates "
f"incomplete download: {entry}"
)


class Uploaded(TypedDict):
size: int
errors: list[str]
Expand Down Expand Up @@ -252,6 +277,9 @@ def process_path(dfile: DandiFile) -> Iterator[dict]:
# without limiting [:50] it might cause some pyout indigestion
raise UploadError(str(exc)[:50])

# Check for .dandidownload paths that indicate incomplete downloads
_check_dandidownload_paths(dfile)

#
# Validate first, so we do not bother server at all if not kosher
#
Expand Down
Loading