Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
5987137
Add initial tests for remote storage workflows with UPath
SamirMoustafa Feb 28, 2026
865eb76
io: add dask.array.to_zarr compat for ome_zarr kwargs
SamirMoustafa Mar 2, 2026
2134386
io: add remote storage helpers in _utils
SamirMoustafa Mar 2, 2026
eee34d8
core: support UPath for SpatialData.path and write()
SamirMoustafa Mar 2, 2026
40af327
io: use resolved store and remote parquet in points, raster, shapes, …
SamirMoustafa Mar 2, 2026
540631c
ci: add test deps and Dockerfile for storage emulators (S3, Azure, GCS)
SamirMoustafa Mar 2, 2026
532af5a
test: move remote storage tests under tests/io/remote_storage and add…
SamirMoustafa Mar 2, 2026
c22b8bf
fix: update Dask internal keys for zarr compatibility
SamirMoustafa Mar 2, 2026
0c07169
test: refine subset and table validation in spatial data tests
SamirMoustafa Mar 2, 2026
f21bb52
feat: move Dockerfile for storage emulators to facilitate testing
SamirMoustafa Mar 2, 2026
072566a
ci: enhance GitHub Actions workflow to support storage emulators on L…
SamirMoustafa Mar 2, 2026
ee6e4dc
fix: handle RuntimeError in fsspec async session closure
SamirMoustafa Mar 2, 2026
9019e6a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 2, 2026
42c3133
refactor: add type hints to functions in _dask_zarr_compat, _utils, a…
SamirMoustafa Mar 2, 2026
70ababe
chore: remove pytest-timeout from test dependencies in pyproject.toml
SamirMoustafa Mar 4, 2026
cae2319
test: add unit tests for remote storage store resolution and credenti…
SamirMoustafa Mar 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,43 @@ jobs:
fi
fi
uv sync --group=test
# Start storage emulators (S3, Azure, GCS) only on Linux; service containers are not available on Windows/macOS
- name: Build and start storage emulators
if: matrix.os == 'ubuntu-latest'
run: |
docker build -f tests/io/remote_storage/Dockerfile.emulators -t spatialdata-emulators .
docker run --rm -d --name spatialdata-emulators \
-p 5000:5000 -p 10000:10000 -p 4443:4443 \
spatialdata-emulators
- name: Wait for emulator ports
if: matrix.os == 'ubuntu-latest'
run: |
echo "Waiting for S3 (5000), Azure (10000), GCS (4443)..."
python3 -c "
import socket, time
for _ in range(45):
try:
for p in (5000, 10000, 4443):
socket.create_connection(('127.0.0.1', p), timeout=2)
print('Emulators ready.')
break
except (socket.error, OSError):
time.sleep(2)
else:
raise SystemExit('Emulators did not become ready.')
"
# On Linux, emulators run above so full suite (incl. tests/io/remote_storage/) runs. On Windows/macOS, skip remote_storage.
- name: Test
env:
MPLBACKEND: agg
PLATFORM: ${{ matrix.os }}
DISPLAY: :42
run: |
uv run pytest --cov --color=yes --cov-report=xml
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
uv run pytest --cov --color=yes --cov-report=xml
else
uv run pytest --cov --color=yes --cov-report=xml --ignore=tests/io/remote_storage/
fi
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ dev = [
"bump2version",
]
test = [
"adlfs",
"gcsfs",
"moto[server]",
"pytest",
"pytest-cov",
"pytest-mock",
Expand Down
71 changes: 48 additions & 23 deletions src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __init__(
tables: dict[str, AnnData] | Tables | None = None,
attrs: Mapping[Any, Any] | None = None,
) -> None:
self._path: Path | None = None
self._path: Path | UPath | None = None

self._shared_keys: set[str | None] = set()
self._images: Images = Images(shared_keys=self._shared_keys)
Expand Down Expand Up @@ -548,16 +548,16 @@ def is_backed(self) -> bool:
return self.path is not None

@property
def path(self) -> Path | None:
def path(self) -> Path | UPath | None:
"""Path to the Zarr storage."""
return self._path

@path.setter
def path(self, value: Path | None) -> None:
if value is None or isinstance(value, str | Path):
def path(self, value: Path | UPath | None) -> None:
if value is None or isinstance(value, (str, Path, UPath)):
self._path = value
else:
raise TypeError("Path must be `None`, a `str` or a `Path` object.")
raise TypeError("Path must be `None`, a `str`, a `Path` or a `UPath` object.")

def locate_element(self, element: SpatialElement) -> list[str]:
"""
Expand Down Expand Up @@ -1032,18 +1032,34 @@ def _symmetric_difference_with_zarr_store(self) -> tuple[list[str], list[str]]:

def _validate_can_safely_write_to_path(
self,
file_path: str | Path,
file_path: str | Path | UPath,
overwrite: bool = False,
saving_an_element: bool = False,
) -> None:
from spatialdata._io._utils import _backed_elements_contained_in_path, _is_subfolder, _resolve_zarr_store
from spatialdata._io._utils import (
_backed_elements_contained_in_path,
_is_subfolder,
_remote_zarr_store_exists,
_resolve_zarr_store,
)

if isinstance(file_path, str):
file_path = Path(file_path)

if not isinstance(file_path, Path):
raise ValueError(f"file_path must be a string or a Path object, type(file_path) = {type(file_path)}.")
if not isinstance(file_path, (Path, UPath)):
raise ValueError(f"file_path must be a string, Path or UPath object, type(file_path) = {type(file_path)}.")

if isinstance(file_path, UPath):
store = _resolve_zarr_store(file_path)
if _remote_zarr_store_exists(store) and not overwrite:
raise ValueError(
"The Zarr store already exists. Use `overwrite=True` to try overwriting the store. "
"Please note that only Zarr stores not currently in use by the current SpatialData object can be "
"overwritten."
)
return

# Local Path: existing logic
# TODO: add test for this
if os.path.exists(file_path):
store = _resolve_zarr_store(file_path)
Expand Down Expand Up @@ -1072,8 +1088,13 @@ def _validate_can_safely_write_to_path(
ERROR_MSG + "\nDetails: the target path contains one or more files that Dask use for "
"backing elements in the SpatialData object." + WORKAROUND
)
if self.path is not None and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
# Subfolder checks only for local paths (Path); skip when self.path is UPath
if (
self.path is not None
and isinstance(self.path, Path)
and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
)
):
if saving_an_element and _is_subfolder(parent=self.path, child=file_path):
raise ValueError(
Expand Down Expand Up @@ -1102,7 +1123,7 @@ def _validate_all_elements(self) -> None:
@_deprecation_alias(format="sdata_formats", version="0.7.0")
def write(
self,
file_path: str | Path,
file_path: str | Path | UPath | None = None,
overwrite: bool = False,
consolidate_metadata: bool = True,
update_sdata_path: bool = True,
Expand All @@ -1115,7 +1136,7 @@ def write(
Parameters
----------
file_path
The path to the Zarr store to write to.
The path to the Zarr store to write to. If ``None``, uses :attr:`path` (must be set).
overwrite
If `True`, overwrite the Zarr store if it already exists. If `False`, `write()` will fail if the Zarr store
already exists.
Expand Down Expand Up @@ -1161,8 +1182,13 @@ def write(

parsed = _parse_formats(sdata_formats)

if file_path is None:
if self.path is None:
raise ValueError("file_path must be provided when SpatialData.path is not set.")
file_path = self.path
if isinstance(file_path, str):
file_path = Path(file_path)
# Keep UPath as-is; do not convert to Path
self._validate_can_safely_write_to_path(file_path, overwrite=overwrite)
self._validate_all_elements()

Expand Down Expand Up @@ -1192,7 +1218,7 @@ def write(
def _write_element(
self,
element: SpatialElement | AnnData,
zarr_container_path: Path,
zarr_container_path: Path | UPath,
element_type: str,
element_name: str,
overwrite: bool,
Expand All @@ -1201,10 +1227,8 @@ def _write_element(
) -> None:
from spatialdata._io.io_zarr import _get_groups_for_element

if not isinstance(zarr_container_path, Path):
raise ValueError(
f"zarr_container_path must be a Path object, type(zarr_container_path) = {type(zarr_container_path)}."
)
if not isinstance(zarr_container_path, (Path, UPath)):
raise ValueError(f"zarr_container_path must be a Path or UPath, got {type(zarr_container_path).__name__}.")
file_path_of_element = zarr_container_path / element_type / element_name
self._validate_can_safely_write_to_path(
file_path=file_path_of_element, overwrite=overwrite, saving_an_element=True
Expand Down Expand Up @@ -1489,7 +1513,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# check if the element exists in the Zarr storage
if not _group_for_element_exists(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
):
Expand All @@ -1503,7 +1527,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# warn the users if the element is not self-contained, that is, it is Dask-backed by files outside the Zarr
# group for the element
element_zarr_path = Path(self.path) / element_type / element_name
element_zarr_path = self.path / element_type / element_name
if not _is_element_self_contained(element=element, element_path=element_zarr_path):
logger.info(
f"Element {element_type}/{element_name} is not self-contained. The metadata will be"
Expand Down Expand Up @@ -1544,7 +1568,7 @@ def write_channel_names(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have the check in the conditional
if element_type == "images" and self.path is not None:
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path), element_type=element_type, element_name=element_name, use_consolidated=False
zarr_path=self.path, element_type=element_type, element_name=element_name, use_consolidated=False
)

from spatialdata._io._utils import overwrite_channel_names
Expand Down Expand Up @@ -1588,7 +1612,7 @@ def write_transformations(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have a conditional
assert self.path is not None
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
use_consolidated=False,
Expand Down Expand Up @@ -1956,7 +1980,8 @@ def h(s: str) -> str:

descr = "SpatialData object"
if self.path is not None:
descr += f", with associated Zarr store: {self.path.resolve()}"
path_descr = str(self.path) if isinstance(self.path, UPath) else self.path.resolve()
descr += f", with associated Zarr store: {path_descr}"

non_empty_elements = self._non_empty_elements()
last_element_index = len(non_empty_elements) - 1
Expand Down
2 changes: 2 additions & 0 deletions src/spatialdata/_io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

# Patch da.to_zarr so ome_zarr's **kwargs are passed as zarr_array_kwargs (avoids FutureWarning)
import spatialdata._io._dask_zarr_compat # noqa: F401
from spatialdata._io._utils import get_dask_backing_files
from spatialdata._io.format import SpatialDataFormatType
from spatialdata._io.io_points import write_points
Expand Down
55 changes: 55 additions & 0 deletions src/spatialdata/_io/_dask_zarr_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Compatibility layer for dask.array.to_zarr when callers pass array options via **kwargs.

ome_zarr.writer calls da.to_zarr(..., **options) with array options (compressor, dimension_names,
etc.). Dask deprecated **kwargs in favor of zarr_array_kwargs. This module patches da.to_zarr to
forward such kwargs into zarr_array_kwargs (excluding dask-internal keys like zarr_format that
zarr.Group.create_array() does not accept), avoiding the FutureWarning and keeping behavior correct.
"""

from __future__ import annotations

from typing import Any

import dask.array as _da

_orig_to_zarr = _da.to_zarr

# Keys from ome_zarr/dask **kwargs that must not be passed to zarr.Group.create_array()
# dimension_separator: not accepted by all zarr versions in the create_array() path.
_DASK_INTERNAL_KEYS = frozenset({"zarr_format", "dimension_separator"})


def _to_zarr(
arr: Any,
url: Any,
component: Any = None,
storage_options: Any = None,
region: Any = None,
compute: bool = True,
return_stored: bool = False,
zarr_array_kwargs: Any = None,
zarr_read_kwargs: Any = None,
**kwargs: Any,
) -> Any:
"""Forward deprecated **kwargs into zarr_array_kwargs, excluding _DASK_INTERNAL_KEYS."""
if kwargs:
zarr_array_kwargs = dict(zarr_array_kwargs) if zarr_array_kwargs else {}
for k, v in kwargs.items():
if k not in _DASK_INTERNAL_KEYS:
zarr_array_kwargs[k] = v
kwargs = {}
return _orig_to_zarr(
arr,
url,
component=component,
storage_options=storage_options,
region=region,
compute=compute,
return_stored=return_stored,
zarr_array_kwargs=zarr_array_kwargs,
zarr_read_kwargs=zarr_read_kwargs,
**kwargs,
)


_da.to_zarr = _to_zarr
Loading