From 5232e12dca4ba2d24bc9935cd43243a42fa8fa35 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 20:56:58 +0100
Subject: [PATCH 01/30] typo

---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index a22aa5d8..5ea008cf 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -385,7 +385,7 @@ def __iter__(self) -> Generator[zarr.Group]:
 
     @property
     def is_empty(self) -> bool:
-        """Wether or not there is an existing store at the group location."""
+        """Whether or not there is an existing store at the group location."""
         return (
             (not (V1_ENCODING.items() <= self._group.attrs.items()) or len(self._dataset_keys) == 0)
             if isinstance(self._group, zarr.Group)

From 5d14e5c71e4c12daf1d5158238f6478b20de5bcb Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 20:59:22 +0100
Subject: [PATCH 02/30] _collection_added' defined outside

---
 src/annbatch/loader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py
index 973a47d9..4c23504b 100644
--- a/src/annbatch/loader.py
+++ b/src/annbatch/loader.py
@@ -161,6 +161,7 @@ class Loader[
     _batch_sampler: Sampler
     _concat_strategy: None | concat_strategies = None
     _dataset_intervals: pd.IntervalIndex | None = None
+    _collection_added: bool = False
 
     def __init__(
         self,
@@ -312,7 +313,7 @@ def use_collection(
         """
         if collection.is_empty:
             raise ValueError("DatasetCollection is empty")
-        if getattr(self, "_collection_added", False):
+        if self._collection_added:
             raise RuntimeError(
                 "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_anndatas` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_anndatas`."
             )

From 2558b4017d85aa66fcf7bf696c2b873bf6c2ee97 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 21:01:26 +0100
Subject: [PATCH 03/30] consistent naming with add_anndatas

---
 src/annbatch/io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 5ea008cf..d5b906e4 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -393,7 +393,7 @@ def is_empty(self) -> bool:
         )
 
     @_with_settings
-    def add_adatas(
+    def add_anndatas(
         self,
         adata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
         *,
@@ -473,7 +473,7 @@ def add_adatas(
             ...     "path/to/second_adata.h5ad",
             ...     "path/to/third_adata.h5ad",
             ... ]
-            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_adatas(
+            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_anndatas(
             ...    datasets,
             ...    load_adata=read_lazy_x_and_obs_only,
             ...)
@@ -647,7 +647,7 @@ def _add_to_collection(
                 Whether or not to shuffle when adding.  Otherwise, the incoming data will just be split up and appended.
         """
         if self.is_empty:
-            raise ValueError("Store is empty. Please run `DatasetCollection.add` first.")
+            raise ValueError("Store is empty. Please run `DatasetCollection.add_anndatas` first.")
         # Check for mismatched keys among the inputs.
         _check_for_mismatched_keys(adata_paths, load_adata=load_adata)
 

From 36af588245f581edfe960a1b955a6aff1bafefc9 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 21:04:45 +0100
Subject: [PATCH 04/30] ruff format

---
 tests/test_dataset.py | 42 ++++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 02b56b09..8a80924a 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -104,30 +104,24 @@ def concat(datas: list[Data | ad.AnnData]) -> ListData | list[ad.AnnData]:
     "gen_loader",
     [
         pytest.param(
-            lambda collection,
-            shuffle,
-            use_zarrs,
-            chunk_size=chunk_size,
-            preload_nchunks=preload_nchunks,
-            open_func=open_func,
-            batch_size=batch_size,
-            preload_to_gpu=preload_to_gpu,
-            concat_strategy=concat_strategy: Loader(
-                shuffle=shuffle,
-                chunk_size=chunk_size,
-                preload_nchunks=preload_nchunks,
-                return_index=True,
-                batch_size=batch_size,
-                preload_to_gpu=preload_to_gpu,
-                to_torch=False,
-                concat_strategy=concat_strategy,
-            ).use_collection(
-                collection,
-                **(
-                    {"load_adata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
-                    if open_func is not None
-                    else {}
-                ),
+            lambda collection, shuffle, use_zarrs, chunk_size=chunk_size, preload_nchunks=preload_nchunks, open_func=open_func, batch_size=batch_size, preload_to_gpu=preload_to_gpu, concat_strategy=concat_strategy: (
+                Loader(
+                    shuffle=shuffle,
+                    chunk_size=chunk_size,
+                    preload_nchunks=preload_nchunks,
+                    return_index=True,
+                    batch_size=batch_size,
+                    preload_to_gpu=preload_to_gpu,
+                    to_torch=False,
+                    concat_strategy=concat_strategy,
+                ).use_collection(
+                    collection,
+                    **(
+                        {"load_adata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
+                        if open_func is not None
+                        else {}
+                    ),
+                )
             ),
             id=f"chunk_size={chunk_size}-preload_nchunks={preload_nchunks}-open_func={open_func.__name__[5:] if open_func is not None else 'None'}-batch_size={batch_size}{'-cupy' if preload_to_gpu else ''}-concat_strategy={concat_strategy}",  # type: ignore[attr-defined]
             marks=skip_if_no_cupy,

From d841e13a60648db0aa2559aa21fd19a05a0a3303 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 21:06:10 +0100
Subject: [PATCH 05/30] typo2

---
 src/annbatch/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py
index 4c23504b..2df59954 100644
--- a/src/annbatch/loader.py
+++ b/src/annbatch/loader.py
@@ -305,7 +305,7 @@ def use_collection(
         Parameters
         ----------
         collection
-            The collection who on-disk datasets should be used in this loader.
+            The collection whose on-disk datasets should be used in this loader.
         load_adata
             A custom load function - recall that whatever is found in :attr:`~anndata.AnnData.X` and :attr:`~anndata.AnnData.obs` will be yielded in batches.
             Default is to just load `X` and all of `obs`.

From 304dcbbcdd6a7bfd5e81ffdc38a91b90553d7ba3 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 21:38:53 +0100
Subject: [PATCH 06/30] adapt add_anndatas change to tests

---
 tests/conftest.py        |  2 +-
 tests/test_preshuffle.py | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 45be4996..0c9843c2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -114,7 +114,7 @@ def simple_collection(
 ) -> tuple[DatasetCollection, ad.AnnData]:
     zarr_stores = sorted(f for f in adata_with_zarr_path_same_var_space[1].iterdir() if f.is_dir())
     output_path = Path(tmpdir_factory.mktemp("zarr_folder")) / "simple_fixture.zarr"
-    collection = DatasetCollection(output_path).add_adatas(
+    collection = DatasetCollection(output_path).add_anndatas(
         zarr_stores,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
diff --git a/tests/test_preshuffle.py b/tests/test_preshuffle.py
index 680d6fe1..5a0fae4d 100644
--- a/tests/test_preshuffle.py
+++ b/tests/test_preshuffle.py
@@ -51,7 +51,7 @@ def test_store_creation_warnings_with_different_keys(elem_name: Literal["obsm",
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        DatasetCollection(tmp_path / "collection.zarr").add_adatas(
+        DatasetCollection(tmp_path / "collection.zarr").add_anndatas(
             [path_1, path_2],
             zarr_sparse_chunk_size=10,
             zarr_sparse_shard_size=20,
@@ -69,7 +69,7 @@ def test_store_creation_no_warnings_with_custom_load(tmp_path: Path):
     path_2 = tmp_path / "with_extra_key.h5ad"
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
-    collection = DatasetCollection(tmp_path / "collection.zarr").add_adatas(
+    collection = DatasetCollection(tmp_path / "collection.zarr").add_anndatas(
         [path_1, path_2],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -91,7 +91,7 @@ def test_store_creation_path_added_to_obs(tmp_path: Path):
     adata_2.write_h5ad(path_2)
     paths = [path_1, path_2]
     output_dir = tmp_path / "path_src_collection.zarr"
-    collection = DatasetCollection(output_dir).add_adatas(
+    collection = DatasetCollection(output_dir).add_anndatas(
         paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -120,7 +120,7 @@ def test_store_addition_different_keys(
     adata_orig.write_h5ad(orig_path)
     output_path = tmp_path / "zarr_store_addition_different_keys.zarr"
     collection = DatasetCollection(output_path)
-    collection.add_adatas(
+    collection.add_anndatas(
         [orig_path],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -136,7 +136,7 @@ def test_store_addition_different_keys(
     additional_path = tmp_path / "with_extra_key.h5ad"
     adata.write_h5ad(additional_path)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        collection.add_adatas(
+        collection.add_anndatas(
             [additional_path],
             load_adata=load_adata,
             zarr_sparse_chunk_size=10,
@@ -169,7 +169,7 @@ def test_store_creation_default(
         else r"Loading h5ad is currently not supported",
     ):
         kwargs = {} if is_zarr else {"is_collection_h5ad": True}
-        collection = DatasetCollection(output_path, **kwargs).add_adatas(
+        collection = DatasetCollection(output_path, **kwargs).add_anndatas(
             [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
         )
     assert isinstance(
@@ -201,7 +201,7 @@ def test_store_creation(
         adata_with_h5_path_different_var_space[1].parent
         / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_adata is None else 'custom_read'}.zarr"
     )
-    collection = DatasetCollection(output_path).add_adatas(
+    collection = DatasetCollection(output_path).add_anndatas(
         [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")],
         var_subset=var_subset,
         zarr_sparse_chunk_size=10,
@@ -292,7 +292,7 @@ def test_mismatched_raw_concat(
     h5_files = sorted(adata_with_h5_path_different_var_space[1].iterdir())
     output_path = adata_with_h5_path_different_var_space[1].parent / "zarr_store_creation_test_heterogeneous.zarr"
     h5_paths = [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
-    collection = DatasetCollection(output_path).add_adatas(
+    collection = DatasetCollection(output_path).add_anndatas(
         h5_paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -337,7 +337,7 @@ def test_store_extension(
     additional = all_h5_paths[4:]  # don't add everything to get a "different" var space
     # create new store
     collection = DatasetCollection(store_path)
-    collection.add_adatas(
+    collection.add_anndatas(
         original,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -348,7 +348,7 @@ def test_store_extension(
         shuffle=True,
     )
     # add h5ads to existing store
-    collection.add_adatas(
+    collection.add_anndatas(
         additional,
         load_adata=load_adata,
         zarr_sparse_chunk_size=10,
@@ -379,5 +379,5 @@ def test_empty(tmp_path: Path):
     assert collection.is_empty
     # Doesn't matter what errors as long as this function runs, but not to completion
     with pytest.raises(TypeError):
-        collection.add_adatas()
+        collection.add_anndatas()
     assert not (V1_ENCODING.items() <= g.attrs.items())

From 4958749997a24144928785d61e70721ab6d5d00f Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 22:07:27 +0100
Subject: [PATCH 07/30] add torch and h5py to mypy ignore_missing_imports

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 79dafe73..c06c9bd0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -180,7 +180,7 @@ omit = [
 ]
 
 [[tool.mypy.overrides]]
-module = [ "anndata.*", "cupyx.*", "cupy.*" ]
+module = [ "anndata.*", "cupyx.*", "cupy.*", "torch.*", "h5py.*" ]
 ignore_missing_imports = true
 
 [tool.cruft]

From 830f2d4ff88b81cf8ddc6cc6d2eec9e7300cd12e Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 22:09:17 +0100
Subject: [PATCH 08/30] fix Mapping.copy() call in write_sharded callback

---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index d5b906e4..87114af2 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -104,7 +104,7 @@ def callback(
             iospec: ad.experimental.IOSpec,
         ):
             # Ensure we're not overriding anything here
-            dataset_kwargs = dataset_kwargs.copy()
+            dataset_kwargs = dict(dataset_kwargs)
             if iospec.encoding_type in {"array"} and (
                 any(n in store.name for n in {"obsm", "layers", "obsp"}) or "X" == elem_name
             ):

From 6d6067ac05bb2cdb8950b767a0477028b8334cfa Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 22:09:32 +0100
Subject: [PATCH 09/30] wrap categories in pd.Index for Categorical.from_codes

---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 87114af2..2d4f963e 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -182,7 +182,7 @@ def _lazy_load_anndatas[T: zarr.Group | h5py.Group | PathLike[str] | str](
         adata = load_adata(path)
         # Track the source file for this given anndata object
         adata.obs["src_path"] = pd.Categorical.from_codes(
-            np.ones((adata.shape[0],), dtype="int") * i, categories=[str(p) for p in paths]
+            np.ones((adata.shape[0],), dtype="int") * i, categories=pd.Index([str(p) for p in paths])
         )
         # Concatenating Dataset2D drops categoricals so we need to track them
         if isinstance(adata.obs, Dataset2D):

From 0f5aa1df5a4d8ffdd760ffaa63e5ac44e6bd1040 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 22:09:40 +0100
Subject: [PATCH 10/30] add asserts for match/case narrowing and rename idxs
 variable

---
 src/annbatch/io.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 2d4f963e..fb094d4d 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -239,11 +239,13 @@ def _create_chunks_for_shuffling(
     if use_single_chunking:
         return [np.concatenate(idxs)]
     # unfortunately, this is the only way to prevent numpy.split from trying to np.array the idxs list, which can have uneven elements.
-    idxs = np.array([slice(int(idx[0]), int(idx[-1] + 1)) for idx in idxs])
+    idxs_as_slices = np.array([slice(int(idx[0]), int(idx[-1] + 1)) for idx in idxs])
     return [
         np.concatenate([np.arange(s.start, s.stop) for s in idx])
         for idx in (
-            split_given_size(idxs, n_slices_per_dataset) if n_chunkings is None else np.array_split(idxs, n_chunkings)
+            split_given_size(idxs_as_slices, n_slices_per_dataset)
+            if n_chunkings is None
+            else np.array_split(idxs_as_slices, n_chunkings)
         )
     ]
 

From 12830d5082521d6087c58983b81e4362e8089b79 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 11 Feb 2026 22:14:42 +0100
Subject: [PATCH 11/30] is none == is none works better with mypy

---
 src/annbatch/io.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index fb094d4d..f563f5e7 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -226,15 +226,14 @@ def _create_chunks_for_shuffling(
     idxs = split_given_size(np.arange(n_obs), shuffle_chunk_size)
     if shuffle:
         random.shuffle(idxs)
-    match shuffle_n_obs_per_dataset is not None, n_chunkings is not None:
-        case True, False:
-            n_slices_per_dataset = int(shuffle_n_obs_per_dataset // shuffle_chunk_size)
-            use_single_chunking = n_obs <= shuffle_n_obs_per_dataset or n_slices_per_dataset <= 1
-        case False, True:
-            n_slices_per_dataset = (n_obs // n_chunkings) // shuffle_chunk_size
-            use_single_chunking = n_chunkings == 1
-        case _, _:
-            raise ValueError("Cannot provide both shuffle_n_obs_per_dataset and n_chunkings or neither")
+    if (shuffle_n_obs_per_dataset is None) == (n_chunkings is None):
+        raise ValueError("Cannot provide both shuffle_n_obs_per_dataset and n_chunkings or neither")
+    elif shuffle_n_obs_per_dataset is not None:
+        n_slices_per_dataset = int(shuffle_n_obs_per_dataset // shuffle_chunk_size)
+        use_single_chunking = n_obs <= shuffle_n_obs_per_dataset or n_slices_per_dataset <= 1
+    else:  # n_chunkings is not None
+        n_slices_per_dataset = (n_obs // n_chunkings) // shuffle_chunk_size
+        use_single_chunking = n_chunkings == 1
     # In this case `shuffle_n_obs_per_dataset` is bigger than the size of the dataset or the slice size is probably too big.
     if use_single_chunking:
         return [np.concatenate(idxs)]

From a60774cca976ca33a0f400786f3ad8bf5e7b3925 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Thu, 12 Feb 2026 17:08:12 +0100
Subject: [PATCH 12/30] other add_anndatas renames + changelog

---
 CHANGELOG.md                 |  1 +
 README.md                    |  2 +-
 docs/index.md                |  2 +-
 docs/notebooks/example.ipynb | 12 ++++++------
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 090222bd..5e6f9165 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.8]
 
 - {class}`~annbatch.Loader` acccepts an `rng` argument now
+- Renamed {meth}`annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
 
 ## [0.0.7]
 
diff --git a/README.md b/README.md
index 5128594a..7f11e7ea 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ zarr.config.set(
 
 # Create a collection at the given path. The subgroups will all be anndata stores.
 collection = DatasetCollection("path/to/output/collection.zarr")
-collection.add_adatas(
+collection.add_anndatas(
     adata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
diff --git a/docs/index.md b/docs/index.md
index d94024b5..6263f2e4 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -9,7 +9,7 @@ Let's go through the above example:
 ### Preprocessing
 
 ```python
-colleciton = DatasetCollection("path/to/output/store.zarr").add_adatas(
+colleciton = DatasetCollection("path/to/output/store.zarr").add_anndatas(
     adata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index f7085129..86efd256 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -133,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "hide-output"
@@ -198,7 +198,7 @@
     "\n",
     "\n",
     "collection = DatasetCollection(zarr.open(\"annbatch_collection\", mode=\"w\"))\n",
-    "collection.add_adatas(\n",
+    "collection.add_anndatas(\n",
     "    # List all the h5ad files you want to include in the collection\n",
     "    adata_paths=[\"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\", \"f81463b8-4986-4904-a0ea-20ff02cbb317.h5ad\"],\n",
     "    # Path to store the output collection\n",
@@ -328,7 +328,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "hide-output"
@@ -363,7 +363,7 @@
     }
    ],
    "source": [
-    "collection.add_adatas(\n",
+    "collection.add_anndatas(\n",
     "    adata_paths=[\n",
     "        \"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\",\n",
     "    ],\n",
@@ -381,7 +381,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "annbatch",
    "language": "python",
    "name": "python3"
   },
@@ -395,7 +395,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.12.12"
   }
  },
  "nbformat": 4,

From 9c495b3d9e9c644692349175966dda2bdf2d0697 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Feb 2026 16:08:32 +0000
Subject: [PATCH 13/30] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_dataset.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 8a80924a..1d010c36 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -104,7 +104,15 @@ def concat(datas: list[Data | ad.AnnData]) -> ListData | list[ad.AnnData]:
     "gen_loader",
     [
         pytest.param(
-            lambda collection, shuffle, use_zarrs, chunk_size=chunk_size, preload_nchunks=preload_nchunks, open_func=open_func, batch_size=batch_size, preload_to_gpu=preload_to_gpu, concat_strategy=concat_strategy: (
+            lambda collection,
+            shuffle,
+            use_zarrs,
+            chunk_size=chunk_size,
+            preload_nchunks=preload_nchunks,
+            open_func=open_func,
+            batch_size=batch_size,
+            preload_to_gpu=preload_to_gpu,
+            concat_strategy=concat_strategy: (
                 Loader(
                     shuffle=shuffle,
                     chunk_size=chunk_size,

From 7f33e7852059d99370b11e2892166efcc46fe9a8 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Fri, 13 Feb 2026 12:16:11 +0100
Subject: [PATCH 14/30] Revert "is none == is none works better with mypy"

This reverts commit 12830d5082521d6087c58983b81e4362e8089b79.
---
 src/annbatch/io.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index f563f5e7..fb094d4d 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -226,14 +226,15 @@ def _create_chunks_for_shuffling(
     idxs = split_given_size(np.arange(n_obs), shuffle_chunk_size)
     if shuffle:
         random.shuffle(idxs)
-    if (shuffle_n_obs_per_dataset is None) == (n_chunkings is None):
-        raise ValueError("Cannot provide both shuffle_n_obs_per_dataset and n_chunkings or neither")
-    elif shuffle_n_obs_per_dataset is not None:
-        n_slices_per_dataset = int(shuffle_n_obs_per_dataset // shuffle_chunk_size)
-        use_single_chunking = n_obs <= shuffle_n_obs_per_dataset or n_slices_per_dataset <= 1
-    else:  # n_chunkings is not None
-        n_slices_per_dataset = (n_obs // n_chunkings) // shuffle_chunk_size
-        use_single_chunking = n_chunkings == 1
+    match shuffle_n_obs_per_dataset is not None, n_chunkings is not None:
+        case True, False:
+            n_slices_per_dataset = int(shuffle_n_obs_per_dataset // shuffle_chunk_size)
+            use_single_chunking = n_obs <= shuffle_n_obs_per_dataset or n_slices_per_dataset <= 1
+        case False, True:
+            n_slices_per_dataset = (n_obs // n_chunkings) // shuffle_chunk_size
+            use_single_chunking = n_chunkings == 1
+        case _, _:
+            raise ValueError("Cannot provide both shuffle_n_obs_per_dataset and n_chunkings or neither")
     # In this case `shuffle_n_obs_per_dataset` is bigger than the size of the dataset or the slice size is probably too big.
     if use_single_chunking:
         return [np.concatenate(idxs)]

From 462ca485f4c2850eb6102176ff4a728441305f57 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Fri, 13 Feb 2026 12:22:10 +0100
Subject: [PATCH 15/30] update changelogs

---
 CHANGELOG.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e6f9165..ad727e18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.8]
 
 - {class}`~annbatch.Loader` acccepts an `rng` argument now
-- Renamed {meth}`annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
+- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
 
 ## [0.0.7]
 
@@ -36,7 +36,7 @@ and this project adheres to [Semantic Versioning][].
 
 ## [0.0.4]
 
-- Load into memory nullables/categoricals from `obs` by default when shuffling (i.e., no custom `load_adata` argument to {meth}`annbatch.DatasetCollection.add_adatas`)
+- Load into memory nullables/categoricals from `obs` by default when shuffling (i.e., no custom `load_adata` argument to {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`))
 
 ## [0.0.3]
 
@@ -50,9 +50,9 @@ and this project adheres to [Semantic Versioning][].
 ### Breaking
 
 - `ZarrSparseDataset` and `ZarrDenseDataset` have been conslidated into {class}`annbatch.Loader`
-- `create_anndata_collection` and `add_to_collection` have been moved into the {meth}`annbatch.DatasetCollection.add_adatas` method
-- Default reading of input data is now fully lazy in {meth}`annbatch.DatasetCollection.add_adatas`, and therefore the shuffle process may now be slower although have better memory properties.  Use `load_adata` argument in {meth}`annbatch.DatasetCollection.add_adatas` to customize this behavior.
-- Files shuffled under the old `create_anndata_collection` will not be recognized by {class}`annbatch.DatasetCollection` and therefore are not usable with the new {class}`annbatch.Loader.use_collection` API.  At the moment, the file metadata we maintain is only for internal purposes - however, if you wish to migrate to be able to use {class}`annbatch.DatasetCollection` in conjunction with {class}`annbatch.Loader.use_collection`, the root folder of the old collection must have attrs `{"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}` and be a {class}`zarr.Group`. The subfolders (i.e., datasets) must be called `dataset_([0-9]*)`. Otherwise you can use the {meth}`annbatch.Loader.add_anndatas` as before.
+- `create_anndata_collection` and `add_to_collection` have been moved into the {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) method
+- Default reading of input data is now fully lazy in {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`), and therefore the shuffle process may now be slower although have better memory properties.  Use `load_adata` argument in {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) to customize this behavior.
+- Files shuffled under the old `create_anndata_collection` will not be recognized by {class}`annbatch.DatasetCollection` and therefore are not usable with the new {class}`annbatch.Loader.use_collection` API.  At the moment, the file metadata we maintain is only for internal purposes - however, if you wish to migrate to be able to use {class}`annbatch.DatasetCollection` in conjunction with {class}`annbatch.Loader.use_collection`, the root folder of the old collection must have attrs `{"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}` and be a {class}`zarr.Group`. The subfolders (i.e., datasets) must be called `dataset_([0-9]*)`. Otherwise you can use the {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) as before.
 
 ### Changed
 

From 400ee88a98588f7134c77359cfc572c4fed5bd47 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Fri, 13 Feb 2026 12:22:46 +0100
Subject: [PATCH 16/30] updatechangelog again

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad727e18..0259c504 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.8]
 
 - {class}`~annbatch.Loader` acccepts an `rng` argument now
+
+### Breaking
 - Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
 
 ## [0.0.7]

From fea4e70237b55aa73d265d5fc511f93b21ff353a Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Fri, 13 Feb 2026 20:29:47 +0100
Subject: [PATCH 17/30] update changelog

---
 CHANGELOG.md | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0259c504..44f38e4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,13 +8,15 @@ and this project adheres to [Semantic Versioning][].
 [keep a changelog]: https://keepachangelog.com/en/1.0.0/
 [semantic versioning]: https://semver.org/spec/v2.0.0.html
 
-## [0.0.8]
-
-- {class}`~annbatch.Loader` acccepts an `rng` argument now
+## [0.0.9]
 
 ### Breaking
 - Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
 
+## [0.0.8]
+
+- {class}`~annbatch.Loader` acccepts an `rng` argument now
+
 ## [0.0.7]
 
 - Make the in-memory concatenation strategy configurable for {meth}`annbatch.Loader.__iter__` via a `concat_strategy` argument to `__init__` - sparse on-disk will concatenated then shuffled/yielded (faster, higher memory usage) but dense will be shuffled and then concated/yielded (lower memory usage).
@@ -38,7 +40,7 @@ and this project adheres to [Semantic Versioning][].
 
 ## [0.0.4]
 
-- Load into memory nullables/categoricals from `obs` by default when shuffling (i.e., no custom `load_adata` argument to {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`))
+- Load into memory nullables/categoricals from `obs` by default when shuffling (i.e., no custom `load_adata` argument to `annbatch.DatasetCollection.add_adatas`)
 
 ## [0.0.3]
 
@@ -52,9 +54,9 @@ and this project adheres to [Semantic Versioning][].
 ### Breaking
 
 - `ZarrSparseDataset` and `ZarrDenseDataset` have been conslidated into {class}`annbatch.Loader`
-- `create_anndata_collection` and `add_to_collection` have been moved into the {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) method
-- Default reading of input data is now fully lazy in {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`), and therefore the shuffle process may now be slower although have better memory properties.  Use `load_adata` argument in {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) to customize this behavior.
-- Files shuffled under the old `create_anndata_collection` will not be recognized by {class}`annbatch.DatasetCollection` and therefore are not usable with the new {class}`annbatch.Loader.use_collection` API.  At the moment, the file metadata we maintain is only for internal purposes - however, if you wish to migrate to be able to use {class}`annbatch.DatasetCollection` in conjunction with {class}`annbatch.Loader.use_collection`, the root folder of the old collection must have attrs `{"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}` and be a {class}`zarr.Group`. The subfolders (i.e., datasets) must be called `dataset_([0-9]*)`. Otherwise you can use the {meth}`annbatch.DatasetCollection.add_anndatas` (formerly `add_adatas`) as before.
+- `create_anndata_collection` and `add_to_collection` have been moved into the `annbatch.DatasetCollection.add_adatas` method
+- Default reading of input data is now fully lazy in `annbatch.DatasetCollection.add_adatas`, and therefore the shuffle process may now be slower although have better memory properties.  Use `load_adata` argument in `annbatch.DatasetCollection.add_adatas` to customize this behavior.
+- Files shuffled under the old `create_anndata_collection` will not be recognized by {class}`annbatch.DatasetCollection` and therefore are not usable with the new {class}`annbatch.Loader.use_collection` API.  At the moment, the file metadata we maintain is only for internal purposes - however, if you wish to migrate to be able to use {class}`annbatch.DatasetCollection` in conjunction with {class}`annbatch.Loader.use_collection`, the root folder of the old collection must have attrs `{"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}` and be a {class}`zarr.Group`. The subfolders (i.e., datasets) must be called `dataset_([0-9]*)`. Otherwise you can use the `annbatch.DatasetCollection.add_adatas` as before.
 
 ### Changed
 

From ac14eb9914015dbec7778dc3a776e4e2484ba7a9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Feb 2026 12:45:29 +0000
Subject: [PATCH 18/30] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pyproject.toml        |  1 +
 tests/test_dataset.py | 10 +---------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9d8585c5..1114211d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -167,6 +167,7 @@ run.omit = [
 run.patch = [ "subprocess" ]
 run.source = [ "annbatch" ]
 
+[tool.mypy]
 [[tool.mypy.overrides]]
 overrides = [ { module = [ "anndata.*", "cupyx.*", "cupy.*", "torch.*", "h5py.*" ], ignore_missing_imports = true } ]
 
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 1d010c36..8a80924a 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -104,15 +104,7 @@ def concat(datas: list[Data | ad.AnnData]) -> ListData | list[ad.AnnData]:
     "gen_loader",
     [
         pytest.param(
-            lambda collection,
-            shuffle,
-            use_zarrs,
-            chunk_size=chunk_size,
-            preload_nchunks=preload_nchunks,
-            open_func=open_func,
-            batch_size=batch_size,
-            preload_to_gpu=preload_to_gpu,
-            concat_strategy=concat_strategy: (
+            lambda collection, shuffle, use_zarrs, chunk_size=chunk_size, preload_nchunks=preload_nchunks, open_func=open_func, batch_size=batch_size, preload_to_gpu=preload_to_gpu, concat_strategy=concat_strategy: (
                 Loader(
                     shuffle=shuffle,
                     chunk_size=chunk_size,

From 8d2387117749c424ece246b465bf4754f12d0c28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Selman=20=C3=96zleyen?=
 <32667648+selmanozleyen@users.noreply.github.com>
Date: Tue, 24 Feb 2026 11:08:52 +0100
Subject: [PATCH 19/30] Update src/annbatch/io.py

Co-authored-by: Ilan Gold <ilanbassgold@gmail.com>
---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index fb094d4d..e5b0d1bb 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -397,7 +397,7 @@ def is_empty(self) -> bool:
     @_with_settings
     def add_anndatas(
         self,
-        adata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
+        anndata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
         *,
         load_adata: Callable[[zarr.Group | h5py.Group | PathLike[str] | str], ad.AnnData] = _default_load_adata,
         var_subset: Iterable[str] | None = None,

From 0129dc816ff688420118dc082d7c478f63b6963f Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 24 Feb 2026 11:12:59 +0100
Subject: [PATCH 20/30] anndata_paths

---
 CHANGELOG.md                 |  3 ++-
 README.md                    |  2 +-
 docs/index.md                |  2 +-
 docs/notebooks/example.ipynb |  6 +++---
 src/annbatch/io.py           | 22 +++++++++++-----------
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44f38e4e..3561f2b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.9]
 
 ### Breaking
-- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` to be consistent with the rest of the API.
+- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` and renamed `adata_paths` argument to `anndata_paths` to be consistent with the rest of the API.
 
 ## [0.0.8]
 
@@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning][].
 
 ## [0.0.2]
 
+
 ### Breaking
 
 - `ZarrSparseDataset` and `ZarrDenseDataset` have been conslidated into {class}`annbatch.Loader`
diff --git a/README.md b/README.md
index b8caba79..7452c0df 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ zarr.config.set(
 # Create a collection at the given path. The subgroups will all be anndata stores.
 collection = DatasetCollection("path/to/output/collection.zarr")
 collection.add_anndatas(
-    adata_paths=[
+    anndata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
     ],
diff --git a/docs/index.md b/docs/index.md
index 08e1335c..d720d3d7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,7 +10,7 @@ Let's go through the above example:
 
 ```python
 colleciton = DatasetCollection("path/to/output/store.zarr").add_anndatas(
-    adata_paths=[
+    anndata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
     ],
diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index 86efd256..9e97921e 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -118,7 +118,7 @@
    "metadata": {},
    "source": [
     "The conversion code will take care of the following things:\n",
-    "* Align (outer join) the gene spaces across all datasets listed in `adata_paths`\n",
+    "* Align (outer join) the gene spaces across all datasets listed in `anndata_paths`\n",
     "  * The gene spaces are outer-joined based on the gene names provided in the `var_names` field of the individual `AnnData` objects.\n",
     "  * If you want to subset to specific gene space, you can provide a list of gene names via the `var_subset` parameter.\n",
     "* Shuffle the cells across all datasets (this works on larger than memory datasets as well).\n",
@@ -200,7 +200,7 @@
     "collection = DatasetCollection(zarr.open(\"annbatch_collection\", mode=\"w\"))\n",
     "collection.add_anndatas(\n",
     "    # List all the h5ad files you want to include in the collection\n",
-    "    adata_paths=[\"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\", \"f81463b8-4986-4904-a0ea-20ff02cbb317.h5ad\"],\n",
+    "    anndata_paths=[\"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\", \"f81463b8-4986-4904-a0ea-20ff02cbb317.h5ad\"],\n",
     "    # Path to store the output collection\n",
     "    shuffle=True,  # Whether to pre-shuffle the cells of the collection\n",
     "    n_obs_per_dataset=2_097_152,  # Number of cells per dataset shard, this number is much higher than available in these datasets but is generally a good target\n",
@@ -364,7 +364,7 @@
    ],
    "source": [
     "collection.add_anndatas(\n",
-    "    adata_paths=[\n",
+    "    anndata_paths=[\n",
     "        \"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\",\n",
     "    ],\n",
     "    load_adata=read_lazy_x_and_obs_only,\n",
diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index e5b0d1bb..767324a6 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -425,12 +425,12 @@ def add_anndatas(
 
         Parameters
         ----------
-            adata_paths
+            anndata_paths
                 Paths to the AnnData files used to create the zarr store.
             load_adata
                 Function to customize (lazy-)loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used with categoricals/nullables read into memory and `(-1)` chunks for `obs`.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
-                Beware that concatenating nullables/categoricals (i.e., what happens if `len(adata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
+                Beware that concatenating nullables/categoricals (i.e., what happens if `len(anndata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
             var_subset
                 Subset of gene names to include in the store. If None, all genes are included.
                 Genes are subset based on the `var_names` attribute of the concatenated AnnData object.
@@ -483,7 +483,7 @@ def add_anndatas(
         if shuffle_chunk_size > n_obs_per_dataset:
             raise ValueError("Cannot have a large slice size than observations per dataset")
         shared_kwargs = {
-            "adata_paths": adata_paths,
+            "anndata_paths": anndata_paths,
             "load_adata": load_adata,
             "zarr_sparse_chunk_size": zarr_sparse_chunk_size,
             "zarr_sparse_shard_size": zarr_sparse_shard_size,
@@ -503,7 +503,7 @@ def add_anndatas(
     def _create_collection(
         self,
         *,
-        adata_paths: Iterable[PathLike[str]] | Iterable[str],
+        anndata_paths: Iterable[PathLike[str]] | Iterable[str],
         load_adata: Callable[[PathLike[str] | str], ad.AnnData] = _default_load_adata,
         var_subset: Iterable[str] | None = None,
         zarr_sparse_chunk_size: int = 32768,
@@ -528,7 +528,7 @@ def _create_collection(
 
         Parameters
         ----------
-            adata_paths
+            anndata_paths
                 Paths to the AnnData files used to create the zarr store.
             load_adata
                 Function to customize lazy-loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used.
@@ -563,8 +563,8 @@ def _create_collection(
         """
         if not self.is_empty:
             raise RuntimeError("Cannot create a collection at a location that already has a shuffled collection")
-        _check_for_mismatched_keys(adata_paths, load_adata=load_adata)
-        adata_concat = _lazy_load_anndatas(adata_paths, load_adata=load_adata)
+        _check_for_mismatched_keys(anndata_paths, load_adata=load_adata)
+        adata_concat = _lazy_load_anndatas(anndata_paths, load_adata=load_adata)
         adata_concat.obs_names_make_unique()
         n_obs_per_dataset = min(adata_concat.shape[0], n_obs_per_dataset)
         chunks = _create_chunks_for_shuffling(
@@ -606,7 +606,7 @@ def _create_collection(
     def _add_to_collection(
         self,
         *,
-        adata_paths: Iterable[PathLike[str]] | Iterable[str],
+        anndata_paths: Iterable[PathLike[str]] | Iterable[str],
         load_adata: Callable[[PathLike[str] | str], ad.AnnData] = ad.read_h5ad,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
@@ -623,7 +623,7 @@ def _add_to_collection(
 
         Parameters
         ----------
-            adata_paths
+            anndata_paths
                 Paths to the anndata files to be appended to the collection of output chunks.
             load_adata
                 Function to customize loading the invidiual input anndata files. By default, :func:`anndata.read_h5ad` is used.
@@ -651,9 +651,9 @@ def _add_to_collection(
         if self.is_empty:
             raise ValueError("Store is empty. Please run `DatasetCollection.add_anndatas` first.")
         # Check for mismatched keys among the inputs.
-        _check_for_mismatched_keys(adata_paths, load_adata=load_adata)
+        _check_for_mismatched_keys(anndata_paths, load_adata=load_adata)
 
-        adata_concat = _lazy_load_anndatas(adata_paths, load_adata=load_adata)
+        adata_concat = _lazy_load_anndatas(anndata_paths, load_adata=load_adata)
         if math.ceil(adata_concat.shape[0] / shuffle_chunk_size) < len(self._dataset_keys):
             raise ValueError(
                 f"Use a shuffle size small enough to distribute the input data with {adata_concat.shape[0]} obs across {len(self._dataset_keys)} anndata stores."

From d8c9577e2c2448b132e597f97627628bc4811521 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 24 Feb 2026 11:18:47 +0100
Subject: [PATCH 21/30] load_adata to load_anndata

---
 CHANGELOG.md                 |  1 +
 README.md                    |  4 ++--
 docs/notebooks/example.ipynb | 12 ++++++------
 src/annbatch/io.py           | 38 ++++++++++++++++++------------------
 src/annbatch/loader.py       | 11 +++++++----
 tests/test_dataset.py        |  4 ++--
 tests/test_preshuffle.py     | 26 ++++++++++++------------
 7 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3561f2b4..62dc1bdc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning][].
 
 ### Breaking
 - Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` and renamed `adata_paths` argument to `anndata_paths` to be consistent with the rest of the API.
+- In similar fashion, renamed `load_adata` argument to `load_anndata` in {meth}`annbatch.DatasetCollection.add_anndatas` and {meth}`annbatch.DatasetCollection.use_collection`.
 
 ## [0.0.8]
 
diff --git a/README.md b/README.md
index 7452c0df..c701dc87 100644
--- a/README.md
+++ b/README.md
@@ -127,9 +127,9 @@ with ad.settings.override(remove_unused_categories=False):
         preload_nchunks=256,
     )
     # `use_collection` automatically uses the on-disk `X` and full `obs` in the `Loader`
-    # but the `load_adata` arg can override this behavior
+    # but the `load_anndata` arg can override this behavior
     # (see `custom_load_func` above for an example of customization).
-    ds = ds.use_collection(collection, load_adata = custom_load_func)
+    ds = ds.use_collection(collection, load_anndata = custom_load_func)
 
 # Iterate over dataloader (plugin replacement for torch.utils.DataLoader)
 for batch in ds:
diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index 9e97921e..52da2652 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -178,7 +178,7 @@
     "\n",
     "\n",
     "# For CELLxGENE data, the raw counts can either be found under .raw.X or under .X (if .raw is not supplied).\n",
-    "# To have a store that only contains raw counts, we can write the following load_adata function\n",
+    "# To have a store that only contains raw counts, we can write the following load_anndata function\n",
     "def read_lazy_x_and_obs_only(path) -> ad.AnnData:\n",
     "    \"\"\"Custom load function to only load raw counts from CxG data.\"\"\"\n",
     "    # IMPORTANT: Large data should always be loaded lazily to reduce the memory footprint\n",
@@ -205,7 +205,7 @@
     "    shuffle=True,  # Whether to pre-shuffle the cells of the collection\n",
     "    n_obs_per_dataset=2_097_152,  # Number of cells per dataset shard, this number is much higher than available in these datasets but is generally a good target\n",
     "    var_subset=None,  # Optionally subset the collection to a specific gene space\n",
-    "    load_adata=read_lazy_x_and_obs_only,\n",
+    "    load_anndata=read_lazy_x_and_obs_only,\n",
     ")"
    ]
   },
@@ -227,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "hide-output"
@@ -251,7 +251,7 @@
     "from annbatch import Loader\n",
     "\n",
     "\n",
-    "def _load_adata(g: zarr.Group) -> ad.AnnData:\n",
+    "def _load_anndata(g: zarr.Group) -> ad.AnnData:\n",
     "    return ad.AnnData(X=ad.io.sparse_dataset(g[\"X\"]), obs=ad.experimental.read_lazy(g).obs[[\"cell_type\"]].to_memory())\n",
     "\n",
     "\n",
@@ -265,7 +265,7 @@
     ")\n",
     "\n",
     "# Add in the shuffled data that should be used for training.\n",
-    "ds.use_collection(collection, load_adata=_load_adata)"
+    "ds.use_collection(collection, load_anndata=_load_anndata)"
    ]
   },
   {
@@ -367,7 +367,7 @@
     "    anndata_paths=[\n",
     "        \"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\",\n",
     "    ],\n",
-    "    load_adata=read_lazy_x_and_obs_only,\n",
+    "    load_anndata=read_lazy_x_and_obs_only,\n",
     ")"
    ]
   },
diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 767324a6..00656b76 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -33,7 +33,7 @@
 V1_ENCODING = {"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}
 
 
-def _default_load_adata[T: zarr.Group | h5py.Group | PathLike[str] | str](x: T) -> ad.AnnData:
+def _default_load_anndata[T: zarr.Group | h5py.Group | PathLike[str] | str](x: T) -> ad.AnnData:
     adata = ad.experimental.read_lazy(x, load_annotation_index=False)
     if not isinstance(x, zarr.Group | h5py.Group):
         group = (
@@ -138,7 +138,7 @@ def callback(
 def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str](
     paths_or_anndatas: Iterable[T | ad.AnnData],
     *,
-    load_adata: Callable[[T], ad.AnnData] = lambda x: ad.experimental.read_lazy(x, load_annotation_index=False),
+    load_anndata: Callable[[T], ad.AnnData] = lambda x: ad.experimental.read_lazy(x, load_annotation_index=False),
 ):
     num_raw_in_adata = 0
     found_keys: dict[str, defaultdict[str, int]] = {
@@ -148,7 +148,7 @@ def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str]
     }
     for path_or_anndata in tqdm(paths_or_anndatas, desc="checking for mismatched keys"):
         if not isinstance(path_or_anndata, ad.AnnData):
-            adata = load_adata(path_or_anndata)
+            adata = load_anndata(path_or_anndata)
         else:
             adata = path_or_anndata
         for elem_name, key_count in found_keys.items():
@@ -160,26 +160,26 @@ def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str]
             num_raw_in_adata += 1
     if num_raw_in_adata != (num_anndatas := len(list(paths_or_anndatas))) and num_raw_in_adata != 0:
         warnings.warn(
-            f"Found raw keys not present in all anndatas {paths_or_anndatas}, consider deleting raw or moving it to a shared layer/X location via `load_adata`",
+            f"Found raw keys not present in all anndatas {paths_or_anndatas}, consider deleting raw or moving it to a shared layer/X location via `load_anndata`",
             stacklevel=2,
         )
     for elem_name, key_count in found_keys.items():
         elem_keys_mismatched = [key for key, count in key_count.items() if (count != num_anndatas and count != 0)]
         if len(elem_keys_mismatched) > 0:
             warnings.warn(
-                f"Found {elem_name} keys {elem_keys_mismatched} not present in all anndatas {paths_or_anndatas}, consider stopping and using the `load_adata` argument to alter {elem_name} accordingly.",
+                f"Found {elem_name} keys {elem_keys_mismatched} not present in all anndatas {paths_or_anndatas}, consider stopping and using the `load_anndata` argument to alter {elem_name} accordingly.",
                 stacklevel=2,
             )
 
 
 def _lazy_load_anndatas[T: zarr.Group | h5py.Group | PathLike[str] | str](
     paths: Iterable[T],
-    load_adata: Callable[[T], ad.AnnData] = _default_load_adata,
+    load_anndata: Callable[[T], ad.AnnData] = _default_load_anndata,
 ):
     adatas = []
     categoricals_in_all_adatas: dict[str, pd.Index] = {}
     for i, path in tqdm(enumerate(paths), desc="loading"):
-        adata = load_adata(path)
+        adata = load_anndata(path)
         # Track the source file for this given anndata object
         adata.obs["src_path"] = pd.Categorical.from_codes(
             np.ones((adata.shape[0],), dtype="int") * i, categories=pd.Index([str(p) for p in paths])
@@ -399,7 +399,7 @@ def add_anndatas(
         self,
         anndata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
         *,
-        load_adata: Callable[[zarr.Group | h5py.Group | PathLike[str] | str], ad.AnnData] = _default_load_adata,
+        load_anndata: Callable[[zarr.Group | h5py.Group | PathLike[str] | str], ad.AnnData] = _default_load_anndata,
         var_subset: Iterable[str] | None = None,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
@@ -427,7 +427,7 @@ def add_anndatas(
         ----------
             anndata_paths
                 Paths to the AnnData files used to create the zarr store.
-            load_adata
+            load_anndata
                 Function to customize (lazy-)loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used with categoricals/nullables read into memory and `(-1)` chunks for `obs`.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
                 Beware that concatenating nullables/categoricals (i.e., what happens if `len(anndata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
@@ -477,14 +477,14 @@ def add_anndatas(
             ... ]
             >>> DatasetCollection("path/to/output/zarr_store.zarr").add_anndatas(
             ...    datasets,
-            ...    load_adata=read_lazy_x_and_obs_only,
+            ...    load_anndata=read_lazy_x_and_obs_only,
             ...)
         """
         if shuffle_chunk_size > n_obs_per_dataset:
             raise ValueError("Cannot have a large slice size than observations per dataset")
         shared_kwargs = {
             "anndata_paths": anndata_paths,
-            "load_adata": load_adata,
+            "load_anndata": load_anndata,
             "zarr_sparse_chunk_size": zarr_sparse_chunk_size,
             "zarr_sparse_shard_size": zarr_sparse_shard_size,
             "zarr_dense_chunk_size": zarr_dense_chunk_size,
@@ -504,7 +504,7 @@ def _create_collection(
         self,
         *,
         anndata_paths: Iterable[PathLike[str]] | Iterable[str],
-        load_adata: Callable[[PathLike[str] | str], ad.AnnData] = _default_load_adata,
+        load_anndata: Callable[[PathLike[str] | str], ad.AnnData] = _default_load_anndata,
         var_subset: Iterable[str] | None = None,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
@@ -530,7 +530,7 @@ def _create_collection(
         ----------
             anndata_paths
                 Paths to the AnnData files used to create the zarr store.
-            load_adata
+            load_anndata
                 Function to customize lazy-loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and `obs`), you can provide a custom function here to speed up loading and harmonize your data.
                 The input to the function is a path to an anndata file, and the output is an anndata object which has `X` as a :class:`dask.array.Array`.
@@ -563,8 +563,8 @@ def _create_collection(
         """
         if not self.is_empty:
             raise RuntimeError("Cannot create a collection at a location that already has a shuffled collection")
-        _check_for_mismatched_keys(anndata_paths, load_adata=load_adata)
-        adata_concat = _lazy_load_anndatas(anndata_paths, load_adata=load_adata)
+        _check_for_mismatched_keys(anndata_paths, load_anndata=load_anndata)
+        adata_concat = _lazy_load_anndatas(anndata_paths, load_anndata=load_anndata)
         adata_concat.obs_names_make_unique()
         n_obs_per_dataset = min(adata_concat.shape[0], n_obs_per_dataset)
         chunks = _create_chunks_for_shuffling(
@@ -607,7 +607,7 @@ def _add_to_collection(
         self,
         *,
         anndata_paths: Iterable[PathLike[str]] | Iterable[str],
-        load_adata: Callable[[PathLike[str] | str], ad.AnnData] = ad.read_h5ad,
+        load_anndata: Callable[[PathLike[str] | str], ad.AnnData] = ad.read_h5ad,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
         zarr_dense_chunk_size: int = 1024,
@@ -625,7 +625,7 @@ def _add_to_collection(
         ----------
             anndata_paths
                 Paths to the anndata files to be appended to the collection of output chunks.
-            load_adata
+            load_anndata
                 Function to customize loading the invidiual input anndata files. By default, :func:`anndata.read_h5ad` is used.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and `obs`), you can provide a custom function here to speed up loading and harmonize your data.
                 The input to the function is a path to an anndata file, and the output is an anndata object.
@@ -651,9 +651,9 @@ def _add_to_collection(
         if self.is_empty:
             raise ValueError("Store is empty. Please run `DatasetCollection.add_anndatas` first.")
         # Check for mismatched keys among the inputs.
-        _check_for_mismatched_keys(anndata_paths, load_adata=load_adata)
+        _check_for_mismatched_keys(anndata_paths, load_anndata=load_anndata)
 
-        adata_concat = _lazy_load_anndatas(anndata_paths, load_adata=load_adata)
+        adata_concat = _lazy_load_anndatas(anndata_paths, load_anndata=load_anndata)
         if math.ceil(adata_concat.shape[0] / shuffle_chunk_size) < len(self._dataset_keys):
             raise ValueError(
                 f"Use a shuffle size small enough to distribute the input data with {adata_concat.shape[0]} obs across {len(self._dataset_keys)} anndata stores."
diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py
index 2df59954..29811872 100644
--- a/src/annbatch/loader.py
+++ b/src/annbatch/loader.py
@@ -296,7 +296,10 @@ def batch_sampler(self) -> Sampler:
         return self._batch_sampler
 
     def use_collection(
-        self, collection: DatasetCollection, *, load_adata: Callable[[zarr.Group], ad.AnnData] = load_x_and_obs_and_var
+        self,
+        collection: DatasetCollection,
+        *,
+        load_anndata: Callable[[zarr.Group], ad.AnnData] = load_x_and_obs_and_var,
     ) -> Self:
         """Load from an existing :class:`annbatch.DatasetCollection`.
 
@@ -306,10 +309,10 @@ def use_collection(
         ----------
         collection
             The collection whose on-disk datasets should be used in this loader.
-        load_adata
+        load_anndata
             A custom load function - recall that whatever is found in :attr:`~anndata.AnnData.X` and :attr:`~anndata.AnnData.obs` will be yielded in batches.
             Default is to just load `X` and all of `obs`.
-            This default behavior can degrade performance if you don't need all columns in `obs` - it is recommended to use the `load_adata` argument.
+            This default behavior can degrade performance if you don't need all columns in `obs` - it is recommended to use the `load_anndata` argument.
         """
         if collection.is_empty:
             raise ValueError("DatasetCollection is empty")
@@ -317,7 +320,7 @@ def use_collection(
             raise RuntimeError(
                 "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_anndatas` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_anndatas`."
             )
-        adatas = [load_adata(g) for g in collection]
+        adatas = [load_anndata(g) for g in collection]
         self.add_anndatas(adatas)
         self._collection_added = True
         return self
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 8a80924a..bfff82be 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -117,7 +117,7 @@ def concat(datas: list[Data | ad.AnnData]) -> ListData | list[ad.AnnData]:
                 ).use_collection(
                     collection,
                     **(
-                        {"load_adata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
+                        {"load_anndata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
                         if open_func is not None
                         else {}
                     ),
@@ -519,7 +519,7 @@ def test_no_obs_no_var(simple_collection: tuple[ad.AnnData, DatasetCollection]):
         batch_size=20,
     ).use_collection(
         simple_collection[1],
-        load_adata=lambda g: ad.AnnData(X=ad.io.sparse_dataset(g["layers"]["sparse"])),
+        load_anndata=lambda g: ad.AnnData(X=ad.io.sparse_dataset(g["layers"]["sparse"])),
     )
     assert next(iter(ds))["obs"] is None
 
diff --git a/tests/test_preshuffle.py b/tests/test_preshuffle.py
index 5a0fae4d..722164ee 100644
--- a/tests/test_preshuffle.py
+++ b/tests/test_preshuffle.py
@@ -77,7 +77,7 @@ def test_store_creation_no_warnings_with_custom_load(tmp_path: Path):
         zarr_dense_shard_size=10,
         n_obs_per_dataset=10,
         shuffle_chunk_size=5,
-        load_adata=lambda x: ad.AnnData(X=ad.io.read_elem(h5py.File(x)["X"])),
+        load_anndata=lambda x: ad.AnnData(X=ad.io.read_elem(h5py.File(x)["X"])),
     )
     assert len(ad.read_zarr(next(iter(collection))).layers.keys()) == 0
 
@@ -109,11 +109,11 @@ def test_store_creation_path_added_to_obs(tmp_path: Path):
 
 
 @pytest.mark.parametrize("elem_name", ["obsm", "layers", "raw", "obs"])
-@pytest.mark.parametrize("load_adata", [ad.read_h5ad, ad.experimental.read_lazy])
+@pytest.mark.parametrize("load_anndata", [ad.read_h5ad, ad.experimental.read_lazy])
 def test_store_addition_different_keys(
     elem_name: Literal["obsm", "layers", "raw"],
     tmp_path: Path,
-    load_adata: Callable[[PathLike[str] | str], ad.AnnData],
+    load_anndata: Callable[[PathLike[str] | str], ad.AnnData],
 ):
     adata_orig = ad.AnnData(X=np.random.randn(100, 20))
     orig_path = tmp_path / "orig.h5ad"
@@ -138,7 +138,7 @@ def test_store_addition_different_keys(
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
         collection.add_anndatas(
             [additional_path],
-            load_adata=load_adata,
+            load_anndata=load_anndata,
             zarr_sparse_chunk_size=10,
             zarr_sparse_shard_size=20,
             zarr_dense_chunk_size=5,
@@ -188,18 +188,18 @@ def test_store_creation_default(
 
 @pytest.mark.parametrize("shuffle", [pytest.param(True, id="shuffle"), pytest.param(False, id="no_shuffle")])
 @pytest.mark.parametrize(
-    "load_adata", [pytest.param(None, id="default_read"), pytest.param(ad.experimental.read_lazy, id="fully_lazy")]
+    "load_anndata", [pytest.param(None, id="default_read"), pytest.param(ad.experimental.read_lazy, id="fully_lazy")]
 )
 def test_store_creation(
     adata_with_h5_path_different_var_space: tuple[ad.AnnData, Path],
     shuffle: bool,
-    load_adata: Callable[[str], ad.AnnData],
+    load_anndata: Callable[[str], ad.AnnData],
 ):
     var_subset = [f"gene_{i}" for i in range(100)]
     h5_files = sorted(adata_with_h5_path_different_var_space[1].iterdir())
     output_path = (
         adata_with_h5_path_different_var_space[1].parent
-        / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_adata is None else 'custom_read'}.zarr"
+        / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_anndata is None else 'custom_read'}.zarr"
     )
     collection = DatasetCollection(output_path).add_anndatas(
         [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")],
@@ -211,7 +211,7 @@ def test_store_creation(
         n_obs_per_dataset=50,
         shuffle_chunk_size=10,
         shuffle=shuffle,
-        **({"load_adata": load_adata} if load_adata is not None else {}),
+        **({"load_anndata": load_anndata} if load_anndata is not None else {}),
     )
     assert not DatasetCollection(output_path).is_empty
     assert V1_ENCODING.items() <= zarr.open(output_path).attrs.items()
@@ -301,7 +301,7 @@ def test_mismatched_raw_concat(
         n_obs_per_dataset=30,
         shuffle_chunk_size=10,
         shuffle=False,  # don't shuffle -> want to check if the right attributes get taken
-        load_adata=_read_lazy_x_and_obs_only_from_raw,
+        load_anndata=_read_lazy_x_and_obs_only_from_raw,
     )
 
     adatas_orig = []
@@ -324,14 +324,14 @@ def test_mismatched_raw_concat(
     np.testing.assert_array_equal(adata_orig.X.toarray(), adata.X.toarray())
 
 
-@pytest.mark.parametrize("load_adata", [ad.read_h5ad, ad.experimental.read_lazy])
+@pytest.mark.parametrize("load_anndata", [ad.read_h5ad, ad.experimental.read_lazy])
 def test_store_extension(
     adata_with_h5_path_different_var_space: tuple[ad.AnnData, Path],
-    load_adata: Callable[[PathLike[str] | str], ad.AnnData],
+    load_anndata: Callable[[PathLike[str] | str], ad.AnnData],
 ):
     all_h5_paths = sorted(p for p in adata_with_h5_path_different_var_space[1].iterdir() if p.suffix == ".h5ad")
     store_path = (
-        adata_with_h5_path_different_var_space[1].parent / f"zarr_store_extension_test_{load_adata.__name__}.zarr"
+        adata_with_h5_path_different_var_space[1].parent / f"zarr_store_extension_test_{load_anndata.__name__}.zarr"
     )
     original = all_h5_paths
     additional = all_h5_paths[4:]  # don't add everything to get a "different" var space
@@ -350,7 +350,7 @@ def test_store_extension(
     # add h5ads to existing store
     collection.add_anndatas(
         additional,
-        load_adata=load_adata,
+        load_anndata=load_anndata,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
         zarr_dense_chunk_size=5,

From 50293fba1f1f9c99cd9679ceb0de9c2c7a33daf0 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 24 Feb 2026 11:33:07 +0100
Subject: [PATCH 22/30] fix mistake

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 62dc1bdc..5eb4bf65 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning][].
 
 ### Breaking
 - Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` and renamed `adata_paths` argument to `anndata_paths` to be consistent with the rest of the API.
-- In similar fashion, renamed `load_adata` argument to `load_anndata` in {meth}`annbatch.DatasetCollection.add_anndatas` and {meth}`annbatch.DatasetCollection.use_collection`.
+- In similar fashion, renamed `load_adata` argument to `load_anndata` in {meth}`annbatch.DatasetCollection.add_anndatas` and {meth}`annbatch.Loader.use_collection`.
 
 ## [0.0.8]
 

From a485103c7572fb0a079b593cdf01cc71e9dc4b77 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 25 Feb 2026 15:10:44 +0100
Subject: [PATCH 23/30] rename from anndata to adata

---
 README.md                | 10 +++---
 docs/index.md            |  4 +--
 src/annbatch/io.py       | 76 ++++++++++++++++++++--------------------
 src/annbatch/loader.py   | 20 +++++------
 tests/conftest.py        |  2 +-
 tests/test_dataset.py    |  8 ++---
 tests/test_preshuffle.py | 48 ++++++++++++-------------
 7 files changed, 84 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index c701dc87..e7052e12 100644
--- a/README.md
+++ b/README.md
@@ -86,8 +86,8 @@ zarr.config.set(
 
 # Create a collection at the given path. The subgroups will all be anndata stores.
 collection = DatasetCollection("path/to/output/collection.zarr")
-collection.add_anndatas(
-    anndata_paths=[
+collection.add_adata(
+    adata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
     ],
@@ -98,7 +98,7 @@ collection.add_anndatas(
 Data loading:
 
 > [!IMPORTANT]
-> Without custom loading via {meth}`annbatch.Loader.use_collection` or `load_anndata{s}`  or `load_dataset{s}`, *all* columns of the (obs) {class}`pandas.DataFrame` will be loaded and yielded potentially degrading performance.
+> Without custom loading via {meth}`annbatch.Loader.use_collection` or `load_adata{s}`  or `load_dataset{s}`, *all* columns of the (obs) {class}`pandas.DataFrame` will be loaded and yielded potentially degrading performance.
 
 ```python
 from pathlib import Path
@@ -127,9 +127,9 @@ with ad.settings.override(remove_unused_categories=False):
         preload_nchunks=256,
     )
     # `use_collection` automatically uses the on-disk `X` and full `obs` in the `Loader`
-    # but the `load_anndata` arg can override this behavior
+    # but the `load_adata` arg can override this behavior
     # (see `custom_load_func` above for an example of customization).
-    ds = ds.use_collection(collection, load_anndata = custom_load_func)
+    ds = ds.use_collection(collection, load_adata = custom_load_func)
 
 # Iterate over dataloader (plugin replacement for torch.utils.DataLoader)
 for batch in ds:
diff --git a/docs/index.md b/docs/index.md
index d720d3d7..dd8b4a89 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -9,8 +9,8 @@ Let's go through the above example:
 ### Preprocessing
 
 ```python
-colleciton = DatasetCollection("path/to/output/store.zarr").add_anndatas(
-    anndata_paths=[
+colleciton = DatasetCollection("path/to/output/store.zarr").add_adata(
+    adata_paths=[
         "path/to/your/file1.h5ad",
         "path/to/your/file2.h5ad"
     ],
diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 00656b76..bddaa3a8 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -33,7 +33,7 @@
 V1_ENCODING = {"encoding-type": "annbatch-preshuffled", "encoding-version": "0.1.0"}
 
 
-def _default_load_anndata[T: zarr.Group | h5py.Group | PathLike[str] | str](x: T) -> ad.AnnData:
+def _default_load_adata[T: zarr.Group | h5py.Group | PathLike[str] | str](x: T) -> ad.AnnData:
     adata = ad.experimental.read_lazy(x, load_annotation_index=False)
     if not isinstance(x, zarr.Group | h5py.Group):
         group = (
@@ -136,9 +136,9 @@ def callback(
 
 
 def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str](
-    paths_or_anndatas: Iterable[T | ad.AnnData],
+    paths_or_adata: Iterable[T | ad.AnnData],
     *,
-    load_anndata: Callable[[T], ad.AnnData] = lambda x: ad.experimental.read_lazy(x, load_annotation_index=False),
+    load_adata: Callable[[T], ad.AnnData] = lambda x: ad.experimental.read_lazy(x, load_annotation_index=False),
 ):
     num_raw_in_adata = 0
     found_keys: dict[str, defaultdict[str, int]] = {
@@ -146,9 +146,9 @@ def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str]
         "obsm": defaultdict(lambda: 0),
         "obs": defaultdict(lambda: 0),
     }
-    for path_or_anndata in tqdm(paths_or_anndatas, desc="checking for mismatched keys"):
+    for path_or_anndata in tqdm(paths_or_adata, desc="Checking for mismatched keys"):
         if not isinstance(path_or_anndata, ad.AnnData):
-            adata = load_anndata(path_or_anndata)
+            adata = load_adata(path_or_anndata)
         else:
             adata = path_or_anndata
         for elem_name, key_count in found_keys.items():
@@ -158,28 +158,28 @@ def _check_for_mismatched_keys[T: zarr.Group | h5py.Group | PathLike[str] | str]
                     key_count[key] += 1
         if adata.raw is not None:
             num_raw_in_adata += 1
-    if num_raw_in_adata != (num_anndatas := len(list(paths_or_anndatas))) and num_raw_in_adata != 0:
+    if num_raw_in_adata != (num_anndatas := len(list(paths_or_adata))) and num_raw_in_adata != 0:
         warnings.warn(
-            f"Found raw keys not present in all anndatas {paths_or_anndatas}, consider deleting raw or moving it to a shared layer/X location via `load_anndata`",
+            f"Found raw keys not present in all anndatas {paths_or_adata}, consider deleting raw or moving it to a shared layer/X location via `load_adata`",
             stacklevel=2,
         )
     for elem_name, key_count in found_keys.items():
         elem_keys_mismatched = [key for key, count in key_count.items() if (count != num_anndatas and count != 0)]
         if len(elem_keys_mismatched) > 0:
             warnings.warn(
-                f"Found {elem_name} keys {elem_keys_mismatched} not present in all anndatas {paths_or_anndatas}, consider stopping and using the `load_anndata` argument to alter {elem_name} accordingly.",
+                f"Found {elem_name} keys {elem_keys_mismatched} not present in all anndatas {paths_or_adata}, consider stopping and using the `load_adata` argument to alter {elem_name} accordingly.",
                 stacklevel=2,
             )
 
 
-def _lazy_load_anndatas[T: zarr.Group | h5py.Group | PathLike[str] | str](
+def _lazy_load_adata[T: zarr.Group | h5py.Group | PathLike[str] | str](
     paths: Iterable[T],
-    load_anndata: Callable[[T], ad.AnnData] = _default_load_anndata,
+    load_adata: Callable[[T], ad.AnnData] = _default_load_adata,
 ):
     adatas = []
     categoricals_in_all_adatas: dict[str, pd.Index] = {}
-    for i, path in tqdm(enumerate(paths), desc="loading"):
-        adata = load_anndata(path)
+    for i, path in tqdm(enumerate(paths), total=len(paths), desc="Lazy loading adata"):
+        adata = load_adata(path)
         # Track the source file for this given anndata object
         adata.obs["src_path"] = pd.Categorical.from_codes(
             np.ones((adata.shape[0],), dtype="int") * i, categories=pd.Index([str(p) for p in paths])
@@ -395,11 +395,11 @@ def is_empty(self) -> bool:
         )
 
     @_with_settings
-    def add_anndatas(
+    def add_adata(
         self,
-        anndata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
+        adata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
         *,
-        load_anndata: Callable[[zarr.Group | h5py.Group | PathLike[str] | str], ad.AnnData] = _default_load_anndata,
+        load_adata: Callable[[zarr.Group | h5py.Group | PathLike[str] | str], ad.AnnData] = _default_load_adata,
         var_subset: Iterable[str] | None = None,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
@@ -411,7 +411,7 @@ def add_anndatas(
         shuffle_chunk_size: int = 1000,
         shuffle: bool = True,
     ) -> Self:
-        """Take AnnData paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `n_obs_per_dataset` rows per dataset if running for the first time).
+        """Take adata paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `n_obs_per_dataset` rows per dataset if running for the first time).
 
         The set of AnnData datasets is collectively referred to as a "collection" where each dataset is called `dataset_i.{zarr,h5ad}`.
         The main purpose of this function is to create shuffled sharded zarr datasets, which is the default behavior of this function.
@@ -425,12 +425,12 @@ def add_anndatas(
 
         Parameters
         ----------
-            anndata_paths
+            adata_paths
                 Paths to the AnnData files used to create the zarr store.
-            load_anndata
+            load_adata
                 Function to customize (lazy-)loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used with categoricals/nullables read into memory and `(-1)` chunks for `obs`.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
-                Beware that concatenating nullables/categoricals (i.e., what happens if `len(anndata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
+                Beware that concatenating nullables/categoricals (i.e., what happens if `len(adata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
             var_subset
                 Subset of gene names to include in the store. If None, all genes are included.
                 Genes are subset based on the `var_names` attribute of the concatenated AnnData object.
@@ -475,16 +475,16 @@ def add_anndatas(
             ...     "path/to/second_adata.h5ad",
             ...     "path/to/third_adata.h5ad",
             ... ]
-            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_anndatas(
+            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_adata(
             ...    datasets,
-            ...    load_anndata=read_lazy_x_and_obs_only,
+            ...    load_adata=read_lazy_x_and_obs_only,
             ...)
         """
         if shuffle_chunk_size > n_obs_per_dataset:
             raise ValueError("Cannot have a large slice size than observations per dataset")
         shared_kwargs = {
-            "anndata_paths": anndata_paths,
-            "load_anndata": load_anndata,
+            "adata_paths": adata_paths,
+            "load_adata": load_adata,
             "zarr_sparse_chunk_size": zarr_sparse_chunk_size,
             "zarr_sparse_shard_size": zarr_sparse_shard_size,
             "zarr_dense_chunk_size": zarr_dense_chunk_size,
@@ -503,8 +503,8 @@ def add_anndatas(
     def _create_collection(
         self,
         *,
-        anndata_paths: Iterable[PathLike[str]] | Iterable[str],
-        load_anndata: Callable[[PathLike[str] | str], ad.AnnData] = _default_load_anndata,
+        adata_paths: Iterable[PathLike[str]] | Iterable[str],
+        load_adata: Callable[[PathLike[str] | str], ad.AnnData] = _default_load_adata,
         var_subset: Iterable[str] | None = None,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
@@ -528,9 +528,9 @@ def _create_collection(
 
         Parameters
         ----------
-            anndata_paths
+            adata_paths
                 Paths to the AnnData files used to create the zarr store.
-            load_anndata
+            load_adata
                 Function to customize lazy-loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and `obs`), you can provide a custom function here to speed up loading and harmonize your data.
                 The input to the function is a path to an anndata file, and the output is an anndata object which has `X` as a :class:`dask.array.Array`.
@@ -563,8 +563,8 @@ def _create_collection(
         """
         if not self.is_empty:
             raise RuntimeError("Cannot create a collection at a location that already has a shuffled collection")
-        _check_for_mismatched_keys(anndata_paths, load_anndata=load_anndata)
-        adata_concat = _lazy_load_anndatas(anndata_paths, load_anndata=load_anndata)
+        _check_for_mismatched_keys(adata_paths, load_adata=load_adata)
+        adata_concat = _lazy_load_adata(adata_paths, load_adata=load_adata)
         adata_concat.obs_names_make_unique()
         n_obs_per_dataset = min(adata_concat.shape[0], n_obs_per_dataset)
         chunks = _create_chunks_for_shuffling(
@@ -573,7 +573,7 @@ def _create_collection(
 
         if var_subset is None:
             var_subset = adata_concat.var_names
-        for i, chunk in enumerate(tqdm(chunks, desc="processing chunks")):
+        for i, chunk in enumerate(tqdm(chunks, desc="Creating collection")):
             var_mask = adata_concat.var_names.isin(var_subset)
             # np.sort: It's more efficient to access elements sequentially from dask arrays
             # The data will be shuffled later on, we just want the elements at this point
@@ -606,8 +606,8 @@ def _create_collection(
     def _add_to_collection(
         self,
         *,
-        anndata_paths: Iterable[PathLike[str]] | Iterable[str],
-        load_anndata: Callable[[PathLike[str] | str], ad.AnnData] = ad.read_h5ad,
+        adata_paths: Iterable[PathLike[str]] | Iterable[str],
+        load_adata: Callable[[PathLike[str] | str], ad.AnnData] = ad.read_h5ad,
         zarr_sparse_chunk_size: int = 32768,
         zarr_sparse_shard_size: int = 134_217_728,
         zarr_dense_chunk_size: int = 1024,
@@ -623,9 +623,9 @@ def _add_to_collection(
 
         Parameters
         ----------
-            anndata_paths
+            adata_paths
                 Paths to the anndata files to be appended to the collection of output chunks.
-            load_anndata
+            load_adata
                 Function to customize loading the invidiual input anndata files. By default, :func:`anndata.read_h5ad` is used.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and `obs`), you can provide a custom function here to speed up loading and harmonize your data.
                 The input to the function is a path to an anndata file, and the output is an anndata object.
@@ -649,11 +649,11 @@ def _add_to_collection(
                 Whether or not to shuffle when adding.  Otherwise, the incoming data will just be split up and appended.
         """
         if self.is_empty:
-            raise ValueError("Store is empty. Please run `DatasetCollection.add_anndatas` first.")
+            raise ValueError("Store is empty. Please run `DatasetCollection.add_adata` first.")
         # Check for mismatched keys among the inputs.
-        _check_for_mismatched_keys(anndata_paths, load_anndata=load_anndata)
+        _check_for_mismatched_keys(adata_paths, load_adata=load_adata)
 
-        adata_concat = _lazy_load_anndatas(anndata_paths, load_anndata=load_anndata)
+        adata_concat = _lazy_load_adata(adata_paths, load_adata=load_adata)
         if math.ceil(adata_concat.shape[0] / shuffle_chunk_size) < len(self._dataset_keys):
             raise ValueError(
                 f"Use a shuffle size small enough to distribute the input data with {adata_concat.shape[0]} obs across {len(self._dataset_keys)} anndata stores."
@@ -667,7 +667,7 @@ def _add_to_collection(
 
         adata_concat.obs_names_make_unique()
         for dataset, chunk in tqdm(
-            zip(self._dataset_keys, chunks, strict=True), total=len(self._dataset_keys), desc="processing chunks"
+            zip(self._dataset_keys, chunks, strict=True), total=len(self._dataset_keys), desc="Extending collection"
         ):
             adata_dataset = ad.io.read_elem(self._group[dataset])
             subset_adata = _to_categorical_obs(
diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py
index 29811872..b65b9da7 100644
--- a/src/annbatch/loader.py
+++ b/src/annbatch/loader.py
@@ -299,45 +299,45 @@ def use_collection(
         self,
         collection: DatasetCollection,
         *,
-        load_anndata: Callable[[zarr.Group], ad.AnnData] = load_x_and_obs_and_var,
+        load_adata: Callable[[zarr.Group], ad.AnnData] = load_x_and_obs_and_var,
     ) -> Self:
         """Load from an existing :class:`annbatch.DatasetCollection`.
 
-        This function can only be called once. If you want to manually add more data, use :meth:`Loader.add_anndatas` or open an issue.
+        This function can only be called once. If you want to manually add more data, use :meth:`Loader.add_adata` or open an issue.
 
         Parameters
         ----------
         collection
             The collection whose on-disk datasets should be used in this loader.
-        load_anndata
+        load_adata
             A custom load function - recall that whatever is found in :attr:`~anndata.AnnData.X` and :attr:`~anndata.AnnData.obs` will be yielded in batches.
             Default is to just load `X` and all of `obs`.
-            This default behavior can degrade performance if you don't need all columns in `obs` - it is recommended to use the `load_anndata` argument.
+            This default behavior can degrade performance if you don't need all columns in `obs` - it is recommended to use the `load_adata` argument.
         """
         if collection.is_empty:
             raise ValueError("DatasetCollection is empty")
         if self._collection_added:
             raise RuntimeError(
-                "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_anndatas` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_anndatas`."
+                "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_adata` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_adata`."
             )
-        adatas = [load_anndata(g) for g in collection]
-        self.add_anndatas(adatas)
+        adatas = [load_adata(g) for g in collection]
+        self.add_adata(adatas)
         self._collection_added = True
         return self
 
     @validate_sampler
-    def add_anndatas(
+    def add_adata(
         self,
         adatas: list[ad.AnnData],
     ) -> Self:
-        """Append anndatas to this dataset.
+        """Append adata to this dataset.
 
         Parameters
         ----------
             adatas
                 List of :class:`anndata.AnnData` objects, with :class:`zarr.Array` or :class:`anndata.abc.CSRDataset` as the data matrix in :attr:`~anndata.AnnData.X`, and :attr:`~anndata.AnnData.obs` containing annotations to yield in a :class:`pandas.DataFrame`.
         """
-        check_lt_1([len(adatas)], ["Number of anndatas"])
+        check_lt_1([len(adatas)], ["Number of adata"])
         for adata in adatas:
             dataset, obs, var = self._prepare_dataset_obs_and_var(adata)
             self._add_dataset_unchecked(dataset, obs, var)
diff --git a/tests/conftest.py b/tests/conftest.py
index 0c9843c2..06f2e77f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -114,7 +114,7 @@ def simple_collection(
 ) -> tuple[DatasetCollection, ad.AnnData]:
     zarr_stores = sorted(f for f in adata_with_zarr_path_same_var_space[1].iterdir() if f.is_dir())
     output_path = Path(tmpdir_factory.mktemp("zarr_folder")) / "simple_fixture.zarr"
-    collection = DatasetCollection(output_path).add_anndatas(
+    collection = DatasetCollection(output_path).add_adata(
         zarr_stores,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index bfff82be..6e044a2e 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -117,7 +117,7 @@ def concat(datas: list[Data | ad.AnnData]) -> ListData | list[ad.AnnData]:
                 ).use_collection(
                     collection,
                     **(
-                        {"load_anndata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
+                        {"load_adata": lambda group: open_func(group, use_zarrs=use_zarrs, use_anndata=True)}
                         if open_func is not None
                         else {}
                     ),
@@ -519,7 +519,7 @@ def test_no_obs_no_var(simple_collection: tuple[ad.AnnData, DatasetCollection]):
         batch_size=20,
     ).use_collection(
         simple_collection[1],
-        load_anndata=lambda g: ad.AnnData(X=ad.io.sparse_dataset(g["layers"]["sparse"])),
+        load_adata=lambda g: ad.AnnData(X=ad.io.sparse_dataset(g["layers"]["sparse"])),
     )
     assert next(iter(ds))["obs"] is None
 
@@ -558,10 +558,10 @@ def test_mismatched_var_raises_error(tmp_path: Path, subtests):
         with pytest.raises(ValueError, match="All datasets must have identical var DataFrames"):
             loader.add_anndata(adata2_on_disk)
 
-    with subtests.test(msg="add_anndatas"):
+    with subtests.test(msg="add_adata"):
         loader = Loader(chunk_size=10, preload_nchunks=4, batch_size=20)
         with pytest.raises(ValueError, match="All datasets must have identical var DataFrames"):
-            loader.add_anndatas([adata1_on_disk, adata2_on_disk])
+            loader.add_adata([adata1_on_disk, adata2_on_disk])
 
     with subtests.test(msg="add_dataset"):
         loader = Loader(chunk_size=10, preload_nchunks=4, batch_size=20)
diff --git a/tests/test_preshuffle.py b/tests/test_preshuffle.py
index 722164ee..ada5072f 100644
--- a/tests/test_preshuffle.py
+++ b/tests/test_preshuffle.py
@@ -51,7 +51,7 @@ def test_store_creation_warnings_with_different_keys(elem_name: Literal["obsm",
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        DatasetCollection(tmp_path / "collection.zarr").add_anndatas(
+        DatasetCollection(tmp_path / "collection.zarr").add_adata(
             [path_1, path_2],
             zarr_sparse_chunk_size=10,
             zarr_sparse_shard_size=20,
@@ -69,7 +69,7 @@ def test_store_creation_no_warnings_with_custom_load(tmp_path: Path):
     path_2 = tmp_path / "with_extra_key.h5ad"
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
-    collection = DatasetCollection(tmp_path / "collection.zarr").add_anndatas(
+    collection = DatasetCollection(tmp_path / "collection.zarr").add_adata(
         [path_1, path_2],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -77,7 +77,7 @@ def test_store_creation_no_warnings_with_custom_load(tmp_path: Path):
         zarr_dense_shard_size=10,
         n_obs_per_dataset=10,
         shuffle_chunk_size=5,
-        load_anndata=lambda x: ad.AnnData(X=ad.io.read_elem(h5py.File(x)["X"])),
+        load_adata=lambda x: ad.AnnData(X=ad.io.read_elem(h5py.File(x)["X"])),
     )
     assert len(ad.read_zarr(next(iter(collection))).layers.keys()) == 0
 
@@ -91,7 +91,7 @@ def test_store_creation_path_added_to_obs(tmp_path: Path):
     adata_2.write_h5ad(path_2)
     paths = [path_1, path_2]
     output_dir = tmp_path / "path_src_collection.zarr"
-    collection = DatasetCollection(output_dir).add_anndatas(
+    collection = DatasetCollection(output_dir).add_adata(
         paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -109,18 +109,18 @@ def test_store_creation_path_added_to_obs(tmp_path: Path):
 
 
 @pytest.mark.parametrize("elem_name", ["obsm", "layers", "raw", "obs"])
-@pytest.mark.parametrize("load_anndata", [ad.read_h5ad, ad.experimental.read_lazy])
+@pytest.mark.parametrize("load_adata", [ad.read_h5ad, ad.experimental.read_lazy])
 def test_store_addition_different_keys(
     elem_name: Literal["obsm", "layers", "raw"],
     tmp_path: Path,
-    load_anndata: Callable[[PathLike[str] | str], ad.AnnData],
+    load_adata: Callable[[PathLike[str] | str], ad.AnnData],
 ):
     adata_orig = ad.AnnData(X=np.random.randn(100, 20))
     orig_path = tmp_path / "orig.h5ad"
     adata_orig.write_h5ad(orig_path)
     output_path = tmp_path / "zarr_store_addition_different_keys.zarr"
     collection = DatasetCollection(output_path)
-    collection.add_anndatas(
+    collection.add_adata(
         [orig_path],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -136,9 +136,9 @@ def test_store_addition_different_keys(
     additional_path = tmp_path / "with_extra_key.h5ad"
     adata.write_h5ad(additional_path)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        collection.add_anndatas(
+        collection.add_adata(
             [additional_path],
-            load_anndata=load_anndata,
+            load_adata=load_adata,
             zarr_sparse_chunk_size=10,
             zarr_sparse_shard_size=20,
             zarr_dense_chunk_size=5,
@@ -169,7 +169,7 @@ def test_store_creation_default(
         else r"Loading h5ad is currently not supported",
     ):
         kwargs = {} if is_zarr else {"is_collection_h5ad": True}
-        collection = DatasetCollection(output_path, **kwargs).add_anndatas(
+        collection = DatasetCollection(output_path, **kwargs).add_adata(
             [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
         )
     assert isinstance(
@@ -188,20 +188,20 @@ def test_store_creation_default(
 
 @pytest.mark.parametrize("shuffle", [pytest.param(True, id="shuffle"), pytest.param(False, id="no_shuffle")])
 @pytest.mark.parametrize(
-    "load_anndata", [pytest.param(None, id="default_read"), pytest.param(ad.experimental.read_lazy, id="fully_lazy")]
+    "load_adata", [pytest.param(None, id="default_read"), pytest.param(ad.experimental.read_lazy, id="fully_lazy")]
 )
 def test_store_creation(
     adata_with_h5_path_different_var_space: tuple[ad.AnnData, Path],
     shuffle: bool,
-    load_anndata: Callable[[str], ad.AnnData],
+    load_adata: Callable[[str], ad.AnnData],
 ):
     var_subset = [f"gene_{i}" for i in range(100)]
     h5_files = sorted(adata_with_h5_path_different_var_space[1].iterdir())
     output_path = (
         adata_with_h5_path_different_var_space[1].parent
-        / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_anndata is None else 'custom_read'}.zarr"
+        / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_adata is None else 'custom_read'}.zarr"
     )
-    collection = DatasetCollection(output_path).add_anndatas(
+    collection = DatasetCollection(output_path).add_adata(
         [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")],
         var_subset=var_subset,
         zarr_sparse_chunk_size=10,
@@ -211,7 +211,7 @@ def test_store_creation(
         n_obs_per_dataset=50,
         shuffle_chunk_size=10,
         shuffle=shuffle,
-        **({"load_anndata": load_anndata} if load_anndata is not None else {}),
+        **({"load_adata": load_adata} if load_adata is not None else {}),
     )
     assert not DatasetCollection(output_path).is_empty
     assert V1_ENCODING.items() <= zarr.open(output_path).attrs.items()
@@ -292,7 +292,7 @@ def test_mismatched_raw_concat(
     h5_files = sorted(adata_with_h5_path_different_var_space[1].iterdir())
     output_path = adata_with_h5_path_different_var_space[1].parent / "zarr_store_creation_test_heterogeneous.zarr"
     h5_paths = [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
-    collection = DatasetCollection(output_path).add_anndatas(
+    collection = DatasetCollection(output_path).add_adata(
         h5_paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -301,7 +301,7 @@ def test_mismatched_raw_concat(
         n_obs_per_dataset=30,
         shuffle_chunk_size=10,
         shuffle=False,  # don't shuffle -> want to check if the right attributes get taken
-        load_anndata=_read_lazy_x_and_obs_only_from_raw,
+        load_adata=_read_lazy_x_and_obs_only_from_raw,
     )
 
     adatas_orig = []
@@ -324,20 +324,20 @@ def test_mismatched_raw_concat(
     np.testing.assert_array_equal(adata_orig.X.toarray(), adata.X.toarray())
 
 
-@pytest.mark.parametrize("load_anndata", [ad.read_h5ad, ad.experimental.read_lazy])
+@pytest.mark.parametrize("load_adata", [ad.read_h5ad, ad.experimental.read_lazy])
 def test_store_extension(
     adata_with_h5_path_different_var_space: tuple[ad.AnnData, Path],
-    load_anndata: Callable[[PathLike[str] | str], ad.AnnData],
+    load_adata: Callable[[PathLike[str] | str], ad.AnnData],
 ):
     all_h5_paths = sorted(p for p in adata_with_h5_path_different_var_space[1].iterdir() if p.suffix == ".h5ad")
     store_path = (
-        adata_with_h5_path_different_var_space[1].parent / f"zarr_store_extension_test_{load_anndata.__name__}.zarr"
+        adata_with_h5_path_different_var_space[1].parent / f"zarr_store_extension_test_{load_adata.__name__}.zarr"
     )
     original = all_h5_paths
     additional = all_h5_paths[4:]  # don't add everything to get a "different" var space
     # create new store
     collection = DatasetCollection(store_path)
-    collection.add_anndatas(
+    collection.add_adata(
         original,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -348,9 +348,9 @@ def test_store_extension(
         shuffle=True,
     )
     # add h5ads to existing store
-    collection.add_anndatas(
+    collection.add_adata(
         additional,
-        load_anndata=load_anndata,
+        load_adata=load_adata,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
         zarr_dense_chunk_size=5,
@@ -379,5 +379,5 @@ def test_empty(tmp_path: Path):
     assert collection.is_empty
     # Doesn't matter what errors as long as this function runs, but not to completion
     with pytest.raises(TypeError):
-        collection.add_anndatas()
+        collection.add_adata()
     assert not (V1_ENCODING.items() <= g.attrs.items())

From b1af3b64c0cd7dcbd85976fb5ac36bc685b4e5f1 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Wed, 25 Feb 2026 15:11:34 +0100
Subject: [PATCH 24/30] update changelog

---
 CHANGELOG.md       | 7 +++++--
 src/annbatch/io.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5eb4bf65..d04722c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,8 +11,11 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.9]
 
 ### Breaking
-- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_anndatas` and renamed `adata_paths` argument to `anndata_paths` to be consistent with the rest of the API.
-- In similar fashion, renamed `load_adata` argument to `load_anndata` in {meth}`annbatch.DatasetCollection.add_anndatas` and {meth}`annbatch.Loader.use_collection`.
+- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_adata`.
+- Renamed `annbatch.Loader.add_anndatas` to {meth}`annbatch.Loader.add_adata`.
+
+### Fixed
+- Formatted progress bar descriptions to be more readable.
 
 ## [0.0.8]
 
diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index bddaa3a8..fb670b24 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -411,7 +411,7 @@ def add_adata(
         shuffle_chunk_size: int = 1000,
         shuffle: bool = True,
     ) -> Self:
-        """Take adata paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `n_obs_per_dataset` rows per dataset if running for the first time).
+        """Take AnnData paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `n_obs_per_dataset` rows per dataset if running for the first time).
 
         The set of AnnData datasets is collectively referred to as a "collection" where each dataset is called `dataset_i.{zarr,h5ad}`.
         The main purpose of this function is to create shuffled sharded zarr datasets, which is the default behavior of this function.

From 8716f947d149d743c42c1356f351b27504ee8b3f Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Thu, 26 Feb 2026 13:41:32 +0100
Subject: [PATCH 25/30] Apply suggestions from code review

---
 docs/notebooks/example.ipynb | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index 52da2652..2141539d 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -118,7 +118,7 @@
    "metadata": {},
    "source": [
     "The conversion code will take care of the following things:\n",
-    "* Align (outer join) the gene spaces across all datasets listed in `anndata_paths`\n",
+    "* Align (outer join) the gene spaces across all datasets listed in `adata_paths`\n",
     "  * The gene spaces are outer-joined based on the gene names provided in the `var_names` field of the individual `AnnData` objects.\n",
     "  * If you want to subset to specific gene space, you can provide a list of gene names via the `var_subset` parameter.\n",
     "* Shuffle the cells across all datasets (this works on larger than memory datasets as well).\n",
@@ -178,7 +178,7 @@
     "\n",
     "\n",
     "# For CELLxGENE data, the raw counts can either be found under .raw.X or under .X (if .raw is not supplied).\n",
-    "# To have a store that only contains raw counts, we can write the following load_anndata function\n",
+    "# To have a store that only contains raw counts, we can write the following `load_adata` function\n",
     "def read_lazy_x_and_obs_only(path) -> ad.AnnData:\n",
     "    \"\"\"Custom load function to only load raw counts from CxG data.\"\"\"\n",
     "    # IMPORTANT: Large data should always be loaded lazily to reduce the memory footprint\n",
@@ -198,14 +198,14 @@
     "\n",
     "\n",
     "collection = DatasetCollection(zarr.open(\"annbatch_collection\", mode=\"w\"))\n",
-    "collection.add_anndatas(\n",
+    "collection.add_adatas(\n",
     "    # List all the h5ad files you want to include in the collection\n",
-    "    anndata_paths=[\"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\", \"f81463b8-4986-4904-a0ea-20ff02cbb317.h5ad\"],\n",
+    "    adata_paths=[\"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\", \"f81463b8-4986-4904-a0ea-20ff02cbb317.h5ad\"],\n",
     "    # Path to store the output collection\n",
     "    shuffle=True,  # Whether to pre-shuffle the cells of the collection\n",
     "    n_obs_per_dataset=2_097_152,  # Number of cells per dataset shard, this number is much higher than available in these datasets but is generally a good target\n",
     "    var_subset=None,  # Optionally subset the collection to a specific gene space\n",
-    "    load_anndata=read_lazy_x_and_obs_only,\n",
+    "    load_adata=read_lazy_x_and_obs_only,\n",
     ")"
    ]
   },
@@ -251,7 +251,7 @@
     "from annbatch import Loader\n",
     "\n",
     "\n",
-    "def _load_anndata(g: zarr.Group) -> ad.AnnData:\n",
+    "def _load_adata(g: zarr.Group) -> ad.AnnData:\n",
     "    return ad.AnnData(X=ad.io.sparse_dataset(g[\"X\"]), obs=ad.experimental.read_lazy(g).obs[[\"cell_type\"]].to_memory())\n",
     "\n",
     "\n",
@@ -363,11 +363,11 @@
     }
    ],
    "source": [
-    "collection.add_anndatas(\n",
-    "    anndata_paths=[\n",
+    "collection.add_adatas(\n",
+    "    adata_paths=[\n",
     "        \"866d7d5e-436b-4dbd-b7c1-7696487d452e.h5ad\",\n",
     "    ],\n",
-    "    load_anndata=read_lazy_x_and_obs_only,\n",
+    "    load_adata=read_lazy_x_and_obs_only,\n",
     ")"
    ]
   },

From c885049ed6e8a42c879c1915c503a043e1ab5c76 Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Thu, 26 Feb 2026 13:43:48 +0100
Subject: [PATCH 26/30] Apply suggestion from @ilan-gold

---
 docs/notebooks/example.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index 2141539d..7bf3c822 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -265,7 +265,7 @@
     ")\n",
     "\n",
     "# Add in the shuffled data that should be used for training.\n",
-    "ds.use_collection(collection, load_anndata=_load_anndata)"
+    "ds.use_collection(collection, load_adata=_load_adata)"
    ]
   },
   {

From 4305a8b82f8b29bffb5cfeb5ffccd8289bd2534e Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 3 Mar 2026 11:13:53 +0100
Subject: [PATCH 27/30] fix after merge conflict

---
 CHANGELOG.md           |  5 ++---
 src/annbatch/loader.py | 18 +++++++++---------
 tests/test_dataset.py  | 12 ++++++------
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 00c1937c..831e622b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,12 +11,11 @@ and this project adheres to [Semantic Versioning][].
 ## [0.0.9]
 
 ### Breaking
-- Renamed `annbatch.DatasetCollection.add_adatas` to {meth}`annbatch.DatasetCollection.add_adata`.
-- Renamed `annbatch.Loader.add_anndatas` to {meth}`annbatch.Loader.add_adata`.
+- Renamed `annbatch.Loader.add_anndatas` to {meth}`annbatch.Loader.add_adatas`.
+- Renamed `annbatch.Loader.add_anndata` to {meth}`annbatch.Loader.add_adata`.
 
 ### Fixed
 - Formatted progress bar descriptions to be more readable.
-=======
 - {class}`annbatch.DatasetCollection` now accepts a `rng` argument to the {meth}`annbatch.DatasetCollection.add_adatas` method.
 
 
diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py
index b65b9da7..0e969c74 100644
--- a/src/annbatch/loader.py
+++ b/src/annbatch/loader.py
@@ -131,7 +131,7 @@ class Loader[
                 batch_size=4096,
                 chunk_size=32,
                 preload_nchunks=512,
-            ).add_anndata(my_anndata)
+            ).add_adata(my_anndata)
         >>> for batch in ds:
                 # optionally convert to dense
                 # batch = batch.to_dense()
@@ -303,7 +303,7 @@ def use_collection(
     ) -> Self:
         """Load from an existing :class:`annbatch.DatasetCollection`.
 
-        This function can only be called once. If you want to manually add more data, use :meth:`Loader.add_adata` or open an issue.
+        This function can only be called once. If you want to manually add more data, use :meth:`Loader.add_adatas` or open an issue.
 
         Parameters
         ----------
@@ -318,33 +318,33 @@ def use_collection(
             raise ValueError("DatasetCollection is empty")
         if self._collection_added:
             raise RuntimeError(
-                "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_adata` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_adata`."
+                "You should not add multiple collections, independently shuffled - please preshuffle multiple collections, use `add_adatas` manually if you know what you are doing, or open an issue if you believe that this should be supported at an API level higher than `add_adatas`."
             )
         adatas = [load_adata(g) for g in collection]
-        self.add_adata(adatas)
+        self.add_adatas(adatas)
         self._collection_added = True
         return self
 
     @validate_sampler
-    def add_adata(
+    def add_adatas(
         self,
         adatas: list[ad.AnnData],
     ) -> Self:
-        """Append adata to this dataset.
+        """Append adatas to this dataset.
 
         Parameters
         ----------
             adatas
                 List of :class:`anndata.AnnData` objects, with :class:`zarr.Array` or :class:`anndata.abc.CSRDataset` as the data matrix in :attr:`~anndata.AnnData.X`, and :attr:`~anndata.AnnData.obs` containing annotations to yield in a :class:`pandas.DataFrame`.
         """
-        check_lt_1([len(adatas)], ["Number of adata"])
+        check_lt_1([len(adatas)], ["Number of adatas"])
         for adata in adatas:
             dataset, obs, var = self._prepare_dataset_obs_and_var(adata)
             self._add_dataset_unchecked(dataset, obs, var)
         return self
 
-    def add_anndata(self, adata: ad.AnnData) -> Self:
-        """Append an anndata to this dataset.
+    def add_adata(self, adata: ad.AnnData) -> Self:
+        """Append an adata to this dataset.
 
         Parameters
         ----------
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 6e044a2e..18684717 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -552,16 +552,16 @@ def test_mismatched_var_raises_error(tmp_path: Path, subtests):
         var=adata2.var,
     )
 
-    with subtests.test(msg="add_anndata"):
+    with subtests.test(msg="add_adata"):
         loader = Loader(chunk_size=10, preload_nchunks=4, batch_size=20)
-        loader.add_anndata(adata1_on_disk)
+        loader.add_adata(adata1_on_disk)
         with pytest.raises(ValueError, match="All datasets must have identical var DataFrames"):
-            loader.add_anndata(adata2_on_disk)
+            loader.add_adata(adata2_on_disk)
 
-    with subtests.test(msg="add_adata"):
+    with subtests.test(msg="add_adatas"):
         loader = Loader(chunk_size=10, preload_nchunks=4, batch_size=20)
         with pytest.raises(ValueError, match="All datasets must have identical var DataFrames"):
-            loader.add_adata([adata1_on_disk, adata2_on_disk])
+            loader.add_adatas([adata1_on_disk, adata2_on_disk])
 
     with subtests.test(msg="add_dataset"):
         loader = Loader(chunk_size=10, preload_nchunks=4, batch_size=20)
@@ -585,7 +585,7 @@ def test_preload_dtype(tmp_path: Path, dtype_in: np.dtype, expected: np.dtype):
     z = zarr.open(tmp_path / "foo.zarr")
     write_sharded(z, ad.AnnData(X=sp.random(100, 10, dtype=dtype_in, format="csr", rng=np.random.default_rng())))
     adata = ad.AnnData(X=ad.io.sparse_dataset(z["X"]))
-    loader = Loader(preload_to_gpu=True, batch_size=10, chunk_size=10, preload_nchunks=2, to_torch=False).add_anndata(
+    loader = Loader(preload_to_gpu=True, batch_size=10, chunk_size=10, preload_nchunks=2, to_torch=False).add_adata(
         adata
     )
     assert next(iter(loader))["X"].dtype == expected

From 33061e6a217f2d0d06f83cef90d31b691ce6b7e4 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 3 Mar 2026 11:20:38 +0100
Subject: [PATCH 28/30] undo dataset collection changes

---
 src/annbatch/io.py       |  4 ++--
 tests/test_preshuffle.py | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index e935e6f8..f158fe70 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -395,7 +395,7 @@ def is_empty(self) -> bool:
         )
 
     @_with_settings
-    def add_adata(
+    def add_adatas(
         self,
         adata_paths: Iterable[zarr.Group | h5py.Group | PathLike[str] | str],
         *,
@@ -478,7 +478,7 @@ def add_adata(
             ...     "path/to/second_adata.h5ad",
             ...     "path/to/third_adata.h5ad",
             ... ]
-            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_adata(
+            >>> DatasetCollection("path/to/output/zarr_store.zarr").add_adatas(
             ...    datasets,
             ...    load_adata=read_lazy_x_and_obs_only,
             ...)
diff --git a/tests/test_preshuffle.py b/tests/test_preshuffle.py
index 8066f425..da06723d 100644
--- a/tests/test_preshuffle.py
+++ b/tests/test_preshuffle.py
@@ -51,7 +51,7 @@ def test_store_creation_warnings_with_different_keys(elem_name: Literal["obsm",
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        DatasetCollection(tmp_path / "collection.zarr").add_adata(
+        DatasetCollection(tmp_path / "collection.zarr").add_adatas(
             [path_1, path_2],
             zarr_sparse_chunk_size=10,
             zarr_sparse_shard_size=20,
@@ -69,7 +69,7 @@ def test_store_creation_no_warnings_with_custom_load(tmp_path: Path):
     path_2 = tmp_path / "with_extra_key.h5ad"
     adata_1.write_h5ad(path_1)
     adata_2.write_h5ad(path_2)
-    collection = DatasetCollection(tmp_path / "collection.zarr").add_adata(
+    collection = DatasetCollection(tmp_path / "collection.zarr").add_adatas(
         [path_1, path_2],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -91,7 +91,7 @@ def test_store_creation_path_added_to_obs(tmp_path: Path):
     adata_2.write_h5ad(path_2)
     paths = [path_1, path_2]
     output_dir = tmp_path / "path_src_collection.zarr"
-    collection = DatasetCollection(output_dir).add_adata(
+    collection = DatasetCollection(output_dir).add_adatas(
         paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -120,7 +120,7 @@ def test_store_addition_different_keys(
     adata_orig.write_h5ad(orig_path)
     output_path = tmp_path / "zarr_store_addition_different_keys.zarr"
     collection = DatasetCollection(output_path)
-    collection.add_adata(
+    collection.add_adatas(
         [orig_path],
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -136,7 +136,7 @@ def test_store_addition_different_keys(
     additional_path = tmp_path / "with_extra_key.h5ad"
     adata.write_h5ad(additional_path)
     with pytest.warns(UserWarning, match=rf"Found {elem_name} keys.* not present in all anndatas"):
-        collection.add_adata(
+        collection.add_adatas(
             [additional_path],
             load_adata=load_adata,
             zarr_sparse_chunk_size=10,
@@ -169,7 +169,7 @@ def test_store_creation_default(
         else r"Loading h5ad is currently not supported",
     ):
         kwargs = {} if is_zarr else {"is_collection_h5ad": True}
-        collection = DatasetCollection(output_path, **kwargs).add_adata(
+        collection = DatasetCollection(output_path, **kwargs).add_adatas(
             [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
         )
     assert isinstance(
@@ -201,7 +201,7 @@ def test_store_creation(
         adata_with_h5_path_different_var_space[1].parent
         / f"zarr_store_creation_test_{shuffle}_{'default_read' if load_adata is None else 'custom_read'}.zarr"
     )
-    collection = DatasetCollection(output_path).add_adata(
+    collection = DatasetCollection(output_path).add_adatas(
         [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")],
         var_subset=var_subset,
         zarr_sparse_chunk_size=10,
@@ -292,7 +292,7 @@ def test_mismatched_raw_concat(
     h5_files = sorted(adata_with_h5_path_different_var_space[1].iterdir())
     output_path = adata_with_h5_path_different_var_space[1].parent / "zarr_store_creation_test_heterogeneous.zarr"
     h5_paths = [adata_with_h5_path_different_var_space[1] / f for f in h5_files if str(f).endswith(".h5ad")]
-    collection = DatasetCollection(output_path).add_adata(
+    collection = DatasetCollection(output_path).add_adatas(
         h5_paths,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -337,7 +337,7 @@ def test_store_extension(
     additional = all_h5_paths[4:]  # don't add everything to get a "different" var space
     # create new store
     collection = DatasetCollection(store_path)
-    collection.add_adata(
+    collection.add_adatas(
         original,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,
@@ -348,7 +348,7 @@ def test_store_extension(
         shuffle=True,
     )
     # add h5ads to existing store
-    collection.add_adata(
+    collection.add_adatas(
         additional,
         load_adata=load_adata,
         zarr_sparse_chunk_size=10,
@@ -379,7 +379,7 @@ def test_empty(tmp_path: Path):
     assert collection.is_empty
     # Doesn't matter what errors as long as this function runs, but not to completion
     with pytest.raises(TypeError):
-        collection.add_adata()
+        collection.add_adatas()
     assert not (V1_ENCODING.items() <= g.attrs.items())
 
 

From 9da30cac44e1c8b79a02ca7e10e8da95eceacb7d Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Tue, 3 Mar 2026 11:22:56 +0100
Subject: [PATCH 29/30] conftest

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 06f2e77f..45be4996 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -114,7 +114,7 @@ def simple_collection(
 ) -> tuple[DatasetCollection, ad.AnnData]:
     zarr_stores = sorted(f for f in adata_with_zarr_path_same_var_space[1].iterdir() if f.is_dir())
     output_path = Path(tmpdir_factory.mktemp("zarr_folder")) / "simple_fixture.zarr"
-    collection = DatasetCollection(output_path).add_adata(
+    collection = DatasetCollection(output_path).add_adatas(
         zarr_stores,
         zarr_sparse_chunk_size=10,
         zarr_sparse_shard_size=20,

From 5098d5ca093c2d1f12d7baef80348529781e90cc Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Tue, 3 Mar 2026 12:07:40 +0100
Subject: [PATCH 30/30] Update src/annbatch/io.py

---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index f158fe70..d5d131f6 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -665,7 +665,7 @@ def _add_to_collection(
                 Whether or not to shuffle when adding.  Otherwise, the incoming data will just be split up and appended.
         """
         if self.is_empty:
-            raise ValueError("Store is empty. Please run `DatasetCollection.add_adata` first.")
+            raise ValueError("Store is empty. Please run `DatasetCollection.add_adatas` first.")
         # Check for mismatched keys among the inputs.
         _check_for_mismatched_keys(adata_paths, load_adata=load_adata)