Skip to content

[dependencies] extraction workflow/h5sc files fail with anndata>=0.12.0 #317

@machth

Description

@machth

Running:
project.extract()

Version: v1.3.5

Error:


ValueError Traceback (most recent call last)
Cell In[13], line 2
1 #project.extraction_f.debug = False
----> 2 project.extract()

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/project.py:1605, in Project.extract(self, partial, n_cells, seed, overwrite, output_folder_name)
1602 if overwrite is not None:
1603 self.extraction_f.overwrite_run_path = overwrite
-> 1605 self.extraction_f(partial=partial, n_cells=n_cells, seed=seed, output_folder_name=output_folder_name)
1606 self.get_project_status()

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/_base.py:311, in ProcessingStep.call(self, debug, overwrite, *args, **kwargs)
309 process = getattr(self, "process", None)
310 if callable(process):
--> 311 x = self.process(*args, **kwargs) # type: ignore[attr-defined]
312 # clear temp directory after processing is completed
313 if not self.deep_debug:

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/extraction.py:936, in HDF5CellExtraction.process(self, partial, n_cells, seed, output_folder_name)
933 self.DEFAULT_LOG_NAME = "partial_processing.log" # change log name so that the results are not written to the same log file as a complete extraction
935 # run all of the extraction setup steps
--> 936 self._set_up_extraction(output_folder_name=output_folder_name)
937 stop_setup = timeit.default_timer()
938 time_setup = stop_setup - start_setup

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/extraction.py:252, in HDF5CellExtraction._set_up_extraction(self, output_folder_name)
249 self._get_classes_to_extract()
251 # create output files for saving results to
--> 252 self._create_output_files()
254 # print relevant information to log file
255 self._verbalise_extraction_info()

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/extraction.py:762, in HDF5CellExtraction._create_output_files(self)
754 single_cell_data_shape = (
755 self.num_classes,
756 (self.n_masks + self.n_image_channels),
757 self.image_size,
758 self.image_size,
759 )
761 self.output_path = os.path.join(self.extraction_data_directory, self.DEFAULT_EXTRACTION_FILE)
--> 762 self._initialize_empty_anndata()
764 # add an empty HDF5 dataset to the obsm group of the anndata object
765 with h5py.File(self.output_path, "a") as hf:

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/scportrait/pipeline/extraction.py:746, in HDF5CellExtraction._initialize_empty_anndata(self)
743 adata.uns[f"{self.DEFAULT_NAME_SINGLE_CELL_IMAGES}/compression"] = self.compression_type
745 # write to file
--> 746 adata.write(self.output_path)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/legacy_api_wrap/init.py:82, in legacy_api..wrapper..fn_compatible(*args_all, **kw)
79 @wraps(fn)
80 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
81 if len(args_all) <= n_positional:
---> 82 return fn(*args_all, **kw)
84 args_pos: P.args
85 args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_core/anndata.py:1902, in AnnData.write_h5ad(self, filename, convert_strings_to_categoricals, compression, compression_opts, as_dense)
1899 if filename is None:
1900 filename = self.filename
-> 1902 write_h5ad(
1903 Path(filename),
1904 self,
1905 convert_strings_to_categoricals=convert_strings_to_categoricals,
1906 compression=compression,
1907 compression_opts=compression_opts,
1908 as_dense=as_dense,
1909 )
1911 if self.isbacked:
1912 self.file.filename = filename

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/utils.py:325, in no_write_dataset_2d..raise_error_if_dataset_2d_present(store, adata, *args, **kwargs)
318 msg = (
319 "Writing AnnData objects with a Dataset2D not supported yet. "
320 "Please use ds.to_memory to bring the dataset into memory. "
321 "Note that if you have generated this object by concatenating several AnnData objects"
322 "the original types may be lost."
323 )
324 raise NotImplementedError(msg)
--> 325 return write(store, adata, *args, **kwargs)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/h5ad.py:105, in write_h5ad(filepath, adata, as_dense, convert_strings_to_categoricals, dataset_kwargs, **kwargs)
103 write_elem(f, "varp", dict(adata.varp), dataset_kwargs=dataset_kwargs)
104 write_elem(f, "layers", dict(adata.layers), dataset_kwargs=dataset_kwargs)
--> 105 write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/specs/registry.py:521, in write_elem(store, k, elem, dataset_kwargs)
497 def write_elem(
498 store: GroupStorageType,
499 k: str,
(...) 502 dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
503 ) -> None:
504 """
505 Write an element to a storage group using anndata encoding.
506
(...) 519 E.g. for zarr this would be chunks, compressor.
520 """
--> 521 Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/utils.py:248, in report_write_key_on_error..func_wrapper(*args, **kwargs)
246 raise ValueError(msg)
247 try:
--> 248 return func(*args, **kwargs)
249 except Exception as e:
250 path = _get_display_path(store)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/specs/registry.py:392, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
389 write_func = self.find_write_func(dest_type, elem, modifiers)
391 if self.callback is None:
--> 392 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
393 return self.callback(
394 write_func,
395 store,
(...) 399 iospec=self.registry.get_spec(elem),
400 )

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/specs/registry.py:76, in write_spec..decorator..wrapper(g, k, *args, **kwargs)
74 @wraps(func)
75 def wrapper(g: GroupStorageType, k: str, *args, **kwargs):
---> 76 result = func(g, k, *args, **kwargs)
77 g[k].attrs.setdefault("encoding-type", spec.encoding_type)
78 g[k].attrs.setdefault("encoding-version", spec.encoding_version)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/specs/methods.py:387, in write_mapping(f, k, v, _writer, dataset_kwargs)
385 g = f.require_group(k)
386 for sub_k, sub_v in v.items():
--> 387 _writer.write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/utils.py:248, in report_write_key_on_error..func_wrapper(*args, **kwargs)
246 raise ValueError(msg)
247 try:
--> 248 return func(*args, **kwargs)
249 except Exception as e:
250 path = _get_display_path(store)

File ~/miniforge3/envs/scportrait/lib/python3.12/site-packages/anndata/_io/specs/registry.py:355, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
353 if "/" in k.split(store.name)[-1][1:]:
354 msg = "Forward slashes are not allowed in keys."
--> 355 raise ValueError(msg)
357 if isinstance(store, h5py.File):
358 store = store["/"]

ValueError: Forward slashes are not allowed in keys.
Error raised while writing key 'single_cell_images/n_cells' of <class 'h5py._hl.group.Group'> to /uns

Fix in file: File ~/Documents/github/scPortrait/src/scportrait/pipeline/extraction.py:756
which worked for my workflow:
[...]

add additional metadata to uns

    adata.uns[f"{self.DEFAULT_NAME_SINGLE_CELL_IMAGES}"] = {"n_cells": self.num_classes,
                                                            "n_channels": self.n_masks + self.n_image_channels,
                                                            "n_masks": self.n_masks, 
                                                            "n_image_channels": self.n_image_channels,
                                                            "image_size": self.image_size,
                                                            "normalization": self.normalization,
                                                            "normalization_range_lower": self.normalization_range[0], 
                                                            "normalization_range_upper": self.normalization_range[1],
                                                            "channel_names": channels,
                                                            "channel_mapping": np.array(channel_mapping, dtype="<U15"),
                                                            "compression": self.compression_type
    }

[...]

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions