diff --git a/CHANGELOG.md b/CHANGELOG.md index 4965727..7c99842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 0.0.6 +- Added `combine_columns` function +- Implemented `__eq__` override for `SpatialImage` subclasses + ## Version 0.0.5 - Implementing a placeholder `SpatialFeatureExperiment` class. This version only implements the data structure to hold various geometries but none of the methods except for slicing. diff --git a/src/spatialexperiment/ProxySFE.py b/src/spatialexperiment/ProxySFE.py index 08aa778..7c5bc79 100644 --- a/src/spatialexperiment/ProxySFE.py +++ b/src/spatialexperiment/ProxySFE.py @@ -8,7 +8,11 @@ from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList -from .SpatialExperiment import SpatialExperiment, _validate_column_data, _validate_sample_ids +from .SpatialExperiment import ( + SpatialExperiment, + _validate_column_data, + _validate_sample_ids, +) __author__ = "jkanche" __copyright__ = "jkanche" @@ -44,7 +48,10 @@ def _validate_geometries(geometries: Dict[str, gpd.GeoDataFrame], prop_name: str for i, geom in enumerate(geometries.values()): if not isinstance(geom, gpd.GeoDataFrame): - raise TypeError(f"Item {i} in {prop_name} is {type(geom).__name__} " f"rather than `GeoDataFrame`.\n") + raise TypeError( + f"Item {i} in {prop_name} is {type(geom).__name__} " + f"rather than `GeoDataFrame`.\n" + ) def _validate_annotgeometries(geometries, column_data): @@ -58,13 +65,16 @@ def _validate_annotgeometries(geometries, column_data): for i, geom in enumerate(geometries.values()): if "sample_id" not in geom.columns: - raise ValueError(f"Item {i} of 'annot_geometries' does not have column 'sample_id'.\n") + raise ValueError( + f"Item {i} of 'annot_geometries' does not have column 'sample_id'.\n" + ) else: samples_seen = geom["sample_id"].unique() missing = [s for s in samples_seen if s not in sample_ids] if len(missing) > 0: raise ValueError( - f"Samples {', '.join(missing)} in item {i} of " f"annot_geometries are absent from 'column_data'.\n" + f"Samples {', '.join(missing)} in item {i} of " + f"annot_geometries are absent from 'column_data'.\n" ) @@ -75,7 +85,9 @@ def _validate_graph_sample_id(spatial_graphs, column_data): missing = graph_sample_ids - col_sample_ids if missing: - raise ValueError(f"Samples {', '.join(missing)} are in the graphs but not 'column_data'.\n") + raise ValueError( + f"Samples {', '.join(missing)} are in the graphs but not 'column_data'.\n" + ) def _validate_graph_structure(spatial_graphs): @@ -89,7 +101,9 @@ def _validate_graph_structure(spatial_graphs): "and whose rows are margins (rows, columns, annotation).\n" ) elif not set(spatial_graphs.get_row_names()) == {"row", "col", "annot"}: - raise ValueError("Row names of 'spatial_graphs' must be 'row', 'col', and 'annot'.\n") + raise ValueError( + "Row names of 'spatial_graphs' must be 'row', 'col', and 'annot'.\n" + ) class ProxySpatialFeatureExperiment(SpatialExperiment): @@ -404,7 +418,9 @@ def __repr__(self) -> str: output += ", col_geometries=" + ut.print_truncated_dict(self._col_geometries) output += ", row_geometries=" + ut.print_truncated_dict(self._row_geometries) - output += ", annot_geometries=" + ut.print_truncated_dict(self._annot_geometries) + output += ", annot_geometries=" + ut.print_truncated_dict( + self._annot_geometries + ) output += ", spatial_graphs=" + self._spatial_graphs.__repr__() output += ")" @@ -448,7 +464,9 @@ def get_unit(self) -> str: """Get the coordinate unit.""" return self._unit - def set_unit(self, unit: str, in_place: bool = False) -> "ProxySpatialFeatureExperiment": + def set_unit( + self, unit: str, in_place: bool = False + ) -> "ProxySpatialFeatureExperiment": """Set the coordinate unit. Args: @@ -631,7 +649,9 @@ def set_spatial_graphs( Returns: A modified ``ProxySpatialFeatureExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ - _graphs = _sanitize_spatial_graphs(graphs, list(set(self.get_column_data().get_column("sample_id")))) + _graphs = _sanitize_spatial_graphs( + graphs, list(set(self.get_column_data().get_column("sample_id"))) + ) _validate_graph_structure(_graphs) _validate_graph_sample_id(_graphs, self.get_column_data()) @@ -658,7 +678,9 @@ def spatial_graphs(self, graphs: Optional[BiocFrame]): ################################ def get_slice( - self, rows: Optional[Union[str, int, bool, List]] = None, columns: Optional[Union[str, int, bool, List]] = None + self, + rows: Optional[Union[str, int, bool, List]] = None, + columns: Optional[Union[str, int, bool, List]] = None, ) -> "ProxySpatialFeatureExperiment": """Get a slice of the experiment. @@ -675,7 +697,9 @@ def get_slice( sfe = super().get_slice(rows=rows, columns=columns) slicer = self._generic_slice(rows=rows, columns=columns) - do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) + do_slice_cols = not ( + isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None) + ) # Update geometries and graphs if do_slice_cols: @@ -698,7 +722,9 @@ def get_slice( new_graphs = None if self.spatial_graphs is not None: # Keep only columns for remaining samples - cols_to_keep = [c for c in self.spatial_graphs.columns if c in column_sample_ids] + cols_to_keep = [ + c for c in self.spatial_graphs.columns if c in column_sample_ids + ] if cols_to_keep: new_graphs = self.spatial_graphs[cols_to_keep] @@ -743,7 +769,10 @@ def get_slice( ################################ def set_column_data( - self, cols: Optional[BiocFrame], replace_column_names: bool = False, in_place: bool = False + self, + cols: Optional[BiocFrame], + replace_column_names: bool = False, + in_place: bool = False, ) -> "ProxySpatialFeatureExperiment": """Override: Set sample data. diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 1c3fadb..9f31e1e 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -3,13 +3,25 @@ from urllib.parse import urlparse from warnings import warn -import biocutils as ut import numpy as np -from biocframe import BiocFrame from PIL import Image -from singlecellexperiment import SingleCellExperiment + +from biocframe import BiocFrame +import biocutils as ut +from summarizedexperiment._combineutils import ( + check_assays_are_equal, + merge_assays, + merge_se_colnames, + relaxed_merge_assays +) from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList +from singlecellexperiment import SingleCellExperiment +from singlecellexperiment._combineutils import ( + merge_generic, + relaxed_merge_generic, + relaxed_merge_numpy_generic +) from ._imgutils import retrieve_rows_by_id from ._validators import ( @@ -21,7 +33,8 @@ _validate_spatial_coords, _validate_spatial_coords_names, ) -from .SpatialImage import VirtualSpatialImage, construct_spatial_image_class +from ._combineutils import merge_spatial_frames, merge_spatial_coordinates +from .SpatialImage import construct_spatial_image_class, VirtualSpatialImage __author__ = "keviny2" __copyright__ = "keviny2" @@ -189,7 +202,9 @@ def __init__( column_data = _sanitize_frame(column_data, num_rows=self.shape[1]) if not column_data.has_column("sample_id"): - column_data["sample_id"] = ["sample01"] * self.shape[1] # hard code default sample_id as "sample01" + column_data["sample_id"] = ["sample01"] * self.shape[ + 1 + ] # hard code default sample_id as "sample01" spatial_coords = _sanitize_frame(spatial_coords, num_rows=self.shape[1]) img_data = _sanitize_frame(img_data, num_rows=0) @@ -202,7 +217,9 @@ def __init__( _validate_column_data(column_data=column_data) _validate_img_data(img_data=img_data) _validate_sample_ids(column_data=column_data, img_data=img_data) - _validate_spatial_coords(spatial_coords=spatial_coords, column_data=column_data) + _validate_spatial_coords( + spatial_coords=spatial_coords, column_data=column_data + ) ######################### ######>> Copying <<###### @@ -306,10 +323,14 @@ def __repr__(self) -> str: output += ", row_ranges=" + self._row_ranges.__repr__() if self._alternative_experiments is not None: - output += ", alternative_experiments=" + ut.print_truncated_list(self.alternative_experiment_names) + output += ", alternative_experiments=" + ut.print_truncated_list( + self.alternative_experiment_names + ) if self._reduced_dims is not None: - output += ", reduced_dims=" + ut.print_truncated_list(self.reduced_dim_names) + output += ", reduced_dims=" + ut.print_truncated_list( + self.reduced_dim_names + ) if self._main_experiment_name is not None: output += ", main_experiment_name=" + self._main_experiment_name @@ -337,14 +358,10 @@ def __str__(self) -> str: output += f"assays({len(self.assay_names)}): {ut.print_truncated_list(self.assay_names)}\n" - output += ( - f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" - ) + output += f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" output += f"row_names({0 if self._row_names is None else len(self._row_names)}): {' ' if self._row_names is None else ut.print_truncated_list(self._row_names)}\n" - output += ( - f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" - ) + output += f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" output += f"column_names({0 if self._column_names is None else len(self._column_names)}): {' ' if self._column_names is None else ut.print_truncated_list(self._column_names)}\n" output += f"main_experiment_name: {' ' if self._main_experiment_name is None else self._main_experiment_name}\n" @@ -377,7 +394,9 @@ def get_spatial_coords(self) -> BiocFrame: return self.get_spatial_coordinates() def set_spatial_coordinates( - self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]], in_place: bool = False + self, + spatial_coords: Optional[Union[BiocFrame, np.ndarray]], + in_place: bool = False, ) -> "SpatialExperiment": """Set new spatial coordinates. @@ -410,10 +429,14 @@ def set_spatial_coordinates( return output def set_spatial_coords( - self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]], in_place: bool = False + self, + spatial_coords: Optional[Union[BiocFrame, np.ndarray]], + in_place: bool = False, ) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates`.""" - return self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=in_place) + return self.set_spatial_coordinates( + spatial_coords=spatial_coords, in_place=in_place + ) @property def spatial_coords(self) -> BiocFrame: @@ -435,7 +458,9 @@ def spatial_coordinates(self) -> BiocFrame: return self.get_spatial_coordinates() @spatial_coordinates.setter - def spatial_coordinates(self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]]): + def spatial_coordinates( + self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]] + ): """Alias for :py:meth:`~set_spatial_coordinates`.""" warn( "Setting property 'spatial_coords' is an in-place operation, use 'set_spatial_coordinates' instead.", @@ -485,15 +510,21 @@ def set_spatial_coordinates_names( new_spatial_coords = self._spatial_coords else: _validate_spatial_coords_names(spatial_coords_names, self._spatial_coords) - new_spatial_coords = self._spatial_coords.set_column_names(spatial_coords_names) + new_spatial_coords = self._spatial_coords.set_column_names( + spatial_coords_names + ) output = self._define_output(in_place) output._spatial_coords = new_spatial_coords return output - def set_spatial_coords_names(self, spatial_coords_names: List[str], in_place: bool = False) -> "SpatialExperiment": + def set_spatial_coords_names( + self, spatial_coords_names: List[str], in_place: bool = False + ) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates_names`.""" - return self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=in_place) + return self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=in_place + ) @property def spatial_coords_names(self) -> List[str]: @@ -507,7 +538,9 @@ def spatial_coords_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=True + ) @property def spatial_coordinates_names(self) -> List[str]: @@ -521,7 +554,9 @@ def spatial_coordinates_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=True + ) ############################## ########>> img_data <<######## @@ -539,7 +574,9 @@ def get_img_data(self) -> BiocFrame: """Alias for :py:meth:`~get_image_data`.""" return self.get_image_data() - def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) -> "SpatialExperiment": + def set_image_data( + self, img_data: Optional[BiocFrame], in_place: bool = False + ) -> "SpatialExperiment": """Set new image data. Args: @@ -568,7 +605,9 @@ def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) output._img_data = img_data return output - def set_img_data(self, img_data: BiocFrame, in_place: bool = False) -> "SpatialExperiment": + def set_img_data( + self, img_data: BiocFrame, in_place: bool = False + ) -> "SpatialExperiment": """Alias for :py:meth:`~set_image_data`.""" return self.set_image_data(img_data=img_data, in_place=in_place) @@ -605,7 +644,9 @@ def image_data(self, img_data: BiocFrame): ############################## def get_scale_factors( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> List[float]: """Return scale factor(s) of image(s) based on the provided sample and image ids. See :py:meth:`~get_img` for more details on the behavior for various @@ -628,7 +669,9 @@ def get_scale_factors( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + img_data_subset = retrieve_rows_by_id( + img_data=self.img_data, sample_id=sample_id, image_id=image_id + ) if img_data_subset.shape[0] == 1: return img_data_subset["scale_factor"][0] @@ -640,7 +683,10 @@ def get_scale_factors( ################################ def set_column_data( - self, cols: Optional[BiocFrame], replace_column_names: bool = False, in_place: bool = False + self, + cols: Optional[BiocFrame], + replace_column_names: bool = False, + in_place: bool = False, ) -> "SpatialExperiment": """Override: Set sample data. @@ -682,14 +728,18 @@ def set_column_data( ################################ def get_slice( - self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]] + self, + rows: Optional[Union[str, int, bool, Sequence]], + columns: Optional[Union[str, int, bool, Sequence]], ) -> "SpatialExperiment": """Alias for :py:attr:`~__getitem__`.""" spe = super().get_slice(rows=rows, columns=columns) slicer = self._generic_slice(rows=rows, columns=columns) - do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) + do_slice_cols = not ( + isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None) + ) new_spatial_coords = None @@ -697,7 +747,9 @@ def get_slice( new_spatial_coords = self.spatial_coords[slicer.col_indices, :] column_sample_ids = set(spe.column_data["sample_id"]) - mask = [sample_id in column_sample_ids for sample_id in self.img_data["sample_id"]] + mask = [ + sample_id in column_sample_ids for sample_id in self.img_data["sample_id"] + ] new_img_data = self.img_data[mask,] @@ -724,7 +776,9 @@ def get_slice( ################################ def get_img( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> Union[VirtualSpatialImage, List[VirtualSpatialImage]]: """ Retrieve spatial images based on the provided sample and image ids. @@ -768,7 +822,9 @@ def get_img( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + img_data_subset = retrieve_rows_by_id( + img_data=self.img_data, sample_id=sample_id, image_id=image_id + ) if img_data_subset is None: return [] @@ -818,7 +874,9 @@ def add_img( Raises: ValueError: If the sample_id and image_id pair already exists. """ - _validate_sample_image_ids(img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id) + _validate_sample_image_ids( + img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id + ) if isinstance(image_source, (str, Path)): is_url = urlparse(str(image_source)).scheme in ("http", "https", "ftp") @@ -846,12 +904,21 @@ def add_img( # TODO: implement rmv_img() def rmv_img( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> "SpatialExperiment": raise NotImplementedError() - def img_source(self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, path=False): - raise NotImplementedError("This function is irrelevant because it is for `RemoteSpatialImages`") + def img_source( + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, + path=False, + ): + raise NotImplementedError( + "This function is irrelevant because it is for `RemoteSpatialImages`" + ) def img_raster(self, sample_id=None, image_id=None): # NOTE: this function seems redundant, might be an artifact of the different subclasses of SpatialImage in the R implementation? just call `get_img()` for now @@ -870,3 +937,147 @@ def to_spatial_experiment(): ################################ #######>> combine ops <<######## ################################ + + def relaxed_combine_columns(self, *other) -> "SpatialExperiment": + """Wrapper around :py:func:`~relaxed_combine_columns`.""" + return relaxed_combine_columns(self, *other) + + def combine_columns(self, *other) -> "SpatialExperiment": + """Wrapper around :py:func:`~combine_columns`.""" + return combine_columns(self, *other) + + +################################ +#######>> combine ops <<######## +################################ + + +@ut.combine_columns.register(SpatialExperiment) +def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: + """Combine multiple ``SpatialExperiment`` objects by column. + + All assays must contain the same assay names. If you need a + flexible combine operation, checkout :py:func:`~relaxed_combine_columns`. + + Returns: + A combined ``SpatialExperiment``. + """ + warn( + "'row_pairs' and 'column_pairs' are currently ignored during this operation.", + UserWarning, + ) + + first = x[0] + _all_assays = [y.assays for y in x] + check_assays_are_equal(_all_assays) + _new_assays = merge_assays(_all_assays, by="column") + + _new_col_names = merge_se_colnames(x) + + _new_rdim = None + try: + _new_rdim = merge_generic(x, by="row", attr="reduced_dims") + except Exception as e: + warn( + f"Cannot combine 'reduced_dimensions' across experiments, {str(e)}", + UserWarning, + ) + + _new_alt_expt = None + try: + _new_alt_expt = merge_generic(x, by="column", attr="alternative_experiments") + except Exception as e: + warn( + f"Cannot combine 'alternative_experiments' across experiments, {str(e)}", + UserWarning, + ) + + _all_spatial_coords = [y._spatial_coords for y in x] + _new_spatial_coords = merge_spatial_coordinates(_all_spatial_coords) + + _new_cols, _new_img_data = merge_spatial_frames(x) + + current_class_const = type(first) + return current_class_const( + assays=_new_assays, + row_ranges=first._row_ranges, + row_data=first._rows, + column_data=_new_cols, + row_names=first._row_names, + column_names=_new_col_names, + metadata=first._metadata, + reduced_dims=_new_rdim, + main_experiment_name=first._main_experiment_name, + alternative_experiments=_new_alt_expt, + spatial_coords=_new_spatial_coords, + img_data=_new_img_data, + ) + + +@ut.relaxed_combine_columns.register(SpatialExperiment) +def relaxed_combine_columns( + *x: SpatialExperiment, +) -> SpatialExperiment: + """A relaxed version of the :py:func:`~biocutils.combine_rows.combine_columns` method for + :py:class:`~SpatialExperiment` objects. Whereas ``combine_columns`` expects that all objects have the same rows, + ``relaxed_combine_columns`` allows for different rows. Absent columns in any object are filled in with appropriate + placeholder values before combining. + + Args: + x: + One or more ``SpatialExperiment`` objects, possibly with differences in the + number and identity of their rows. + + Returns: + A ``SpatialExperiment`` that combines all ``experiments`` along their columns and contains + the union of all rows. Rows absent in any ``x`` are filled in + with placeholders consisting of Nones or masked NumPy values. + """ + warn( + "'row_pairs' and 'column_pairs' are currently ignored during this operation.", + UserWarning, + ) + + first = x[0] + _new_assays = relaxed_merge_assays(x, by="column") + + _new_col_names = merge_se_colnames(x) + + _new_rdim = None + try: + _new_rdim = relaxed_merge_numpy_generic(x, by="row", attr="reduced_dims") + except Exception as e: + warn( + f"Cannot combine 'reduced_dimensions' across experiments, {str(e)}", + UserWarning, + ) + + _new_alt_expt = None + try: + _new_alt_expt = relaxed_merge_generic(x, by="column", attr="alternative_experiments") + except Exception as e: + warn( + f"Cannot combine 'alternative_experiments' across experiments, {str(e)}", + UserWarning, + ) + + _all_spatial_coords = [y._spatial_coords for y in x] + _new_spatial_coords = merge_spatial_coordinates(_all_spatial_coords) + + _new_cols, _new_img_data = merge_spatial_frames(x) + + current_class_const = type(first) + return current_class_const( + assays=_new_assays, + row_ranges=first._row_ranges, + row_data=first._rows, + column_data=_new_cols, + row_names=first._row_names, + column_names=_new_col_names, + metadata=first._metadata, + reduced_dims=_new_rdim, + main_experiment_name=first._main_experiment_name, + alternative_experiments=_new_alt_expt, + spatial_coords=_new_spatial_coords, + img_data=_new_img_data, + ) diff --git a/src/spatialexperiment/SpatialImage.py b/src/spatialexperiment/SpatialImage.py index 3a90872..4218a2a 100644 --- a/src/spatialexperiment/SpatialImage.py +++ b/src/spatialexperiment/SpatialImage.py @@ -50,7 +50,9 @@ def get_metadata(self) -> dict: """ return self._metadata - def set_metadata(self, metadata: dict, in_place: bool = False) -> "VirtualSpatialImage": + def set_metadata( + self, metadata: dict, in_place: bool = False + ) -> "VirtualSpatialImage": """Set additional metadata. Args: @@ -65,7 +67,9 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "VirtualSpatia or as a reference to the (in-place-modified) original. """ if not isinstance(metadata, dict): - raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.") + raise TypeError( + f"`metadata` must be a dictionary, provided {type(metadata)}." + ) output = self._define_output(in_place) output._metadata = metadata return output @@ -146,7 +150,9 @@ def _sanitize_loaded_image(image): class LoadedSpatialImage(VirtualSpatialImage): """Class for images loaded into memory.""" - def __init__(self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None): + def __init__( + self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None + ): """Initialize the object. Args: @@ -250,7 +256,9 @@ def get_image(self) -> Image.Image: return self._image - def set_image(self, image: Union[Image.Image, np.ndarray], in_place: bool = False) -> "LoadedSpatialImage": + def set_image( + self, image: Union[Image.Image, np.ndarray], in_place: bool = False + ) -> "LoadedSpatialImage": """Set new image. Args: @@ -402,7 +410,9 @@ def get_path(self) -> Path: """Get the path to the image file.""" return self._path - def set_path(self, path: Union[str, Path], in_place: bool = False) -> "StoredSpatialImage": + def set_path( + self, path: Union[str, Path], in_place: bool = False + ) -> "StoredSpatialImage": """Update the path to the image file. Args: @@ -463,7 +473,9 @@ def _validate_url(url): class RemoteSpatialImage(VirtualSpatialImage): """Class for remotely hosted images.""" - def __init__(self, url: str, metadata: Optional[dict] = None, validate: bool = True): + def __init__( + self, url: str, metadata: Optional[dict] = None, validate: bool = True + ): """Initialize the object. Args: @@ -494,7 +506,7 @@ def _define_output(self, in_place: bool = False) -> "RemoteSpatialImage": ######################### ######>> Equality <<##### ######################### - + def __eq__(self, other) -> bool: return super().__eq__(other) and self.url == other.url diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py new file mode 100644 index 0000000..4b40641 --- /dev/null +++ b/src/spatialexperiment/_combineutils.py @@ -0,0 +1,108 @@ +from __future__ import annotations +from typing import List, Tuple + +from warnings import warn +from copy import deepcopy +import itertools + +from biocframe import BiocFrame +import biocutils as ut +from spatialexperiment import SpatialExperiment + + +def _append_indices_to_samples(bframes: List[BiocFrame]) -> List[BiocFrame]: + """Append indices to sample IDs for a list of `BiocFrames`. + + For each `BiocFrame`, appends an index to all sample IDs to ensure uniqueness + across multiple frames. + + Args: + bframes: List of `BiocFrame` objects containing sample IDs. + + Returns: + List of `BiocFrame`s with modified sample IDs. + """ + modified_bframes = [] + for i, bframe in enumerate(bframes, start=1): + bframe_copy = deepcopy(bframe) + bframe_copy["sample_id"] = [ + f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"] + ] + modified_bframes.append(bframe_copy) + return modified_bframes + + +def merge_spatial_frames(x: List[SpatialExperiment], relaxed: bool = False) -> Tuple[BiocFrame, BiocFrame]: + """Merge column data and image data from multiple ``SpatialExperiment`` objects. + + If duplicate sample IDs exist across objects, appends indices to make them unique. + Sample IDs in column data determine the uniqueness check as they are the superset + of IDs in image data. + + Args: + x: List of ``SpatialExperiment`` objects + relaxed: If `True`, allows frames with different columns to be combined. + Absent columns in any frame are filled with appropriate placeholder values. + Defaults to `False`. + + Returns: + A tuple with the merged column data and image data. + """ + cols = [y._cols for y in x] + img_datas = [y._img_data for y in x] + + expected_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) + all_sample_ids = list( + itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols) + ) + + if len(set(all_sample_ids)) < expected_unique: + warn( + "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." + ) + modified_columns = _append_indices_to_samples(cols) + modified_img_data = _append_indices_to_samples(img_datas) + else: + modified_columns = cols + modified_img_data = img_datas + + if relaxed: + _new_cols = ut.relaxed_combine_rows(*modified_columns) + _new_img_data = ut.relaxed_combine_rows(*modified_img_data) + else: + _new_cols = ut.combine_rows(*modified_columns) + _new_img_data = ut.combine_rows(*modified_img_data) + return _new_cols, _new_img_data + + +def merge_spatial_coordinates(spatial_coords: List[BiocFrame], relaxed: bool = False) -> BiocFrame: + """Merge spatial coordinates from multiple frames. + + Args: + spatial_coords: List of `BiocFrame`s containing spatial coordinates. + relaxed: If `True`, allows frames with different columns to be combined. + Absent columns in any frame are filled with appropriate placeholder values. + Defaults to `False`. + + Returns: + A merged BiocFrame containing all spatial coordinates. + + Raises: + ValueError: If spatial coordinates have different numbers of columns. + + Warns: + If dimension names are not consistent across all `BiocFrame`s. + """ + first_shape = spatial_coords[0].shape[1] + if not all(coords.shape[1] == first_shape for coords in spatial_coords): + raise ValueError("Not all 'spatial_coords' have the same number of columns.") + + first_columns = spatial_coords[0].columns + if not all(coords.columns == first_columns for coords in spatial_coords): + warn("Not all 'spatial_coords' have the same dimension names.") + + if relaxed: + _new_spatial_coords = ut.relaxed_combine_rows(*spatial_coords) + else: + _new_spatial_coords = ut.combine_rows(*spatial_coords) + return _new_spatial_coords diff --git a/src/spatialexperiment/_imgutils.py b/src/spatialexperiment/_imgutils.py index c9c2c29..9aa67b0 100644 --- a/src/spatialexperiment/_imgutils.py +++ b/src/spatialexperiment/_imgutils.py @@ -8,7 +8,9 @@ def retrieve_rows_by_id( - img_data: BiocFrame, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + img_data: BiocFrame, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> Union[BiocFrame, None]: """ Retrieve rows from `img_data` based on specified `sample_id` and `image_id`. @@ -51,11 +53,15 @@ def retrieve_rows_by_id( else: subset = subset.combine_rows(row) else: - subset = img_data[[_image_id == image_id for _image_id in img_data["image_id"]], :] + subset = img_data[ + [_image_id == image_id for _image_id in img_data["image_id"]], : + ] elif sample_id is None: first_sample_id = img_data["sample_id"][0] - first_sample = img_data[[_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], :] + first_sample = img_data[ + [_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], : + ] if image_id is True: subset = first_sample @@ -63,10 +69,14 @@ def retrieve_rows_by_id( elif image_id is None: subset = first_sample[0, :] else: - subset = first_sample[[_image_id == image_id for _image_id in img_data["image_id"]], :] + subset = first_sample[ + [_image_id == image_id for _image_id in img_data["image_id"]], : + ] else: - selected_sample = img_data[[_sample_id == sample_id for _sample_id in img_data["sample_id"]], :] + selected_sample = img_data[ + [_sample_id == sample_id for _sample_id in img_data["sample_id"]], : + ] if selected_sample.shape[0] == 0: subset = selected_sample @@ -75,6 +85,8 @@ def retrieve_rows_by_id( elif image_id is None: subset = selected_sample[0, :] else: - subset = selected_sample[[_image_id == image_id for _image_id in selected_sample["image_id"]]] + subset = selected_sample[ + [_image_id == image_id for _image_id in selected_sample["image_id"]] + ] return subset diff --git a/src/spatialexperiment/_initutils.py b/src/spatialexperiment/_initutils.py index edaa4c2..1569889 100644 --- a/src/spatialexperiment/_initutils.py +++ b/src/spatialexperiment/_initutils.py @@ -33,7 +33,11 @@ def construct_spatial_coords_from_names( current_column_data = _sanitize_frame(column_data, num_rows=column_data.shape[1]) - missing_names = [name for name in spatial_coords_names if name not in current_column_data.column_names] + missing_names = [ + name + for name in spatial_coords_names + if name not in current_column_data.column_names + ] if missing_names: raise ValueError( f"The following names in `spatial_coords_names` are missing from `column_data`: {missing_names}" @@ -44,7 +48,11 @@ def construct_spatial_coords_from_names( column_data_subset = deepcopy( current_column_data[ :, - [col for col in current_column_data.column_names if col not in spatial_coords_names], + [ + col + for col in current_column_data.column_names + if col not in spatial_coords_names + ], ] ) @@ -52,7 +60,11 @@ def construct_spatial_coords_from_names( def construct_img_data( - sample_id: str, image_id: str, image_sources: List[str], scale_factors: List[float], load_image: bool = False + sample_id: str, + image_id: str, + image_sources: List[str], + scale_factors: List[float], + load_image: bool = False, ) -> BiocFrame: """Construct the image data for a `SpatialExperiment`. @@ -77,7 +89,9 @@ def construct_img_data( A `BiocFrame` representing the image data for a `SpatialExperiment`. """ if not len(image_id) == len(image_sources) == len(scale_factors): - raise ValueError("'image_id', 'image_sources' and 'scale_factors' are not the same length.") + raise ValueError( + "'image_id', 'image_sources' and 'scale_factors' are not the same length." + ) spis = [] for image_source in image_sources: diff --git a/src/spatialexperiment/_validators.py b/src/spatialexperiment/_validators.py index 27a62d5..a3ae86e 100644 --- a/src/spatialexperiment/_validators.py +++ b/src/spatialexperiment/_validators.py @@ -13,7 +13,9 @@ def _validate_spatial_coords_names(spatial_coords_names, spatial_coords): raise TypeError("'spatial_coords_names' is not a list of strings") if len(spatial_coords_names) != spatial_coords.shape[1]: - raise ValueError(f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names.") + raise ValueError( + f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names." + ) def _validate_column_data(column_data): @@ -43,7 +45,9 @@ def _validate_sample_image_ids(img_data, new_sample_id, new_image_id): for row in img_data: data = row[1] if data["sample_id"] == new_sample_id and data["image_id"] == new_image_id: - raise ValueError(f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists") + raise ValueError( + f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists" + ) # TODO: check if 'new_sample_id' is present in column_data['sample_id'] @@ -54,7 +58,7 @@ def _validate_spatial_coords(spatial_coords, column_data): if not hasattr(spatial_coords, "shape"): raise TypeError( - f"Spatial coordinates must be a dataframe-like object." + "Spatial coordinates must be a dataframe-like object." "Does not contain a `shape` property." ) @@ -87,7 +91,9 @@ def _validate_sample_ids(column_data, img_data): column_data_sample_ids = set(column_data["sample_id"]) if not img_data_sample_ids <= column_data_sample_ids: - raise ValueError("All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']") + raise ValueError( + "All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']" + ) if img_data_sample_ids != column_data_sample_ids: warnings.warn( diff --git a/tests/conftest.py b/tests/conftest.py index 0123424..386b519 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,6 +43,10 @@ def spe(): } ) + row_names = BiocFrame({"row_names": range(nrows)}) + + column_names = BiocFrame({"column_names": range(ncols)}) + x_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) @@ -65,6 +69,8 @@ def spe(): assays={"counts": counts}, row_data=row_data, column_data=col_data, + row_names=row_names, + column_names=column_names, spatial_coords=spatial_coords, img_data=img_data, ) diff --git a/tests/test_img_data_methods.py b/tests/test_img_data_methods.py index d36ef4c..6e9adca 100644 --- a/tests/test_img_data_methods.py +++ b/tests/test_img_data_methods.py @@ -1,6 +1,5 @@ import pytest from copy import deepcopy -from spatialexperiment import construct_spatial_image_class from spatialexperiment.SpatialImage import VirtualSpatialImage __author__ = "keviny2" diff --git a/tests/test_pSFE.py b/tests/test_pSFE.py index 93521f8..7bfed7e 100644 --- a/tests/test_pSFE.py +++ b/tests/test_pSFE.py @@ -9,6 +9,7 @@ __copyright__ = "jkanche" __license__ = "MIT" + def test_init_basic(): nrows = 200 ncols = 500 @@ -17,11 +18,13 @@ def test_init_basic(): assert isinstance(tspe, ProxySpatialFeatureExperiment) + def test_init_empty(): tspe = ProxySpatialFeatureExperiment() assert isinstance(tspe, ProxySpatialFeatureExperiment) + def test_init_with_col_geoms(): nrows = 200 ncols = 500 @@ -33,7 +36,9 @@ def test_init_with_col_geoms(): ] ) - colgeoms = {"polygons" : gpd.GeoDataFrame({"geometry": polys})} - tspe = ProxySpatialFeatureExperiment(assays={"spots": counts}, col_geometries=colgeoms) + colgeoms = {"polygons": gpd.GeoDataFrame({"geometry": polys})} + tspe = ProxySpatialFeatureExperiment( + assays={"spots": counts}, col_geometries=colgeoms + ) - assert isinstance(tspe, ProxySpatialFeatureExperiment) \ No newline at end of file + assert isinstance(tspe, ProxySpatialFeatureExperiment) diff --git a/tests/test_spe.py b/tests/test_spe.py index a875b1c..7936222 100644 --- a/tests/test_spe.py +++ b/tests/test_spe.py @@ -22,6 +22,7 @@ def test_SPE_empty_constructor(): assert "sample_id" in tspe.column_data.columns.as_list() assert tspe.column_data.shape == (tspe.shape[1], 1) + def test_spe_basic(): nrows = 200 ncols = 500 diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py new file mode 100644 index 0000000..23d456b --- /dev/null +++ b/tests/test_spe_combine.py @@ -0,0 +1,106 @@ +from copy import deepcopy + +import pytest +import numpy as np +import biocutils as ut +from spatialexperiment import SpatialExperiment + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_combine_columns(spe): + spe1 = deepcopy(spe) + spe2 = deepcopy(spe) + + # TODO: this is a temporary fix until https://github.com/BiocPy/SpatialExperiment/issues/25 is finished + spe1.column_data["sample_id"] = [ + f"{sample_id}_A" for sample_id in spe1.column_data["sample_id"] + ] + spe2.column_data["sample_id"] = [ + f"{sample_id}_B" for sample_id in spe2.column_data["sample_id"] + ] + spe1.img_data["sample_id"] = [ + f"{sample_id}_A" for sample_id in spe1.img_data["sample_id"] + ] + spe2.img_data["sample_id"] = [ + f"{sample_id}_B" for sample_id in spe2.img_data["sample_id"] + ] + + combined = ut.combine_columns(spe1, spe2) + + # img_data checks + assert combined.img_data.shape[0] == 2 * spe.img_data.shape[0] + assert set(combined.column_data["sample_id"]) == set(combined.img_data["sample_id"]) + assert set(combined.column_data["sample_id"]) == set( + spe1.column_data["sample_id"] + spe2.column_data["sample_id"] + ) + + idx1 = range(spe1.img_data.shape[0]) + idx2 = range( + spe1.img_data.shape[0], spe1.img_data.shape[0] + spe2.img_data.shape[0] + ) + img_data1 = combined.img_data[idx1, :] + img_data2 = combined.img_data[idx2, :] + + assert img_data1["sample_id"] == spe1.img_data["sample_id"] + assert img_data1["image_id"] == spe1.img_data["image_id"] + assert img_data1["data"] == spe1.img_data["data"] + assert img_data1["scale_factor"] == spe1.img_data["scale_factor"] + + assert img_data2["sample_id"] == spe2.img_data["sample_id"] + assert img_data2["image_id"] == spe2.img_data["image_id"] + assert img_data2["data"] == spe2.img_data["data"] + assert img_data2["scale_factor"] == spe2.img_data["scale_factor"] + + # spatial_coords checks + idx1 = range(spe1.spatial_coords.shape[0]) + idx2 = range( + spe1.spatial_coords.shape[0], + spe1.spatial_coords.shape[0] + spe2.spatial_coords.shape[0], + ) + spatial_coords1 = combined.spatial_coords[idx1, :] + spatial_coords2 = combined.spatial_coords[idx2, :] + + assert (spatial_coords1.to_pandas() == spe1.spatial_coords.to_pandas()).all().all() + assert (spatial_coords2.to_pandas() == spe2.spatial_coords.to_pandas()).all().all() + + +def test_relaxed_combine_columns(spe): + nrows, ncols = spe.shape + spe2 = spe.set_assays( + { + "counts": np.random.poisson(lam=10, size=(nrows, ncols)), + "normalized": np.random.normal(size=(nrows, ncols)) + }, + in_place=False + ) + + with pytest.raises(Exception): + combined = ut.combine_columns(spe, spe2) + + combined = ut.relaxed_combine_columns(spe, spe2) + assert combined is not None + assert isinstance(combined, SpatialExperiment) + assert combined.shape[1] == spe.shape[1] + spe2.shape[1] + assert combined.shape[0] == spe.shape[0] + + combined2 = spe.relaxed_combine_columns(spe2) + assert combined2 is not None + assert isinstance(combined2, SpatialExperiment) + assert combined2.shape[1] == spe.shape[1] + spe2.shape[1] + assert combined2.shape[0] == spe.shape[0] + + +def test_duplicate_sample_ids(spe): + with pytest.warns(UserWarning): + combined = ut.combine_columns(spe, spe) + + assert len(set(combined.column_data["sample_id"])) == 2 * len( + set(spe.column_data["sample_id"]) + ) + assert combined.shape[0] == spe.shape[0] + assert combined.shape[1] == 2 * spe.shape[1] + assert combined.rownames == spe.rownames + assert set(combined.colnames.as_list()) == set(spe.colnames.as_list()) diff --git a/tests/test_spe_methods.py b/tests/test_spe_methods.py index 341985c..1dd29ea 100644 --- a/tests/test_spe_methods.py +++ b/tests/test_spe_methods.py @@ -18,6 +18,7 @@ y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) spatial_coords = np.column_stack((x_coords, y_coords)) + def test_spatial_coords_numpy(): tspe = SpatialExperiment(assays={"counts": counts}, spatial_coords=spatial_coords) diff --git a/tests/test_spi.py b/tests/test_spi.py index a05c3ed..f7e93c5 100644 --- a/tests/test_spi.py +++ b/tests/test_spi.py @@ -1,7 +1,12 @@ import pytest from PIL import Image from spatialexperiment import construct_spatial_image_class -from spatialexperiment.SpatialImage import VirtualSpatialImage, StoredSpatialImage, LoadedSpatialImage, RemoteSpatialImage +from spatialexperiment.SpatialImage import ( + VirtualSpatialImage, + StoredSpatialImage, + LoadedSpatialImage, + RemoteSpatialImage, +) __author__ = "keviny2" __copyright__ = "keviny2" @@ -18,7 +23,9 @@ def test_spi_constructor_path(): def test_spi_constructor_spi(): - spi_1 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) + spi_1 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) spi_2 = construct_spatial_image_class(spi_1, is_url=False) assert issubclass(type(spi_2), VirtualSpatialImage) @@ -49,9 +56,14 @@ def test_invalid_input(): with pytest.raises(Exception): construct_spatial_image_class(5, is_url=False) + def test_spi_equality(): - spi_path_1 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) - spi_path_2 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) + spi_path_1 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) + spi_path_2 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) assert spi_path_1 == spi_path_2