diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 90aa16a..7504d08 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -2,9 +2,9 @@ name: Run tests on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] jobs: build: diff --git a/README.md b/README.md index 8603d6c..1cc671e 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,103 @@ [![PyPI-Server](https://img.shields.io/pypi/v/SpatialExperiment.svg)](https://pypi.org/project/SpatialExperiment/) -![Unit tests](https://github.com/BiocPy/SpatialExperiment/actions/workflows/pypi-test.yml/badge.svg) +![Unit tests](https://github.com/BiocPy/SpatialExperiment/actions/workflows/run-tests.yml/badge.svg) # SpatialExperiment -> Container class for sotring data from spatial -omics experiments +A Python package for storing and analyzing spatial-omics experimental data. `SpatialExperiment` extends [SingleCellExperiment](https://github.com/biocpy/singlecellexperiment) with dedicated slots for image data and spatial coordinates, making it ideal for spatial transcriptomics and other spatially-resolved omics data. -A longer description of your project goes here... +> [!NOTE] +> +> This package is in **active development**. ## Install To get started, install the package from [PyPI](https://pypi.org/project/SpatialExperiment/) ```bash -pip install SpatialExperiment +pip install spatialexperiment ``` +## Usage + +The `SpatialExperiment` class extends `SingleCellExperiment` with the following key attributes: + +- `spatial_coords`: A BioFrame containing spot/cell spatial coordinates relative to the image, typically including: + - x-coordinates + - y-coordinates + - Additional spatial metadata + +- `img_data`: A BiocFrame containing image-related information: + - sample_ids: Unique identifiers for each sample + - image_ids: Unique identifiers for each image + - data: The actual image data + - scale_factor: Scaling factors for proper image interpretation + +- `column_data`: Contains sample_id mappings that link spots to their corresponding images + +### Quick Start + +Here's how to create a SpatialExperiment object from scratch: + +```python +from spatialexperiment import SpatialExperiment, SpatialImage +import numpy as np +from biocframe import BiocFrame + +# Create example data +nrows = 200 # Number of features (e.g., genes) +ncols = 500 # Number of spots/cells + +# Generate random count data +counts = np.random.rand(nrows, ncols) + +# Create feature annotations +row_data = BiocFrame({ + "gene_ids": [f"gene_{i}" for i in range(nrows)], + "gene_names": [f"Gene_{i}" for i in range(nrows)] +}) + +# Create spot/cell annotations +col_data = BiocFrame({ + "n_genes": [50, 200] * int(ncols / 2), + "condition": ["healthy", "tumor"] * int(ncols / 2), + "cell_id": [f"spot_{i}" for i in range(ncols)], + "sample_id": ["sample_1"] * int(ncols / 2) + ["sample_2"] * int(ncols / 2), +}) + +# Generate spatial coordinates +spatial_coords = BiocFrame({ + "x": np.random.uniform(low=0.0, high=100.0, size=ncols), + "y": np.random.uniform(low=0.0, high=100.0, size=ncols) +}) + +# Create image data +img_data = BiocFrame({ + "sample_id": ["sample_1", "sample_1", "sample_2"], + "image_id": ["aurora", "dice", "desert"], + "data": [ + SpatialImage("tests/images/sample_image1.jpg"), + SpatialImage("tests/images/sample_image2.png"), + SpatialImage("tests/images/sample_image3.jpg"), + ], + "scale_factor": [1, 1, 1], +}) + +# Create SpatialExperiment object +spe = SpatialExperiment( + assays={"counts": counts}, + row_data=row_data, + column_data=col_data, + spatial_coords=spatial_coords, + img_data=img_data, +) +``` + +For more detailed information about available methods and functionality, please refer to the [SingleCellExperiment documentation](https://biocpy.github.io/SingleCellExperiment/). + + ## Note This project has been set up using [BiocSetup](https://github.com/biocpy/biocsetup) -and [PyScaffold](https://pyscaffold.org/). +and [PyScaffold](https://pyscaffold.org/). \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index d8d10d6..60bdc64 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -299,6 +299,11 @@ "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), "setuptools": ("https://setuptools.pypa.io/en/stable/", None), "pyscaffold": ("https://pyscaffold.org/en/stable", None), + "biocframe": ("https://biocpy.github.io/BiocFrame", None), + "genomicranges": ("https://biocpy.github.io/GenomicRanges", None), + "summarizedexperiment": ("https://biocpy.github.io/SummarizedExperiment", None), + "biocutils": ("https://biocpy.github.io/BiocUtils", None), + "singlecellexperiment": ("https://biocpy.github.io/SingleCellExperiment", None), } print(f"loading configurations for {project} {version} ...", file=sys.stderr) diff --git a/pyproject.toml b/pyproject.toml index 874febe..086f90c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,14 +12,14 @@ version_scheme = "no-guess-dev" line-length = 120 src = ["src"] exclude = ["tests"] -extend-ignore = ["F821"] +lint.extend-ignore = ["F821"] -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "google" [tool.ruff.format] docstring-code-format = true docstring-code-line-length = 20 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"] diff --git a/setup.cfg b/setup.cfg index e9581b4..c703fca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,12 @@ package_dir = # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" + pandas>=2.0 + biocframe>=0.6 + biocutils>=0.2 + summarizedexperiment>=0.5 + singlecellexperiment>=0.5.6 + pillow>=11.0 [options.packages.find] diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py new file mode 100644 index 0000000..a1bda63 --- /dev/null +++ b/src/spatialexperiment/SpatialExperiment.py @@ -0,0 +1,845 @@ +from typing import Any, Dict, List, Optional, Sequence, Union +from warnings import warn + +import biocutils as ut +from biocframe import BiocFrame +from PIL import Image +from singlecellexperiment import SingleCellExperiment +from summarizedexperiment._frameutils import _sanitize_frame +from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList + +from ._imgutils import retrieve_rows_by_id +from ._validators import ( + _validate_column_data, + _validate_id, + _validate_img_data, + _validate_sample_ids, + _validate_sample_image_ids, + _validate_spatial_coords, + _validate_spatial_coords_names, +) +from .SpatialImage import SpatialImage + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +class SpatialExperiment(SingleCellExperiment): + """Container class for storing data from spatial -omics experiments, extending + :py:class:`~singlecellexperiment.SingleCellExperiment` to provide slots for + image data and spatial coordinates. + + In contrast to R, :py:class:`~numpy.ndarray` or scipy matrices are unnamed and do + not contain rownames and colnames. Hence, these matrices cannot be directly used as + values in assays or alternative experiments. We strictly enforce type checks in these cases. + """ + + def __init__( + self, + assays: Dict[str, Any] = None, + row_ranges: Optional[GRangesOrGRangesList] = None, + row_data: Optional[BiocFrame] = None, + column_data: Optional[BiocFrame] = None, + row_names: Optional[List[str]] = None, + column_names: Optional[List[str]] = None, + metadata: Optional[dict] = None, + reduced_dims: Optional[Dict[str, Any]] = None, + main_experiment_name: Optional[str] = None, + alternative_experiments: Optional[Dict[str, Any]] = None, + alternative_experiment_check_dim_names: bool = True, + row_pairs: Optional[Any] = None, + column_pairs: Optional[Any] = None, + spatial_coords: Optional[BiocFrame] = None, + img_data: Optional[BiocFrame] = None, + validate: bool = True, + ) -> None: + """Initialize a spatial experiment. + + Args: + assays: + A dictionary containing matrices, with assay names as keys + and 2-dimensional matrices represented as either + :py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`. + + Alternatively, you may use any 2-dimensional matrix that has + the ``shape`` property and implements the slice operation + using the ``__getitem__`` dunder method. + + All matrices in assays must be 2-dimensional and have the + same shape (number of rows, number of columns). + + row_ranges: + Genomic features, must be the same length as the number of rows of + the matrices in assays. + + row_data: + Features, must be the same length as the number of rows of + the matrices in assays. + + Feature information is coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. + + column_data: + Sample data, must be the same length as the number of + columns of the matrices in assays. For instances of the + ``SpatialExperiment`` class, the sample data must include + a column named `sample_id`. If any 'sample_id' in the sample data is not present in the 'sample_id's of 'img_data', a warning will be issued. + + If `sample_id` is not present, a column with this name + will be created and filled with the default value `sample01`. + + Sample information is coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. + + row_names: + A list of strings, same as the number of rows.Defaults to None. + + column_names: + A list of strings, same as the number of columns. Defaults to None. + + metadata: + Additional experimental metadata describing the methods. + Defaults to None. + + reduced_dims: + Slot for low-dimensionality embeddings. + + Usually a dictionary with the embedding method as keys (e.g., t-SNE, UMAP) + and the dimensions as values. + + Embeddings may be represented as a matrix or a data frame, must contain a shape. + + main_experiment_name: + A string, specifying the main experiment name. + + alternative_experiments: + Used to manage multi-modal experiments performed on the same sample/cells. + + Alternative experiments must contain the same cells (rows) as the primary experiment. + It's a dictionary with keys as the names of the alternative experiments + (e.g., sc-atac, crispr) and values as subclasses of + :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. + + alternative_experiment_check_dim_names: + Whether to check if the column names of the alternative experiment match the column names + of the main experiment. This is the equivalent to the ``withDimnames`` + parameter in the R implementation. + + Defaults to True. + + row_pairs: + Row pairings/relationships between features. + + Defaults to None. + + column_pairs: + Column pairings/relationships between cells. + + Defaults to None. + + spatial_coords: + Optional :py:class:`~biocframe.BiocFrame.BiocFrame` containing columns of spatial coordinates. Must be the same length as `column_data`. Typical column names might include: + + - **['x', 'y']**: For simple 2D coordinates. + - **['pxl_col_in_fullres', 'pxl_row_in_fullres']**: For pixel-based coordinates in full-resolution images. + + Spatial coordinates are coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. + + img_data: + Optional :py:class:`~biocframe.BiocFrame.BiocFrame` containing the image data, structured with the following columns: + - **sample_id** (str): A string identifier for the sample to which an image corresponds. + - **image_id** (str): A unique string identifier for each image within each sample. + - **data** (SpatialImage): The image itself, represented as a SpatialImage object. + - **scale_factor** (float): A numerical value that indicates the scaling factor applied to the image. + + All 'sample_id's in 'img_data' must be present in the 'sample_id's of 'column_data'. + + Image data are coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None. + + validate: + Internal use only. + """ + super().__init__( + assays=assays, + row_ranges=row_ranges, + row_data=row_data, + column_data=column_data, + row_names=row_names, + column_names=column_names, + metadata=metadata, + reduced_dims=reduced_dims, + main_experiment_name=main_experiment_name, + alternative_experiments=alternative_experiments, + row_pairs=row_pairs, + column_pairs=column_pairs, + alternative_experiment_check_dim_names=alternative_experiment_check_dim_names, + validate=validate, + ) + + column_data = _sanitize_frame(column_data, num_rows=self.shape[1]) + + if not column_data.has_column("sample_id"): + column_data["sample_id"] = ["sample01"] * self.shape[1] # hard code default sample_id as "sample01" + + spatial_coords = _sanitize_frame(spatial_coords, num_rows=self.shape[1]) + img_data = _sanitize_frame(img_data, num_rows=0) + + self._img_data = img_data + self._cols = column_data + self._spatial_coords = spatial_coords + + if validate: + _validate_column_data(column_data=column_data) + _validate_img_data(img_data=img_data) + _validate_sample_ids(column_data=column_data, img_data=img_data) + _validate_spatial_coords(spatial_coords=spatial_coords, column_data=column_data) + + ######################### + ######>> Copying <<###### + ######################### + + def __deepcopy__(self, memo=None, _nil=[]): + """ + Returns: + A deep copy of the current ``SpatialExperiment``. + """ + from copy import deepcopy + + _assays_copy = deepcopy(self._assays) + _rows_copy = deepcopy(self._rows) + _rowranges_copy = deepcopy(self._row_ranges) + _cols_copy = deepcopy(self._cols) + _row_names_copy = deepcopy(self._row_names) + _col_names_copy = deepcopy(self._column_names) + _metadata_copy = deepcopy(self.metadata) + _main_expt_name_copy = deepcopy(self._main_experiment_name) + _red_dim_copy = deepcopy(self._reduced_dims) + _alt_expt_copy = deepcopy(self._alternative_experiments) + _row_pair_copy = deepcopy(self._row_pairs) + _col_pair_copy = deepcopy(self._column_pairs) + _spatial_coords_copy = deepcopy(self._spatial_coords) + _img_data_copy = deepcopy(self._img_data) + + current_class_const = type(self) + return current_class_const( + assays=_assays_copy, + row_ranges=_rowranges_copy, + row_data=_rows_copy, + column_data=_cols_copy, + row_names=_row_names_copy, + column_names=_col_names_copy, + metadata=_metadata_copy, + reduced_dims=_red_dim_copy, + main_experiment_name=_main_expt_name_copy, + alternative_experiments=_alt_expt_copy, + row_pairs=_row_pair_copy, + column_pairs=_col_pair_copy, + spatial_coords=_spatial_coords_copy, + img_data=_img_data_copy, + ) + + def __copy__(self): + """ + Returns: + A shallow copy of the current ``SpatialExperiment``. + """ + current_class_const = type(self) + return current_class_const( + assays=self._assays, + row_ranges=self._row_ranges, + row_data=self._rows, + column_data=self._cols, + row_names=self._row_names, + column_names=self._column_names, + metadata=self._metadata, + reduced_dims=self._reduced_dims, + main_experiment_name=self._main_experiment_name, + alternative_experiments=self._alternative_experiments, + row_pairs=self._row_pairs, + column_pairs=self._column_pairs, + spatial_coords=self._spatial_coords, + img_data=self._img_data, + ) + + def copy(self): + """Alias for :py:meth:`~__copy__`.""" + return self.__copy__() + + ########################## + ######>> Printing <<###### + ########################## + + def __repr__(self) -> str: + """ + Returns: + A string representation. + """ + output = f"{type(self).__name__}(number_of_rows={self.shape[0]}" + output += f", number_of_columns={self.shape[1]}" + output += ", assays=" + ut.print_truncated_list(self.assay_names) + + output += ", row_data=" + self._rows.__repr__() + if self._row_names is not None: + output += ", row_names=" + ut.print_truncated_list(self._row_names) + + output += ", column_data=" + self._cols.__repr__() + if self._column_names is not None: + output += ", column_names=" + ut.print_truncated_list(self._column_names) + + if self._spatial_coords is not None: + output += ", spatial_coords=" + self._spatial_coords.__repr__() + + if self._img_data is not None: + output += ", img_data=" + self._img_data.__repr__() + + if self._row_ranges is not None: + output += ", row_ranges=" + self._row_ranges.__repr__() + + if self._alternative_experiments is not None: + output += ", alternative_experiments=" + ut.print_truncated_list(self.alternative_experiment_names) + + if self._reduced_dims is not None: + output += ", reduced_dims=" + ut.print_truncated_list(self.reduced_dim_names) + + if self._main_experiment_name is not None: + output += ", main_experiment_name=" + self._main_experiment_name + + if len(self._row_pairs) > 0: + output += ", row_pairs=" + ut.print_truncated_dict(self._row_pairs) + + if len(self._column_pairs) > 0: + output += ", column_pairs=" + ut.print_truncated_dict(self._column_pairs) + + if len(self._metadata) > 0: + output += ", metadata=" + ut.print_truncated_dict(self._metadata) + + output += ")" + return output + + def __str__(self) -> str: + """ + Returns: + A pretty-printed string containing the contents of this object. + """ + output = f"class: {type(self).__name__}\n" + + output += f"dimensions: ({self.shape[0]}, {self.shape[1]})\n" + + output += f"assays({len(self.assay_names)}): {ut.print_truncated_list(self.assay_names)}\n" + + output += ( + f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" + ) + output += f"row_names({0 if self._row_names is None else len(self._row_names)}): {' ' if self._row_names is None else ut.print_truncated_list(self._row_names)}\n" + + output += ( + f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" + ) + output += f"column_names({0 if self._column_names is None else len(self._column_names)}): {' ' if self._column_names is None else ut.print_truncated_list(self._column_names)}\n" + + output += f"main_experiment_name: {' ' if self._main_experiment_name is None else self._main_experiment_name}\n" + output += f"reduced_dims({len(self.reduced_dim_names)}): {ut.print_truncated_list(self.reduced_dim_names)}\n" + output += f"alternative_experiments({len(self.alternative_experiment_names)}): {ut.print_truncated_list(self.alternative_experiment_names)}\n" + output += f"row_pairs({len(self.row_pair_names)}): {ut.print_truncated_list(self.row_pair_names)}\n" + output += f"column_pairs({len(self.column_pair_names)}): {ut.print_truncated_list(self.column_pair_names)}\n" + + output += f"metadata({str(len(self.metadata))}): {ut.print_truncated_list(list(self.metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n" + + output += f"spatial_coords columns({len(self.spatial_coords_names)}): {ut.print_truncated_list(self.spatial_coords_names)}\n" + output += f"img_data columns({len(self._img_data.column_names)}): {ut.print_truncated_list(self._img_data.column_names)}" + + return output + + ############################## + #####>> spatial_coords <<##### + ############################## + + def get_spatial_coordinates(self) -> BiocFrame: + """Access spatial coordinates. + + Returns: + A ``BiocFrame`` containing columns of spatial coordinates. + """ + return self._spatial_coords + + def get_spatial_coords(self) -> BiocFrame: + """Alias for :py:meth:`~get_spatial_coordinates`.""" + return self.get_spatial_coordinates() + + def set_spatial_coordinates( + self, spatial_coords: Optional[BiocFrame], in_place: bool = False + ) -> "SpatialExperiment": + """Set new spatial coordinates. + + Args: + spatial_coords: + :py:class:`~biocframe.BiocFrame.BiocFrame` containing columns of spatial coordinates. Must be the same length as `column_data`. Typical column names might include: + + - **['x', 'y']**: For simple 2D coordinates. + - **['pxl_col_in_fullres', 'pxl_row_in_fullres']**: For pixel-based coordinates in full-resolution images. + + To remove coordinate information, set `spatial_coords=None`. + + Spatial coordinates are coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. Defaults to False. + + Returns: + A modified ``SpatialExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. + """ + spatial_coords = _sanitize_frame(spatial_coords, self.shape[1]) + + _validate_spatial_coords(spatial_coords, self.column_data) + + output = self._define_output(in_place) + output._spatial_coords = spatial_coords + return output + + def set_spatial_coords(self, spatial_coords: BiocFrame, in_place: bool = False) -> "SpatialExperiment": + """Alias for :py:meth:`~set_spatial_coordinates`.""" + return self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=in_place) + + @property + def spatial_coords(self) -> BiocFrame: + """Alias for :py:meth:`~get_spatial_coordinates`.""" + return self.get_spatial_coordinates() + + @spatial_coords.setter + def spatial_coords(self, spatial_coords: BiocFrame): + """Alias for :py:meth:`~set_spatial_coordinates`.""" + warn( + "Setting property 'spatial_coords' is an in-place operation, use 'set_spatial_coordinates' instead.", + UserWarning, + ) + self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=True) + + @property + def spatial_coordinates(self) -> BiocFrame: + """Alias for :py:meth:`~get_spatial_coordinates`.""" + return self.get_spatial_coordinates() + + @spatial_coordinates.setter + def spatial_coordinates(self, spatial_coords: BiocFrame): + """Alias for :py:meth:`~set_spatial_coordinates`.""" + warn( + "Setting property 'spatial_coords' is an in-place operation, use 'set_spatial_coordinates' instead.", + UserWarning, + ) + self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=True) + + ############################## + ##>> spatial_coords_names <<## + ############################## + + def get_spatial_coordinates_names(self) -> List[str]: + """Access spatial coordinates names. + + Returns: + The defined names of the spatial coordinates. + """ + return self._spatial_coords.columns.as_list() + + def get_spatial_coords_names(self) -> List[str]: + """Alias for :py:meth:`~get_spatial_coordinate_names`.""" + return self.get_spatial_coordinate_names() + + def set_spatial_coordinates_names( + self, spatial_coords_names: List[str], in_place: bool = False + ) -> "SpatialExperiment": + """Set new spatial coordinates names. + + Args: + spatial_coords_names: + New spatial coordinates names. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. Defaults to False. + + Returns: + A modified ``SpatialExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. + """ + _validate_spatial_coords_names(spatial_coords_names, self.spatial_coordinates) + + old_spatial_coordinates = self.get_spatial_coordinates() + new_spatial_coordinates = old_spatial_coordinates.set_column_names(spatial_coords_names) + + output = self._define_output(in_place) + output._spatial_coords = new_spatial_coordinates + return output + + def set_spatial_coords_names(self, spatial_coords_names: List[str], in_place: bool = False) -> "SpatialExperiment": + """Alias for :py:meth:`~set_spatial_coordinates_names`.""" + return self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=in_place) + + @property + def spatial_coords_names(self) -> List[str]: + """Alias for :py:meth:`~get_spatial_coordinates_names`.""" + return self.get_spatial_coordinates_names() + + @spatial_coords_names.setter + def spatial_coords_names(self, spatial_coords_names: List[str]): + """Alias for :py:meth:`~set_spatial_coordinates_names`.""" + warn( + "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", + UserWarning, + ) + self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + + @property + def spatial_coordinates_names(self) -> List[str]: + """Alias for :py:meth:`~get_spatial_coordinates_names`.""" + return self.get_spatial_coordinates_names() + + @spatial_coordinates_names.setter + def spatial_coordinates_names(self, spatial_coords_names: List[str]): + """Alias for :py:meth:`~set_spatial_coordinates_names`.""" + warn( + "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", + UserWarning, + ) + self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + + ############################## + ########>> img_data <<######## + ############################## + + def get_image_data(self) -> BiocFrame: + """Access image data. + + Returns: + A BiocFrame object containing the image data. + """ + return self._img_data + + def get_img_data(self) -> BiocFrame: + """Alias for :py:meth:`~get_image_data`.""" + return self.get_image_data() + + def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) -> "SpatialExperiment": + """Set new image data. + + Args: + img_data: + :py:class:`~biocframe.BiocFrame.BiocFrame` containing the image data, structured with the following columns: + - **sample_id** (str): A string identifier for the sample to which an image corresponds. + - **image_id** (str): A unique string identifier for each image within each sample. + - **data** (SpatialImage): The image itself, represented as a SpatialImage object. + - **scale_factor** (float): A numerical value that indicates the scaling factor applied to the image. + + Image data are coerced to a + :py:class:`~biocframe.BiocFrame.BiocFrame`. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. Defaults to False. + + Returns: + A modified ``SpatialExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. + """ + img_data = _sanitize_frame(img_data, num_rows=0) + + _validate_img_data(img_data) + _validate_sample_ids(self.column_data, img_data) + + output = self._define_output(in_place) + output._img_data = img_data + return output + + def set_img_data(self, img_data: BiocFrame, in_place: bool = False) -> "SpatialExperiment": + """Alias for :py:meth:`~set_image_data`.""" + return self.set_image_data(img_data=img_data, in_place=in_place) + + @property + def img_data(self) -> BiocFrame: + """Alias for :py:meth:`~get_image_data`.""" + return self.get_image_data() + + @img_data.setter + def img_data(self, img_data: BiocFrame): + """Alias for :py:meth:`~set_image_data`.""" + warn( + "Setting property 'img_data' is an in-place operation, use 'set_image_data' instead.", + UserWarning, + ) + self.set_image_data(img_data=img_data, in_place=True) + + @property + def image_data(self) -> BiocFrame: + """Alias for :py:meth:`~get_image_data`.""" + return self.get_image_data() + + @image_data.setter + def image_data(self, img_data: BiocFrame): + """Alias for :py:meth:`~set_image_data`.""" + warn( + "Setting property 'img_data' is an in-place operation, use 'set_image_data' instead.", + UserWarning, + ) + self.set_image_data(img_data=img_data, in_place=True) + + ############################## + #####>> scale_factors <<###### + ############################## + + def get_scale_factors( + self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + ) -> List[float]: + """Return scale factor(s) of image(s) based on the provided sample and image ids. + See :py:meth:`~get_img` for more details on the behavior for various + combinations of `sample_id` and `image_id` values. + + Args: + sample_id: + - `sample_id=True`: Matches all samples. + - `sample_id=None`: Matches the first sample. + - `sample_id=""`: Matches a sample by its id. + + image_id: + - `image_id=True`: Matches all images for the specified sample(s). + - `image_id=None`: Matches the first image for the sample(s). + - `image_id=""`: Matches image(s) by its(their) id. + + Returns: + The scale factor(s) of the specified image(s). + """ + _validate_id(sample_id) + _validate_id(image_id) + + img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + + if img_data_subset.shape[0] == 1: + return img_data_subset["scale_factor"][0] + + return img_data_subset["scale_factor"] + + ################################ + ###>> OVERRIDE column_data <<### + ################################ + + def set_column_data( + self, cols: Optional[BiocFrame], replace_column_names: bool = False, in_place: bool = False + ) -> "SpatialExperiment": + """Override: Set sample data. + + Args: + cols: + New sample data. If 'cols' contains a column + named 'sample_id's, a check is performed to ensure + that all 'sample_id's in the 'img_data' are present. If any 'sample_id' in the 'cols' is not present in the 'sample_id's of 'img_data', a warning will be issued. + + If 'sample_id' is not present or 'cols' is None, the original 'sample_id's are retained. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. Defaults to False. + + Returns: + A modified ``SpatialExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. + """ + cols = _sanitize_frame(cols, num_rows=self.shape[1]) + if "sample_id" not in cols.columns: + cols["sample_id"] = self.column_data["sample_id"] + + _validate_column_data(column_data=cols) + _validate_sample_ids(column_data=cols, img_data=self.img_data) + + output = self._define_output(in_place) + output._cols = cols + + if replace_column_names: + return output.set_column_names(cols.row_names, in_place=in_place) + + return output + + ################################ + #########>> slicers <<########## + ################################ + + def get_slice( + self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]] + ) -> "SpatialExperiment": + """Alias for :py:attr:`~__getitem__`.""" + + spe = super().get_slice(rows=rows, columns=columns) + + slicer = self._generic_slice(rows=rows, columns=columns) + do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) + + new_spatial_coords = None + + if do_slice_cols: + new_spatial_coords = self.spatial_coords[slicer.col_indices, :] + + column_sample_ids = set(spe.column_data["sample_id"]) + mask = [sample_id in column_sample_ids for sample_id in self.img_data["sample_id"]] + + new_img_data = self.img_data[mask,] + + current_class_const = type(self) + return current_class_const( + assays=spe.assays, + row_ranges=spe.row_ranges, + row_data=spe.row_data, + column_data=spe.column_data, + row_names=spe.row_names, + column_names=spe.column_names, + metadata=spe.metadata, + main_experiment_name=spe.main_experiment_name, + reduced_dims=spe.reduced_dims, + alternative_experiments=spe.alternative_experiments, + row_pairs=spe.row_pairs, + column_pairs=spe.column_pairs, + spatial_coords=new_spatial_coords, + img_data=new_img_data, + ) + + ################################ + ######>> img_data funcs <<###### + ################################ + + def get_img( + self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + ) -> Union[SpatialImage, List[SpatialImage]]: + """ + Retrieve spatial images based on the provided sample and image ids. + + Args: + sample_id: + - `sample_id=True`: Matches all samples. + - `sample_id=None`: Matches the first sample. + - `sample_id=""`: Matches a sample by its id. + + image_id: + - `image_id=True`: Matches all images for the specified sample(s). + - `image_id=None`: Matches the first image for the sample(s). + - `image_id=""`: Matches image(s) by its(their) id. + + Returns: + Zero, one, or more `SpatialImage` objects. + + Behavior: + - sample_id = True, image_id = True: + Returns all images from all samples. + + - sample_id = None, image_id = None: + Returns the first image entry in the dataset. + + - sample_id = True, image_id = None: + Returns the first image for each sample. + + - sample_id = None, image_id = True: + Returns all images for the first sample. + + - sample_id = , image_id = True: + Returns all images for the specified sample. + + - sample_id = , image_id = None: + Returns the first image for the specified sample. + + - sample_id = , image_id = : + Returns the image matching the specified sample and image identifiers. + """ + _validate_id(sample_id) + _validate_id(image_id) + + img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + + if img_data_subset is None: + return [] + + if img_data_subset.shape[0] == 1: + return img_data_subset["data"][0] + + return img_data_subset["data"] + + def add_img( + self, + image_source: str, + scale_factor: float, + sample_id: Union[str, bool, None], + image_id: Union[str, bool, None], + load: bool = True, + in_place: bool = False, + ) -> "SpatialExperiment": + """Add a new image entry. + + Args: + image_source: + The file path to the image. + + scale_factor: + The scaling factor associated with the image. + + sample_id: + The sample id of the image. + + image_id: + The image id of the image. + + load: + Whether to load the image into memory. If `True`, + the method reads the image file from + `image_source`. + Defaults to `True`. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. + Defaults to False. + + Returns: + A modified ``SpatialExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. + + Raises: + ValueError: If the sample_id and image_id pair already exists. + """ + _validate_sample_image_ids(img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id) + + if load: + img = Image.open(image_source) + spi = SpatialImage(img) + else: + spi = SpatialImage(image_source) + + new_row = BiocFrame( + { + "sample_id": [sample_id], + "image_id": [image_id], + "data": [spi], + "scale_factor": [scale_factor], + } + ) + new_img_data = self._img_data.combine_rows(new_row) + + output = self._define_output(in_place) + output._img_data = new_img_data + return output + + # TODO: implement rmv_img() + def rmv_img( + self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + ) -> "SpatialExperiment": + raise NotImplementedError() + + def img_source(self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, path=False): + raise NotImplementedError("This function is irrelevant because it is for `RemoteSpatialImages`") + + def img_raster(self, sample_id=None, image_id=None): + # NOTE: this function seems redundant, might be an artifact of the different subclasses of SpatialImage in the R implementation? just call `get_img()` for now + self.get_img(sample_id=sample_id, image_id=image_id) + + def rotate_img(self, sample_id=None, image_id=None, degrees=90): + raise NotImplementedError() + + def mirror_img(self, sample_id=None, image_id=None, axis=("h", "v")): + raise NotImplementedError() + + @staticmethod + def to_spatial_experiment(): + raise NotImplementedError() + + ################################ + #######>> combine ops <<######## + ################################ diff --git a/src/spatialexperiment/SpatialImage.py b/src/spatialexperiment/SpatialImage.py new file mode 100644 index 0000000..2c74807 --- /dev/null +++ b/src/spatialexperiment/SpatialImage.py @@ -0,0 +1,38 @@ +import os + +from PIL import Image + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +# TODO: add documentation, __repr__, __str__ +class SpatialImage: + def __init__(self, x): + if isinstance(x, SpatialImage): + self.image = x.image + self.path = x.path + elif isinstance(x, Image.Image): + self.image = x + self.path = None + elif isinstance(x, str): + if x.startswith(("http://", "https://", "ftp://")): + raise ValueError("URLs are not supported for SpatialImage.") + else: + self.image = None + self.path = os.path.normpath(x) + else: + raise ValueError("Unknown input type for 'x'") + + def load_image(self): + """Load the image from the stored path into memory.""" + if self.image is None and self.path is not None: + self.image = Image.open(self.path) + return self.image + + def get_image(self): + """Retrieve the image, loading it if necessary.""" + if self.image is None: + return self.load_image() + return self.image diff --git a/src/spatialexperiment/__init__.py b/src/spatialexperiment/__init__.py index 06f2c77..3a5ea71 100644 --- a/src/spatialexperiment/__init__.py +++ b/src/spatialexperiment/__init__.py @@ -14,3 +14,6 @@ __version__ = "unknown" finally: del version, PackageNotFoundError + +from .SpatialExperiment import SpatialExperiment +from .SpatialImage import SpatialImage diff --git a/src/spatialexperiment/_imgutils.py b/src/spatialexperiment/_imgutils.py new file mode 100644 index 0000000..c9c2c29 --- /dev/null +++ b/src/spatialexperiment/_imgutils.py @@ -0,0 +1,80 @@ +from typing import Union + +from biocframe import BiocFrame + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def retrieve_rows_by_id( + img_data: BiocFrame, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None +) -> Union[BiocFrame, None]: + """ + Retrieve rows from `img_data` based on specified `sample_id` and `image_id`. + + Args: + img_data: The data from which to retrieve rows. + sample_id: + - `sample_id=True`: Matches all samples. + - `sample_id=None`: Matches the first sample. + - `sample_id=""`: Matches a sample by its id. + + image_id: + - `image_id=True`: Matches all images for the specified sample(s). + - `image_id=None`: Matches the first image for the sample(s). + - `image_id=""`: Matches image(s) by its(their) id. + + Returns: + The filtered `img_data` based on the specified ids, or `None` if `img_data` is empty. + """ + + if img_data is None: + return None + + if img_data.shape[0] == 0: + return None + + if sample_id is True: + if image_id is True: + return img_data + + elif image_id is None: + unique_sample_ids = list(set(img_data["sample_id"])) + sample_id_groups = img_data.split("sample_id") + subset = None + + for sample_id in unique_sample_ids: + row = sample_id_groups[sample_id][0, :] + if subset is None: + subset = row + else: + subset = subset.combine_rows(row) + else: + subset = img_data[[_image_id == image_id for _image_id in img_data["image_id"]], :] + + elif sample_id is None: + first_sample_id = img_data["sample_id"][0] + first_sample = img_data[[_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], :] + + if image_id is True: + subset = first_sample + + elif image_id is None: + subset = first_sample[0, :] + else: + subset = first_sample[[_image_id == image_id for _image_id in img_data["image_id"]], :] + + else: + selected_sample = img_data[[_sample_id == sample_id for _sample_id in img_data["sample_id"]], :] + + if selected_sample.shape[0] == 0: + subset = selected_sample + elif image_id is True: + subset = selected_sample + elif image_id is None: + subset = selected_sample[0, :] + else: + subset = selected_sample[[_image_id == image_id for _image_id in selected_sample["image_id"]]] + + return subset diff --git a/src/spatialexperiment/_initutils.py b/src/spatialexperiment/_initutils.py new file mode 100644 index 0000000..edaa4c2 --- /dev/null +++ b/src/spatialexperiment/_initutils.py @@ -0,0 +1,95 @@ +from copy import deepcopy +from typing import List, Tuple + +from biocframe import BiocFrame +from PIL import Image +from SpatialImage import SpatialImage +from summarizedexperiment._frameutils import _sanitize_frame + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def construct_spatial_coords_from_names( + spatial_coords_names: List[str], column_data: BiocFrame +) -> Tuple[BiocFrame, BiocFrame]: + """Construct the `spatial_coords` dataframe from names. + + Args: + spatial_coords_names: + A list of strings of column names from `column_data` containing spatial coordinates. + + column_data: + The sample data. + + Returns: + A tuple containing two `BiocFrame` objects: + - The first `BiocFrame` contains columns of spatial coordinates. + - The second `BiocFrame` is a subset of the original `column_data`, with the spatial coordinate columns removed. + """ + if spatial_coords_names is None: + raise ValueError("No spatial coordinate names were provided.") + + current_column_data = _sanitize_frame(column_data, num_rows=column_data.shape[1]) + + missing_names = [name for name in spatial_coords_names if name not in current_column_data.column_names] + if missing_names: + raise ValueError( + f"The following names in `spatial_coords_names` are missing from `column_data`: {missing_names}" + ) + + spatial_coords = deepcopy(current_column_data[:, spatial_coords_names]) + + column_data_subset = deepcopy( + current_column_data[ + :, + [col for col in current_column_data.column_names if col not in spatial_coords_names], + ] + ) + + return spatial_coords, column_data_subset + + +def construct_img_data( + sample_id: str, image_id: str, image_sources: List[str], scale_factors: List[float], load_image: bool = False +) -> BiocFrame: + """Construct the image data for a `SpatialExperiment`. + + Args: + sample_id: + The sample id. + + image_id: + The image id. + + image_sources: + The file paths to the images. Must be the same length as `scale_factors`. + + scale_factors: + The scaling factors associated with the images. Must be the same length as + `image_sources`. + + load_image: + Whether to load the images into memory. Defaults to False. + + Returns: + A `BiocFrame` representing the image data for a `SpatialExperiment`. + """ + if not len(image_id) == len(image_sources) == len(scale_factors): + raise ValueError("'image_id', 'image_sources' and 'scale_factors' are not the same length.") + + spis = [] + for image_source in image_sources: + result = Image.open(image_source) if load_image else image_source + spi = SpatialImage(result) + spis.append(spi) + + img_data = { + "sample_id": sample_id, + "image_id": image_id, + "data": spis, + "scale_factor": scale_factors, + } + + return BiocFrame(img_data) diff --git a/src/spatialexperiment/_validators.py b/src/spatialexperiment/_validators.py new file mode 100644 index 0000000..f27542e --- /dev/null +++ b/src/spatialexperiment/_validators.py @@ -0,0 +1,93 @@ +import warnings + +from biocframe import BiocFrame +import biocutils as ut + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def _validate_spatial_coords_names(spatial_coords_names, spatial_coords): + if not ut.is_list_of_type(spatial_coords_names, str): + raise TypeError("'spatial_coords_names' is not a list of strings") + + if len(spatial_coords_names) != spatial_coords.shape[1]: + raise ValueError(f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names.") + + +def _validate_column_data(column_data): + if column_data is None: + raise ValueError("'column_data' must have a column named 'sample_id'.") + + if not isinstance(column_data, BiocFrame): + raise TypeError("'column_data' must be a BiocFrame object.") + + if "sample_id" not in column_data.columns: + raise ValueError("'column_data' must have a column named 'sample_id'.") + + +def _validate_id(id): + is_valid = isinstance(id, str) or id is True or id is None + if not is_valid: + raise ValueError(f"{id} must be one of [str, True, None]") + + +def _validate_sample_image_ids(img_data, new_sample_id, new_image_id): + if img_data is None: + return + + if not isinstance(img_data, BiocFrame): + raise TypeError("`img_data` is not a BiocFrame object.") + + for row in img_data: + data = row[1] + if data["sample_id"] == new_sample_id and data["image_id"] == new_image_id: + raise ValueError(f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists") + + # TODO: check if 'new_sample_id' is present in column_data['sample_id'] + + +def _validate_spatial_coords(spatial_coords, column_data): + if spatial_coords is None: + return + + if not isinstance(spatial_coords, BiocFrame): + raise TypeError("'spatial_coords' must be a BiocFrame object.") + + if column_data.shape[0] != spatial_coords.shape[0]: + raise ValueError("'spatial_coords' do not contain coordinates for all cells.") + + +def _validate_img_data(img_data): + if img_data is None: + return + + if not isinstance(img_data, BiocFrame): + raise TypeError("'img_data' must be a BiocFrame object.") + + if img_data.shape[0] == 0: + return + + required_columns = ["sample_id", "image_id", "data", "scale_factor"] + if not all(column in img_data.columns for column in required_columns): + missing = list(set(required_columns) - set(img_data.columns)) + raise ValueError(f"'img_data' is missing required columns: {missing}") + + +def _validate_sample_ids(column_data, img_data): + """Ensure consistency of sample_id between img_data and column_data.""" + if img_data is None or img_data.shape[0] == 0: + return + + img_data_sample_ids = set(img_data["sample_id"]) + column_data_sample_ids = set(column_data["sample_id"]) + + if not img_data_sample_ids <= column_data_sample_ids: + raise ValueError("All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']") + + if img_data_sample_ids != column_data_sample_ids: + warnings.warn( + "Not all 'sample_id's in 'column_data' correspond to an entry in 'img_data'", + UserWarning, + ) diff --git a/src/spatialexperiment/skeleton.py b/src/spatialexperiment/skeleton.py deleted file mode 100644 index c6bf341..0000000 --- a/src/spatialexperiment/skeleton.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -This is a skeleton file that can serve as a starting point for a Python -console script. To run this script uncomment the following lines in the -``[options.entry_points]`` section in ``setup.cfg``:: - - console_scripts = - fibonacci = spatialexperiment.skeleton:run - -Then run ``pip install .`` (or ``pip install -e .`` for editable mode) -which will install the command ``fibonacci`` inside your current environment. - -Besides console scripts, the header (i.e. until ``_logger``...) of this file can -also be used as template for Python modules. - -Note: - This file can be renamed depending on your needs or safely removed if not needed. - -References: - - https://setuptools.pypa.io/en/latest/userguide/entry_point.html - - https://pip.pypa.io/en/stable/reference/pip_install -""" - -import argparse -import logging -import sys - -from spatialexperiment import __version__ - -__author__ = "keviny2" -__copyright__ = "keviny2" -__license__ = "MIT" - -_logger = logging.getLogger(__name__) - - -# ---- Python API ---- -# The functions defined in this section can be imported by users in their -# Python scripts/interactive interpreter, e.g. via -# `from spatialexperiment.skeleton import fib`, -# when using this Python module as a library. - - -def fib(n): - """Fibonacci example function - - Args: - n (int): integer - - Returns: - int: n-th Fibonacci number - """ - assert n > 0 - a, b = 1, 1 - for _i in range(n - 1): - a, b = b, a + b - return a - - -# ---- CLI ---- -# The functions defined in this section are wrappers around the main Python -# API allowing them to be called directly from the terminal as a CLI -# executable/script. - - -def parse_args(args): - """Parse command line parameters - - Args: - args (List[str]): command line parameters as list of strings - (for example ``["--help"]``). - - Returns: - :obj:`argparse.Namespace`: command line parameters namespace - """ - parser = argparse.ArgumentParser(description="Just a Fibonacci demonstration") - parser.add_argument( - "--version", - action="version", - version=f"SpatialExperiment {__version__}", - ) - parser.add_argument(dest="n", help="n-th Fibonacci number", type=int, metavar="INT") - parser.add_argument( - "-v", - "--verbose", - dest="loglevel", - help="set loglevel to INFO", - action="store_const", - const=logging.INFO, - ) - parser.add_argument( - "-vv", - "--very-verbose", - dest="loglevel", - help="set loglevel to DEBUG", - action="store_const", - const=logging.DEBUG, - ) - return parser.parse_args(args) - - -def setup_logging(loglevel): - """Setup basic logging - - Args: - loglevel (int): minimum loglevel for emitting messages - """ - logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s" - logging.basicConfig( - level=loglevel, stream=sys.stdout, format=logformat, datefmt="%Y-%m-%d %H:%M:%S" - ) - - -def main(args): - """Wrapper allowing :func:`fib` to be called with string arguments in a CLI fashion - - Instead of returning the value from :func:`fib`, it prints the result to the - ``stdout`` in a nicely formatted message. - - Args: - args (List[str]): command line parameters as list of strings - (for example ``["--verbose", "42"]``). - """ - args = parse_args(args) - setup_logging(args.loglevel) - _logger.debug("Starting crazy calculations...") - print(f"The {args.n}-th Fibonacci number is {fib(args.n)}") - _logger.info("Script ends here") - - -def run(): - """Calls :func:`main` passing the CLI arguments extracted from :obj:`sys.argv` - - This function can be used as entry point to create console scripts with setuptools. - """ - main(sys.argv[1:]) - - -if __name__ == "__main__": - # ^ This is a guard statement that will prevent the following code from - # being executed in the case someone imports this file instead of - # executing it as a script. - # https://docs.python.org/3/library/__main__.html - - # After installing your project with pip, users can also run your Python - # modules as scripts via the ``-m`` flag, as defined in PEP 338:: - # - # python -m spatialexperiment.skeleton 42 - # - run() diff --git a/tests/conftest.py b/tests/conftest.py index 9463c27..b681f0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,72 @@ -""" - Dummy conftest.py for spatialexperiment. +import pytest +import numpy as np +from biocframe import BiocFrame +from spatialexperiment import SpatialExperiment, SpatialImage +from random import random - If you don't know what this is for, just leave it empty. - Read more about conftest.py under: - - https://docs.pytest.org/en/stable/fixture.html - - https://docs.pytest.org/en/stable/writing_plugins.html -""" -# import pytest +@pytest.fixture +def spe(): + nrows = 200 + ncols = 500 + counts = np.random.rand(nrows, ncols) + row_data = BiocFrame( + { + "seqnames": [ + "chr1", + "chr2", + "chr2", + "chr2", + "chr1", + "chr1", + "chr3", + "chr3", + "chr3", + "chr3", + ] + * int(nrows / 10), + "starts": range(100, 100 + nrows), + "ends": range(110, 110 + nrows), + "strand": ["-", "+", "+", "*", "*", "+", "+", "+", "-", "-"] + * int(nrows / 10), + "score": range(0, nrows), + "GC": [random() for _ in range(10)] * int(nrows / 10), + } + ) + + col_data = BiocFrame( + { + "n_genes": [50, 200] * int(ncols / 2), + "condition": ["healthy", "tumor"] * int(ncols / 2), + "cell_id": ["spot_1", "spot_2"] * int(ncols / 2), + "sample_id": ["sample_1"] * int(ncols / 2) + ["sample_2"] * int(ncols / 2), + } + ) + + x_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) + y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) + + spatial_coords = BiocFrame({"x": x_coords, "y": y_coords}) + + img_data = BiocFrame( + { + "sample_id": ["sample_1", "sample_1", "sample_2"], + "image_id": ["aurora", "dice", "desert"], + "data": [ + SpatialImage("tests/images/sample_image1.jpg"), + SpatialImage("tests/images/sample_image2.png"), + SpatialImage("tests/images/sample_image3.jpg"), + ], + "scale_factor": [1, 1, 1], + } + ) + + spe_instance = SpatialExperiment( + assays={"counts": counts}, + row_data=row_data, + column_data=col_data, + spatial_coords=spatial_coords, + img_data=img_data, + ) + + return spe_instance diff --git a/tests/images/sample_image1.jpg b/tests/images/sample_image1.jpg new file mode 100644 index 0000000..8716e42 Binary files /dev/null and b/tests/images/sample_image1.jpg differ diff --git a/tests/images/sample_image2.png b/tests/images/sample_image2.png new file mode 100644 index 0000000..11eb818 Binary files /dev/null and b/tests/images/sample_image2.png differ diff --git a/tests/images/sample_image3.jpg b/tests/images/sample_image3.jpg new file mode 100644 index 0000000..14befa6 Binary files /dev/null and b/tests/images/sample_image3.jpg differ diff --git a/tests/images/sample_image4.png b/tests/images/sample_image4.png new file mode 100644 index 0000000..f359621 Binary files /dev/null and b/tests/images/sample_image4.png differ diff --git a/tests/test_img_data_methods.py b/tests/test_img_data_methods.py new file mode 100644 index 0000000..ec1a48b --- /dev/null +++ b/tests/test_img_data_methods.py @@ -0,0 +1,73 @@ +import pytest +from copy import deepcopy +from spatialexperiment import SpatialImage + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_get_img_without_img_data(spe): + tspe = deepcopy(spe) + + tspe.img_data = None + assert not tspe.get_img() + + +def test_get_img_no_matches(spe): + images = spe.get_img(sample_id="foo", image_id="foo") + assert not images + + +def test_get_img_both_null(spe): + res = spe.get_img(sample_id=None, image_id=None) + image = spe.img_data["data"][0] + + assert isinstance(res, SpatialImage) + assert res == image + + +def test_get_img_both_true(spe): + res = spe.get_img(sample_id=True, image_id=True) + images = spe.img_data["data"] + + assert isinstance(res, list) + assert res == images + + +def test_get_img_specific_sample(spe): + res = spe.get_img(sample_id="sample_1", image_id=True) + images = spe.img_data["data"][:2] + + assert isinstance(res, list) + assert res == images + + +def test_get_img_specific_image(spe): + res = spe.get_img(sample_id=True, image_id="desert") + images = spe.img_data["data"][2] + + assert isinstance(res, SpatialImage) + assert res == images + + +def test_add_img(spe): + tspe = spe.add_img( + image_source="tests/images/sample_image4.png", + scale_factor=1, + sample_id="sample_2", + image_id="unsplash", + ) + + tspe.img_data.shape[0] == spe.img_data.shape[0] + 1 + + +def test_add_img_already_exists(spe): + img_data = spe.img_data + with pytest.raises(ValueError): + spe.add_img( + image_source="tests/images/sample_image4.png", + scale_factor=1, + sample_id=img_data["sample_id"][0], + image_id=img_data["image_id"][0], + ) diff --git a/tests/test_si.py b/tests/test_si.py new file mode 100644 index 0000000..998b6d2 --- /dev/null +++ b/tests/test_si.py @@ -0,0 +1,41 @@ +import pytest +from PIL import Image +from spatialexperiment import SpatialImage + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_si_constructor_path(): + si = SpatialImage("images/sample_image1.jpg") + + assert isinstance(si, SpatialImage) + assert si.path == "images/sample_image1.jpg" + assert si.image is None + + +def test_si_constructor_si(): + si_1 = SpatialImage("images/sample_image1.jpg") + si_2 = SpatialImage(si_1) + + assert isinstance(si_2, SpatialImage) + assert si_1.image == si_2.image + assert si_1.path == si_2.path + + +def test_si_constructor_image(): + image = Image.open("tests/images/sample_image2.png") + si = SpatialImage(image) + + assert isinstance(si, SpatialImage) + assert si.path is None + assert si.image == image + + +def test_invalid_input(): + with pytest.raises(ValueError): + SpatialImage("https://i.redd.it/3pw5uah7xo041.jpg") + + with pytest.raises(ValueError): + SpatialImage(5) diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py deleted file mode 100644 index 2baf543..0000000 --- a/tests/test_skeleton.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from spatialexperiment.skeleton import fib, main - -__author__ = "keviny2" -__copyright__ = "keviny2" -__license__ = "MIT" - - -def test_fib(): - """API Tests""" - assert fib(1) == 1 - assert fib(2) == 1 - assert fib(7) == 13 - with pytest.raises(AssertionError): - fib(-10) - - -def test_main(capsys): - """CLI Tests""" - # capsys is a pytest fixture that allows asserts against stdout/stderr - # https://docs.pytest.org/en/stable/capture.html - main(["7"]) - captured = capsys.readouterr() - assert "The 7-th Fibonacci number is 13" in captured.out diff --git a/tests/test_spe.py b/tests/test_spe.py new file mode 100644 index 0000000..1953e03 --- /dev/null +++ b/tests/test_spe.py @@ -0,0 +1,22 @@ +from biocframe import BiocFrame +from spatialexperiment import SpatialExperiment + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_SPE_empty_constructor(): + tspe = SpatialExperiment() + + assert isinstance(tspe, SpatialExperiment) + + assert isinstance(tspe.img_data, BiocFrame) + assert tspe.img_data.shape[0] == 0 + + assert len(tspe.spatial_coords_names) == 0 + assert isinstance(tspe.spatial_coords, BiocFrame) + assert tspe.spatial_coords.shape == (tspe.shape[1], 0) + + assert "sample_id" in tspe.column_data.columns.as_list() + assert tspe.column_data.shape == (tspe.shape[1], 1) diff --git a/tests/test_spe_column_data.py b/tests/test_spe_column_data.py new file mode 100644 index 0000000..ec634d3 --- /dev/null +++ b/tests/test_spe_column_data.py @@ -0,0 +1,57 @@ +import pytest +from copy import deepcopy +from biocframe import BiocFrame + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_set_col_data_to_none(spe): + tspe = deepcopy(spe) + tspe.column_data = None + + assert tspe.col_data.columns.as_list() == ["sample_id"] + assert tspe.column_data["sample_id"] == spe.column_data["sample_id"] + + +def test_valid_col_data_without_sample_id(spe): + tspe = deepcopy(spe) + + new_col_data = BiocFrame({"barcode": list(range(spe.column_data.shape[0]))}) + + tspe.column_data = new_col_data + + assert spe.column_data["sample_id"] == tspe.column_data["sample_id"] + + +def test_valid_sample_id(spe): + tspe = deepcopy(spe) + + new_col_data = BiocFrame( + { + "n_genes": [50, 200] * int(tspe.column_data.shape[0] / 2), + "condition": ["healthy", "tumor"] * int(tspe.column_data.shape[0] / 2), + "cell_id": ["spot_1", "spot_2"] * int(tspe.column_data.shape[0] / 2), + "passed_qc": [True, False] * int(tspe.column_data.shape[0] / 2), + "sample_id": ["sample_1", "sample_2"] * int(tspe.column_data.shape[0] / 2), + } + ) + + tspe.column_data = new_col_data + + +def test_invalid_sample_id(spe): + tspe = deepcopy(spe) + + new_col_data = BiocFrame( + { + "n_genes": [50, 200] * int(tspe.column_data.shape[0] / 2), + "condition": ["healthy", "tumor"] * int(tspe.column_data.shape[0] / 2), + "cell_id": ["spot_1", "spot_2"] * int(tspe.column_data.shape[0] / 2), + "sample_id": ["foo"] * tspe.column_data.shape[0], + } + ) + + with pytest.raises(ValueError): + tspe.column_data = new_col_data diff --git a/tests/test_spe_methods.py b/tests/test_spe_methods.py new file mode 100644 index 0000000..64f9dc6 --- /dev/null +++ b/tests/test_spe_methods.py @@ -0,0 +1,29 @@ +from copy import deepcopy +import biocutils as ut + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_spatial_coords_names(spe): + assert spe.spatial_coords_names == spe.spatial_coords.columns.as_list() + + +def test_set_spatial_coords_names(spe): + tspe = deepcopy(spe) + + new_spatial_coords_names = list(map(str, range(len(spe.spatial_coords_names)))) + + tspe.spatial_coords_names = new_spatial_coords_names + + assert tspe.spatial_coords_names == new_spatial_coords_names + assert tspe.spatial_coords_names == tspe.spatial_coords.columns.as_list() + + +def test_get_scale_factors(spe): + sfs = spe.get_scale_factors(sample_id=True, image_id=True) + + assert ut.is_list_of_type(sfs, float) or ut.is_list_of_type(sfs, int) + assert len(sfs) == spe.img_data.shape[0] + assert sfs == spe.img_data["scale_factor"] diff --git a/tests/test_spe_subset.py b/tests/test_spe_subset.py new file mode 100644 index 0000000..2e9d1e1 --- /dev/null +++ b/tests/test_spe_subset.py @@ -0,0 +1,34 @@ +from spatialexperiment import SpatialExperiment + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_drop_all_samples(spe): + tspe = spe[:, []] + + assert tspe.shape[1] == 0 + assert tspe.shape[0] == spe.shape[0] + + assert tspe.img_data.shape == (0, 4) + + +# bug for empty row slicing in SCE +# https://github.com/BiocPy/SingleCellExperiment/issues/59 +# def test_drop_all_features(spe): +# tspe = spe[[], :] + +# assert tspe.shape == (0, spe.shape[1]) +# assert tspe.img_data == spe.img_data + + +def test_spe_slice_removes_sample(spe): + mask = ["sample_1" == sample_id for sample_id in spe.column_data["sample_id"]] + tspe_slice = spe[:, mask] + + assert tspe_slice is not None + assert isinstance(tspe_slice, SpatialExperiment) + + assert set(tspe_slice.column_data["sample_id"]) == {"sample_1"} + assert set(tspe_slice.img_data["sample_id"]) == {"sample_1"}