From c46871434a41037dc0e3f53bfd7a1185b9ca0fbe Mon Sep 17 00:00:00 2001 From: keviny2 Date: Mon, 17 Feb 2025 14:12:21 -0800 Subject: [PATCH 01/12] Write function stubs --- src/spatialexperiment/SpatialExperiment.py | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 1c3fadb..f2acb1f 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -870,3 +870,48 @@ def to_spatial_experiment(): ################################ #######>> combine ops <<######## ################################ + + def relaxed_combine_columns(self, *other) -> "SpatialExperiment": + """Wrapper around :py:func:`~relaxed_combine_columns`.""" + return relaxed_combine_columns(self, *other) + + def combine_columns(self, *other) -> "SpatialExperiment": + """Wrapper around :py:func:`~combine_columns`.""" + return combine_columns(self, *other) + + +################################ +#######>> combine ops <<######## +################################ + +@ut.combine_columns.register(SpatialExperiment) +def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: + """Combine multiple ``SpatialExperiment`` objects by column. + + All assays must contain the same assay names. If you need a + flexible combine operation, checkout :py:func:`~relaxed_combine_columns`. + + Returns: + A combined ``SpatialExperiment``. + """ + + +@ut.relaxed_combine_columns.register(SpatialExperiment) +def relaxed_combine_columns( + *x: SpatialExperiment, +) -> SpatialExperiment: + """A relaxed version of the :py:func:`~biocutils.combine_rows.combine_columns` method for + :py:class:`~SpatialExperiment` objects. Whereas ``combine_columns`` expects that all objects have the same rows, + ``relaxed_combine_columns`` allows for different rows. Absent columns in any object are filled in with appropriate + placeholder values before combining. + + Args: + x: + One or more ``SpatialExperiment`` objects, possibly with differences in the + number and identity of their rows. + + Returns: + A ``SpatialExperiment`` that combines all ``experiments`` along their columns and contains + the union of all rows. Rows absent in any ``x`` are filled in + with placeholders consisting of Nones or masked NumPy values. + """ From 745cd0662d65472e299fb57a5247bdf7dc76c765 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 18 Feb 2025 13:33:34 -0800 Subject: [PATCH 02/12] Begin implementing combine_columns --- src/spatialexperiment/SpatialExperiment.py | 64 ++++++++++++++++++++-- src/spatialexperiment/_combineutils.py | 22 ++++++++ 2 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 src/spatialexperiment/_combineutils.py diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index f2acb1f..7e66b2a 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -3,13 +3,22 @@ from urllib.parse import urlparse from warnings import warn -import biocutils as ut import numpy as np -from biocframe import BiocFrame from PIL import Image -from singlecellexperiment import SingleCellExperiment + +from biocframe import BiocFrame +import biocutils as ut +from summarizedexperiment._combineutils import ( + check_assays_are_equal, + merge_assays, + merge_se_colnames +) from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList +from singlecellexperiment import SingleCellExperiment +from singlecellexperiment._combineutils import ( + merge_generic, +) from ._imgutils import retrieve_rows_by_id from ._validators import ( @@ -21,7 +30,8 @@ _validate_spatial_coords, _validate_spatial_coords_names, ) -from .SpatialImage import VirtualSpatialImage, construct_spatial_image_class +from ._combineutils import merge_spe_cols +from .SpatialImage import construct_spatial_image_class, VirtualSpatialImage __author__ = "keviny2" __copyright__ = "keviny2" @@ -894,6 +904,52 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: Returns: A combined ``SpatialExperiment``. """ + warn( + "'row_pairs' and 'column_pairs' are currently ignored during this operation.", + UserWarning, + ) + + first = x[0] + _all_assays = [y.assays for y in x] + check_assays_are_equal(_all_assays) + _new_assays = merge_assays(_all_assays, by="column") + + _all_cols = [y._cols for y in x] + _new_cols = merge_spe_cols(_all_cols) + _new_col_names = merge_se_colnames(x) + + _new_rdim = None + try: + _new_rdim = merge_generic(x, by="row", attr="reduced_dims") + except Exception as e: + warn( + f"Cannot combine 'reduced_dimensions' across experiments, {str(e)}", + UserWarning, + ) + + _new_alt_expt = None + try: + _new_alt_expt = merge_generic(x, by="column", attr="alternative_experiments") + except Exception as e: + warn( + f"Cannot combine 'alternative_experiments' across experiments, {str(e)}", + UserWarning, + ) + + current_class_const = type(first) + return current_class_const( + assays=_new_assays, + row_ranges=first._row_ranges, + row_data=first._rows, + column_data=_new_cols, + row_names=first._row_names, + column_names=_new_col_names, + metadata=first._metadata, + reduced_dims=_new_rdim, + main_experiment_name=first._main_experiment_name, + alternative_experiments=_new_alt_expt, + ) + @ut.relaxed_combine_columns.register(SpatialExperiment) diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py new file mode 100644 index 0000000..126ca11 --- /dev/null +++ b/src/spatialexperiment/_combineutils.py @@ -0,0 +1,22 @@ +from warnings import warn +import itertools +import biocutils as ut + + +def merge_spe_cols(cols): + sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) + + if len(set(sample_ids)) != len(sample_ids): + warn( + "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." + ) + _all_cols = [] + for i, _cols in enumerate(cols): + _cols_copy = _cols.copy() + _cols_copy["sample_id"] = _cols_copy["sample_id"] + f".{i}" + _all_cols.append(_cols_copy) + else: + _all_cols = cols + + _new_cols = ut.combine_rows(*_all_cols) + return _new_cols From 54030ec90d493d1f4d163b7bbf76bfbb11c5f3ae Mon Sep 17 00:00:00 2001 From: keviny2 Date: Fri, 21 Feb 2025 15:40:05 -0800 Subject: [PATCH 03/12] Make deepcopy of coldata --- src/spatialexperiment/_combineutils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index 126ca11..bd49284 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -1,4 +1,5 @@ from warnings import warn +from copy import deepcopy import itertools import biocutils as ut @@ -11,9 +12,9 @@ def merge_spe_cols(cols): "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." ) _all_cols = [] - for i, _cols in enumerate(cols): - _cols_copy = _cols.copy() - _cols_copy["sample_id"] = _cols_copy["sample_id"] + f".{i}" + for i, _cols in enumerate(cols, start=1): + _cols_copy = deepcopy(_cols) + _cols_copy["sample_id"] = [f"{sample_id}_{i}" for sample_id in _cols_copy["sample_id"]] _all_cols.append(_cols_copy) else: _all_cols = cols From 108ba1fcdc0cfa26ab23c2fa34142a3918c3bb0d Mon Sep 17 00:00:00 2001 From: keviny2 Date: Fri, 21 Feb 2025 15:40:27 -0800 Subject: [PATCH 04/12] Add test for duplicate sample ids for combine ops --- tests/test_spe_combine.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/test_spe_combine.py diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py new file mode 100644 index 0000000..2389b3f --- /dev/null +++ b/tests/test_spe_combine.py @@ -0,0 +1,23 @@ +from copy import deepcopy + +import pytest +import numpy as np +import biocutils as ut +from spatialexperiment import SpatialExperiment + +__author__ = "keviny2" +__copyright__ = "keviny2" +__license__ = "MIT" + + +def test_duplicate_sample_ids(spe): + with pytest.warns(UserWarning): + combined = ut.combine_columns(spe, spe) + + assert len(combined.column_data["sample_id"].unique()) == 2 * len( + spe.column_data["sample_id"].unique() + ) + assert combined.shape[0] == spe.shape[0] + assert combined.shape[1] == 2 * spe.shape[1] + assert combined.rownames == spe.rownames + assert set(combined.colnames.as_list()) == set(spe.colnames.as_list()) From 76b0d65acb37ab357fd5f1bf3b783e2f99d7d3c2 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Mon, 24 Feb 2025 14:17:34 -0800 Subject: [PATCH 05/12] Add row_names and column_names to test spe --- tests/conftest.py | 14 ++++++++++++++ tests/test_spe_combine.py | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0123424..0283a7c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,6 +43,18 @@ def spe(): } ) + row_names = BiocFrame( + { + "row_names": range(nrows) + } + ) + + column_names = BiocFrame( + { + "column_names": range(ncols) + } + ) + x_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) @@ -65,6 +77,8 @@ def spe(): assays={"counts": counts}, row_data=row_data, column_data=col_data, + row_names=row_names, + column_names=column_names, spatial_coords=spatial_coords, img_data=img_data, ) diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index 2389b3f..2c536c0 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -14,8 +14,8 @@ def test_duplicate_sample_ids(spe): with pytest.warns(UserWarning): combined = ut.combine_columns(spe, spe) - assert len(combined.column_data["sample_id"].unique()) == 2 * len( - spe.column_data["sample_id"].unique() + assert len(set(combined.column_data["sample_id"])) == 2 * len( + set(spe.column_data["sample_id"]) ) assert combined.shape[0] == spe.shape[0] assert combined.shape[1] == 2 * spe.shape[1] From e7566ca615df25e32f4a5a7a8e128cd13377b66e Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 08:01:13 -0800 Subject: [PATCH 06/12] WIP: Add test for checking that img_data combined correctly --- src/spatialexperiment/SpatialExperiment.py | 10 +++++++- src/spatialexperiment/_combineutils.py | 19 ++++++++++++++- tests/test_spe_combine.py | 27 ++++++++++++++++++++-- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 7e66b2a..764ab73 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -30,7 +30,7 @@ _validate_spatial_coords, _validate_spatial_coords_names, ) -from ._combineutils import merge_spe_cols +from ._combineutils import merge_spe_cols, merge_spe_spatial_coords from .SpatialImage import construct_spatial_image_class, VirtualSpatialImage __author__ = "keviny2" @@ -936,6 +936,12 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: UserWarning, ) + _all_spatial_coords = [y._spatial_coords for y in x] + _new_spatial_coords = merge_spe_spatial_coords(_all_spatial_coords) + + _all_img_data = [y._img_data for y in x] + _new_img_data = ut.combine_rows(*_all_img_data) + current_class_const = type(first) return current_class_const( assays=_new_assays, @@ -948,6 +954,8 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: reduced_dims=_new_rdim, main_experiment_name=first._main_experiment_name, alternative_experiments=_new_alt_expt, + spatial_coords=_new_spatial_coords, + img_data=_new_img_data ) diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index bd49284..d694e7a 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -5,9 +5,11 @@ def merge_spe_cols(cols): + num_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) + sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) - if len(set(sample_ids)) != len(sample_ids): + if len(set(sample_ids)) < num_unique: warn( "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." ) @@ -21,3 +23,18 @@ def merge_spe_cols(cols): _new_cols = ut.combine_rows(*_all_cols) return _new_cols + + +def merge_spe_spatial_coords(spatial_coords): + first_shape = spatial_coords[0].shape[1] + if not all(coords.shape[1] == first_shape for coords in spatial_coords): + raise ValueError("Not all 'spatial_coords' have the same number of columns.") + + first_columns = spatial_coords[0].columns + if not all(coords.columns == first_columns for coords in spatial_coords): + warn( + "Not all 'spatial_coords' have the same dimension names." + ) + + _new_spatial_coords = ut.combine_rows(*spatial_coords) + return _new_spatial_coords diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index 2c536c0..62762c9 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -1,9 +1,7 @@ from copy import deepcopy import pytest -import numpy as np import biocutils as ut -from spatialexperiment import SpatialExperiment __author__ = "keviny2" __copyright__ = "keviny2" @@ -21,3 +19,28 @@ def test_duplicate_sample_ids(spe): assert combined.shape[1] == 2 * spe.shape[1] assert combined.rownames == spe.rownames assert set(combined.colnames.as_list()) == set(spe.colnames.as_list()) + + +def test_img_data_combined_correctly(spe): + spe1 = deepcopy(spe) + spe2 = deepcopy(spe) + + # TODO: this is a temporary fix until https://github.com/BiocPy/SpatialExperiment/issues/25 is finished + spe1.column_data["sample_id"] = [f"{sample_id}_A" for sample_id in spe1.column_data["sample_id"]] + spe2.column_data["sample_id"] = [f"{sample_id}_B" for sample_id in spe2.column_data["sample_id"]] + spe1.img_data["sample_id"] = [f"{sample_id}_A" for sample_id in spe1.img_data["sample_id"]] + spe2.img_data["sample_id"] = [f"{sample_id}_B" for sample_id in spe2.img_data["sample_id"]] + + with pytest.warns(None): + combined = ut.combine_columns(spe1, spe2) + + assert combined.img_data.shape[0] == 2 * spe.img_data.shape[0] + assert set(combined.column_data["sample_id"]) == set(combined.img_data["sample_id"]) + assert set(combined.column_data["sample_id"]) == set(spe1.column_data["sample_id"] + spe2.column_data["sample_id"]) + + one = range(len(spe1.img_data)) + two = range(len(spe1.img_data), len(spe1.img_data) + len(spe2.img_data)) + + # TODO: .all().all() doesn't work for BiocFrames + assert (spe3.img_data[one, :] == spe1.img_data).all().all() + assert (spe3.img_data[two, :] == spe2.img_data).all().all() From 0c650db9394be728bfe1be7c28260f136768b7c6 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 15:48:00 -0800 Subject: [PATCH 07/12] All combine columns tests pass --- src/spatialexperiment/SpatialExperiment.py | 7 +- src/spatialexperiment/_combineutils.py | 78 ++++++++++++++++++---- tests/test_spe_combine.py | 59 ++++++++++------ 3 files changed, 105 insertions(+), 39 deletions(-) diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 764ab73..ddd100f 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -30,7 +30,7 @@ _validate_spatial_coords, _validate_spatial_coords_names, ) -from ._combineutils import merge_spe_cols, merge_spe_spatial_coords +from ._combineutils import merge_spe_cols_and_img_data, merge_spe_spatial_coords from .SpatialImage import construct_spatial_image_class, VirtualSpatialImage __author__ = "keviny2" @@ -914,8 +914,6 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: check_assays_are_equal(_all_assays) _new_assays = merge_assays(_all_assays, by="column") - _all_cols = [y._cols for y in x] - _new_cols = merge_spe_cols(_all_cols) _new_col_names = merge_se_colnames(x) _new_rdim = None @@ -939,8 +937,7 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: _all_spatial_coords = [y._spatial_coords for y in x] _new_spatial_coords = merge_spe_spatial_coords(_all_spatial_coords) - _all_img_data = [y._img_data for y in x] - _new_img_data = ut.combine_rows(*_all_img_data) + _new_cols, _new_img_data = merge_spe_cols_and_img_data(x) current_class_const = type(first) return current_class_const( diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index d694e7a..db36fe3 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -1,31 +1,83 @@ +from typing import List, Tuple + from warnings import warn from copy import deepcopy import itertools + +from biocframe import BiocFrame import biocutils as ut +from spatialexperiment import SpatialExperiment + + +def _append_sample_indices(bframes: List[BiocFrame]) -> List[BiocFrame]: + """Append indices to sample IDs for a list of `BiocFrames`. + + For each `BiocFrame`, appends an index to all sample IDs to ensure uniqueness + across multiple frames. + + Args: + List of `BiocFrame` objects containing sample IDs. + + Returns: + List of `BiocFrame`s with modified sample IDs. + """ + modified_bframes = [] + for i, bframe in enumerate(bframes, start=1): + bframe_copy = deepcopy(bframe) + bframe_copy["sample_id"] = [f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"]] + modified_bframes.append(bframe_copy) + return modified_bframes -def merge_spe_cols(cols): - num_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) +def merge_spe_cols_and_img_data(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFrame]: + """Merge column data and image data from multiple ``SpatialExperiment`` objects. + + If duplicate sample IDs exist across objects, appends indices to make them unique. + Sample IDs in column data determine the uniqueness check as they are the superset + of IDs in image data. + + Args: + x: List of ``SpatialExperiment`` objects + + Returns: + A tuple with the merged column data and image data. + """ + cols = [y._cols for y in x] + img_datas = [y._img_data for y in x] - sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) + expected_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) + all_sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) - if len(set(sample_ids)) < num_unique: + if len(set(all_sample_ids)) < expected_unique: warn( "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." ) - _all_cols = [] - for i, _cols in enumerate(cols, start=1): - _cols_copy = deepcopy(_cols) - _cols_copy["sample_id"] = [f"{sample_id}_{i}" for sample_id in _cols_copy["sample_id"]] - _all_cols.append(_cols_copy) + modified_columns = _append_sample_indices(cols) + modified_img_data = _append_sample_indices(img_datas) else: - _all_cols = cols + modified_columns = cols + modified_img_data = img_datas - _new_cols = ut.combine_rows(*_all_cols) - return _new_cols + _new_cols = ut.combine_rows(*modified_columns) + _new_img_data = ut.combine_rows(*modified_img_data) + return _new_cols, _new_img_data -def merge_spe_spatial_coords(spatial_coords): +def merge_spe_spatial_coords(spatial_coords: List[BiocFrame]) -> BiocFrame: + """Merge spatial coordinates from multiple frames. + + Args: + spatial_coords: List of `BiocFrame`s containing spatial coordinates. + + Returns: + A merged BiocFrame containing all spatial coordinates. + + Raises: + ValueError: If spatial coordinates have different numbers of columns. + + Warns: + If dimension names are not consistent across all `BiocFrame`s. + """ first_shape = spatial_coords[0].shape[1] if not all(coords.shape[1] == first_shape for coords in spatial_coords): raise ValueError("Not all 'spatial_coords' have the same number of columns.") diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index 62762c9..d02d598 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -8,20 +8,7 @@ __license__ = "MIT" -def test_duplicate_sample_ids(spe): - with pytest.warns(UserWarning): - combined = ut.combine_columns(spe, spe) - - assert len(set(combined.column_data["sample_id"])) == 2 * len( - set(spe.column_data["sample_id"]) - ) - assert combined.shape[0] == spe.shape[0] - assert combined.shape[1] == 2 * spe.shape[1] - assert combined.rownames == spe.rownames - assert set(combined.colnames.as_list()) == set(spe.colnames.as_list()) - - -def test_img_data_combined_correctly(spe): +def test_combine_columns(spe): spe1 = deepcopy(spe) spe2 = deepcopy(spe) @@ -31,16 +18,46 @@ def test_img_data_combined_correctly(spe): spe1.img_data["sample_id"] = [f"{sample_id}_A" for sample_id in spe1.img_data["sample_id"]] spe2.img_data["sample_id"] = [f"{sample_id}_B" for sample_id in spe2.img_data["sample_id"]] - with pytest.warns(None): - combined = ut.combine_columns(spe1, spe2) + combined = ut.combine_columns(spe1, spe2) + # img_data checks assert combined.img_data.shape[0] == 2 * spe.img_data.shape[0] assert set(combined.column_data["sample_id"]) == set(combined.img_data["sample_id"]) assert set(combined.column_data["sample_id"]) == set(spe1.column_data["sample_id"] + spe2.column_data["sample_id"]) - one = range(len(spe1.img_data)) - two = range(len(spe1.img_data), len(spe1.img_data) + len(spe2.img_data)) + idx1 = range(spe1.img_data.shape[0]) + idx2 = range(spe1.img_data.shape[0], spe1.img_data.shape[0] + spe2.img_data.shape[0]) + img_data1 = combined.img_data[idx1, :] + img_data2 = combined.img_data[idx2, :] + + assert img_data1["sample_id"] == spe1.img_data["sample_id"] + assert img_data1["image_id"] == spe1.img_data["image_id"] + assert img_data1["data"] == spe1.img_data["data"] + assert img_data1["scale_factor"] == spe1.img_data["scale_factor"] + + assert img_data2["sample_id"] == spe2.img_data["sample_id"] + assert img_data2["image_id"] == spe2.img_data["image_id"] + assert img_data2["data"] == spe2.img_data["data"] + assert img_data2["scale_factor"] == spe2.img_data["scale_factor"] - # TODO: .all().all() doesn't work for BiocFrames - assert (spe3.img_data[one, :] == spe1.img_data).all().all() - assert (spe3.img_data[two, :] == spe2.img_data).all().all() + # spatial_coords checks + idx1 = range(spe1.spatial_coords.shape[0]) + idx2 = range(spe1.spatial_coords.shape[0], spe1.spatial_coords.shape[0] + spe2.spatial_coords.shape[0]) + spatial_coords1 = combined.spatial_coords[idx1, :] + spatial_coords2 = combined.spatial_coords[idx2, :] + + assert (spatial_coords1.to_pandas() == spe1.spatial_coords.to_pandas()).all().all() + assert (spatial_coords2.to_pandas() == spe2.spatial_coords.to_pandas()).all().all() + + +def test_duplicate_sample_ids(spe): + with pytest.warns(UserWarning): + combined = ut.combine_columns(spe, spe) + + assert len(set(combined.column_data["sample_id"])) == 2 * len( + set(spe.column_data["sample_id"]) + ) + assert combined.shape[0] == spe.shape[0] + assert combined.shape[1] == 2 * spe.shape[1] + assert combined.rownames == spe.rownames + assert set(combined.colnames.as_list()) == set(spe.colnames.as_list()) From b463d718eb075ece88ab721edc686577b74d9207 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 15:56:01 -0800 Subject: [PATCH 08/12] Lint --- src/spatialexperiment/ProxySFE.py | 55 +++++++-- src/spatialexperiment/SpatialExperiment.py | 132 +++++++++++++++------ src/spatialexperiment/SpatialImage.py | 26 ++-- src/spatialexperiment/_combineutils.py | 44 +++---- src/spatialexperiment/_imgutils.py | 24 +++- src/spatialexperiment/_initutils.py | 22 +++- src/spatialexperiment/_validators.py | 14 ++- tests/conftest.py | 12 +- tests/test_img_data_methods.py | 1 - tests/test_pSFE.py | 11 +- tests/test_spe.py | 1 + tests/test_spe_combine.py | 29 +++-- tests/test_spe_methods.py | 1 + tests/test_spi.py | 20 +++- 14 files changed, 273 insertions(+), 119 deletions(-) diff --git a/src/spatialexperiment/ProxySFE.py b/src/spatialexperiment/ProxySFE.py index 08aa778..7c5bc79 100644 --- a/src/spatialexperiment/ProxySFE.py +++ b/src/spatialexperiment/ProxySFE.py @@ -8,7 +8,11 @@ from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList -from .SpatialExperiment import SpatialExperiment, _validate_column_data, _validate_sample_ids +from .SpatialExperiment import ( + SpatialExperiment, + _validate_column_data, + _validate_sample_ids, +) __author__ = "jkanche" __copyright__ = "jkanche" @@ -44,7 +48,10 @@ def _validate_geometries(geometries: Dict[str, gpd.GeoDataFrame], prop_name: str for i, geom in enumerate(geometries.values()): if not isinstance(geom, gpd.GeoDataFrame): - raise TypeError(f"Item {i} in {prop_name} is {type(geom).__name__} " f"rather than `GeoDataFrame`.\n") + raise TypeError( + f"Item {i} in {prop_name} is {type(geom).__name__} " + f"rather than `GeoDataFrame`.\n" + ) def _validate_annotgeometries(geometries, column_data): @@ -58,13 +65,16 @@ def _validate_annotgeometries(geometries, column_data): for i, geom in enumerate(geometries.values()): if "sample_id" not in geom.columns: - raise ValueError(f"Item {i} of 'annot_geometries' does not have column 'sample_id'.\n") + raise ValueError( + f"Item {i} of 'annot_geometries' does not have column 'sample_id'.\n" + ) else: samples_seen = geom["sample_id"].unique() missing = [s for s in samples_seen if s not in sample_ids] if len(missing) > 0: raise ValueError( - f"Samples {', '.join(missing)} in item {i} of " f"annot_geometries are absent from 'column_data'.\n" + f"Samples {', '.join(missing)} in item {i} of " + f"annot_geometries are absent from 'column_data'.\n" ) @@ -75,7 +85,9 @@ def _validate_graph_sample_id(spatial_graphs, column_data): missing = graph_sample_ids - col_sample_ids if missing: - raise ValueError(f"Samples {', '.join(missing)} are in the graphs but not 'column_data'.\n") + raise ValueError( + f"Samples {', '.join(missing)} are in the graphs but not 'column_data'.\n" + ) def _validate_graph_structure(spatial_graphs): @@ -89,7 +101,9 @@ def _validate_graph_structure(spatial_graphs): "and whose rows are margins (rows, columns, annotation).\n" ) elif not set(spatial_graphs.get_row_names()) == {"row", "col", "annot"}: - raise ValueError("Row names of 'spatial_graphs' must be 'row', 'col', and 'annot'.\n") + raise ValueError( + "Row names of 'spatial_graphs' must be 'row', 'col', and 'annot'.\n" + ) class ProxySpatialFeatureExperiment(SpatialExperiment): @@ -404,7 +418,9 @@ def __repr__(self) -> str: output += ", col_geometries=" + ut.print_truncated_dict(self._col_geometries) output += ", row_geometries=" + ut.print_truncated_dict(self._row_geometries) - output += ", annot_geometries=" + ut.print_truncated_dict(self._annot_geometries) + output += ", annot_geometries=" + ut.print_truncated_dict( + self._annot_geometries + ) output += ", spatial_graphs=" + self._spatial_graphs.__repr__() output += ")" @@ -448,7 +464,9 @@ def get_unit(self) -> str: """Get the coordinate unit.""" return self._unit - def set_unit(self, unit: str, in_place: bool = False) -> "ProxySpatialFeatureExperiment": + def set_unit( + self, unit: str, in_place: bool = False + ) -> "ProxySpatialFeatureExperiment": """Set the coordinate unit. Args: @@ -631,7 +649,9 @@ def set_spatial_graphs( Returns: A modified ``ProxySpatialFeatureExperiment`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ - _graphs = _sanitize_spatial_graphs(graphs, list(set(self.get_column_data().get_column("sample_id")))) + _graphs = _sanitize_spatial_graphs( + graphs, list(set(self.get_column_data().get_column("sample_id"))) + ) _validate_graph_structure(_graphs) _validate_graph_sample_id(_graphs, self.get_column_data()) @@ -658,7 +678,9 @@ def spatial_graphs(self, graphs: Optional[BiocFrame]): ################################ def get_slice( - self, rows: Optional[Union[str, int, bool, List]] = None, columns: Optional[Union[str, int, bool, List]] = None + self, + rows: Optional[Union[str, int, bool, List]] = None, + columns: Optional[Union[str, int, bool, List]] = None, ) -> "ProxySpatialFeatureExperiment": """Get a slice of the experiment. @@ -675,7 +697,9 @@ def get_slice( sfe = super().get_slice(rows=rows, columns=columns) slicer = self._generic_slice(rows=rows, columns=columns) - do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) + do_slice_cols = not ( + isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None) + ) # Update geometries and graphs if do_slice_cols: @@ -698,7 +722,9 @@ def get_slice( new_graphs = None if self.spatial_graphs is not None: # Keep only columns for remaining samples - cols_to_keep = [c for c in self.spatial_graphs.columns if c in column_sample_ids] + cols_to_keep = [ + c for c in self.spatial_graphs.columns if c in column_sample_ids + ] if cols_to_keep: new_graphs = self.spatial_graphs[cols_to_keep] @@ -743,7 +769,10 @@ def get_slice( ################################ def set_column_data( - self, cols: Optional[BiocFrame], replace_column_names: bool = False, in_place: bool = False + self, + cols: Optional[BiocFrame], + replace_column_names: bool = False, + in_place: bool = False, ) -> "ProxySpatialFeatureExperiment": """Override: Set sample data. diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index ddd100f..13d8f24 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -11,7 +11,7 @@ from summarizedexperiment._combineutils import ( check_assays_are_equal, merge_assays, - merge_se_colnames + merge_se_colnames, ) from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList @@ -30,7 +30,7 @@ _validate_spatial_coords, _validate_spatial_coords_names, ) -from ._combineutils import merge_spe_cols_and_img_data, merge_spe_spatial_coords +from ._combineutils import merge_spatial_frames, merge_spatial_coordinates from .SpatialImage import construct_spatial_image_class, VirtualSpatialImage __author__ = "keviny2" @@ -199,7 +199,9 @@ def __init__( column_data = _sanitize_frame(column_data, num_rows=self.shape[1]) if not column_data.has_column("sample_id"): - column_data["sample_id"] = ["sample01"] * self.shape[1] # hard code default sample_id as "sample01" + column_data["sample_id"] = ["sample01"] * self.shape[ + 1 + ] # hard code default sample_id as "sample01" spatial_coords = _sanitize_frame(spatial_coords, num_rows=self.shape[1]) img_data = _sanitize_frame(img_data, num_rows=0) @@ -212,7 +214,9 @@ def __init__( _validate_column_data(column_data=column_data) _validate_img_data(img_data=img_data) _validate_sample_ids(column_data=column_data, img_data=img_data) - _validate_spatial_coords(spatial_coords=spatial_coords, column_data=column_data) + _validate_spatial_coords( + spatial_coords=spatial_coords, column_data=column_data + ) ######################### ######>> Copying <<###### @@ -316,10 +320,14 @@ def __repr__(self) -> str: output += ", row_ranges=" + self._row_ranges.__repr__() if self._alternative_experiments is not None: - output += ", alternative_experiments=" + ut.print_truncated_list(self.alternative_experiment_names) + output += ", alternative_experiments=" + ut.print_truncated_list( + self.alternative_experiment_names + ) if self._reduced_dims is not None: - output += ", reduced_dims=" + ut.print_truncated_list(self.reduced_dim_names) + output += ", reduced_dims=" + ut.print_truncated_list( + self.reduced_dim_names + ) if self._main_experiment_name is not None: output += ", main_experiment_name=" + self._main_experiment_name @@ -347,14 +355,10 @@ def __str__(self) -> str: output += f"assays({len(self.assay_names)}): {ut.print_truncated_list(self.assay_names)}\n" - output += ( - f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" - ) + output += f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" output += f"row_names({0 if self._row_names is None else len(self._row_names)}): {' ' if self._row_names is None else ut.print_truncated_list(self._row_names)}\n" - output += ( - f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" - ) + output += f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" output += f"column_names({0 if self._column_names is None else len(self._column_names)}): {' ' if self._column_names is None else ut.print_truncated_list(self._column_names)}\n" output += f"main_experiment_name: {' ' if self._main_experiment_name is None else self._main_experiment_name}\n" @@ -387,7 +391,9 @@ def get_spatial_coords(self) -> BiocFrame: return self.get_spatial_coordinates() def set_spatial_coordinates( - self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]], in_place: bool = False + self, + spatial_coords: Optional[Union[BiocFrame, np.ndarray]], + in_place: bool = False, ) -> "SpatialExperiment": """Set new spatial coordinates. @@ -420,10 +426,14 @@ def set_spatial_coordinates( return output def set_spatial_coords( - self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]], in_place: bool = False + self, + spatial_coords: Optional[Union[BiocFrame, np.ndarray]], + in_place: bool = False, ) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates`.""" - return self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=in_place) + return self.set_spatial_coordinates( + spatial_coords=spatial_coords, in_place=in_place + ) @property def spatial_coords(self) -> BiocFrame: @@ -445,7 +455,9 @@ def spatial_coordinates(self) -> BiocFrame: return self.get_spatial_coordinates() @spatial_coordinates.setter - def spatial_coordinates(self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]]): + def spatial_coordinates( + self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]] + ): """Alias for :py:meth:`~set_spatial_coordinates`.""" warn( "Setting property 'spatial_coords' is an in-place operation, use 'set_spatial_coordinates' instead.", @@ -495,15 +507,21 @@ def set_spatial_coordinates_names( new_spatial_coords = self._spatial_coords else: _validate_spatial_coords_names(spatial_coords_names, self._spatial_coords) - new_spatial_coords = self._spatial_coords.set_column_names(spatial_coords_names) + new_spatial_coords = self._spatial_coords.set_column_names( + spatial_coords_names + ) output = self._define_output(in_place) output._spatial_coords = new_spatial_coords return output - def set_spatial_coords_names(self, spatial_coords_names: List[str], in_place: bool = False) -> "SpatialExperiment": + def set_spatial_coords_names( + self, spatial_coords_names: List[str], in_place: bool = False + ) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates_names`.""" - return self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=in_place) + return self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=in_place + ) @property def spatial_coords_names(self) -> List[str]: @@ -517,7 +535,9 @@ def spatial_coords_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=True + ) @property def spatial_coordinates_names(self) -> List[str]: @@ -531,7 +551,9 @@ def spatial_coordinates_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) + self.set_spatial_coordinates_names( + spatial_coords_names=spatial_coords_names, in_place=True + ) ############################## ########>> img_data <<######## @@ -549,7 +571,9 @@ def get_img_data(self) -> BiocFrame: """Alias for :py:meth:`~get_image_data`.""" return self.get_image_data() - def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) -> "SpatialExperiment": + def set_image_data( + self, img_data: Optional[BiocFrame], in_place: bool = False + ) -> "SpatialExperiment": """Set new image data. Args: @@ -578,7 +602,9 @@ def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) output._img_data = img_data return output - def set_img_data(self, img_data: BiocFrame, in_place: bool = False) -> "SpatialExperiment": + def set_img_data( + self, img_data: BiocFrame, in_place: bool = False + ) -> "SpatialExperiment": """Alias for :py:meth:`~set_image_data`.""" return self.set_image_data(img_data=img_data, in_place=in_place) @@ -615,7 +641,9 @@ def image_data(self, img_data: BiocFrame): ############################## def get_scale_factors( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> List[float]: """Return scale factor(s) of image(s) based on the provided sample and image ids. See :py:meth:`~get_img` for more details on the behavior for various @@ -638,7 +666,9 @@ def get_scale_factors( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + img_data_subset = retrieve_rows_by_id( + img_data=self.img_data, sample_id=sample_id, image_id=image_id + ) if img_data_subset.shape[0] == 1: return img_data_subset["scale_factor"][0] @@ -650,7 +680,10 @@ def get_scale_factors( ################################ def set_column_data( - self, cols: Optional[BiocFrame], replace_column_names: bool = False, in_place: bool = False + self, + cols: Optional[BiocFrame], + replace_column_names: bool = False, + in_place: bool = False, ) -> "SpatialExperiment": """Override: Set sample data. @@ -692,14 +725,18 @@ def set_column_data( ################################ def get_slice( - self, rows: Optional[Union[str, int, bool, Sequence]], columns: Optional[Union[str, int, bool, Sequence]] + self, + rows: Optional[Union[str, int, bool, Sequence]], + columns: Optional[Union[str, int, bool, Sequence]], ) -> "SpatialExperiment": """Alias for :py:attr:`~__getitem__`.""" spe = super().get_slice(rows=rows, columns=columns) slicer = self._generic_slice(rows=rows, columns=columns) - do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) + do_slice_cols = not ( + isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None) + ) new_spatial_coords = None @@ -707,7 +744,9 @@ def get_slice( new_spatial_coords = self.spatial_coords[slicer.col_indices, :] column_sample_ids = set(spe.column_data["sample_id"]) - mask = [sample_id in column_sample_ids for sample_id in self.img_data["sample_id"]] + mask = [ + sample_id in column_sample_ids for sample_id in self.img_data["sample_id"] + ] new_img_data = self.img_data[mask,] @@ -734,7 +773,9 @@ def get_slice( ################################ def get_img( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> Union[VirtualSpatialImage, List[VirtualSpatialImage]]: """ Retrieve spatial images based on the provided sample and image ids. @@ -778,7 +819,9 @@ def get_img( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + img_data_subset = retrieve_rows_by_id( + img_data=self.img_data, sample_id=sample_id, image_id=image_id + ) if img_data_subset is None: return [] @@ -828,7 +871,9 @@ def add_img( Raises: ValueError: If the sample_id and image_id pair already exists. """ - _validate_sample_image_ids(img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id) + _validate_sample_image_ids( + img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id + ) if isinstance(image_source, (str, Path)): is_url = urlparse(str(image_source)).scheme in ("http", "https", "ftp") @@ -856,12 +901,21 @@ def add_img( # TODO: implement rmv_img() def rmv_img( - self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> "SpatialExperiment": raise NotImplementedError() - def img_source(self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, path=False): - raise NotImplementedError("This function is irrelevant because it is for `RemoteSpatialImages`") + def img_source( + self, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, + path=False, + ): + raise NotImplementedError( + "This function is irrelevant because it is for `RemoteSpatialImages`" + ) def img_raster(self, sample_id=None, image_id=None): # NOTE: this function seems redundant, might be an artifact of the different subclasses of SpatialImage in the R implementation? just call `get_img()` for now @@ -894,6 +948,7 @@ def combine_columns(self, *other) -> "SpatialExperiment": #######>> combine ops <<######## ################################ + @ut.combine_columns.register(SpatialExperiment) def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: """Combine multiple ``SpatialExperiment`` objects by column. @@ -935,9 +990,9 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: ) _all_spatial_coords = [y._spatial_coords for y in x] - _new_spatial_coords = merge_spe_spatial_coords(_all_spatial_coords) + _new_spatial_coords = merge_spatial_coordinates(_all_spatial_coords) - _new_cols, _new_img_data = merge_spe_cols_and_img_data(x) + _new_cols, _new_img_data = merge_spatial_frames(x) current_class_const = type(first) return current_class_const( @@ -952,11 +1007,10 @@ def combine_columns(*x: SpatialExperiment) -> SpatialExperiment: main_experiment_name=first._main_experiment_name, alternative_experiments=_new_alt_expt, spatial_coords=_new_spatial_coords, - img_data=_new_img_data + img_data=_new_img_data, ) - @ut.relaxed_combine_columns.register(SpatialExperiment) def relaxed_combine_columns( *x: SpatialExperiment, diff --git a/src/spatialexperiment/SpatialImage.py b/src/spatialexperiment/SpatialImage.py index 3a90872..4218a2a 100644 --- a/src/spatialexperiment/SpatialImage.py +++ b/src/spatialexperiment/SpatialImage.py @@ -50,7 +50,9 @@ def get_metadata(self) -> dict: """ return self._metadata - def set_metadata(self, metadata: dict, in_place: bool = False) -> "VirtualSpatialImage": + def set_metadata( + self, metadata: dict, in_place: bool = False + ) -> "VirtualSpatialImage": """Set additional metadata. Args: @@ -65,7 +67,9 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "VirtualSpatia or as a reference to the (in-place-modified) original. """ if not isinstance(metadata, dict): - raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.") + raise TypeError( + f"`metadata` must be a dictionary, provided {type(metadata)}." + ) output = self._define_output(in_place) output._metadata = metadata return output @@ -146,7 +150,9 @@ def _sanitize_loaded_image(image): class LoadedSpatialImage(VirtualSpatialImage): """Class for images loaded into memory.""" - def __init__(self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None): + def __init__( + self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None + ): """Initialize the object. Args: @@ -250,7 +256,9 @@ def get_image(self) -> Image.Image: return self._image - def set_image(self, image: Union[Image.Image, np.ndarray], in_place: bool = False) -> "LoadedSpatialImage": + def set_image( + self, image: Union[Image.Image, np.ndarray], in_place: bool = False + ) -> "LoadedSpatialImage": """Set new image. Args: @@ -402,7 +410,9 @@ def get_path(self) -> Path: """Get the path to the image file.""" return self._path - def set_path(self, path: Union[str, Path], in_place: bool = False) -> "StoredSpatialImage": + def set_path( + self, path: Union[str, Path], in_place: bool = False + ) -> "StoredSpatialImage": """Update the path to the image file. Args: @@ -463,7 +473,9 @@ def _validate_url(url): class RemoteSpatialImage(VirtualSpatialImage): """Class for remotely hosted images.""" - def __init__(self, url: str, metadata: Optional[dict] = None, validate: bool = True): + def __init__( + self, url: str, metadata: Optional[dict] = None, validate: bool = True + ): """Initialize the object. Args: @@ -494,7 +506,7 @@ def _define_output(self, in_place: bool = False) -> "RemoteSpatialImage": ######################### ######>> Equality <<##### ######################### - + def __eq__(self, other) -> bool: return super().__eq__(other) and self.url == other.url diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index db36fe3..56851e8 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -9,36 +9,38 @@ from spatialexperiment import SpatialExperiment -def _append_sample_indices(bframes: List[BiocFrame]) -> List[BiocFrame]: +def _append_indices_to_samples(bframes: List[BiocFrame]) -> List[BiocFrame]: """Append indices to sample IDs for a list of `BiocFrames`. - + For each `BiocFrame`, appends an index to all sample IDs to ensure uniqueness across multiple frames. - + Args: List of `BiocFrame` objects containing sample IDs. - + Returns: List of `BiocFrame`s with modified sample IDs. """ modified_bframes = [] for i, bframe in enumerate(bframes, start=1): bframe_copy = deepcopy(bframe) - bframe_copy["sample_id"] = [f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"]] + bframe_copy["sample_id"] = [ + f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"] + ] modified_bframes.append(bframe_copy) return modified_bframes -def merge_spe_cols_and_img_data(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFrame]: +def merge_spatial_frames(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFrame]: """Merge column data and image data from multiple ``SpatialExperiment`` objects. - + If duplicate sample IDs exist across objects, appends indices to make them unique. Sample IDs in column data determine the uniqueness check as they are the superset of IDs in image data. - + Args: x: List of ``SpatialExperiment`` objects - + Returns: A tuple with the merged column data and image data. """ @@ -46,35 +48,37 @@ def merge_spe_cols_and_img_data(x: List[SpatialExperiment]) -> Tuple[BiocFrame, img_datas = [y._img_data for y in x] expected_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) - all_sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) + all_sample_ids = list( + itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols) + ) if len(set(all_sample_ids)) < expected_unique: warn( "'sample_id's are duplicated across 'SpatialExperiment' objects to 'combine_columns'; appending sample indices." ) - modified_columns = _append_sample_indices(cols) - modified_img_data = _append_sample_indices(img_datas) + modified_columns = _append_indices_to_samples(cols) + modified_img_data = _append_indices_to_samples(img_datas) else: modified_columns = cols modified_img_data = img_datas - _new_cols = ut.combine_rows(*modified_columns) + _new_cols = ut.combine_rows(*modified_columns) _new_img_data = ut.combine_rows(*modified_img_data) return _new_cols, _new_img_data -def merge_spe_spatial_coords(spatial_coords: List[BiocFrame]) -> BiocFrame: +def merge_spatial_coordinates(spatial_coords: List[BiocFrame]) -> BiocFrame: """Merge spatial coordinates from multiple frames. - + Args: spatial_coords: List of `BiocFrame`s containing spatial coordinates. - + Returns: A merged BiocFrame containing all spatial coordinates. - + Raises: ValueError: If spatial coordinates have different numbers of columns. - + Warns: If dimension names are not consistent across all `BiocFrame`s. """ @@ -84,9 +88,7 @@ def merge_spe_spatial_coords(spatial_coords: List[BiocFrame]) -> BiocFrame: first_columns = spatial_coords[0].columns if not all(coords.columns == first_columns for coords in spatial_coords): - warn( - "Not all 'spatial_coords' have the same dimension names." - ) + warn("Not all 'spatial_coords' have the same dimension names.") _new_spatial_coords = ut.combine_rows(*spatial_coords) return _new_spatial_coords diff --git a/src/spatialexperiment/_imgutils.py b/src/spatialexperiment/_imgutils.py index c9c2c29..9aa67b0 100644 --- a/src/spatialexperiment/_imgutils.py +++ b/src/spatialexperiment/_imgutils.py @@ -8,7 +8,9 @@ def retrieve_rows_by_id( - img_data: BiocFrame, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None + img_data: BiocFrame, + sample_id: Union[str, bool, None] = None, + image_id: Union[str, bool, None] = None, ) -> Union[BiocFrame, None]: """ Retrieve rows from `img_data` based on specified `sample_id` and `image_id`. @@ -51,11 +53,15 @@ def retrieve_rows_by_id( else: subset = subset.combine_rows(row) else: - subset = img_data[[_image_id == image_id for _image_id in img_data["image_id"]], :] + subset = img_data[ + [_image_id == image_id for _image_id in img_data["image_id"]], : + ] elif sample_id is None: first_sample_id = img_data["sample_id"][0] - first_sample = img_data[[_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], :] + first_sample = img_data[ + [_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], : + ] if image_id is True: subset = first_sample @@ -63,10 +69,14 @@ def retrieve_rows_by_id( elif image_id is None: subset = first_sample[0, :] else: - subset = first_sample[[_image_id == image_id for _image_id in img_data["image_id"]], :] + subset = first_sample[ + [_image_id == image_id for _image_id in img_data["image_id"]], : + ] else: - selected_sample = img_data[[_sample_id == sample_id for _sample_id in img_data["sample_id"]], :] + selected_sample = img_data[ + [_sample_id == sample_id for _sample_id in img_data["sample_id"]], : + ] if selected_sample.shape[0] == 0: subset = selected_sample @@ -75,6 +85,8 @@ def retrieve_rows_by_id( elif image_id is None: subset = selected_sample[0, :] else: - subset = selected_sample[[_image_id == image_id for _image_id in selected_sample["image_id"]]] + subset = selected_sample[ + [_image_id == image_id for _image_id in selected_sample["image_id"]] + ] return subset diff --git a/src/spatialexperiment/_initutils.py b/src/spatialexperiment/_initutils.py index edaa4c2..1569889 100644 --- a/src/spatialexperiment/_initutils.py +++ b/src/spatialexperiment/_initutils.py @@ -33,7 +33,11 @@ def construct_spatial_coords_from_names( current_column_data = _sanitize_frame(column_data, num_rows=column_data.shape[1]) - missing_names = [name for name in spatial_coords_names if name not in current_column_data.column_names] + missing_names = [ + name + for name in spatial_coords_names + if name not in current_column_data.column_names + ] if missing_names: raise ValueError( f"The following names in `spatial_coords_names` are missing from `column_data`: {missing_names}" @@ -44,7 +48,11 @@ def construct_spatial_coords_from_names( column_data_subset = deepcopy( current_column_data[ :, - [col for col in current_column_data.column_names if col not in spatial_coords_names], + [ + col + for col in current_column_data.column_names + if col not in spatial_coords_names + ], ] ) @@ -52,7 +60,11 @@ def construct_spatial_coords_from_names( def construct_img_data( - sample_id: str, image_id: str, image_sources: List[str], scale_factors: List[float], load_image: bool = False + sample_id: str, + image_id: str, + image_sources: List[str], + scale_factors: List[float], + load_image: bool = False, ) -> BiocFrame: """Construct the image data for a `SpatialExperiment`. @@ -77,7 +89,9 @@ def construct_img_data( A `BiocFrame` representing the image data for a `SpatialExperiment`. """ if not len(image_id) == len(image_sources) == len(scale_factors): - raise ValueError("'image_id', 'image_sources' and 'scale_factors' are not the same length.") + raise ValueError( + "'image_id', 'image_sources' and 'scale_factors' are not the same length." + ) spis = [] for image_source in image_sources: diff --git a/src/spatialexperiment/_validators.py b/src/spatialexperiment/_validators.py index 27a62d5..a3ae86e 100644 --- a/src/spatialexperiment/_validators.py +++ b/src/spatialexperiment/_validators.py @@ -13,7 +13,9 @@ def _validate_spatial_coords_names(spatial_coords_names, spatial_coords): raise TypeError("'spatial_coords_names' is not a list of strings") if len(spatial_coords_names) != spatial_coords.shape[1]: - raise ValueError(f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names.") + raise ValueError( + f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names." + ) def _validate_column_data(column_data): @@ -43,7 +45,9 @@ def _validate_sample_image_ids(img_data, new_sample_id, new_image_id): for row in img_data: data = row[1] if data["sample_id"] == new_sample_id and data["image_id"] == new_image_id: - raise ValueError(f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists") + raise ValueError( + f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists" + ) # TODO: check if 'new_sample_id' is present in column_data['sample_id'] @@ -54,7 +58,7 @@ def _validate_spatial_coords(spatial_coords, column_data): if not hasattr(spatial_coords, "shape"): raise TypeError( - f"Spatial coordinates must be a dataframe-like object." + "Spatial coordinates must be a dataframe-like object." "Does not contain a `shape` property." ) @@ -87,7 +91,9 @@ def _validate_sample_ids(column_data, img_data): column_data_sample_ids = set(column_data["sample_id"]) if not img_data_sample_ids <= column_data_sample_ids: - raise ValueError("All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']") + raise ValueError( + "All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']" + ) if img_data_sample_ids != column_data_sample_ids: warnings.warn( diff --git a/tests/conftest.py b/tests/conftest.py index 0283a7c..386b519 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,17 +43,9 @@ def spe(): } ) - row_names = BiocFrame( - { - "row_names": range(nrows) - } - ) + row_names = BiocFrame({"row_names": range(nrows)}) - column_names = BiocFrame( - { - "column_names": range(ncols) - } - ) + column_names = BiocFrame({"column_names": range(ncols)}) x_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) diff --git a/tests/test_img_data_methods.py b/tests/test_img_data_methods.py index d36ef4c..6e9adca 100644 --- a/tests/test_img_data_methods.py +++ b/tests/test_img_data_methods.py @@ -1,6 +1,5 @@ import pytest from copy import deepcopy -from spatialexperiment import construct_spatial_image_class from spatialexperiment.SpatialImage import VirtualSpatialImage __author__ = "keviny2" diff --git a/tests/test_pSFE.py b/tests/test_pSFE.py index 93521f8..7bfed7e 100644 --- a/tests/test_pSFE.py +++ b/tests/test_pSFE.py @@ -9,6 +9,7 @@ __copyright__ = "jkanche" __license__ = "MIT" + def test_init_basic(): nrows = 200 ncols = 500 @@ -17,11 +18,13 @@ def test_init_basic(): assert isinstance(tspe, ProxySpatialFeatureExperiment) + def test_init_empty(): tspe = ProxySpatialFeatureExperiment() assert isinstance(tspe, ProxySpatialFeatureExperiment) + def test_init_with_col_geoms(): nrows = 200 ncols = 500 @@ -33,7 +36,9 @@ def test_init_with_col_geoms(): ] ) - colgeoms = {"polygons" : gpd.GeoDataFrame({"geometry": polys})} - tspe = ProxySpatialFeatureExperiment(assays={"spots": counts}, col_geometries=colgeoms) + colgeoms = {"polygons": gpd.GeoDataFrame({"geometry": polys})} + tspe = ProxySpatialFeatureExperiment( + assays={"spots": counts}, col_geometries=colgeoms + ) - assert isinstance(tspe, ProxySpatialFeatureExperiment) \ No newline at end of file + assert isinstance(tspe, ProxySpatialFeatureExperiment) diff --git a/tests/test_spe.py b/tests/test_spe.py index a875b1c..7936222 100644 --- a/tests/test_spe.py +++ b/tests/test_spe.py @@ -22,6 +22,7 @@ def test_SPE_empty_constructor(): assert "sample_id" in tspe.column_data.columns.as_list() assert tspe.column_data.shape == (tspe.shape[1], 1) + def test_spe_basic(): nrows = 200 ncols = 500 diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index d02d598..6295d23 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -13,20 +13,32 @@ def test_combine_columns(spe): spe2 = deepcopy(spe) # TODO: this is a temporary fix until https://github.com/BiocPy/SpatialExperiment/issues/25 is finished - spe1.column_data["sample_id"] = [f"{sample_id}_A" for sample_id in spe1.column_data["sample_id"]] - spe2.column_data["sample_id"] = [f"{sample_id}_B" for sample_id in spe2.column_data["sample_id"]] - spe1.img_data["sample_id"] = [f"{sample_id}_A" for sample_id in spe1.img_data["sample_id"]] - spe2.img_data["sample_id"] = [f"{sample_id}_B" for sample_id in spe2.img_data["sample_id"]] + spe1.column_data["sample_id"] = [ + f"{sample_id}_A" for sample_id in spe1.column_data["sample_id"] + ] + spe2.column_data["sample_id"] = [ + f"{sample_id}_B" for sample_id in spe2.column_data["sample_id"] + ] + spe1.img_data["sample_id"] = [ + f"{sample_id}_A" for sample_id in spe1.img_data["sample_id"] + ] + spe2.img_data["sample_id"] = [ + f"{sample_id}_B" for sample_id in spe2.img_data["sample_id"] + ] combined = ut.combine_columns(spe1, spe2) # img_data checks assert combined.img_data.shape[0] == 2 * spe.img_data.shape[0] assert set(combined.column_data["sample_id"]) == set(combined.img_data["sample_id"]) - assert set(combined.column_data["sample_id"]) == set(spe1.column_data["sample_id"] + spe2.column_data["sample_id"]) + assert set(combined.column_data["sample_id"]) == set( + spe1.column_data["sample_id"] + spe2.column_data["sample_id"] + ) idx1 = range(spe1.img_data.shape[0]) - idx2 = range(spe1.img_data.shape[0], spe1.img_data.shape[0] + spe2.img_data.shape[0]) + idx2 = range( + spe1.img_data.shape[0], spe1.img_data.shape[0] + spe2.img_data.shape[0] + ) img_data1 = combined.img_data[idx1, :] img_data2 = combined.img_data[idx2, :] @@ -42,7 +54,10 @@ def test_combine_columns(spe): # spatial_coords checks idx1 = range(spe1.spatial_coords.shape[0]) - idx2 = range(spe1.spatial_coords.shape[0], spe1.spatial_coords.shape[0] + spe2.spatial_coords.shape[0]) + idx2 = range( + spe1.spatial_coords.shape[0], + spe1.spatial_coords.shape[0] + spe2.spatial_coords.shape[0], + ) spatial_coords1 = combined.spatial_coords[idx1, :] spatial_coords2 = combined.spatial_coords[idx2, :] diff --git a/tests/test_spe_methods.py b/tests/test_spe_methods.py index 341985c..1dd29ea 100644 --- a/tests/test_spe_methods.py +++ b/tests/test_spe_methods.py @@ -18,6 +18,7 @@ y_coords = np.random.uniform(low=0.0, high=100.0, size=ncols) spatial_coords = np.column_stack((x_coords, y_coords)) + def test_spatial_coords_numpy(): tspe = SpatialExperiment(assays={"counts": counts}, spatial_coords=spatial_coords) diff --git a/tests/test_spi.py b/tests/test_spi.py index a05c3ed..f7e93c5 100644 --- a/tests/test_spi.py +++ b/tests/test_spi.py @@ -1,7 +1,12 @@ import pytest from PIL import Image from spatialexperiment import construct_spatial_image_class -from spatialexperiment.SpatialImage import VirtualSpatialImage, StoredSpatialImage, LoadedSpatialImage, RemoteSpatialImage +from spatialexperiment.SpatialImage import ( + VirtualSpatialImage, + StoredSpatialImage, + LoadedSpatialImage, + RemoteSpatialImage, +) __author__ = "keviny2" __copyright__ = "keviny2" @@ -18,7 +23,9 @@ def test_spi_constructor_path(): def test_spi_constructor_spi(): - spi_1 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) + spi_1 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) spi_2 = construct_spatial_image_class(spi_1, is_url=False) assert issubclass(type(spi_2), VirtualSpatialImage) @@ -49,9 +56,14 @@ def test_invalid_input(): with pytest.raises(Exception): construct_spatial_image_class(5, is_url=False) + def test_spi_equality(): - spi_path_1 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) - spi_path_2 = construct_spatial_image_class("tests/images/sample_image1.jpg", is_url=False) + spi_path_1 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) + spi_path_2 = construct_spatial_image_class( + "tests/images/sample_image1.jpg", is_url=False + ) assert spi_path_1 == spi_path_2 From 3d547037a17fdf42dc61201d16874d1fe093915a Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 16:20:53 -0800 Subject: [PATCH 09/12] Handle relaxed combine columns --- src/spatialexperiment/SpatialExperiment.py | 51 ++++++++++++++++++++++ src/spatialexperiment/_combineutils.py | 25 ++++++++--- tests/test_spe_combine.py | 1 + 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 13d8f24..9f31e1e 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -12,12 +12,15 @@ check_assays_are_equal, merge_assays, merge_se_colnames, + relaxed_merge_assays ) from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList from singlecellexperiment import SingleCellExperiment from singlecellexperiment._combineutils import ( merge_generic, + relaxed_merge_generic, + relaxed_merge_numpy_generic ) from ._imgutils import retrieve_rows_by_id @@ -1030,3 +1033,51 @@ def relaxed_combine_columns( the union of all rows. Rows absent in any ``x`` are filled in with placeholders consisting of Nones or masked NumPy values. """ + warn( + "'row_pairs' and 'column_pairs' are currently ignored during this operation.", + UserWarning, + ) + + first = x[0] + _new_assays = relaxed_merge_assays(x, by="column") + + _new_col_names = merge_se_colnames(x) + + _new_rdim = None + try: + _new_rdim = relaxed_merge_numpy_generic(x, by="row", attr="reduced_dims") + except Exception as e: + warn( + f"Cannot combine 'reduced_dimensions' across experiments, {str(e)}", + UserWarning, + ) + + _new_alt_expt = None + try: + _new_alt_expt = relaxed_merge_generic(x, by="column", attr="alternative_experiments") + except Exception as e: + warn( + f"Cannot combine 'alternative_experiments' across experiments, {str(e)}", + UserWarning, + ) + + _all_spatial_coords = [y._spatial_coords for y in x] + _new_spatial_coords = merge_spatial_coordinates(_all_spatial_coords) + + _new_cols, _new_img_data = merge_spatial_frames(x) + + current_class_const = type(first) + return current_class_const( + assays=_new_assays, + row_ranges=first._row_ranges, + row_data=first._rows, + column_data=_new_cols, + row_names=first._row_names, + column_names=_new_col_names, + metadata=first._metadata, + reduced_dims=_new_rdim, + main_experiment_name=first._main_experiment_name, + alternative_experiments=_new_alt_expt, + spatial_coords=_new_spatial_coords, + img_data=_new_img_data, + ) diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index 56851e8..0162714 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -16,7 +16,7 @@ def _append_indices_to_samples(bframes: List[BiocFrame]) -> List[BiocFrame]: across multiple frames. Args: - List of `BiocFrame` objects containing sample IDs. + bframes: List of `BiocFrame` objects containing sample IDs. Returns: List of `BiocFrame`s with modified sample IDs. @@ -31,7 +31,7 @@ def _append_indices_to_samples(bframes: List[BiocFrame]) -> List[BiocFrame]: return modified_bframes -def merge_spatial_frames(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFrame]: +def merge_spatial_frames(x: List[SpatialExperiment], relaxed: bool = False) -> Tuple[BiocFrame, BiocFrame]: """Merge column data and image data from multiple ``SpatialExperiment`` objects. If duplicate sample IDs exist across objects, appends indices to make them unique. @@ -40,6 +40,9 @@ def merge_spatial_frames(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFra Args: x: List of ``SpatialExperiment`` objects + relaxed: If `True`, allows frames with different columns to be combined. + Absent columns in any frame are filled with appropriate placeholder values. + Defaults to `False`. Returns: A tuple with the merged column data and image data. @@ -62,16 +65,23 @@ def merge_spatial_frames(x: List[SpatialExperiment]) -> Tuple[BiocFrame, BiocFra modified_columns = cols modified_img_data = img_datas - _new_cols = ut.combine_rows(*modified_columns) - _new_img_data = ut.combine_rows(*modified_img_data) + if relaxed: + _new_cols = ut.relaxed_combine_rows(*modified_columns) + _new_img_data = ut.relaxed_combine_rows(*modified_img_data) + else: + _new_cols = ut.combine_rows(*modified_columns) + _new_img_data = ut.combine_rows(*modified_img_data) return _new_cols, _new_img_data -def merge_spatial_coordinates(spatial_coords: List[BiocFrame]) -> BiocFrame: +def merge_spatial_coordinates(spatial_coords: List[BiocFrame], relaxed: bool = False) -> BiocFrame: """Merge spatial coordinates from multiple frames. Args: spatial_coords: List of `BiocFrame`s containing spatial coordinates. + relaxed: If `True`, allows frames with different columns to be combined. + Absent columns in any frame are filled with appropriate placeholder values. + Defaults to `False`. Returns: A merged BiocFrame containing all spatial coordinates. @@ -90,5 +100,8 @@ def merge_spatial_coordinates(spatial_coords: List[BiocFrame]) -> BiocFrame: if not all(coords.columns == first_columns for coords in spatial_coords): warn("Not all 'spatial_coords' have the same dimension names.") - _new_spatial_coords = ut.combine_rows(*spatial_coords) + if relaxed: + _new_spatial_coords = ut.relaxed_combine_rows(*spatial_coords) + else: + _new_spatial_coords = ut.combine_rows(*spatial_coords) return _new_spatial_coords diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index 6295d23..8973bee 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -64,6 +64,7 @@ def test_combine_columns(spe): assert (spatial_coords1.to_pandas() == spe1.spatial_coords.to_pandas()).all().all() assert (spatial_coords2.to_pandas() == spe2.spatial_coords.to_pandas()).all().all() +# TODO: write a test for relaxed_combine_columns def test_duplicate_sample_ids(spe): with pytest.warns(UserWarning): From 4a625adb17afbb2c39ca00665c4aa2f88e5a3d6a Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 20:35:08 -0800 Subject: [PATCH 10/12] Test relaxed combine columns --- tests/test_spe_combine.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_spe_combine.py b/tests/test_spe_combine.py index 8973bee..23d456b 100644 --- a/tests/test_spe_combine.py +++ b/tests/test_spe_combine.py @@ -1,7 +1,9 @@ from copy import deepcopy import pytest +import numpy as np import biocutils as ut +from spatialexperiment import SpatialExperiment __author__ = "keviny2" __copyright__ = "keviny2" @@ -64,7 +66,32 @@ def test_combine_columns(spe): assert (spatial_coords1.to_pandas() == spe1.spatial_coords.to_pandas()).all().all() assert (spatial_coords2.to_pandas() == spe2.spatial_coords.to_pandas()).all().all() -# TODO: write a test for relaxed_combine_columns + +def test_relaxed_combine_columns(spe): + nrows, ncols = spe.shape + spe2 = spe.set_assays( + { + "counts": np.random.poisson(lam=10, size=(nrows, ncols)), + "normalized": np.random.normal(size=(nrows, ncols)) + }, + in_place=False + ) + + with pytest.raises(Exception): + combined = ut.combine_columns(spe, spe2) + + combined = ut.relaxed_combine_columns(spe, spe2) + assert combined is not None + assert isinstance(combined, SpatialExperiment) + assert combined.shape[1] == spe.shape[1] + spe2.shape[1] + assert combined.shape[0] == spe.shape[0] + + combined2 = spe.relaxed_combine_columns(spe2) + assert combined2 is not None + assert isinstance(combined2, SpatialExperiment) + assert combined2.shape[1] == spe.shape[1] + spe2.shape[1] + assert combined2.shape[0] == spe.shape[0] + def test_duplicate_sample_ids(spe): with pytest.warns(UserWarning): From 017feb77a6bd9729dd0bf660cc0d60d1354b72d2 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 21:05:27 -0800 Subject: [PATCH 11/12] Make type hints compatable with python<=3.10 --- src/spatialexperiment/_combineutils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index 0162714..4b40641 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import List, Tuple from warnings import warn From 9520b7fd913268bcb6d4fbfb36bde8730db9dc4d Mon Sep 17 00:00:00 2001 From: keviny2 Date: Tue, 25 Feb 2025 21:17:11 -0800 Subject: [PATCH 12/12] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4965727..7c99842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 0.0.6 +- Added `combine_columns` function +- Implemented `__eq__` override for `SpatialImage` subclasses + ## Version 0.0.5 - Implementing a placeholder `SpatialFeatureExperiment` class. This version only implements the data structure to hold various geometries but none of the methods except for slicing.