From 8cab9ef915ce4e0e9c3ebbba2a6a2480f2818cde Mon Sep 17 00:00:00 2001 From: keviny2 Date: Wed, 19 Mar 2025 15:51:15 -0700 Subject: [PATCH 1/3] Refactor retrieve_rows_by_id to get_img_idx --- src/spatialexperiment/SpatialExperiment.py | 43 +++++--- src/spatialexperiment/_imgutils.py | 108 +++++++++------------ tests/test_img_data_methods.py | 66 ++++++++++--- 3 files changed, 127 insertions(+), 90 deletions(-) diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 9f31e1e..988cbec 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -23,7 +23,7 @@ relaxed_merge_numpy_generic ) -from ._imgutils import retrieve_rows_by_id +from ._imgutils import get_img_idx from ._validators import ( _validate_column_data, _validate_id, @@ -669,14 +669,11 @@ def get_scale_factors( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id( + idxs = get_img_idx( img_data=self.img_data, sample_id=sample_id, image_id=image_id ) - if img_data_subset.shape[0] == 1: - return img_data_subset["scale_factor"][0] - - return img_data_subset["scale_factor"] + return self.img_data[idxs,]["scale_factor"] ################################ ###>> OVERRIDE column_data <<### @@ -772,7 +769,7 @@ def get_slice( ) ################################ - ######>> img_data funcs <<###### + #####>> img_data methods <<##### ################################ def get_img( @@ -822,17 +819,15 @@ def get_img( _validate_id(sample_id) _validate_id(image_id) - img_data_subset = retrieve_rows_by_id( + if not self.img_data: + return None + + idxs = get_img_idx( img_data=self.img_data, sample_id=sample_id, image_id=image_id ) - if img_data_subset is None: - return [] - - if img_data_subset.shape[0] == 1: - return img_data_subset["data"][0] - - return img_data_subset["data"] + images = self.img_data[idxs,]["data"] + return images[0] if len(images) == 1 else images def add_img( self, @@ -907,7 +902,25 @@ def rmv_img( self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, + in_place: bool = False ) -> "SpatialExperiment": + """Remove an image entry. + + Args: + sample_id: + - `sample_id=True`: Matches all samples. + - `sample_id=None`: Matches the first sample. + - `sample_id=""`: Matches a sample by its id. + + image_id: + - `image_id=True`: Matches all images for the specified sample(s). + - `image_id=None`: Matches the first image for the sample(s). + - `image_id=""`: Matches image(s) by its(their) id. + + in_place: + Whether to modify the ``SpatialExperiment`` in place. + Defaults to False. + """ raise NotImplementedError() def img_source( diff --git a/src/spatialexperiment/_imgutils.py b/src/spatialexperiment/_imgutils.py index b26b110..1c79ff6 100644 --- a/src/spatialexperiment/_imgutils.py +++ b/src/spatialexperiment/_imgutils.py @@ -1,9 +1,10 @@ -from typing import Union +from typing import Union, List import os from io import BytesIO from pathlib import Path from urllib.parse import urlparse +import numpy as np from PIL import Image from biocframe import BiocFrame from .SpatialImage import construct_spatial_image_class @@ -86,13 +87,13 @@ def construct_img_data( ) -def retrieve_rows_by_id( +def get_img_idx( img_data: BiocFrame, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, -) -> Union[BiocFrame, None]: +) -> List[int]: """ - Retrieve rows from `img_data` based on specified `sample_id` and `image_id`. + Retrieve the row index/indices of image(s) with matching 'sample_id' and 'image_id' from the 'img_data'. Args: img_data: @@ -109,65 +110,44 @@ def retrieve_rows_by_id( - `image_id=""`: Matches image(s) by its(their) id. Returns: - The filtered `img_data` based on the specified ids, or `None` if `img_data` is empty. + The row index/indices of image(s) with matchine 'sample_id' and 'image_id' from the 'img_data'. """ + sample_ids = np.array(img_data["sample_id"]) + image_ids = np.array(img_data["image_id"]) + if isinstance(sample_id, str) and isinstance(image_id, str): + sid = sample_ids == sample_id + iid = image_ids == image_id + elif sample_id is True and image_id is True: + sid = iid = np.full(len(img_data), True) + elif sample_id is None and image_id is None: + sid = iid = np.eye(len(img_data))[0, :] + elif isinstance(sample_id, str) and image_id is True: + sid = sample_ids == sample_id + iid = np.full(len(img_data), True) + elif sample_id is True and isinstance(image_id, str): + sid = np.full(len(img_data), True) + iid = image_ids == image_id + elif isinstance(sample_id, str) and image_id is None: + sid = sample_ids == sample_id + iid = np.zeros(len(img_data)) + iid[np.where(sid)[0][0]] = 1 + elif sample_id is None and isinstance(image_id, str): + first_sid = img_data["sample_id"][0] + sid = sample_ids == first_sid + iid = image_ids == image_id + elif sample_id is True and image_id is None: + sid = np.full(len(img_data), True) + iid = [img_data['sample_id'].index(x) for x in set(img_data['sample_id'])] + iid = np.eye(len(img_data))[iid, :].sum(axis=0) + elif sample_id is None and image_id is True: + first_sid = img_data["sample_id"][0] + sid = sample_ids == first_sid + iid = np.full(len(img_data), True) + + mask = sid.astype(bool) & iid.astype(bool) + if not any(mask): + raise ValueError( + f"No 'imgData' entry(ies) matched the specified image_id = '{image_id}' and sample_id = '{sample_id}'" + ) - if img_data is None: - return None - - if img_data.shape[0] == 0: - return None - - if sample_id is True: - if image_id is True: - return img_data - - elif image_id is None: - unique_sample_ids = list(set(img_data["sample_id"])) - sample_id_groups = img_data.split("sample_id") - subset = None - - for sample_id in unique_sample_ids: - row = sample_id_groups[sample_id][0, :] - if subset is None: - subset = row - else: - subset = subset.combine_rows(row) - else: - subset = img_data[ - [_image_id == image_id for _image_id in img_data["image_id"]], : - ] - - elif sample_id is None: - first_sample_id = img_data["sample_id"][0] - first_sample = img_data[ - [_sample_id == first_sample_id for _sample_id in img_data["sample_id"]], : - ] - - if image_id is True: - subset = first_sample - - elif image_id is None: - subset = first_sample[0, :] - else: - subset = first_sample[ - [_image_id == image_id for _image_id in img_data["image_id"]], : - ] - - else: - selected_sample = img_data[ - [_sample_id == sample_id for _sample_id in img_data["sample_id"]], : - ] - - if selected_sample.shape[0] == 0: - subset = selected_sample - elif image_id is True: - subset = selected_sample - elif image_id is None: - subset = selected_sample[0, :] - else: - subset = selected_sample[ - [_image_id == image_id for _image_id in selected_sample["image_id"]] - ] - - return subset + return np.where(mask)[0].tolist() diff --git a/tests/test_img_data_methods.py b/tests/test_img_data_methods.py index 6e9adca..7a685a5 100644 --- a/tests/test_img_data_methods.py +++ b/tests/test_img_data_methods.py @@ -1,5 +1,6 @@ import pytest from copy import deepcopy +import numpy as np from spatialexperiment.SpatialImage import VirtualSpatialImage __author__ = "keviny2" @@ -7,7 +8,7 @@ __license__ = "MIT" -def test_get_img_without_img_data(spe): +def test_get_img_no_img_data(spe): tspe = deepcopy(spe) tspe.img_data = None @@ -15,16 +16,17 @@ def test_get_img_without_img_data(spe): def test_get_img_no_matches(spe): - images = spe.get_img(sample_id="foo", image_id="foo") - assert not images + with pytest.raises(ValueError): + images = spe.get_img(sample_id="foo", image_id="foo") -def test_get_img_both_null(spe): - res = spe.get_img(sample_id=None, image_id=None) - image = spe.img_data["data"][0] +def test_get_img_both_str(spe): + res = spe.get_img(sample_id="sample_1", image_id="dice") + images = spe.img_data[np.array(spe.img_data["sample_id"]) == "sample_1",] + images = images[np.array(images["image_id"]) == "dice",]["data"][0] assert isinstance(res, VirtualSpatialImage) - assert res == image + assert res == images def test_get_img_both_true(spe): @@ -35,22 +37,64 @@ def test_get_img_both_true(spe): assert res == images -def test_get_img_specific_sample(spe): +def test_get_img_both_none(spe): + res = spe.get_img(sample_id=None, image_id=None) + image = spe.img_data[0,]["data"][0] + + assert isinstance(res, VirtualSpatialImage) + assert res == image + + +def test_get_img_sample_str_image_true(spe): res = spe.get_img(sample_id="sample_1", image_id=True) - images = spe.img_data["data"][:2] + images = spe.img_data[np.array(spe.img_data["sample_id"]) == "sample_1",]["data"] assert isinstance(res, list) assert res == images -def test_get_img_specific_image(spe): +def test_get_img_sample_true_image_str(spe): res = spe.get_img(sample_id=True, image_id="desert") - images = spe.img_data["data"][2] + images = spe.img_data[np.array(spe.img_data["image_id"]) == "desert",]["data"][0] + + assert isinstance(res, VirtualSpatialImage) + assert res == images + + +def test_get_img_sample_str_image_none(spe): + res = spe.get_img(sample_id="sample_1", image_id=None) + images = spe.img_data[np.array(spe.img_data["sample_id"]) == "sample_1",]["data"][0] + + assert isinstance(res, VirtualSpatialImage) + assert res == images + + +def test_get_img_sample_none_image_str(spe): + res = spe.get_img(sample_id=None, image_id="aurora") + images = spe.img_data[np.array(spe.img_data["image_id"]) == "aurora",]["data"][0] assert isinstance(res, VirtualSpatialImage) assert res == images +def test_get_img_sample_true_image_none(spe): + res = spe.get_img(sample_id=True, image_id=None) + idxs = [spe.img_data["sample_id"].index(x) for x in set(spe.img_data["sample_id"])] + images = spe.img_data[idxs,]["data"] + + assert isinstance(res, list) and all(isinstance(item, VirtualSpatialImage) for item in res) + assert set(res) == set(images) + + +def test_get_img_sample_none_image_true(spe): + res = spe.get_img(sample_id=None, image_id=True) + first_sample_id = spe.img_data["sample_id"][0] + images = spe.img_data[np.array(spe.img_data["sample_id"]) == first_sample_id,]["data"] + + assert isinstance(res, list) and all(isinstance(item, VirtualSpatialImage) for item in res) + assert set(res) == set(images) + + def test_add_img(spe): tspe = spe.add_img( image_source="tests/images/sample_image4.png", From 7966b2f02fda59296282723601e18f39ef902adb Mon Sep 17 00:00:00 2001 From: keviny2 Date: Wed, 19 Mar 2025 16:00:22 -0700 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 416d53b..106a4e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Version 0.0.7 +- Refactored `get_img_idx` for improved maintainability +- Disambiguated `get_img_data` between `_imgutils.py` and `SpatialExperiment.py` +- Moved `SpatialFeatureExperiment` into its own package + ## Version 0.0.6 - Added `read_tenx_visium()` function to load 10x Visium data as SpatialExperiment - Added `combine_columns` function From c095108ebb85990bbd2f203bba08173ec27de187 Mon Sep 17 00:00:00 2001 From: keviny2 Date: Wed, 19 Mar 2025 16:02:17 -0700 Subject: [PATCH 3/3] Modify CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 106a4e9..571135c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## Version 0.0.7 +## [Unreleased] - Refactored `get_img_idx` for improved maintainability - Disambiguated `get_img_data` between `_imgutils.py` and `SpatialExperiment.py` - Moved `SpatialFeatureExperiment` into its own package