diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 7504d08..315df60 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -1,33 +1,73 @@ -name: Run tests +name: Test the library on: push: - branches: [main] + branches: + - master # for legacy repos + - main pull_request: - branches: [main] + branches: + - master # for legacy repos + - main + workflow_dispatch: # Allow manually triggering the workflow + schedule: + # Run roughly every 15 days at 00:00 UTC + # (useful to check if updates on dependencies break the package) + - cron: "0 0 1,16 * *" + +permissions: + contents: read + +concurrency: + group: >- + ${{ github.workflow }}-${{ github.ref_type }}- + ${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true jobs: - build: - runs-on: ubuntu-latest + test: strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] - - name: Python ${{ matrix.python-version }} + python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + platform: + - ubuntu-latest + # - macos-latest + # - windows-latest + runs-on: ${{ matrix.platform }} + name: Python ${{ matrix.python }}, ${{ matrix.platform }} steps: - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v5 + id: setup-python with: - python-version: ${{ matrix.python-version }} - cache: "pip" + python-version: ${{ matrix.python }} - name: Install dependencies run: | python -m pip install --upgrade pip - pip install tox + pip install tox coverage - - name: Test with tox - run: | + - name: Run tests + run: >- + pipx run --python '${{ steps.setup-python.outputs.python-path }}' tox + -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args + + - name: Check for codecov token availability + id: codecov-check + shell: bash + run: | + if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then + echo "codecov=true" >> $GITHUB_OUTPUT; + else + echo "codecov=false" >> $GITHUB_OUTPUT; + fi + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v5 + if: ${{ steps.codecov-check.outputs.codecov == 'true' }} + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + slug: ${{ github.repository }} + flags: ${{ matrix.platform }} - py${{ matrix.python }} diff --git a/CHANGELOG.md b/CHANGELOG.md index fd84ad5..fa5fa18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## Version 0.0.10 + +- Add an affine function that computes a `rasterio.Affine` object given a `scale_factor`. This assumes a simple scaling where the origin is (0,0) in the spatial coordinate system corresponding to the top-left pixel (0,0). More complex alignments would require explicit affine transforms. +- Ensure img_raster() consistently returns a PIL.Image.Image. +- Add to_numpy() method. +- Changes to how caching works in remote images. + + ## Version 0.0.9 - Added `to_anndata()` in main `SpatialExperiment` class (PR #50) diff --git a/setup.cfg b/setup.cfg index 8fbcedc..2776665 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,11 +51,11 @@ install_requires = importlib-metadata; python_version<"3.8" biocframe>=0.6.3 biocutils>=0.2 - summarizedexperiment>=0.5 singlecellexperiment>=0.5.7 pillow>=11.0 requests scipy~=1.13 + rasterio [options.packages.find] diff --git a/src/spatialexperiment/spatialimage.py b/src/spatialexperiment/spatialimage.py index 1e1900c..337d89c 100644 --- a/src/spatialexperiment/spatialimage.py +++ b/src/spatialexperiment/spatialimage.py @@ -11,6 +11,7 @@ import numpy as np import requests from PIL import Image, ImageChops +from rasterio.transform import Affine __author__ = "jkanche, keviny2" __copyright__ = "jkanche, keviny2" @@ -87,12 +88,21 @@ def metadata(self, metadata: dict): ) self.set_metadata(metadata, in_place=True) - ############################ - ######>> img props <<####### - ############################ + ################################## + ######>> Spatial Props <<######### + ################################## + + def affine(self, scale_factor: float = 1.0) -> Affine: + """Computes a simple affine transformation from the scale_factor. + Assumes pixel (0,0) is top-left and maps to spatial origin (0,0). + Y-axis in spatial coordinates increases downwards by default (matching pixel rows). + Use `Affine.scale(self.scale_factor, -self.scale_factor) * Affine.translation(0, height_in_pixels)` + if Y spatial needs to increase upwards. + """ + return Affine.scale(scale_factor, scale_factor) def get_dimensions(self) -> Tuple[int, int]: - """Get image dimensions (width, height).""" + """Get image dimensions (width, height) in pixels.""" img = self.img_raster() return img.size @@ -106,7 +116,7 @@ def dimensions(self) -> Tuple[int, int]: ############################ @abstractmethod - def img_source(self, as_path: bool = False) -> Union[str, None]: + def img_source(self, as_path: bool = False) -> Union[str, Path, None]: """Get the source of the image. Args: @@ -122,36 +132,75 @@ def img_raster(self) -> Image.Image: """Get the image as a PIL Image object.""" pass + def to_numpy(self, **kwargs) -> np.ndarray: + """Convert the image raster to a NumPy array. + + Args: + **kwargs: + Additional arguments passed to `np.array()`. + + Returns: + NumPy array representation of the image. + """ + return np.array(self.img_raster(), **kwargs) + def rotate_img(self, degrees: float = 90) -> "LoadedSpatialImage": - """Rotate image by specified degrees clockwise.""" - img = self.img_raster() + """Rotate image by specified degrees clockwise. + Returns: + A new LoadedSpatialImage. + """ + img = self.img_raster() # PIL rotates counter-clockwise - rotated = img.rotate(-degrees, expand=True) - return LoadedSpatialImage(rotated) + rotated_pil_img = img.rotate(-degrees, expand=True) + return LoadedSpatialImage(image=rotated_pil_img, metadata=self.metadata.copy()) def mirror_img(self, axis: str = "h") -> "LoadedSpatialImage": - """Mirror image horizontally or vertically.""" + """Mirror image horizontally or vertically. + + Args: + axis: + 'h' for horizontal (default) or 'v' for vertical. + + Returns: + A new LoadedSpatialImage. + """ img = self.img_raster() - if axis == "h": - mirrored = img.transpose(Image.FLIP_LEFT_RIGHT) - elif axis == "v": - mirrored = img.transpose(Image.FLIP_TOP_BOTTOM) + if axis.lower() == "h": + mirrored_pil_img = img.transpose(Image.FLIP_LEFT_RIGHT) + elif axis.lower() == "v": + mirrored_pil_img = img.transpose(Image.FLIP_TOP_BOTTOM) else: raise ValueError("axis must be 'h' or 'v'") - return LoadedSpatialImage(mirrored) + return LoadedSpatialImage( + image=mirrored_pil_img, + metadata=self.metadata.copy(), + ) -def _sanitize_loaded_image(image): +def _sanitize_loaded_image(image: Union[Image.Image, np.ndarray]) -> Image.Image: if isinstance(image, np.ndarray): - _result = Image.fromarray(image) + # trying to infer mode for multi-channel arrays if not RGBA/RGB + if image.ndim == 3: + if image.shape[2] == 1: # Grayscale with channel dim + _result = Image.fromarray(image.squeeze(axis=2)) + elif image.shape[2] not in [3, 4]: # common RGB/RGBA + warn( + f"NumPy array has {image.shape[2]} channels; Pillow might not infer mode correctly. Ensure it's compatible e.g. (H,W,3) or (H,W,4)." + ) + _result = Image.fromarray(image) # Lets try PIL + else: + _result = Image.fromarray(image) + elif image.ndim == 2: # Grayscale + _result = Image.fromarray(image) + else: + raise ValueError(f"Unsupported NumPy array shape: {image.shape}. Expected 2D (H,W) or 3D (H,W,C).") elif isinstance(image, Image.Image): _result = image else: - raise TypeError("image must be PIL Image or numpy array") - + raise TypeError(f"image must be PIL.Image.Image or numpy.ndarray, got '{type(image)}'.") return _result @@ -172,23 +221,33 @@ def __init__(self, image: Union[Image.Image, np.ndarray], metadata: Optional[dic self._image = _sanitize_loaded_image(image) - def _define_output(self, in_place: bool = False) -> "LoadedSpatialImage": - if in_place is True: - return self - else: - return self.__copy__() - ######################### ######>> Equality <<##### ######################### def __eq__(self, other) -> bool: - diff = ImageChops.difference(self.image, other.image) + if not super().__eq__(other): + return False + if not isinstance(other, LoadedSpatialImage): + return False - return super().__eq__(other) and not diff.getbbox() + # compare image content + try: + diff = ImageChops.difference(self.img_raster(), other.img_raster()) + return not diff.getbbox() + except Exception as _: + # If images are not comparable (e.g. different modes, sizes after operations) + return False def __hash__(self): - return hash((super().__hash__(), self._image.tobytes())) + # Hashing Image directly is problematic due to internal state PIL maintains. + # Hashing bytes is more reliable but can be slow for large images. + try: + img_bytes = self._image.tobytes() + except Exception as _: + # Fallback if tobytes fails for some reason + img_bytes = id(self._image) # Not ideal, but better than erroring hash + return hash((super().__hash__(), img_bytes)) ######################### ######>> Copying <<###### @@ -201,12 +260,12 @@ def __deepcopy__(self, memo=None, _nil=[]): """ from copy import deepcopy - _img_copy = deepcopy(self._image) + _img_copy = self._image.copy() _metadata_copy = deepcopy(self.metadata) current_class_const = type(self) return current_class_const( - imaage=_img_copy, + image=_img_copy, metadata=_metadata_copy, ) @@ -217,7 +276,7 @@ def __copy__(self): """ current_class_const = type(self) return current_class_const( - image=self._image, + image=self._image.copy(), metadata=self._metadata, ) @@ -257,9 +316,8 @@ def __str__(self) -> str: ######>> img props <<####### ############################ - def get_image(self) -> Union[Image.Image, np.ndarray]: - """Get the image as a PIL Image object or ndarray.""" - + def get_image(self) -> Image.Image: + """Get the PIL Image object.""" return self._image def set_image(self, image: Union[Image.Image, np.ndarray], in_place: bool = False) -> "LoadedSpatialImage": @@ -272,14 +330,16 @@ def set_image(self, image: Union[Image.Image, np.ndarray], in_place: bool = Fals in_place: Whether to modify the ``LoadedSpatialImage`` in place. Defaults to False. + Returns: + Modified LoadedSpatialImage. """ _out = self._define_output(in_place=in_place) _out._image = _sanitize_loaded_image(image) + # reset lru_cache for methods that depend on image content if any were used return _out @property def image(self) -> Image.Image: - """Alias for :py:meth:`~get_image`.""" return self.get_image() @image.setter @@ -295,11 +355,7 @@ def image(self, image: Union[Image.Image, np.ndarray]): return self.set_image(image=image, in_place=True) def img_source(self, as_path: bool = False) -> None: - """Get the source of the loaded image. - - Returns: - Always returns None. - """ + """Get the source of the loaded image (always None for in-memory).""" return None ############################ @@ -307,10 +363,11 @@ def img_source(self, as_path: bool = False) -> None: ############################ def img_raster(self) -> Image.Image: + """Get the image as a PIL Image object.""" return self._image -def _sanitize_path(path): +def _sanitize_path(path: Union[str, Path]) -> Path: _path = Path(path).resolve() if not _path.exists(): raise FileNotFoundError(f"Image file not found: {path}") @@ -335,18 +392,14 @@ def __init__(self, path: Union[str, Path], metadata: Optional[dict] = None): self._path = _sanitize_path(path) - def _define_output(self, in_place: bool = False) -> "LoadedSpatialImage": - if in_place is True: - return self - else: - return self.__copy__() - ######################### ######>> Equality <<##### ######################### - def __eq__(self, other): - return super().__eq__(other) and self.path == other.path + def __eq__(self, other) -> bool: + if not super().__eq__(other): + return False + return isinstance(other, StoredSpatialImage) and self.path == other.path def __hash__(self): return hash((super().__hash__(), str(self._path))) @@ -437,9 +490,11 @@ def set_path(self, path: Union[str, Path], in_place: bool = False) -> "StoredSpa or as a reference to the (in-place-modified) original. """ new_path = _sanitize_path(path) - _out = self._define_output(in_place=in_place) _out._path = new_path + # Clear LRU cache if path changes + if in_place and hasattr(self.img_raster, "cache_clear"): + self.img_raster.cache_clear() return _out @property @@ -457,7 +512,7 @@ def path(self, path: Union[str, Path]): "Setting property 'path' is an in-place operation, use 'set_path' instead", UserWarning, ) - return self.set_path(path=path, in_place=True) + self.set_path(path=path, in_place=True) def img_source(self, as_path: bool = False) -> str: """Get the source path of the image. @@ -468,7 +523,7 @@ def img_source(self, as_path: bool = False) -> str: Returns: Path to the image. """ - return str(self._path) if as_path is True else self._path + return str(self._path) if as_path else self._path ############################ ######>> img utils <<####### @@ -477,12 +532,14 @@ def img_source(self, as_path: bool = False) -> str: # Simple in-memory cache @lru_cache(maxsize=32) def img_raster(self) -> Image.Image: - """Load and cache the image.""" + """Load and cache the image from path.""" return Image.open(self._path) -def _validate_url(url): +def _validate_url(url: str): parsed = urlparse(url) + + # Must have scheme (http/https) and network location (domain) if not all([parsed.scheme, parsed.netloc]): raise ValueError(f"Invalid URL: {url}") @@ -507,23 +564,19 @@ def __init__(self, url: str, metadata: Optional[dict] = None, validate: bool = T self._url = url self._cache_dir = Path(tempfile.gettempdir()) / "spatial_image_cache" - self._cache_dir.mkdir(exist_ok=True) + self._cache_dir.mkdir(parents=True, exist_ok=True) if validate: _validate_url(url) - def _define_output(self, in_place: bool = False) -> "RemoteSpatialImage": - if in_place is True: - return self - else: - return self.__copy__() - ######################### ######>> Equality <<##### ######################### def __eq__(self, other) -> bool: - return super().__eq__(other) and self.url == other.url + if not super().__eq__(other): + return False + return isinstance(other, RemoteSpatialImage) and self.url == other.url def __hash__(self): return hash((super().__hash__(), self._url)) @@ -556,7 +609,7 @@ def __copy__(self): current_class_const = type(self) return current_class_const( url=self._url, - metadata=self._metadata, + metadata=self.metadata, ) def copy(self): @@ -599,7 +652,7 @@ def get_url(self) -> str: """Get the url to the image file.""" return self._url - def set_url(self, url: str, in_place: bool = False) -> "RemoteSpatialImage": + def set_url(self, url: str, in_place: bool = False, validate: bool = True) -> "RemoteSpatialImage": """Update the url to the image file. Args: @@ -609,23 +662,30 @@ def set_url(self, url: str, in_place: bool = False) -> "RemoteSpatialImage": in_place: Whether to modify the ``RemoteSpatialImage`` in place. + validate: + Whether to validate the url. + Returns: A modified ``RemoteSpatialImage`` object, either as a copy of the original or as a reference to the (in-place-modified) original. """ - _validate_url(url) - - _out = self._define_output(in_place=in_place) - _out.url = url - return _out + if validate: + _validate_url(url) + output = self._define_output(in_place=in_place) + output._url = url + if in_place and hasattr(self.img_raster, "cache_clear"): + self.img_raster.cache_clear() + if hasattr(self._get_cached_path, "cache_clear"): + self._get_cached_path.cache_clear() + return output @property - def url(self) -> Path: + def url(self) -> str: """Alias for :py:meth:`~get_url`.""" return self.get_url() @url.setter - def url(self, url: Union[str, Path]): + def url(self, url: str): """Alias for :py:attr:`~set_url` with ``in_place = True``. As this mutates the original object, a warning is raised. @@ -634,60 +694,108 @@ def url(self, url: Union[str, Path]): "Setting property 'url' is an in-place operation, use 'set_url' instead", UserWarning, ) - return self.set_url(url=url, in_place=True) + self.set_url(url=url, in_place=True) ############################ ######>> img utils <<####### ############################ - def _download_image(self) -> Path: - """Download image to cache directory.""" - cache_path = self._cache_dir / Path(urlparse(self._url).path).name + @lru_cache(maxsize=1) + def _get_cached_path(self) -> Path: + """Internal method to get the cached path, downloads if not exists.""" + url_path_part = Path(urlparse(self._url).path) + filename = url_path_part.name + if not filename: + import hashlib - if not cache_path.exists(): - response = requests.get(self._url, stream=True) - response.raise_for_status() + filename = hashlib.md5(self._url.encode()).hexdigest() + (url_path_part.suffix or ".img") - with cache_path.open("wb") as f: - shutil.copyfileobj(response.raw, f) + cache_path = self._cache_dir / filename + if not cache_path.exists(): + try: + _validate_url(self._url) + response = requests.get(self._url, stream=True) + response.raise_for_status() + + with cache_path.open("wb") as f: + shutil.copyfileobj(response.raw, f) + except requests.exceptions.RequestException as e: + # If download fails, remove incomplete cache file and re-raise + if cache_path.exists(): + cache_path.unlink(missing_ok=True) + raise IOError(f"Failed to download image from {self._url}: {e}.") from e + except ValueError as e: + raise ValueError(f"Invalid URL for download {self._url}: {e}.") from e return cache_path @lru_cache(maxsize=32) def img_raster(self) -> Image.Image: - """Download (if needed) and load the image.""" - cache_path = self._download_image() - return Image.open(cache_path) + """Download (if needed) and load the image from cache.""" + try: + cached_file_path = self._get_cached_path() + return Image.open(cached_file_path) + except Exception as e: + if hasattr(self._get_cached_path, "cache_clear"): + self._get_cached_path.cache_clear() + raise RuntimeError(f"Could not load image from URL {self._url} via cache: {e}.") def img_source(self, as_path: bool = False) -> str: """Get the source URL or cached path of the image. Args: - as_path: If True, returns downloaded path. Defaults to False. + as_path: + If True, returns path to the downloaded (cached) file. + If False (default), returns the original remote URL. Returns: URL or cached path of the image. """ if as_path: - return str(self._download_image()) + try: + return str(self._get_cached_path()) + except Exception as e: + warn(f"Could not obtain cached path for {self.url}: {e}. Returning original URL.") + return self._url return self._url def construct_spatial_image_class( - x: Union[str, Image.Image, np.ndarray], is_url: Optional[bool] = None + x: Union[str, Path, Image.Image, np.ndarray, VirtualSpatialImage], + metadata: Optional[dict] = None, + is_url: Optional[bool] = None, ) -> VirtualSpatialImage: - """Factory function to create appropriate SpatialImage object.""" + """Factory function to create appropriate SpatialImage object. + + Args: + x: + Image source (path, URL, PIL Image, NumPy array) or an existing VirtualSpatialImage. + + metadata: + Additional metadata dictionary. + + is_url: + Explicitly treat `x` as a URL if it's a string. + + Returns: + An instance of a VirtualSpatialImage subclass. + """ if isinstance(x, VirtualSpatialImage): return x elif isinstance(x, (Image.Image, np.ndarray)): - return LoadedSpatialImage(x) + return LoadedSpatialImage(x, metadata) elif isinstance(x, (str, Path)): + path_str = str(x) if is_url is None: - is_url = urlparse(str(x)).scheme in ("http", "https", "ftp") + try: + parsed = urlparse(path_str) + is_url = all([parsed.scheme, parsed.netloc]) and parsed.scheme in ("http", "https", "ftp") + except Exception: + is_url = False if is_url: - return RemoteSpatialImage(str(x)) + return RemoteSpatialImage(path_str, metadata) else: - return StoredSpatialImage(x) + return StoredSpatialImage(Path(path_str), metadata) else: - raise TypeError(f"Unsupported input type: {type(x)}") + raise TypeError(f"Unsupported input type for image construction: {type(x)}.") diff --git a/tests/test_img_raster.py b/tests/test_img_raster.py index 0b67505..14dae0b 100644 --- a/tests/test_img_raster.py +++ b/tests/test_img_raster.py @@ -38,9 +38,9 @@ def test_remote_spatial_image_img_raster(monkeypatch): image_url = "https://example.com/test_image.jpg" spi_remote = construct_spatial_image_class(image_url, is_url=True) - # Mock the _download_image method to return an image + # Mock the _get_cached_path method to return an image mock_path = "tests/images/sample_image1.jpg" - monkeypatch.setattr(spi_remote, "_download_image", lambda: mock_path) + monkeypatch.setattr(spi_remote, "_get_cached_path", lambda: mock_path) raster = spi_remote.img_raster() @@ -54,7 +54,7 @@ def mock_download(): num_calls += 1 return mock_path - monkeypatch.setattr(spi_remote, "_download_image", mock_download) + monkeypatch.setattr(spi_remote, "_get_cached_path", mock_download) raster2 = spi_remote.img_raster() assert num_calls == 0 diff --git a/tests/test_img_source.py b/tests/test_img_source.py index aebe2ab..4f759de 100644 --- a/tests/test_img_source.py +++ b/tests/test_img_source.py @@ -60,9 +60,9 @@ def test_remote_spatial_image_img_source_with_mock(monkeypatch): assert isinstance(spi_remote, RemoteSpatialImage) - # Mock the _download_image method to return a fixed path + # Mock the _get_cached_path method to return a fixed path mock_path = Path("/tmp/image.jpg") - monkeypatch.setattr(spi_remote, "_download_image", lambda: mock_path) + monkeypatch.setattr(spi_remote, "_get_cached_path", lambda: mock_path) # Test with as_path=True (returns the cached path) source_path = spi_remote.img_source(as_path=True)