diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bbcea023..16ff38fd 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -44,7 +44,6 @@ jobs: - name: Install dependencies if: steps.look-up.outputs.cache-hit != 'true' run: | - pip install --upgrade pip pip install -e .[dev] - run: pytest --disable-pytest-warnings tests/test_bioimageio_collection.py::test_rdf_format_to_populate_cache if: steps.look-up.outputs.cache-hit != 'true' @@ -94,7 +93,7 @@ jobs: path: bioimageio_cache key: ${{needs.populate-cache.outputs.cache-key}} - name: pytest - run: pytest --cov bioimageio --cov-report xml --cov-append --capture no --disable-pytest-warnings + run: pytest --cov bioimageio.core --cov-append --capture no --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} @@ -105,25 +104,50 @@ jobs: path: bioimageio_cache key: ${{needs.populate-cache.outputs.cache-key}} - - if: matrix.report-coverage && github.event_name == 'pull_request' - uses: orgoro/coverage@v3.2 + - run: cp .coverage .coverage.${{matrix.python-version}}-${{matrix.numpy-version}} + - uses: actions/upload-artifact@v4 + with: + name: .coverage.${{matrix.python-version}}-${{matrix.numpy-version}} + retention-days: 1 + path: .coverage.${{matrix.python-version}}-${{matrix.numpy-version}} + include-hidden-files: true + + coverage: + needs: [test] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v6 + - run: | + pip install coverage + - uses: actions/download-artifact@v4 + with: + pattern: .coverage.* + merge-multiple: true + - run: | + ls -la .coverage* + coverage combine + coverage xml -o coverage.xml + - uses: orgoro/coverage@v3.2 with: coverageFile: coverage.xml - token: ${{secrets.GITHUB_TOKEN}} - - if: matrix.report-coverage && github.ref == 'refs/heads/main' + token: ${{ secrets.GITHUB_TOKEN }} + thresholdAll: 0.7 + thresholdNew: 0.9 + thresholdModified: 0.6 + - name: generate coverage badge and html report run: | pip install genbadge[coverage] genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg coverage html -d dist/coverage - - if: matrix.report-coverage && github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v4 with: - name: coverage + name: coverage-summary retention-days: 1 path: dist conda-build: - needs: test + needs: test # only so we run tests even if the pinned bioimageio.spec version is not yet published on conda-forge yet runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -159,7 +183,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 with: - name: coverage + name: coverage-summary path: dist - uses: actions/setup-python@v6 with: @@ -179,65 +203,66 @@ jobs: branch: gh-pages folder: dist - pip-build: - name: Build with pip and publish to PyPI - needs: [test, conda-build] + build: runs-on: ubuntu-latest steps: - - name: Check out the repository - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Set up Python - uses: actions/setup-python@v6 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v6 with: python-version: '3.10' + cache: 'pip' - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip --version - pip install --upgrade build - pip install . + run: pip install --upgrade build - name: Build package - run: | - python -m build + run: python -m build + - uses: actions/upload-artifact@v4 + with: + path: dist/ + name: dist + publish: + needs: [test, build, conda-build, docs] + runs-on: ubuntu-latest + environment: + name: release + url: https://pypi.org/project/bioimageio.core/ + permissions: + contents: write # required for tag creation + id-token: write # required for pypi publish action + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 - name: Check if there is a parent commit id: check-parent-commit run: | echo "sha=$(git rev-parse --verify --quiet HEAD^)" >> $GITHUB_OUTPUT - name: Detect new version - id: check-version + id: tag-version if: github.ref == 'refs/heads/main' && steps.check-parent-commit.outputs.sha uses: salsify/action-detect-and-tag-new-version@v2.0.3 with: - create-tag: false + create-tag: true version-command: | python -c "from pathlib import Path;import json;print(p_src.read_text().split('__version__ = \"')[1].split('\"')[0] if (p_src:=Path('src/bioimageio/core/__init__.py')).exists() else json.loads(Path('bioimageio/core/VERSION').read_text())['version'])" - - - name: Push tag - id: tag-version - if: github.ref == 'refs/heads/main' && steps.check-version.outputs.previous-version != steps.check-version.outputs.current-version - uses: mathieudutour/github-tag-action@v5.5 + - uses: actions/download-artifact@v4 + if: github.ref == 'refs/heads/main' && steps.tag-version.outputs.tag with: - github_token: ${{ secrets.GITHUB_TOKEN }} - custom_tag: ${{ steps.check-version.outputs.current-version }} - + name: dist + path: dist - name: Publish package on PyPI - if: github.ref == 'refs/heads/main' && steps.tag-version.outputs.new_tag - uses: pypa/gh-action-pypi-publish@release/v1.12 + if: github.ref == 'refs/heads/main' && steps.tag-version.outputs.tag + uses: pypa/gh-action-pypi-publish@release/v1 with: - user: __token__ - password: '${{ secrets.PYPI_TOKEN }}' packages-dir: dist/ - verbose: true + - name: Publish the release notes if: github.ref == 'refs/heads/main' uses: release-drafter/release-drafter@v6.0.0 with: - publish: "${{ steps.tag-version.outputs.new_tag != '' }}" - tag: '${{ steps.tag-version.outputs.new_tag }}' + publish: "${{ steps.tag-version.outputs.tag != '' }}" + tag: '${{ steps.tag-version.outputs.tag }}' env: GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' diff --git a/README.md b/README.md index 408262da..da957da3 100644 --- a/README.md +++ b/README.md @@ -347,15 +347,24 @@ stats: inputs/dataset_statistics.json ## Set up Development Environment -To set up a development conda environment run the following commands: +To set up a development environment run the following commands: ```console -conda env create -f dev/env.yaml +conda create -n core python=$(grep -E '^requires-python' pyproject.toml | grep -oE '[0-9]+\.[0-9]+') conda activate core -pip install -e . --no-deps +pip install -e .[dev,partners] ``` -There are different environment files available that only install tensorflow or pytorch as dependencies, see [dev folder](https://github.com/bioimage-io/core-bioimage-io-python/tree/main/dev). +### Joint development of bioimageio.spec and bioimageio.core + +Assuming [spec-bioimage-io](https://github.com/bioimage-io/spec-bioimage-io) is cloned to the parent folder +a joint development environment can be created with the following commands: + +```console +conda create -n core python=$(grep -E '^requires-python' pyproject.toml | grep -oE '[0-9]+\.[0-9]+') +conda activate core +pip install -e .[dev,partners] -e ../spec-bioimage-io[dev] +``` ## Logging level diff --git a/changelog.md b/changelog.md index c1861745..1ce18e61 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +### 0.9.5 + +- bump bioimageio.spec library version to 0.5.6.0 +- improved ONNX export from pytorch state dict or torchscript using torch dynamo +- default `opset_version` for `pytorch_to_onnx`/`torchscript_for_onnx` conversions bumped to 18. + ### 0.9.4 - bump bioimageio.spec library version to 0.5.5.6 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 110d4b1d..b4ffb39a 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -54,9 +54,7 @@ test: - tests requires: {% for dep in pyproject['project']['optional-dependencies']['dev'] %} - {% if dep.startswith('torch>=') %} # pip: torch -> conda: pytorch - - py{{ dep.lower() }} - {% else %} + {% if 'torch' not in dep %} # can't install pytorch>=2.8 from conda-forge smh - {{ dep.lower().replace('_', '-') }} {% endif %} {% endfor %} diff --git a/pyproject.toml b/pyproject.toml index 7315db52..ed7e9aa3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires-python = ">=3.9" readme = "README.md" dynamic = ["version"] dependencies = [ - "bioimageio.spec ==0.5.5.6", + "bioimageio.spec ==0.5.6.0", "h5py", "imagecodecs", "imageio>=2.10", @@ -40,7 +40,7 @@ Documentation = "https://bioimage-io.github.io/core-bioimage-io-python/bioimagei Source = "https://github.com/bioimage-io/core-bioimage-io-python" [project.optional-dependencies] -onnx = ["onnxruntime"] +onnx = ["onnxruntime", "onnxscript"] pytorch = ["torch>=1.6,<3", "torchvision>=0.21", "keras>=3.0,<4"] tensorflow = ["tensorflow", "keras>=2.15,<4"] partners = [ @@ -59,10 +59,11 @@ dev = [ "numpy", "onnx", "onnxruntime", + "onnxscript", "packaging>=17.0", "pdoc", "pre-commit", - "pyright==1.1.404", + "pyright==1.1.407", "pytest-cov", "pytest", "python-dotenv", @@ -124,7 +125,6 @@ testpaths = ["src", "tests"] [tool.ruff] line-length = 88 -target-version = "py39" include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"] exclude = [ "presentations", diff --git a/src/bioimageio/core/__init__.py b/src/bioimageio/core/__init__.py index 14535b6b..ac51907d 100644 --- a/src/bioimageio/core/__init__.py +++ b/src/bioimageio/core/__init__.py @@ -3,7 +3,7 @@ """ # ruff: noqa: E402 -__version__ = "0.9.4" +__version__ = "0.9.5" from loguru import logger logger.disable("bioimageio.core") diff --git a/src/bioimageio/core/_resource_tests.py b/src/bioimageio/core/_resource_tests.py index cf8fcae5..c4572929 100644 --- a/src/bioimageio/core/_resource_tests.py +++ b/src/bioimageio/core/_resource_tests.py @@ -274,15 +274,30 @@ def test_description( ) with TemporaryDirectory(**td_kwargs) as _d: working_dir = Path(_d) - if isinstance(source, (dict, ResourceDescrBase)): + + if isinstance(source, ResourceDescrBase): + descr = source + elif isinstance(source, dict): + context = get_validation_context().replace( + perform_io_checks=True # make sure we perform io checks though + ) + + descr = build_description(source, context=context) + else: + descr = load_description(source, perform_io_checks=True) + + if isinstance(descr, InvalidDescr): + return descr.validation_summary + elif isinstance(source, (dict, ResourceDescrBase)): file_source = save_bioimageio_package( - source, output_path=working_dir / "package.zip" + descr, output_path=working_dir / "package.zip" ) else: file_source = source - return _test_in_env( + _test_in_env( file_source, + descr=descr, working_dir=working_dir, weight_format=weight_format, conda_env=conda_env, @@ -295,10 +310,13 @@ def test_description( **deprecated, ) + return descr.validation_summary + def _test_in_env( source: PermissiveFileSource, *, + descr: ResourceDescr, working_dir: Path, weight_format: Optional[SupportedWeightsFormat], conda_env: Optional[BioimageioCondaEnv], @@ -309,74 +327,69 @@ def _test_in_env( expected_type: Optional[str], sha256: Optional[Sha256], **deprecated: Unpack[DeprecatedKwargs], -) -> ValidationSummary: - descr = load_description(source) - - if not isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise NotImplementedError("Not yet implemented for non-model resources") - - if weight_format is None: - all_present_wfs = [ - wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) - ] - ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] - logger.info( - "Found weight formats {}. Start testing all{}...", - all_present_wfs, - f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", - ) - summary = _test_in_env( - source, - working_dir=working_dir / all_present_wfs[0], - weight_format=all_present_wfs[0], - devices=devices, - determinism=determinism, - conda_env=conda_env, - run_command=run_command, - expected_type=expected_type, - sha256=sha256, - stop_early=stop_early, - **deprecated, - ) - for wf in all_present_wfs[1:]: - additional_summary = _test_in_env( - source, - working_dir=working_dir / wf, - weight_format=wf, - devices=devices, - determinism=determinism, - conda_env=conda_env, - run_command=run_command, - expected_type=expected_type, - sha256=sha256, - stop_early=stop_early, - **deprecated, +): + """Test a bioimage.io resource in a given conda environment. + Adds details to the existing validation summary of **descr**. + """ + if isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + if weight_format is None: + # run tests for all present weight formats + all_present_wfs = [ + wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) + ] + ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] + logger.info( + "Found weight formats {}. Start testing all{}...", + all_present_wfs, + f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", ) - for d in additional_summary.details: - # TODO: filter reduntant details; group details - summary.add_detail(d) - return summary - - if weight_format == "pytorch_state_dict": - wf = descr.weights.pytorch_state_dict - elif weight_format == "torchscript": - wf = descr.weights.torchscript - elif weight_format == "keras_hdf5": - wf = descr.weights.keras_hdf5 - elif weight_format == "onnx": - wf = descr.weights.onnx - elif weight_format == "tensorflow_saved_model_bundle": - wf = descr.weights.tensorflow_saved_model_bundle - elif weight_format == "tensorflow_js": - raise RuntimeError( - "testing 'tensorflow_js' is not supported by bioimageio.core" - ) + for wf in all_present_wfs: + _test_in_env( + source, + descr=descr, + working_dir=working_dir / wf, + weight_format=wf, + devices=devices, + determinism=determinism, + conda_env=conda_env, + run_command=run_command, + expected_type=expected_type, + sha256=sha256, + stop_early=stop_early, + **deprecated, + ) + + return + + if weight_format == "pytorch_state_dict": + wf = descr.weights.pytorch_state_dict + elif weight_format == "torchscript": + wf = descr.weights.torchscript + elif weight_format == "keras_hdf5": + wf = descr.weights.keras_hdf5 + elif weight_format == "onnx": + wf = descr.weights.onnx + elif weight_format == "tensorflow_saved_model_bundle": + wf = descr.weights.tensorflow_saved_model_bundle + elif weight_format == "tensorflow_js": + raise RuntimeError( + "testing 'tensorflow_js' is not supported by bioimageio.core" + ) + else: + assert_never(weight_format) + assert wf is not None + if conda_env is None: + conda_env = get_conda_env(entry=wf) + + test_loc = ("weights", weight_format) else: - assert_never(weight_format) + if conda_env is None: + warnings.warn( + "No conda environment description given for testing (And no default conda envs available for non-model descriptions)." + ) + return - assert wf is not None - if conda_env is None: - conda_env = get_conda_env(entry=wf) + test_loc = () # remove name as we crate a name based on the env description hash value conda_env.name = None @@ -417,16 +430,15 @@ def _test_in_env( # double check that environment was created successfully run_command([CONDA_CMD, "run", "-n", env_name, "python", "--version"]) except Exception as e: - summary = descr.validation_summary - summary.add_detail( + descr.validation_summary.add_detail( ValidationDetail( name="Conda environment creation", status="failed", - loc=("weights", weight_format), + loc=test_loc, recommended_env=conda_env, errors=[ ErrorEntry( - loc=("weights", weight_format), + loc=test_loc, msg=str(e), type="conda", with_traceback=True, @@ -434,7 +446,7 @@ def _test_in_env( ], ) ) - return summary + return working_dir.mkdir(parents=True, exist_ok=True) summary_path = working_dir / "summary.json" @@ -461,7 +473,7 @@ def _test_in_env( ) ) except Exception as e: - cmd_error = f"Failed to run command '{' '.join(cmd)}': {e}." + cmd_error = f"Command '{' '.join(cmd)}' returned with error: {e}." if summary_path.exists(): break @@ -469,29 +481,27 @@ def _test_in_env( if cmd_error is not None: logger.warning(cmd_error) - return ValidationSummary( - name="calling bioimageio test command", - source_name=str(source), - status="failed", - type="unknown", - format_version="unknown", - details=[ - ValidationDetail( - name="run 'bioimageio test'", - errors=[ - ErrorEntry( - loc=(), - type="bioimageio cli", - msg=f"test command '{' '.join(cmd)}' did not produce a summary file at {summary_path}", - ) - ], - status="failed", - ) - ], - env=set(), + descr.validation_summary.add_detail( + ValidationDetail( + name="run 'bioimageio test' command", + recommended_env=conda_env, + errors=[ + ErrorEntry( + loc=(), + type="bioimageio cli", + msg=f"test command '{' '.join(cmd)}' did not produce a summary file at {summary_path}", + ) + ], + status="failed", + ) ) + return - return ValidationSummary.load_json(summary_path) + # add relevant details from command summary + command_summary = ValidationSummary.load_json(summary_path) + for detail in command_summary.details: + if detail.loc[: len(test_loc)] == test_loc: + descr.validation_summary.add_detail(detail) @overload @@ -667,22 +677,19 @@ def load_description_and_test( enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: _test_model_inference(rd, w, devices, stop_early=stop_early, **deprecated) - if stop_early and rd.validation_summary.status == "failed": + if stop_early and rd.validation_summary.status != "passed": break if not isinstance(rd, v0_4.ModelDescr): _test_model_inference_parametrized( rd, w, devices, stop_early=stop_early ) - if stop_early and rd.validation_summary.status == "failed": + if stop_early and rd.validation_summary.status != "passed": break # TODO: add execution of jupyter notebooks # TODO: add more tests - if rd.validation_summary.status == "valid-format": - rd.validation_summary.status = "passed" - return rd @@ -998,35 +1005,41 @@ def get_ns(n: int): ) as prediction_pipeline: for n, batch_size, inputs, exptected_output_shape in generate_test_cases(): error: Optional[str] = None - result = prediction_pipeline.predict_sample_without_blocking(inputs) - if len(result.members) != len(exptected_output_shape): - error = ( - f"Expected {len(exptected_output_shape)} outputs," - + f" but got {len(result.members)}" - ) - + try: + result = prediction_pipeline.predict_sample_without_blocking(inputs) + except Exception as e: + error = str(e) else: - for m, exp in exptected_output_shape.items(): - res = result.members.get(m) - if res is None: - error = "Output tensors may not be None for test case" - break + if len(result.members) != len(exptected_output_shape): + error = ( + f"Expected {len(exptected_output_shape)} outputs," + + f" but got {len(result.members)}" + ) - diff: Dict[AxisId, int] = {} - for a, s in res.sizes.items(): - if isinstance((e_aid := exp[AxisId(a)]), int): - if s != e_aid: + else: + for m, exp in exptected_output_shape.items(): + res = result.members.get(m) + if res is None: + error = "Output tensors may not be None for test case" + break + + diff: Dict[AxisId, int] = {} + for a, s in res.sizes.items(): + if isinstance((e_aid := exp[AxisId(a)]), int): + if s != e_aid: + diff[AxisId(a)] = s + elif ( + s < e_aid.min + or e_aid.max is not None + and s > e_aid.max + ): diff[AxisId(a)] = s - elif ( - s < e_aid.min or e_aid.max is not None and s > e_aid.max - ): - diff[AxisId(a)] = s - if diff: - error = ( - f"(n={n}) Expected output shape {exp}," - + f" but got {res.sizes} (diff: {diff})" - ) - break + if diff: + error = ( + f"(n={n}) Expected output shape {exp}," + + f" but got {res.sizes} (diff: {diff})" + ) + break model.validation_summary.add_detail( ValidationDetail( diff --git a/src/bioimageio/core/backends/onnx_backend.py b/src/bioimageio/core/backends/onnx_backend.py index 9d8412e5..1487e5f4 100644 --- a/src/bioimageio/core/backends/onnx_backend.py +++ b/src/bioimageio/core/backends/onnx_backend.py @@ -1,11 +1,14 @@ # pyright: reportUnknownVariableType=false +import shutil +import tempfile import warnings +from pathlib import Path from typing import Any, List, Optional, Sequence, Union import onnxruntime as rt # pyright: ignore[reportMissingTypeStubs] -from numpy.typing import NDArray - from bioimageio.spec.model import v0_4, v0_5 +from loguru import logger +from numpy.typing import NDArray from ..model_adapters import ModelAdapter from ..utils._type_guards import is_list, is_tuple @@ -20,11 +23,63 @@ def __init__( ): super().__init__(model_description=model_description) - if model_description.weights.onnx is None: + onnx_descr = model_description.weights.onnx + if onnx_descr is None: raise ValueError("No ONNX weights specified for {model_description.name}") - reader = model_description.weights.onnx.get_reader() - self._session = rt.InferenceSession(reader.read()) + providers = None + if hasattr(rt, "get_available_providers"): + providers = rt.get_available_providers() + + if ( + isinstance(onnx_descr, v0_5.OnnxWeightsDescr) + and onnx_descr.external_data is not None + ): + src = onnx_descr.source.absolute() + src_data = onnx_descr.external_data.source.absolute() + if ( + isinstance(src, Path) + and isinstance(src_data, Path) + and src.parent == src_data.parent + ): + logger.debug( + "Loading ONNX model with external data from {}", + src.parent, + ) + self._session = rt.InferenceSession( + src, + providers=providers, # pyright: ignore[reportUnknownArgumentType] + ) + else: + src_reader = onnx_descr.get_reader() + src_data_reader = onnx_descr.external_data.get_reader() + with tempfile.TemporaryDirectory() as tmpdir: + logger.debug( + "Loading ONNX model with external data from {}", + tmpdir, + ) + src = Path(tmpdir) / src_reader.original_file_name + src_data = Path(tmpdir) / src_data_reader.original_file_name + with src.open("wb") as f: + shutil.copyfileobj(src_reader, f) + with src_data.open("wb") as f: + shutil.copyfileobj(src_data_reader, f) + + self._session = rt.InferenceSession( + src, + providers=providers, # pyright: ignore[reportUnknownArgumentType] + ) + else: + # load single source file from bytes (without external data, so probably <2GB) + logger.debug( + "Loading ONNX model from bytes (read from {})", onnx_descr.source + ) + reader = onnx_descr.get_reader() + self._session = rt.InferenceSession( + reader.read(), + providers=providers, # pyright: ignore[reportUnknownArgumentType] + ) + onnx_inputs = self._session.get_inputs() self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] diff --git a/src/bioimageio/core/digest_spec.py b/src/bioimageio/core/digest_spec.py index 4b86c64e..1f2ac895 100644 --- a/src/bioimageio/core/digest_spec.py +++ b/src/bioimageio/core/digest_spec.py @@ -23,10 +23,6 @@ import numpy as np import xarray as xr -from loguru import logger -from numpy.typing import NDArray -from typing_extensions import Unpack, assert_never - from bioimageio.spec._internal.io import HashKwargs from bioimageio.spec.common import FileDescr, FileSource, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 @@ -37,6 +33,9 @@ ParameterizedSize_N, ) from bioimageio.spec.utils import load_array +from loguru import logger +from numpy.typing import NDArray +from typing_extensions import Unpack, assert_never from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks @@ -146,7 +145,9 @@ def _import_from_file_impl( importlib_spec.loader.exec_module(module) except Exception as e: - del sys.modules[module_name] + if module_name in sys.modules: + del sys.modules[module_name] + raise ImportError(f"Failed to import {source}") from e try: diff --git a/src/bioimageio/core/stat_calculators.py b/src/bioimageio/core/stat_calculators.py index ce904068..c9ae2d83 100644 --- a/src/bioimageio/core/stat_calculators.py +++ b/src/bioimageio/core/stat_calculators.py @@ -22,12 +22,11 @@ import numpy as np import xarray as xr +from bioimageio.spec.model.v0_5 import BATCH_AXIS_ID from loguru import logger from numpy.typing import NDArray from typing_extensions import assert_never -from bioimageio.spec.model.v0_5 import BATCH_AXIS_ID - from .axis import AxisId, PerAxis from .common import MemberId from .sample import Sample @@ -140,13 +139,9 @@ def compute( n = int(np.prod([tensor.sizes[d] for d in self._axes])) if xr.__version__.startswith("2023"): - var = ( # pyright: ignore[reportUnknownVariableType] - xr.dot(c, c, dims=self._axes) / n - ) + var = xr.dot(c, c, dims=self._axes) / n else: - var = ( # pyright: ignore[reportUnknownVariableType] - xr.dot(c, c, dim=self._axes) / n - ) + var = xr.dot(c, c, dim=self._axes) / n assert isinstance(var, xr.DataArray) std = np.sqrt(var) diff --git a/src/bioimageio/core/weight_converters/_add_weights.py b/src/bioimageio/core/weight_converters/_add_weights.py index 387a3004..cc915619 100644 --- a/src/bioimageio/core/weight_converters/_add_weights.py +++ b/src/bioimageio/core/weight_converters/_add_weights.py @@ -1,15 +1,14 @@ import traceback from typing import Optional, Union -from loguru import logger -from pydantic import DirectoryPath - from bioimageio.spec import ( InvalidDescr, load_model_description, save_bioimageio_package_as_folder, ) from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat +from loguru import logger +from pydantic import DirectoryPath from .._resource_tests import load_description_and_test @@ -113,15 +112,17 @@ def add_weights( available.add("torchscript") missing.discard("torchscript") - if "torchscript" in available and "onnx" in missing: - logger.info("Attempting to convert 'torchscript' weights to 'onnx'.") - from .torchscript_to_onnx import convert + if "pytorch_state_dict" in available and "onnx" in missing: + logger.info("Attempting to convert 'pytorch_state_dict' weights to 'onnx'.") + from .pytorch_to_onnx import convert try: onnx_weights_path = output_path / "weights.onnx" + model_descr.weights.onnx = convert( model_descr, output_path=onnx_weights_path, + verbose=verbose, ) except Exception as e: if verbose: @@ -132,13 +133,12 @@ def add_weights( available.add("onnx") missing.discard("onnx") - if "pytorch_state_dict" in available and "onnx" in missing: - logger.info("Attempting to convert 'pytorch_state_dict' weights to 'onnx'.") - from .pytorch_to_onnx import convert + if "torchscript" in available and "onnx" in missing: + logger.info("Attempting to convert 'torchscript' weights to 'onnx'.") + from .torchscript_to_onnx import convert try: onnx_weights_path = output_path / "weights.onnx" - model_descr.weights.onnx = convert( model_descr, output_path=onnx_weights_path, diff --git a/src/bioimageio/core/weight_converters/_utils_onnx.py b/src/bioimageio/core/weight_converters/_utils_onnx.py deleted file mode 100644 index 3c45d245..00000000 --- a/src/bioimageio/core/weight_converters/_utils_onnx.py +++ /dev/null @@ -1,15 +0,0 @@ -from collections import defaultdict -from itertools import chain -from typing import DefaultDict, Dict - -from bioimageio.spec.model.v0_5 import ModelDescr - - -def get_dynamic_axes(model_descr: ModelDescr): - dynamic_axes: DefaultDict[str, Dict[int, str]] = defaultdict(dict) - for d in chain(model_descr.inputs, model_descr.outputs): - for i, ax in enumerate(d.axes): - if not isinstance(ax.size, int): - dynamic_axes[str(d.id)][i] = str(ax.id) - - return dynamic_axes diff --git a/src/bioimageio/core/weight_converters/_utils_torch_onnx.py b/src/bioimageio/core/weight_converters/_utils_torch_onnx.py new file mode 100644 index 00000000..2ab8e07a --- /dev/null +++ b/src/bioimageio/core/weight_converters/_utils_torch_onnx.py @@ -0,0 +1,181 @@ +"""helper to export both TorchScript or PytorchStateDict to ONNX""" + +from collections import defaultdict +from itertools import chain +from pathlib import Path +from typing import TYPE_CHECKING, DefaultDict, Dict, List, Literal, Tuple, Union + +import torch +from bioimageio.spec.model.v0_5 import ( + BatchAxis, + FileDescr, + InputAxis, + ModelDescr, + OnnxWeightsDescr, + ParameterizedSize, + SizeReference, +) +from loguru import logger +from typing_extensions import assert_never + +from .. import __version__ +from ..digest_spec import get_member_id, get_test_input_sample +from ..proc_setup import get_pre_and_postprocessing + +if TYPE_CHECKING: + from torch.export.dynamic_shapes import ( + _DimHint as DimHint, # pyright: ignore[reportPrivateUsage] + ) + + +def get_torch_sample_inputs(model_descr: ModelDescr) -> Tuple[torch.Tensor, ...]: + sample = get_test_input_sample(model_descr) + procs = get_pre_and_postprocessing( + model_descr, dataset_for_initial_statistics=[sample] + ) + procs.pre(sample) + inputs_numpy = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs + ] + return tuple(torch.from_numpy(ipt) for ipt in inputs_numpy) + + +def _get_dynamic_axes_noop(model_descr: ModelDescr): + """noop for dynamo=True which uses `get_dynamic_shapes` instead""" + + return None + + +def _get_dynamic_axes_impl(model_descr: ModelDescr): + """dynamic axes for (old) onnx export with dynamo=False""" + dynamic_axes: DefaultDict[str, Dict[int, str]] = defaultdict(dict) + for d in chain(model_descr.inputs, model_descr.outputs): + for i, ax in enumerate(d.axes): + if not isinstance(ax.size, int): + dynamic_axes[str(d.id)][i] = str(ax.id) + + return dynamic_axes + + +try: + from torch.export import Dim + + STATIC_DIM = Dim.STATIC if hasattr(Dim, "STATIC") else None + TensorDim = Union[Dim, "DimHint", None] + +except Exception as e: + use_dynamo = False + logger.info(f"Not using torch dynamo for ONNX export due to:\n{e}") + + def _get_dynamic_shapes_noop(model_descr: ModelDescr): + """noop for dynamo=False which uses `get_dynamic_axes` instead""" + + return None + + get_dynamic_shapes = _get_dynamic_shapes_noop + get_dynamic_axes = _get_dynamic_axes_impl +else: + use_dynamo = True + logger.info("Using torch dynamo for ONNX export") + + def _get_dynamic_shapes_impl(model_descr: ModelDescr): + """Get dynamic shapes for torch dynamo export""" + # dynamic shapes as list to match the source code which may have + # different arg names than the tensor ids in the model description + + dynamic_shapes: List[Dict[int, Union[int, TensorDim]]] = [] + potential_ref_axes: Dict[str, Tuple[InputAxis, int]] = {} + # add dynamic dims from parameterized input sizes (and fixed sizes as None) + for d in model_descr.inputs: + dynamic_tensor_dims: Dict[int, Union[int, TensorDim]] = {} + for i, ax in enumerate(d.axes): + dim_name = f"{d.id}_{ax.id}" + if isinstance(ax.size, int): + dim = ax.size + elif isinstance(ax, BatchAxis): + dim = Dim("batch", min=1) + elif isinstance(ax.size, ParameterizedSize): + dim = Dim(dim_name, min=ax.size.min) + elif isinstance(ax.size, SizeReference): + continue # handled below + else: + assert_never(ax.size) + + dynamic_tensor_dims[i] = dim + potential_ref_axes[dim_name] = (ax, i) + + dynamic_shapes.append(dynamic_tensor_dims) + + # add dynamic dims from size references + for d, dynamic_tensor_dims in zip(model_descr.inputs, dynamic_shapes): + for i, ax in enumerate(d.axes): + if not isinstance(ax.size, SizeReference): + continue # handled above + + dim_name_ref = f"{ax.size.tensor_id}_{ax.size.axis_id}" + ax_ref, i_ref = potential_ref_axes[dim_name_ref] + dim_ref = dynamic_tensor_dims[i_ref] + if isinstance(dim_ref, Dim): + a = ax_ref.scale / ax.scale + b = ax.size.offset + dim = a * dim_ref + b + else: + dim = STATIC_DIM + + dynamic_tensor_dims[i] = dim + + return dynamic_shapes + + get_dynamic_shapes = _get_dynamic_shapes_impl + get_dynamic_axes = _get_dynamic_axes_noop + + +def export_to_onnx( + model_descr: ModelDescr, + model: torch.nn.Module, + output_path: Path, + verbose: bool, + opset_version: int, + parent: Literal["torchscript", "pytorch_state_dict"], +) -> OnnxWeightsDescr: + inputs_torch = get_torch_sample_inputs(model_descr) + + save_weights_externally = use_dynamo + with torch.no_grad(): + outputs_original_torch = model(*inputs_torch) + if isinstance(outputs_original_torch, torch.Tensor): + outputs_original_torch = [outputs_original_torch] + + _ = torch.onnx.export( + model, + inputs_torch, + str(output_path), + dynamo=use_dynamo, + external_data=save_weights_externally, + input_names=[str(d.id) for d in model_descr.inputs], + output_names=[str(d.id) for d in model_descr.outputs], + dynamic_axes=get_dynamic_axes(model_descr), + dynamic_shapes=get_dynamic_shapes(model_descr), + verbose=verbose, + opset_version=opset_version, + ) + + if save_weights_externally: + external_data_path = output_path.with_suffix( + output_path.suffix + ".data" + ).absolute() + if not external_data_path.exists(): + raise FileNotFoundError( + f"Expected external data file at {external_data_path} not found." + ) + external_data_descr = FileDescr(source=external_data_path) + else: + external_data_descr = None + + return OnnxWeightsDescr( + source=output_path.absolute(), + external_data=external_data_descr, + parent=parent, + opset_version=opset_version, + comment=f"Converted with bioimageio.core {__version__}, dynamo={use_dynamo}.", + ) diff --git a/src/bioimageio/core/weight_converters/pytorch_to_onnx.py b/src/bioimageio/core/weight_converters/pytorch_to_onnx.py index 1627d2f8..07f435d0 100644 --- a/src/bioimageio/core/weight_converters/pytorch_to_onnx.py +++ b/src/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -1,14 +1,9 @@ from pathlib import Path -import torch - from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr -from .. import __version__ from ..backends.pytorch_backend import load_torch_model -from ..digest_spec import get_member_id, get_test_input_sample -from ..proc_setup import get_pre_and_postprocessing -from ._utils_onnx import get_dynamic_axes +from ._utils_torch_onnx import export_to_onnx def convert( @@ -16,7 +11,7 @@ def convert( output_path: Path, *, verbose: bool = False, - opset_version: int = 15, + opset_version: int = 18, ) -> OnnxWeightsDescr: """ Convert model weights from the Torchscript state_dict format to the ONNX format. @@ -29,14 +24,14 @@ def convert( verbose: If True, will print out detailed information during the ONNX export process. Defaults to False. opset_version: - The ONNX opset version to use for the export. Defaults to 15. + The ONNX opset version to use for the export. Defaults to 18. Raises: ValueError: If the provided model does not have weights in the PyTorch state_dict format. Returns: - A descriptor object that contains information about the exported ONNX weights. + A description of the exported ONNX weights. """ state_dict_weights_descr = model_descr.weights.pytorch_state_dict @@ -45,35 +40,13 @@ def convert( "The provided model does not have weights in the pytorch state dict format" ) - sample = get_test_input_sample(model_descr) - procs = get_pre_and_postprocessing( - model_descr, dataset_for_initial_statistics=[sample] - ) - procs.pre(sample) - inputs_numpy = [ - sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs - ] - inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] model = load_torch_model(state_dict_weights_descr, load_state=True) - with torch.no_grad(): - outputs_original_torch = model(*inputs_torch) - if isinstance(outputs_original_torch, torch.Tensor): - outputs_original_torch = [outputs_original_torch] - - _ = torch.onnx.export( - model, - tuple(inputs_torch), - str(output_path), - input_names=[str(d.id) for d in model_descr.inputs], - output_names=[str(d.id) for d in model_descr.outputs], - dynamic_axes=get_dynamic_axes(model_descr), - verbose=verbose, - opset_version=opset_version, - ) - return OnnxWeightsDescr( - source=output_path.absolute(), + return export_to_onnx( + model_descr, + model, + output_path, + verbose, + opset_version, parent="pytorch_state_dict", - opset_version=opset_version, - comment=f"Converted with bioimageio.core {__version__}.", ) diff --git a/src/bioimageio/core/weight_converters/torchscript_to_onnx.py b/src/bioimageio/core/weight_converters/torchscript_to_onnx.py index aa695cbb..69d8d522 100644 --- a/src/bioimageio/core/weight_converters/torchscript_to_onnx.py +++ b/src/bioimageio/core/weight_converters/torchscript_to_onnx.py @@ -1,13 +1,10 @@ from pathlib import Path import torch.jit - from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr +from torch._export.converter import TS2EPConverter -from .. import __version__ -from ..digest_spec import get_member_id, get_test_input_sample -from ..proc_setup import get_pre_and_postprocessing -from ._utils_onnx import get_dynamic_axes +from ._utils_torch_onnx import export_to_onnx, get_torch_sample_inputs def convert( @@ -15,7 +12,7 @@ def convert( output_path: Path, *, verbose: bool = False, - opset_version: int = 15, + opset_version: int = 18, ) -> OnnxWeightsDescr: """ Convert model weights from the PyTorch state_dict format to the ONNX format. @@ -28,14 +25,13 @@ def convert( verbose (bool, optional): If True, will print out detailed information during the ONNX export process. Defaults to False. opset_version (int, optional): - The ONNX opset version to use for the export. Defaults to 15. + The ONNX opset version to use for the export. Defaults to 18. Raises: ValueError: If the provided model does not have weights in the torchscript format. Returns: - v0_5.OnnxWeightsDescr: - A descriptor object that contains information about the exported ONNX weights. + A description of the exported ONNX weights. """ torchscript_descr = model_descr.weights.torchscript @@ -44,40 +40,22 @@ def convert( "The provided model does not have weights in the torchscript format" ) - sample = get_test_input_sample(model_descr) - procs = get_pre_and_postprocessing( - model_descr, dataset_for_initial_statistics=[sample] - ) - procs.pre(sample) - inputs_numpy = [ - sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs - ] - inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] - weight_reader = torchscript_descr.get_reader() - model = torch.jit.load(weight_reader) # type: ignore + model = torch.jit.load(weight_reader) # pyright: ignore[reportUnknownVariableType] model.to("cpu") - model = model.eval() # type: ignore - - with torch.no_grad(): - outputs_original_torch = model(*inputs_torch) # type: ignore - if isinstance(outputs_original_torch, torch.Tensor): - outputs_original_torch = [outputs_original_torch] - - _ = torch.onnx.export( - model, # type: ignore - tuple(inputs_torch), - str(output_path), - input_names=[str(d.id) for d in model_descr.inputs], - output_names=[str(d.id) for d in model_descr.outputs], - dynamic_axes=get_dynamic_axes(model_descr), - verbose=verbose, - opset_version=opset_version, - ) - - return OnnxWeightsDescr( - source=output_path.absolute(), + model = model.eval() # pyright: ignore[reportUnknownVariableType] + + torch_sample_inputs = get_torch_sample_inputs(model_descr) + exported_program = TS2EPConverter( + model, # pyright: ignore[reportUnknownArgumentType] + torch_sample_inputs, + ).convert() + + return export_to_onnx( + model_descr, + exported_program.module(), + output_path, + verbose, + opset_version, parent="torchscript", - opset_version=opset_version, - comment=f"Converted with bioimageio.core {__version__}.", ) diff --git a/tests/conftest.py b/tests/conftest.py index 06c89641..960bf120 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,12 +4,14 @@ from itertools import chain from typing import Dict, List +from bioimageio.spec import __version__ as bioimageio_spec_version from dotenv import load_dotenv from loguru import logger from pytest import FixtureRequest, fixture from bioimageio.core import enable_determinism -from bioimageio.spec import __version__ as bioimageio_spec_version + +logger.enable("bioimageio") enable_determinism() _ = load_dotenv() diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py index 1bf65782..b317b022 100644 --- a/tests/test_weight_converters.py +++ b/tests/test_weight_converters.py @@ -4,8 +4,7 @@ from pathlib import Path import pytest - -from bioimageio.spec import load_description +from bioimageio.spec import load_model_description from bioimageio.spec.model import v0_5 @@ -13,7 +12,7 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): from bioimageio.core import test_model from bioimageio.core.weight_converters.pytorch_to_torchscript import convert - model_descr = load_description(any_torch_model, perform_io_checks=False) + model_descr = load_model_description(any_torch_model, perform_io_checks=False) if model_descr.implemented_format_version_tuple[:2] == (0, 4): pytest.skip("cannot convert to old 0.4 format") @@ -28,12 +27,41 @@ def test_pytorch_to_torchscript(any_torch_model, tmp_path): def test_pytorch_to_onnx(convert_to_onnx, tmp_path): + import torch + + if tuple(map(int, torch.__version__.split(".")[:2])) < (2, 8): + pytest.skip("torch>=2.8 is required for sufficient torch dynamo support") + from bioimageio.core import test_model from bioimageio.core.weight_converters.pytorch_to_onnx import convert - model_descr = load_description(convert_to_onnx, format_version="latest") + model_descr = load_model_description(convert_to_onnx, format_version="latest") + out_path = tmp_path / "weights.onnx" + opset_version = 18 + ret_val = convert( + model_descr=model_descr, + output_path=out_path, + opset_version=opset_version, + ) + assert os.path.exists(out_path) + assert isinstance(ret_val, v0_5.OnnxWeightsDescr) + assert ret_val.opset_version == opset_version + assert ret_val.source == out_path + + model_descr.weights.onnx = ret_val + summary = test_model(model_descr, weight_format="onnx") + assert summary.status == "passed", summary.display() + + +def test_torchscript_to_onnx(unet2d_nuclei_broad_model, tmp_path): + from bioimageio.core import test_model + from bioimageio.core.weight_converters.torchscript_to_onnx import convert + + model_descr = load_model_description( + unet2d_nuclei_broad_model, format_version="latest" + ) out_path = tmp_path / "weights.onnx" - opset_version = 15 + opset_version = 18 ret_val = convert( model_descr=model_descr, output_path=out_path, @@ -55,7 +83,7 @@ def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): from bioimageio.core.weight_converters.keras_to_tensorflow import convert out_path = tmp_path / "weights.zip" - model_descr = load_description(any_keras_model) + model_descr = load_model_description(any_keras_model) ret_val = convert(model_descr, out_path) assert out_path.exists() @@ -75,7 +103,7 @@ def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): # def test_tensorflow_to_keras(any_tensorflow_model: Path, tmp_path: Path): # from bioimageio.core.weight_converters.tensorflow_to_keras import convert -# model_descr = load_description(any_tensorflow_model) +# model_descr = load_model_description(any_tensorflow_model) # out_path = tmp_path / "weights.h5" # ret_val = convert(model_descr, output_path=out_path) # assert out_path.exists() @@ -92,7 +120,7 @@ def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): # from bioimageio.core.weight_converters.tensorflow_to_keras import convert # out_path = tmp_path / "weights.zip" -# model_descr = load_description(any_tensorflow_model) +# model_descr = load_model_description(any_tensorflow_model) # ret_val = convert(model_descr, out_path) # assert out_path.exists()