From 57948ed126129b14e20be4ea0a3f84642dc0d423 Mon Sep 17 00:00:00 2001
From: scverse-bot <108668866+scverse-bot@users.noreply.github.com>
Date: Wed, 2 Apr 2025 18:21:27 +0000
Subject: [PATCH 01/15] Automated template update to v0.5.0
---
.cruft.json | 26 ++-
.editorconfig | 5 +-
.github/ISSUE_TEMPLATE/bug_report.yml | 123 ++++++------
.github/ISSUE_TEMPLATE/feature_request.yml | 2 +-
.github/workflows/build.yaml | 24 ++-
.github/workflows/release.yaml | 17 +-
.github/workflows/test.yaml | 44 ++---
.gitignore | 14 +-
.pre-commit-config.yaml | 21 ++-
.vscode/extensions.json | 18 ++
.vscode/launch.json | 33 ++++
.vscode/settings.json | 18 ++
biome.jsonc | 16 ++
docs/_templates/autosummary/class.rst | 8 +-
docs/conf.py | 14 +-
docs/contributing.md | 196 ++++++++++++-------
docs/extensions/typed_returns.py | 2 +-
pyproject.toml | 207 +++++++++++----------
18 files changed, 471 insertions(+), 317 deletions(-)
create mode 100644 .vscode/extensions.json
create mode 100644 .vscode/launch.json
create mode 100644 .vscode/settings.json
create mode 100644 biome.jsonc
diff --git a/.cruft.json b/.cruft.json
index 0b23ca4..cf9e5e0 100644
--- a/.cruft.json
+++ b/.cruft.json
@@ -1,28 +1,42 @@
{
"template": "https://github.com/scverse/cookiecutter-scverse",
- "commit": "6f983988577bcb5a3568fab29091a5c25d97adb5",
- "checkout": null,
+ "commit": "94ef9fb6f9ad8cfe65a3d9575679c03c80c49cd1",
+ "checkout": "v0.5.0",
"context": {
"cookiecutter": {
- "project_name": "fast-knn-imputation",
- "package_name": "fast_knn_imputation",
+ "project_name": "fknni",
+ "package_name": "fknni",
"project_description": "Fast implementations of KNN imputation.",
"author_full_name": "Lukas Heumos",
"author_email": "lukas.heumos@posteo.net",
"github_user": "zethson",
- "project_repo": "https://github.com/zethson/fast-knn-imputation",
+ "github_repo": "fknni",
"license": "Apache License Version 2.0",
+ "ide_integration": true,
"_copy_without_render": [
".github/workflows/build.yaml",
".github/workflows/test.yaml",
"docs/_templates/autosummary/**.rst"
],
+ "_exclude_on_template_update": [
+ "CHANGELOG.md",
+ "LICENSE",
+ "README.md",
+ "docs/api.md",
+ "docs/index.md",
+ "docs/notebooks/example.ipynb",
+ "docs/references.bib",
+ "docs/references.md",
+ "src/**",
+ "tests/**"
+ ],
"_render_devdocs": false,
"_jinja2_env_vars": {
"lstrip_blocks": true,
"trim_blocks": true
},
- "_template": "https://github.com/scverse/cookiecutter-scverse"
+ "_template": "https://github.com/scverse/cookiecutter-scverse",
+ "_commit": "94ef9fb6f9ad8cfe65a3d9575679c03c80c49cd1"
}
},
"directory": null
diff --git a/.editorconfig b/.editorconfig
index 050f911..66678e3 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,10 +8,7 @@ charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
-[*.{yml,yaml}]
-indent_size = 2
-
-[.cruft.json]
+[{*.{yml,yaml,toml},.cruft.json}]
indent_size = 2
[Makefile]
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index a5a20e6..3ca1ccb 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -23,67 +23,72 @@ body:
- type: textarea
id: versions
attributes:
- label: Version information
+ label: Versions
description: |
- Please paste below the output of
+ Which version of packages.
+
+ Please install `session-info2`, run the following command in a notebook,
+ click the “Copy as Markdown” button, then paste the results into the text box below.
+
+ ```python
+ In[1]: import session_info2; session_info2.session_info(dependencies=True)
+ ```
+
+ Alternatively, run this in a console:
```python
- import session_info
- session_info.show(html=False, dependencies=True)
+ >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"])
```
+ render: python
placeholder: |
- -----
- anndata 0.8.0rc2.dev27+ge524389
- session_info 1.0.0
- -----
- asttokens NA
- awkward 1.8.0
- backcall 0.2.0
- cython_runtime NA
- dateutil 2.8.2
- debugpy 1.6.0
- decorator 5.1.1
- entrypoints 0.4
- executing 0.8.3
- h5py 3.7.0
- ipykernel 6.15.0
- jedi 0.18.1
- mpl_toolkits NA
- natsort 8.1.0
- numpy 1.22.4
- packaging 21.3
- pandas 1.4.2
- parso 0.8.3
- pexpect 4.8.0
- pickleshare 0.7.5
- pkg_resources NA
- prompt_toolkit 3.0.29
- psutil 5.9.1
- ptyprocess 0.7.0
- pure_eval 0.2.2
- pydev_ipython NA
- pydevconsole NA
- pydevd 2.8.0
- pydevd_file_utils NA
- pydevd_plugins NA
- pydevd_tracing NA
- pygments 2.12.0
- pytz 2022.1
- scipy 1.8.1
- setuptools 62.5.0
- setuptools_scm NA
- six 1.16.0
- stack_data 0.3.0
- tornado 6.1
- traitlets 5.3.0
- wcwidth 0.2.5
- zmq 23.1.0
- -----
- IPython 8.4.0
- jupyter_client 7.3.4
- jupyter_core 4.10.0
- -----
- Python 3.9.13 | packaged by conda-forge | (main, May 27 2022, 16:58:50) [GCC 10.3.0]
- Linux-5.18.6-arch1-1-x86_64-with-glibc2.35
- -----
- Session information updated at 2022-07-07 17:55
+ anndata 0.11.3
+ ---- ----
+ charset-normalizer 3.4.1
+ coverage 7.7.0
+ psutil 7.0.0
+ dask 2024.7.1
+ jaraco.context 5.3.0
+ numcodecs 0.15.1
+ jaraco.functools 4.0.1
+ Jinja2 3.1.6
+ sphinxcontrib-jsmath 1.0.1
+ sphinxcontrib-htmlhelp 2.1.0
+ toolz 1.0.0
+ session-info2 0.1.2
+ PyYAML 6.0.2
+ llvmlite 0.44.0
+ scipy 1.15.2
+ pandas 2.2.3
+ sphinxcontrib-devhelp 2.0.0
+ h5py 3.13.0
+ tblib 3.0.0
+ setuptools-scm 8.2.0
+ more-itertools 10.3.0
+ msgpack 1.1.0
+ sparse 0.15.5
+ wrapt 1.17.2
+ jaraco.collections 5.1.0
+ numba 0.61.0
+ pyarrow 19.0.1
+ pytz 2025.1
+ MarkupSafe 3.0.2
+ crc32c 2.7.1
+ sphinxcontrib-qthelp 2.0.0
+ sphinxcontrib-serializinghtml 2.0.0
+ zarr 2.18.4
+ asciitree 0.3.3
+ six 1.17.0
+ sphinxcontrib-applehelp 2.0.0
+ numpy 2.1.3
+ cloudpickle 3.1.1
+ sphinxcontrib-bibtex 2.6.3
+ natsort 8.4.0
+ jaraco.text 3.12.1
+ setuptools 76.1.0
+ Deprecated 1.2.18
+ packaging 24.2
+ python-dateutil 2.9.0.post0
+ ---- ----
+ Python 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0]
+ OS Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39
+ Updated 2025-03-18 15:47
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index 3a59695..55f4938 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -1,5 +1,5 @@
name: Feature request
-description: Propose a new feature for fast-knn-imputation
+description: Propose a new feature for fknni
labels: enhancement
body:
- type: textarea
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 265a95e..83e01a1 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -10,20 +10,24 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
+defaults:
+ run:
+ # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
+ shell: bash -euo pipefail {0}
+
jobs:
package:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - name: Set up Python 3.10
- uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
+ with:
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
with:
- python-version: "3.10"
- cache: "pip"
- cache-dependency-path: "**/pyproject.toml"
- - name: Install build dependencies
- run: python -m pip install --upgrade pip wheel twine build
+ cache-dependency-glob: pyproject.toml
- name: Build package
- run: python -m build
+ run: uv build
- name: Check package
- run: twine check --strict dist/*.whl
+ run: uvx twine check --strict dist/*.whl
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 2623afd..bf1157a 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,6 +4,11 @@ on:
release:
types: [published]
+defaults:
+ run:
+ # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
+ shell: bash -euo pipefail {0}
+
# Use "trusted publishing", see https://docs.pypi.org/trusted-publishers/
jobs:
release:
@@ -11,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
environment:
name: pypi
- url: https://pypi.org/p/fast_knn_imputation
+ url: https://pypi.org/p/fknni
permissions:
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
steps:
@@ -19,11 +24,11 @@ jobs:
with:
filter: blob:none
fetch-depth: 0
- - uses: actions/setup-python@v4
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
with:
- python-version: "3.x"
- cache: "pip"
- - run: pip install build
- - run: python -m build
+ cache-dependency-glob: pyproject.toml
+ - name: Build package
+ run: uv build
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c5b152b..d5cfb2a 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -12,23 +12,25 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
+defaults:
+ run:
+ # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
+ shell: bash -euo pipefail {0}
+
jobs:
test:
runs-on: ${{ matrix.os }}
- defaults:
- run:
- shell: bash -e {0} # -e to fail on error
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
- python: "3.9"
+ python: "3.10"
- os: ubuntu-latest
- python: "3.11"
+ python: "3.12"
- os: ubuntu-latest
- python: "3.11"
+ python: "3.12"
pip-flags: "--pre"
name: PRE-RELEASE DEPENDENCIES
@@ -39,29 +41,19 @@ jobs:
PYTHON: ${{ matrix.python }}
steps:
- - uses: actions/checkout@v3
- - name: Set up Python ${{ matrix.python }}
- uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
with:
- python-version: ${{ matrix.python }}
- cache: "pip"
- cache-dependency-path: "**/pyproject.toml"
-
- - name: Install test dependencies
- run: |
- python -m pip install --upgrade pip wheel
- - name: Install dependencies
- run: |
- pip install ${{ matrix.pip-flags }} ".[dev,test]"
- - name: Test
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ cache-dependency-glob: pyproject.toml
+ - name: run tests using hatch
env:
MPLBACKEND: agg
PLATFORM: ${{ matrix.os }}
DISPLAY: :42
- run: |
- coverage run -m pytest -v --color=yes
- - name: Report coverage
- run: |
- coverage report
+ run: uvx hatch test --cover --python ${{ matrix.python }}
- name: Upload coverage
- uses: codecov/codecov-action@v3
+ uses: codecov/codecov-action@v4
diff --git a/.gitignore b/.gitignore
index 0292ce0..31e10b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,25 +6,15 @@ buck-out/
# Compiled files
.venv/
__pycache__/
-.mypy_cache/
-.ruff_cache/
+.*cache/
# Distribution / packaging
-/build/
/dist/
-/*.egg-info/
# Tests and coverage
-/.pytest_cache/
-/.cache/
/data/
+/node_modules/
# docs
/docs/generated/
/docs/_build/
-
-# IDEs
-/.idea/
-/.vscode/
-
-node_modules
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3b8b076..0fcce11 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,17 +2,21 @@ fail_fast: false
default_language_version:
python: python3
default_stages:
- - commit
- - push
+ - pre-commit
+ - pre-push
minimum_pre_commit_version: 2.16.0
repos:
- - repo: https://github.com/pre-commit/mirrors-prettier
- rev: v4.0.0-alpha.8
+ - repo: https://github.com/biomejs/pre-commit
+ rev: v1.9.4
hooks:
- - id: prettier
- exclude: "^node_modules/"
+ - id: biome-format
+ exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually.
+ - repo: https://github.com/tox-dev/pyproject-fmt
+ rev: v2.5.1
+ hooks:
+ - id: pyproject-fmt
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.4.1
+ rev: v0.11.2
hooks:
- id: ruff
types_or: [python, pyi, jupyter]
@@ -20,12 +24,11 @@ repos:
- id: ruff-format
types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.6.0
+ rev: v5.0.0
hooks:
- id: detect-private-key
- id: check-ast
- id: end-of-file-fixer
- exclude: "^node_modules/"
- id: mixed-line-ending
args: [--fix=lf]
- id: trailing-whitespace
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 0000000..caaeb4f
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,18 @@
+{
+ "recommendations": [
+ // GitHub integration
+ "github.vscode-github-actions",
+ "github.vscode-pull-request-github",
+ // Language support
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ "ms-toolsai.jupyter",
+ "tamasfe.even-better-toml",
+ // Dependency management
+ "ninoseki.vscode-mogami",
+ // Linting and formatting
+ "editorconfig.editorconfig",
+ "charliermarsh.ruff",
+ "biomejs.biome",
+ ],
+}
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..36d1874
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,33 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Python: Build Documentation",
+ "type": "debugpy",
+ "request": "launch",
+ "module": "sphinx",
+ "args": ["-M", "html", ".", "_build"],
+ "cwd": "${workspaceFolder}/docs",
+ "console": "internalConsole",
+ "justMyCode": false,
+ },
+ {
+ "name": "Python: Debug Test",
+ "type": "debugpy",
+ "request": "launch",
+ "program": "${file}",
+ "purpose": ["debug-test"],
+ "console": "internalConsole",
+ "justMyCode": false,
+ "env": {
+ "PYTEST_ADDOPTS": "--color=yes",
+ },
+ "presentation": {
+ "hidden": true,
+ },
+ },
+ ],
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..e034b91
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,18 @@
+{
+ "[python][json][jsonc]": {
+ "editor.formatOnSave": true,
+ },
+ "[python]": {
+ "editor.defaultFormatter": "charliermarsh.ruff",
+ "editor.codeActionsOnSave": {
+ "source.fixAll": "always",
+ "source.organizeImports": "always",
+ },
+ },
+ "[json][jsonc]": {
+ "editor.defaultFormatter": "biomejs.biome",
+ },
+ "python.analysis.typeCheckingMode": "basic",
+ "python.testing.pytestEnabled": true,
+ "python.testing.pytestArgs": ["-vv", "--color=yes"],
+}
diff --git a/biome.jsonc b/biome.jsonc
new file mode 100644
index 0000000..2175c16
--- /dev/null
+++ b/biome.jsonc
@@ -0,0 +1,16 @@
+{
+ "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+ "formatter": { "useEditorconfig": true },
+ "overrides": [
+ {
+ "include": ["./.vscode/*.json", "**/*.jsonc"],
+ "json": {
+ "formatter": { "trailingCommas": "all" },
+ "parser": {
+ "allowComments": true,
+ "allowTrailingCommas": true,
+ },
+ },
+ },
+ ],
+}
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
index e4665df..7b4a0cf 100644
--- a/docs/_templates/autosummary/class.rst
+++ b/docs/_templates/autosummary/class.rst
@@ -9,11 +9,11 @@
{% block attributes %}
{% if attributes %}
Attributes table
-~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~
.. autosummary::
{% for item in attributes %}
- ~{{ fullname }}.{{ item }}
+ ~{{ name }}.{{ item }}
{%- endfor %}
{% endif %}
{% endblock %}
@@ -26,7 +26,7 @@ Methods table
.. autosummary::
{% for item in methods %}
{%- if item != '__init__' %}
- ~{{ fullname }}.{{ item }}
+ ~{{ name }}.{{ item }}
{%- endif -%}
{%- endfor %}
{% endif %}
@@ -35,7 +35,7 @@ Methods table
{% block attributes_documentation %}
{% if attributes %}
Attributes
-~~~~~~~~~~~
+~~~~~~~~~~
{% for item in attributes %}
diff --git a/docs/conf.py b/docs/conf.py
index 5378594..6d53672 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,5 @@
# Configuration file for the Sphinx documentation builder.
-#
+
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
@@ -18,7 +18,7 @@
# NOTE: If you installed your project in editable mode, this might be stale.
# If this is the case, reinstall it to refresh the metadata
-info = metadata("fast-knn-imputation")
+info = metadata("fknni")
project_name = info["Name"]
author = info["Author"]
copyright = f"{datetime.now():%Y}, {author}."
@@ -36,10 +36,10 @@
html_context = {
"display_github": True, # Integrate GitHub
- "github_user": "zethson", # Username
- "github_repo": project_name, # Repo name
- "github_version": "main", # Version
- "conf_py_path": "/docs/", # Path in the checkout to the docs root
+ "github_user": "zethson",
+ "github_repo": project_name,
+ "github_version": "main",
+ "conf_py_path": "/docs/",
}
# -- General configuration ---------------------------------------------------
@@ -55,6 +55,7 @@
"sphinx.ext.napoleon",
"sphinxcontrib.bibtex",
"sphinx_autodoc_typehints",
+ "sphinx_tabs.tabs",
"sphinx.ext.mathjax",
"IPython.sphinxext.ipython_console_highlighting",
"sphinxext.opengraph",
@@ -93,6 +94,7 @@
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"anndata": ("https://anndata.readthedocs.io/en/stable/", None),
+ "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
}
diff --git a/docs/contributing.md b/docs/contributing.md
index 5a7bc82..a8057f4 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -1,22 +1,45 @@
# Contributing guide
Scanpy provides extensive [developer documentation][scanpy developer guide], most of which applies to this project, too.
-This document will not reproduce the entire content from there. Instead, it aims at summarizing the most important
-information to get you started on contributing.
+This document will not reproduce the entire content from there.
+Instead, it aims at summarizing the most important information to get you started on contributing.
-We assume that you are already familiar with git and with making pull requests on GitHub. If not, please refer
-to the [scanpy developer guide][].
+We assume that you are already familiar with git and with making pull requests on GitHub.
+If not, please refer to the [scanpy developer guide][].
+
+[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html
## Installing dev dependencies
-In addition to the packages needed to _use_ this package, you need additional python packages to _run tests_ and _build
-the documentation_. It's easy to install them using `pip`:
+In addition to the packages needed to _use_ this package,
+you need additional python packages to [run tests](#writing-tests) and [build the documentation](#docs-building).
+
+:::::{tabs}
+::::{group-tab} Hatch
+The easiest way is to get familiar with [hatch environments][], with which these tasks are simply:
+
+```bash
+hatch test # defined in the table [tool.hatch.envs.hatch-test] in pyproject.toml
+hatch run docs:build # defined in the table [tool.hatch.envs.docs]
+```
+
+::::
+
+::::{group-tab} Pip
+If you prefer managing environments manually, you can use `pip`:
```bash
-cd fast-knn-imputation
+cd fknni
+python3 -m venv .venv
+source .venv/bin/activate
pip install -e ".[dev,test,doc]"
```
+::::
+:::::
+
+[hatch environments]: https://hatch.pypa.io/latest/tutorials/environment/basic-usage/
+
## Code-style
This package uses [pre-commit][] to enforce consistent code-styles.
@@ -28,10 +51,11 @@ To enable pre-commit locally, simply run
pre-commit install
```
-in the root of the repository. Pre-commit will automatically download all dependencies when it is run for the first time.
+in the root of the repository.
+Pre-commit will automatically download all dependencies when it is run for the first time.
-Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub. If you didn't run `pre-commit` before
-pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message.
+Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub.
+If you didn't run `pre-commit` before pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message.
If pre-commit.ci added a commit on a branch you still have been working on locally, simply use
@@ -42,71 +66,108 @@ git pull --rebase
to integrate the changes into yours.
While the [pre-commit.ci][] is useful, we strongly encourage installing and running pre-commit locally first to understand its usage.
-Finally, most editors have an _autoformat on save_ feature. Consider enabling this option for [ruff][ruff-editors]
-and [prettier][prettier-editors].
+Finally, most editors have an _autoformat on save_ feature.
+Consider enabling this option for [ruff][ruff-editors] and [biome][biome-editors].
+[pre-commit]: https://pre-commit.com/
+[pre-commit.ci]: https://pre-commit.ci/
[ruff-editors]: https://docs.astral.sh/ruff/integrations/
-[prettier-editors]: https://prettier.io/docs/en/editors.html
+[biome-editors]: https://biomejs.dev/guides/integrate-in-editor/
+
+(writing-tests)=
## Writing tests
-```{note}
-Remember to first install the package with `pip install -e '.[dev,test]'`
+This package uses [pytest][] for automated testing.
+Please write {doc}`scanpy:dev/testing` for every function added to the package.
+
+Most IDEs integrate with pytest and provide a GUI to run tests.
+Just point yours to one of the environments returned by
+
+```bash
+hatch env create hatch-test # create test environments for all supported versions
+hatch env find hatch-test # list all possible test environment paths
+```
+
+Alternatively, you can run all tests from the command line by executing
+
+:::::{tabs}
+::::{group-tab} Hatch
+
+```bash
+hatch test # test with the highest supported Python version
+# or
+hatch test --all # test with all supported Python versions
```
-This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added
-to the package.
+::::
-Most IDEs integrate with pytest and provide a GUI to run tests. Alternatively, you can run all tests from the
-command line by executing
+::::{group-tab} Pip
```bash
+source .venv/bin/activate
pytest
```
+::::
+:::::
+
in the root of the repository.
+[pytest]: https://docs.pytest.org/
+
### Continuous integration
Continuous integration will automatically run the tests on all pull requests and test
against the minimum and maximum supported Python version.
-Additionally, there's a CI job that tests against pre-releases of all dependencies
-(if there are any). The purpose of this check is to detect incompatibilities
-of new package versions early on and gives you time to fix the issue or reach
-out to the developers of the dependency before the package is released to a wider audience.
-
-[scanpy-test-docs]: https://scanpy.readthedocs.io/en/latest/dev/testing.html#writing-tests
+Additionally, there's a CI job that tests against pre-releases of all dependencies (if there are any).
+The purpose of this check is to detect incompatibilities of new package versions early on and
+gives you time to fix the issue or reach out to the developers of the dependency before the package is released to a wider audience.
## Publishing a release
### Updating the version number
-Before making a release, you need to update the version number in the `pyproject.toml` file. Please adhere to [Semantic Versioning][semver], in brief
+Before making a release, you need to update the version number in the `pyproject.toml` file.
+Please adhere to [Semantic Versioning][semver], in brief
> Given a version number MAJOR.MINOR.PATCH, increment the:
>
-> 1. MAJOR version when you make incompatible API changes,
-> 2. MINOR version when you add functionality in a backwards compatible manner, and
-> 3. PATCH version when you make backwards compatible bug fixes.
+> 1. MAJOR version when you make incompatible API changes,
+> 2. MINOR version when you add functionality in a backwards compatible manner, and
+> 3. PATCH version when you make backwards compatible bug fixes.
>
> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
Once you are done, commit and push your changes and navigate to the "Releases" page of this project on GitHub.
-Specify `vX.X.X` as a tag name and create a release. For more information, see [managing GitHub releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.
+Specify `vX.X.X` as a tag name and create a release.
+For more information, see [managing GitHub releases][].
+This will automatically create a git tag and trigger a Github workflow that creates a release on [PyPI][].
+
+[semver]: https://semver.org/
+[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
+[pypi]: https://pypi.org/
## Writing documentation
-Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features:
+Please write documentation for new or changed features and use-cases.
+This project uses [sphinx][] with the following features:
+
+- The [myst][] extension allows to write documentation in markdown/Markedly Structured Text
+- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension).
+- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
+- [sphinx-autodoc-typehints][], to automatically reference annotated input and output types
+- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
-- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text
-- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension).
-- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
-- [Sphinx autodoc typehints][], to automatically reference annotated input and output types
-- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
+See scanpy’s {doc}`scanpy:dev/documentation` for more information on how to write your own.
-See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information
-on how to write documentation.
+[sphinx]: https://www.sphinx-doc.org/en/master/
+[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html
+[myst-nb]: https://myst-nb.readthedocs.io/en/latest/
+[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
+[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
+[sphinx-autodoc-typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
### Tutorials with myst-nb and jupyter notebooks
@@ -114,47 +175,40 @@ The documentation is set-up to render jupyter notebooks stored in the `docs/note
Currently, only notebooks in `.ipynb` format are supported that will be included with both their input and output cells.
It is your responsibility to update and re-run the notebook whenever necessary.
-If you are interested in automatically running notebooks as part of the continuous integration, please check
-out [this feature request](https://github.com/scverse/cookiecutter-scverse/issues/40) in the `cookiecutter-scverse`
-repository.
+If you are interested in automatically running notebooks as part of the continuous integration,
+please check out [this feature request][issue-render-notebooks] in the `cookiecutter-scverse` repository.
+
+[issue-render-notebooks]: https://github.com/scverse/cookiecutter-scverse/issues/40
#### Hints
-- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only
- if you do so can sphinx automatically create a link to the external documentation.
-- If building the documentation fails because of a missing link that is outside your control, you can add an entry to
- the `nitpick_ignore` list in `docs/conf.py`
+- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`.
+ Only if you do so can sphinx automatically create a link to the external documentation.
+- If building the documentation fails because of a missing link that is outside your control,
+ you can add an entry to the `nitpick_ignore` list in `docs/conf.py`
+
+(docs-building)=
#### Building the docs locally
+:::::{tabs}
+::::{group-tab} Hatch
+
+```bash
+hatch run docs:build
+hatch run docs:open
+```
+
+::::
+
+::::{group-tab} Pip
+
```bash
+source .venv/bin/activate
cd docs
make html
-open _build/html/index.html
+(xdg-)open _build/html/index.html
```
-
-
-[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html
-[cookiecutter-scverse-instance]: https://cookiecutter-scverse-instance.readthedocs.io/en/latest/template_usage.html
-[github quickstart guide]: https://docs.github.com/en/get-started/quickstart/create-a-repo?tool=webui
-[codecov]: https://about.codecov.io/sign-up/
-[codecov docs]: https://docs.codecov.com/docs
-[codecov bot]: https://docs.codecov.com/docs/team-bot
-[codecov app]: https://github.com/apps/codecov
-[pre-commit.ci]: https://pre-commit.ci/
-[readthedocs.org]: https://readthedocs.org/
-[myst-nb]: https://myst-nb.readthedocs.io/en/latest/
-[jupytext]: https://jupytext.readthedocs.io/en/latest/
-[pre-commit]: https://pre-commit.com/
-[anndata]: https://github.com/scverse/anndata
-[mudata]: https://github.com/scverse/mudata
-[pytest]: https://docs.pytest.org/
-[semver]: https://semver.org/
-[sphinx]: https://www.sphinx-doc.org/en/master/
-[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html
-[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
-[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
-[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
-[pypi]: https://pypi.org/
-[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
+::::
+:::::
diff --git a/docs/extensions/typed_returns.py b/docs/extensions/typed_returns.py
index 1135204..0fbffef 100644
--- a/docs/extensions/typed_returns.py
+++ b/docs/extensions/typed_returns.py
@@ -12,7 +12,7 @@
def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
for line in lines:
if m := re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line):
- yield f'-{m["param"]} (:class:`~{m["type"]}`)'
+ yield f"-{m['param']} (:class:`~{m['type']}`)"
else:
yield line
diff --git a/pyproject.toml b/pyproject.toml
index c35a23a..1255e3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,132 +1,135 @@
[build-system]
build-backend = "hatchling.build"
-requires = ["hatchling"]
+requires = [ "hatchling" ]
[project]
-name = "fast-knn-imputation"
+name = "fknni"
version = "0.0.1"
description = "Fast implementations of KNN imputation."
readme = "README.md"
-requires-python = ">=3.9"
-license = {file = "LICENSE"}
+license = { file = "LICENSE" }
+maintainers = [
+ { name = "Lukas Heumos", email = "lukas.heumos@posteo.net" },
+]
authors = [
- {name = "Lukas Heumos"},
+ { name = "Lukas Heumos" },
]
-maintainers = [
- {name = "Lukas Heumos", email = "lukas.heumos@posteo.net"},
+requires-python = ">=3.10"
+classifiers = [
+ "Programming Language :: Python :: 3 :: Only",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
]
-urls.Documentation = "https://fast-knn-imputation.readthedocs.io/"
-urls.Source = "https://github.com/zethson/fast-knn-imputation"
-urls.Home-page = "https://github.com/zethson/fast-knn-imputation"
dependencies = [
- "anndata",
- # for debug logging (referenced from the issue template)
- "session-info",
+ "anndata",
+ # for debug logging (referenced from the issue template)
+ "session-info2",
]
-
-[project.optional-dependencies]
-dev = [
- "pre-commit",
- "twine>=4.0.2",
+optional-dependencies.dev = [
+ "pre-commit",
+ "twine>=4.0.2",
]
-doc = [
- "docutils>=0.8,!=0.18.*,!=0.19.*",
- "sphinx>=4",
- "sphinx-book-theme>=1.0.0",
- "myst-nb",
- "sphinxcontrib-bibtex>=1.0.0",
- "sphinx-autodoc-typehints",
- "sphinxext-opengraph",
- # For notebooks
- "ipykernel",
- "ipython",
- "sphinx-copybutton",
- "pandas",
+optional-dependencies.doc = [
+ "docutils>=0.8,!=0.18.*,!=0.19.*",
+ "ipykernel",
+ "ipython",
+ "myst-nb>=1.1",
+ "pandas",
+ # Until pybtex >0.24.0 releases: https://bitbucket.org/pybtex-devs/pybtex/issues/169/
+ "setuptools",
+ "sphinx>=4",
+ "sphinx-autodoc-typehints",
+ "sphinx-book-theme>=1",
+ "sphinx-copybutton",
+ "sphinx-tabs",
+ "sphinxcontrib-bibtex>=1",
+ "sphinxext-opengraph",
]
-test = [
- "pytest",
- "coverage",
+optional-dependencies.test = [
+ "coverage",
+ "pytest",
]
+# https://docs.pypi.org/project_metadata/#project-urls
+urls.Documentation = "https://fknni.readthedocs.io/"
+urls.Homepage = "https://github.com/zethson/fknni"
+urls.Source = "https://github.com/zethson/fknni"
-[tool.coverage.run]
-source = ["fast_knn_imputation"]
-omit = [
- "**/test_*.py",
-]
+[tool.hatch.envs.default]
+installer = "uv"
+features = [ "dev" ]
-[tool.pytest.ini_options]
-testpaths = ["tests"]
-xfail_strict = true
-addopts = [
- "--import-mode=importlib", # allow using test files with same name
-]
+[tool.hatch.envs.docs]
+features = [ "doc" ]
+scripts.build = "sphinx-build -M html docs docs/_build {args}"
+scripts.open = "python -m webbrowser -t docs/_build/html/index.html"
+scripts.clean = "git clean -fdX -- {args:docs}"
+
+[tool.hatch.envs.hatch-test]
+features = [ "test" ]
[tool.ruff]
line-length = 120
-src = ["src"]
-extend-include = ["*.ipynb"]
+src = [ "src" ]
+extend-include = [ "*.ipynb" ]
-[tool.ruff.format]
-docstring-code-format = true
+format.docstring-code-format = true
-[tool.ruff.lint]
-select = [
- "F", # Errors detected by Pyflakes
- "E", # Error detected by Pycodestyle
- "W", # Warning detected by Pycodestyle
- "I", # isort
- "D", # pydocstyle
- "B", # flake8-bugbear
- "TID", # flake8-tidy-imports
- "C4", # flake8-comprehensions
- "BLE", # flake8-blind-except
- "UP", # pyupgrade
- "RUF100", # Report unused noqa directives
+lint.select = [
+ "B", # flake8-bugbear
+ "BLE", # flake8-blind-except
+ "C4", # flake8-comprehensions
+ "D", # pydocstyle
+ "E", # Error detected by Pycodestyle
+ "F", # Errors detected by Pyflakes
+ "I", # isort
+ "RUF100", # Report unused noqa directives
+ "TID", # flake8-tidy-imports
+ "UP", # pyupgrade
+ "W", # Warning detected by Pycodestyle
]
-ignore = [
- # line too long -> we accept long comment lines; formatter gets rid of long code lines
- "E501",
- # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
- "E731",
- # allow I, O, l as variable names -> I is the identity matrix
- "E741",
- # Missing docstring in public package
- "D104",
- # Missing docstring in public module
- "D100",
- # Missing docstring in __init__
- "D107",
- # Errors from function calls in argument defaults. These are fine when the result is immutable.
- "B008",
- # __magic__ methods are are often self-explanatory, allow missing docstrings
- "D105",
- # first line should end with a period [Bug: doesn't work with single-line docstrings]
- "D400",
- # First line should be in imperative mood; try rephrasing
- "D401",
- ## Disable one in each pair of mutually incompatible rules
- # We don’t want a blank line before a class docstring
- "D203",
- # We want docstrings to start immediately after the opening triple quote
- "D213",
+lint.ignore = [
+ "B008", # Errors from function calls in argument defaults. These are fine when the result is immutable.
+ "D100", # Missing docstring in public module
+ "D104", # Missing docstring in public package
+ "D105", # __magic__ methods are often self-explanatory, allow missing docstrings
+ "D107", # Missing docstring in __init__
+ # Disable one in each pair of mutually incompatible rules
+ "D203", # We don’t want a blank line before a class docstring
+ "D213", # <> We want docstrings to start immediately after the opening triple quote
+ "D400", # first line should end with a period [Bug: doesn’t work with single-line docstrings]
+ "D401", # First line should be in imperative mood; try rephrasing
+ "E501", # line too long -> we accept long comment lines; formatter gets rid of long code lines
+ "E731", # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
+ "E741", # allow I, O, l as variable names -> I is the identity matrix
]
+lint.per-file-ignores."*/__init__.py" = [ "F401" ]
+lint.per-file-ignores."docs/*" = [ "I" ]
+lint.per-file-ignores."tests/*" = [ "D" ]
+lint.pydocstyle.convention = "numpy"
-[tool.ruff.lint.pydocstyle]
-convention = "numpy"
+[tool.pytest.ini_options]
+testpaths = [ "tests" ]
+xfail_strict = true
+addopts = [
+ "--import-mode=importlib", # allow using test files with same name
+]
-[tool.ruff.lint.per-file-ignores]
-"docs/*" = ["I"]
-"tests/*" = ["D"]
-"*/__init__.py" = ["F401"]
+[tool.coverage.run]
+source = [ "fknni" ]
+omit = [
+ "**/test_*.py",
+]
[tool.cruft]
skip = [
- "tests",
- "src/**/__init__.py",
- "src/**/basic.py",
- "docs/api.md",
- "docs/changelog.md",
- "docs/references.bib",
- "docs/references.md",
- "docs/notebooks/example.ipynb",
+ "tests",
+ "src/**/__init__.py",
+ "src/**/basic.py",
+ "docs/api.md",
+ "docs/changelog.md",
+ "docs/references.bib",
+ "docs/references.md",
+ "docs/notebooks/example.ipynb",
]
From ab73246306fae8da7277018b77e9592fffc644f2 Mon Sep 17 00:00:00 2001
From: scverse-bot <108668866+scverse-bot@users.noreply.github.com>
Date: Tue, 30 Sep 2025 06:23:53 +0000
Subject: [PATCH 02/15] Automated template update to v0.6.0
---
.cruft.json | 6 +-
.github/workflows/test.yaml | 80 +++++++++++++++-----
.gitignore | 1 +
.pre-commit-config.yaml | 19 ++---
.readthedocs.yaml | 23 +++---
biome.jsonc | 5 +-
docs/Makefile | 20 -----
docs/contributing.md | 144 ++++++++++++++++++++++++++++++++----
pyproject.toml | 27 ++++++-
9 files changed, 238 insertions(+), 87 deletions(-)
delete mode 100644 docs/Makefile
diff --git a/.cruft.json b/.cruft.json
index cf9e5e0..177791b 100644
--- a/.cruft.json
+++ b/.cruft.json
@@ -1,7 +1,7 @@
{
"template": "https://github.com/scverse/cookiecutter-scverse",
- "commit": "94ef9fb6f9ad8cfe65a3d9575679c03c80c49cd1",
- "checkout": "v0.5.0",
+ "commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec",
+ "checkout": "v0.6.0",
"context": {
"cookiecutter": {
"project_name": "fknni",
@@ -36,7 +36,7 @@
"trim_blocks": true
},
"_template": "https://github.com/scverse/cookiecutter-scverse",
- "_commit": "94ef9fb6f9ad8cfe65a3d9575679c03c80c49cd1"
+ "_commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec"
}
},
"directory": null
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index d5cfb2a..0bd76e8 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -18,27 +18,48 @@ defaults:
shell: bash -euo pipefail {0}
jobs:
+ # Get the test environment from hatch as defined in pyproject.toml.
+ # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are
+ # run locally and on continuous integration.
+ # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for
+ # more details.
+ get-environments:
+ runs-on: ubuntu-latest
+ outputs:
+ envs: ${{ steps.get-envs.outputs.envs }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ - name: Get test environments
+ id: get-envs
+ run: |
+ ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries
+ | map(
+ select(.key | startswith("hatch-test"))
+ | {
+ name: .key,
+ label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end),
+ python: .value.python
+ }
+ )')
+ echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT
+
+ # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above.
test:
- runs-on: ${{ matrix.os }}
+ needs: get-environments
strategy:
fail-fast: false
matrix:
- include:
- - os: ubuntu-latest
- python: "3.10"
- - os: ubuntu-latest
- python: "3.12"
- - os: ubuntu-latest
- python: "3.12"
- pip-flags: "--pre"
- name: PRE-RELEASE DEPENDENCIES
-
- name: ${{ matrix.name }} Python ${{ matrix.python }}
-
- env:
- OS: ${{ matrix.os }}
- PYTHON: ${{ matrix.python }}
+ os: [ubuntu-latest]
+ env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
+
+ name: ${{ matrix.env.label }}
+ runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
@@ -48,12 +69,35 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
+ python-version: ${{ matrix.env.python }}
cache-dependency-glob: pyproject.toml
+ - name: create hatch environment
+ run: uvx hatch env create ${{ matrix.env.name }}
- name: run tests using hatch
env:
MPLBACKEND: agg
PLATFORM: ${{ matrix.os }}
DISPLAY: :42
- run: uvx hatch test --cover --python ${{ matrix.python }}
+ run: uvx hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto
+ - name: generate coverage report
+ run: |
+ # See https://coverage.readthedocs.io/en/latest/config.html#run-patch
+ test -f .coverage || uvx hatch run ${{ matrix.env.name }}:cov-combine
+ uvx hatch run ${{ matrix.env.name }}:cov-report # report visibly
+ uvx hatch run ${{ matrix.env.name }}:coverage xml # create report for upload
- name: Upload coverage
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
+
+ # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch
+ # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why.
+ check:
+ name: Tests pass in all hatch environments
+ if: always()
+ needs:
+ - get-environments
+ - test
+ runs-on: ubuntu-latest
+ steps:
+ - uses: re-actors/alls-green@release/v1
+ with:
+ jobs: ${{ toJSON(needs) }}
diff --git a/.gitignore b/.gitignore
index 31e10b3..bd24e4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ __pycache__/
# Tests and coverage
/data/
/node_modules/
+/.coverage*
# docs
/docs/generated/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0fcce11..b9de3fe 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,24 +7,24 @@ default_stages:
minimum_pre_commit_version: 2.16.0
repos:
- repo: https://github.com/biomejs/pre-commit
- rev: v1.9.4
+ rev: v2.2.4
hooks:
- id: biome-format
exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually.
- repo: https://github.com/tox-dev/pyproject-fmt
- rev: v2.5.1
+ rev: v2.6.0
hooks:
- id: pyproject-fmt
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.11.2
+ rev: v0.13.2
hooks:
- - id: ruff
+ - id: ruff-check
types_or: [python, pyi, jupyter]
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v5.0.0
+ rev: v6.0.0
hooks:
- id: detect-private-key
- id: check-ast
@@ -36,12 +36,3 @@ repos:
# Check that there are no merge conflicts (could be generated by template sync)
- id: check-merge-conflict
args: [--assume-in-merge]
- - repo: local
- hooks:
- - id: forbid-to-commit
- name: Don't commit rej files
- entry: |
- Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
- Fix the merge conflicts manually and remove the .rej files.
- language: fail
- files: '.*\.rej$'
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 69897c3..c3f3f96 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,16 +1,15 @@
# https://docs.readthedocs.io/en/stable/config-file/v2.html
version: 2
build:
- os: ubuntu-20.04
+ os: ubuntu-24.04
tools:
- python: "3.10"
-sphinx:
- configuration: docs/conf.py
- # disable this for more lenient docs builds
- fail_on_warning: true
-python:
- install:
- - method: pip
- path: .
- extra_requirements:
- - doc
+ python: "3.12"
+ jobs:
+ create_environment:
+ - asdf plugin add uv
+ - asdf install uv latest
+ - asdf global uv latest
+ build:
+ html:
+ - uvx hatch run docs:build
+ - mv docs/_build $READTHEDOCS_OUTPUT
diff --git a/biome.jsonc b/biome.jsonc
index 2175c16..9f8f220 100644
--- a/biome.jsonc
+++ b/biome.jsonc
@@ -1,9 +1,10 @@
{
- "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+ "$schema": "https://biomejs.dev/schemas/2.2.0/schema.json",
+ "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true },
"formatter": { "useEditorconfig": true },
"overrides": [
{
- "include": ["./.vscode/*.json", "**/*.jsonc"],
+ "includes": ["./.vscode/*.json", "**/*.jsonc"],
"json": {
"formatter": { "trailingCommas": "all" },
"parser": {
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d4bb2cb..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = .
-BUILDDIR = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/contributing.md b/docs/contributing.md
index a8057f4..699d942 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -1,14 +1,33 @@
# Contributing guide
-Scanpy provides extensive [developer documentation][scanpy developer guide], most of which applies to this project, too.
-This document will not reproduce the entire content from there.
-Instead, it aims at summarizing the most important information to get you started on contributing.
-
+This document aims at summarizing the most important information for getting you started on contributing to this project.
We assume that you are already familiar with git and with making pull requests on GitHub.
-If not, please refer to the [scanpy developer guide][].
+For more extensive tutorials, that also cover the absolute basics,
+please refer to other resources such as the [pyopensci tutorials][],
+the [scientific Python tutorials][], or the [scanpy developer guide][].
+
+[pyopensci tutorials]: https://www.pyopensci.org/learn.html
+[scientific Python tutorials]: https://learn.scientific-python.org/development/tutorials/
[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html
+:::{tip} The *hatch* project manager
+
+We highly recommend to familiarize yourself with [`hatch`][hatch].
+Hatch is a Python project manager that
+
+- manages virtual environments, separately for development, testing and building the documentation.
+ Separating the environments is useful to avoid dependency conflicts.
+- allows to run tests locally in different environments (e.g. different python versions)
+- allows to run tasks defined in `pyproject.toml`, e.g. to build documentation.
+
+While the project is setup with `hatch` in mind,
+it is still possible to use different tools to manage dependencies, such as `uv` or `pip`.
+
+:::
+
+[hatch]: https://hatch.pypa.io/latest/
+
## Installing dev dependencies
In addition to the packages needed to _use_ this package,
@@ -16,29 +35,103 @@ you need additional python packages to [run tests](#writing-tests) and [build th
:::::{tabs}
::::{group-tab} Hatch
-The easiest way is to get familiar with [hatch environments][], with which these tasks are simply:
+
+On the command line, you typically interact with hatch through its command line interface (CLI).
+Running one of the following commands will automatically resolve the environments for testing and
+building the documentation in the background:
```bash
hatch test # defined in the table [tool.hatch.envs.hatch-test] in pyproject.toml
hatch run docs:build # defined in the table [tool.hatch.envs.docs]
```
+When using an IDE such as VS Code,
+you’ll have to point the editor at the paths to the virtual environments manually.
+The environment you typically want to use as your main development environment is the `hatch-test`
+environment with the latest Python version.
+
+To get a list of all environments for your projects, run
+
+```bash
+hatch env show -i
+```
+
+This will list “Standalone” environments and a table of “Matrix” environments like the following:
+
+```
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+| Name | Type | Envs | Features | Dependencies | Scripts |
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+| hatch-test | virtual | hatch-test.py3.10-stable | dev | coverage-enable-subprocess==1.0 | cov-combine |
+| | | hatch-test.py3.13-stable | test | coverage[toml]~=7.4 | cov-report |
+| | | hatch-test.py3.13-pre | | pytest-mock~=3.12 | run |
+| | | | | pytest-randomly~=3.15 | run-cov |
+| | | | | pytest-rerunfailures~=14.0 | |
+| | | | | pytest-xdist[psutil]~=3.5 | |
+| | | | | pytest~=8.1 | |
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+```
+
+From the `Envs` column, select the environment name you want to use for development.
+In this example, it would be `hatch-test.py3.13-stable`.
+
+Next, create the environment with
+
+```bash
+hatch env create hatch-test.py3.13-stable
+```
+
+Then, obtain the path to the environment using
+
+```bash
+hatch env find hatch-test.py3.13-stable
+```
+
+In case you are using VScode, now open the command palette (Ctrl+Shift+P) and search for `Python: Select Interpreter`.
+Choose `Enter Interpreter Path` and paste the path to the virtual environment from above.
+
+In this future, this may become easier through a hatch vscode extension.
+
+::::
+
+::::{group-tab} uv
+
+A popular choice for managing virtual environments is [uv][].
+The main disadvantage compared to hatch is that it supports only a single environment per project at a time,
+which requires you to mix the dependencies for running tests and building docs.
+This can have undesired side-effects,
+such as requiring to install a lower version of a library your project depends on,
+only because an outdated sphinx plugin pins an older version.
+
+To initalize a virtual environment in the `.venv` directory of your project, simply run
+
+```bash
+uv sync --all-extras
+```
+
+The `.venv` directory is typically automatically discovered by IDEs such as VS Code.
+
::::
::::{group-tab} Pip
-If you prefer managing environments manually, you can use `pip`:
+
+Pip is nowadays mostly superseded by environment manager such as [hatch][].
+However, for the sake of completeness, and since it’s ubiquitously available,
+we describe how you can manage environments manually using `pip`:
```bash
-cd fknni
python3 -m venv .venv
source .venv/bin/activate
pip install -e ".[dev,test,doc]"
```
+The `.venv` directory is typically automatically discovered by IDEs such as VS Code.
+
::::
:::::
[hatch environments]: https://hatch.pypa.io/latest/tutorials/environment/basic-usage/
+[uv]: https://docs.astral.sh/uv/
## Code-style
@@ -55,7 +148,7 @@ in the root of the repository.
Pre-commit will automatically download all dependencies when it is run for the first time.
Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub.
-If you didn't run `pre-commit` before pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message.
+If you didn’t run `pre-commit` before pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message.
If pre-commit.ci added a commit on a branch you still have been working on locally, simply use
@@ -102,6 +195,14 @@ hatch test --all # test with all supported Python versions
::::
+::::{group-tab} uv
+
+```bash
+uv run pytest
+```
+
+::::
+
::::{group-tab} Pip
```bash
@@ -118,12 +219,17 @@ in the root of the repository.
### Continuous integration
-Continuous integration will automatically run the tests on all pull requests and test
+Continuous integration via GitHub actions will automatically run the tests on all pull requests and test
against the minimum and maximum supported Python version.
-Additionally, there's a CI job that tests against pre-releases of all dependencies (if there are any).
+Additionally, there’s a CI job that tests against pre-releases of all dependencies (if there are any).
The purpose of this check is to detect incompatibilities of new package versions early on and
-gives you time to fix the issue or reach out to the developers of the dependency before the package is released to a wider audience.
+gives you time to fix the issue or reach out to the developers of the dependency before the package
+is released to a wider audience.
+
+The CI job is defined in `.github/workflows/test.yaml`,
+however the single point of truth for CI jobs is the Hatch test matrix defined in `pyproject.toml`.
+This means that local testing via hatch and remote testing on CI tests against the same python versions and uses the same environments.
## Publishing a release
@@ -189,7 +295,7 @@ please check out [this feature request][issue-render-notebooks] in the `cookiecu
(docs-building)=
-#### Building the docs locally
+### Building the docs locally
:::::{tabs}
::::{group-tab} Hatch
@@ -201,12 +307,22 @@ hatch run docs:open
::::
+::::{group-tab} uv
+
+```bash
+cd docs
+uv run sphinx-build -M html . _build -W
+(xdg-)open _build/html/index.html
+```
+
+::::
+
::::{group-tab} Pip
```bash
source .venv/bin/activate
cd docs
-make html
+sphinx-build -M html . _build -W
(xdg-)open _build/html/index.html
```
diff --git a/pyproject.toml b/pyproject.toml
index 1255e3a..ef63f92 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ optional-dependencies.doc = [
"pandas",
# Until pybtex >0.24.0 releases: https://bitbucket.org/pybtex-devs/pybtex/issues/169/
"setuptools",
- "sphinx>=4",
+ "sphinx>=8.1",
"sphinx-autodoc-typehints",
"sphinx-book-theme>=1",
"sphinx-copybutton",
@@ -48,8 +48,9 @@ optional-dependencies.doc = [
"sphinxext-opengraph",
]
optional-dependencies.test = [
- "coverage",
+ "coverage>=7.10",
"pytest",
+ "pytest-cov", # For VS Code’s coverage functionality
]
# https://docs.pypi.org/project_metadata/#project-urls
urls.Documentation = "https://fknni.readthedocs.io/"
@@ -62,12 +63,29 @@ features = [ "dev" ]
[tool.hatch.envs.docs]
features = [ "doc" ]
-scripts.build = "sphinx-build -M html docs docs/_build {args}"
+scripts.build = "sphinx-build -M html docs docs/_build -W {args}"
scripts.open = "python -m webbrowser -t docs/_build/html/index.html"
scripts.clean = "git clean -fdX -- {args:docs}"
+# Test the lowest and highest supported Python versions with normal deps
+[[tool.hatch.envs.hatch-test.matrix]]
+deps = [ "stable" ]
+python = [ "3.10", "3.13" ]
+
+# Test the newest supported Python version also with pre-release deps
+[[tool.hatch.envs.hatch-test.matrix]]
+deps = [ "pre" ]
+python = [ "3.13" ]
+
[tool.hatch.envs.hatch-test]
-features = [ "test" ]
+features = [ "dev", "test" ]
+
+[tool.hatch.envs.hatch-test.overrides]
+# If the matrix variable `deps` is set to "pre",
+# set the environment variable `UV_PRERELEASE` to "allow".
+matrix.deps.env-vars = [
+ { key = "UV_PRERELEASE", value = "allow", if = [ "pre" ] },
+]
[tool.ruff]
line-length = 120
@@ -118,6 +136,7 @@ addopts = [
[tool.coverage.run]
source = [ "fknni" ]
+patch = [ "subprocess" ]
omit = [
"**/test_*.py",
]
From 4621339791710e2f20a4299309a7edc994ea3639 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 20:01:33 +0100
Subject: [PATCH 03/15] fix
Signed-off-by: Lukas Heumos
---
pyproject.toml | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index aad2a3e..e45f9b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,9 +49,10 @@ optional-dependencies.test = [
"coverage",
"pytest",
]
-optional-dependencies.urls.Documentation = "https://fknni.readthedocs.io/"
-optional-dependencies.urls.Homepage = "https://github.com/zethson/fknni"
-optional-dependencies.urls.Source = "https://github.com/zethson/fknni"
+
+urls.Documentation = "https://fknni.readthedocs.io/"
+urls.Homepage = "https://github.com/zethson/fknni"
+urls.Source = "https://github.com/zethson/fknni"
[tool.hatch.envs.default]
installer = "uv"
@@ -66,7 +67,7 @@ scripts.clean = "git clean -fdX -- {args:docs}"
# Test the lowest and highest supported Python versions with normal deps
[[tool.hatch.envs.hatch-test.matrix]]
deps = [ "stable" ]
-python = [ "3.10", "3.13" ]
+python = [ "3.12", "3.13" ]
# Test the newest supported Python version also with pre-release deps
[[tool.hatch.envs.hatch-test.matrix]]
From b1425246f3a8a84d5aa1b27731db6d507a5d3cf0 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 20:37:33 +0100
Subject: [PATCH 04/15] add 3d support
Signed-off-by: Lukas Heumos
---
src/fknni/faiss/faiss.py | 96 +++++++++++++++++++++++++---------
tests/test_faiss_imputation.py | 63 +++++++++++++++++++++-
2 files changed, 131 insertions(+), 28 deletions(-)
diff --git a/src/fknni/faiss/faiss.py b/src/fknni/faiss/faiss.py
index 1083cdb..6d97a73 100644
--- a/src/fknni/faiss/faiss.py
+++ b/src/fknni/faiss/faiss.py
@@ -1,13 +1,15 @@
from __future__ import annotations
-from typing import Literal, Any
-from lamin_utils import logger
+from collections.abc import Sequence
+from typing import Any, Literal
import faiss
import numpy as np
-from numpy import ndarray, dtype
+from lamin_utils import logger
+from numpy import dtype
from sklearn.base import BaseEstimator, TransformerMixin
+
class FaissImputer(BaseEstimator, TransformerMixin):
"""Imputer for completing missing values using Faiss, incorporating weighted averages based on distance."""
@@ -20,6 +22,7 @@ def __init__(
strategy: Literal["mean", "median", "weighted"] = "mean",
index_factory: str = "Flat",
min_data_ratio: float = 0.25,
+ temporal_mode: Literal["flatten", "per_variable"] = "flatten",
):
"""Initializes FaissImputer with specified parameters that are used for the imputation.
@@ -33,17 +36,23 @@ def __init__(
index_factory: Description of the Faiss index type to build.
min_data_ratio: The minimum (dimension 0) size of the FAISS index relative to the (dimension 0) size of the
dataset that will be used to train FAISS. Defaults to 0.25. See also `fit_transform`.
+ temporal_mode: How to handle 3D temporal data. 'flatten' treats all (variable, timestep) pairs as
+ independent features (fast but allows temporal leakage).
+ 'per_variable' imputes each variable independently across time (slower but respects temporal causality).
"""
if n_neighbors < 1:
raise ValueError("n_neighbors must be at least 1.")
if strategy not in {"mean", "median", "weighted"}:
raise ValueError("Unknown strategy. Choose one of 'mean', 'median', 'weighted'")
+ if temporal_mode not in {"flatten", "per_variable"}:
+ raise ValueError("Unknown temporal_mode. Choose one of 'flatten', 'per_variable'")
self.missing_values = missing_values
self.n_neighbors = n_neighbors
self.metric = metric
self.strategy = strategy
self.index_factory = index_factory
+ self.temporal_mode = temporal_mode
self.X_full = None
self.features_nan = None
self.min_data_ratio = min_data_ratio
@@ -51,20 +60,44 @@ def __init__(
self.warned_unsufficient_neighbors = False
super().__init__()
- # @override
- def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dtype[Any]] | None:
+ def fit_transform( # noqa: D417
+ self, X: np.ndarray, y: np.ndarray | None = None, **fit_params
+ ) -> np.ndarray[Any, dtype[Any]] | None:
"""Imputes missing values in the data using the fitted Faiss index. This imputation will be performed in place.
- This imputation will use self.min_data_ratio to check if the index is of sufficient (dimension 0) size to
- perform a qualitative KNN lookup. If not, it will temporarily exclude enough features to reach this threshold
- and try again. If an index still can't be built, it will use fallbacks values as defined by self.strategy.
+
+ This imputation will use `min_data_ratio` to check if the index is of sufficient (dimension 0) size to perform a qualitative KNN lookup.
+ If not, it will temporarily exclude enough features to reach this threshold and try again.
+ If an index still can't be built, it will use fallbacks values as defined by self.strategy.
Args:
- X: Input data with potential missing values.
+ X: Input data with potential missing values. Can be 2D (samples × features) or 3D (samples × features × timesteps).
y: Ignored, present for compatibility with sklearn's TransformerMixin.
Returns:
Data with imputed values as a NumPy array of the original data type.
"""
+ original_shape = X.shape
+
+ if X.ndim == 3 and self.temporal_mode == "per_variable":
+ n_obs, n_vars, n_t = X.shape
+ result = np.empty_like(X, dtype=np.float64)
+ for var_idx in range(n_vars):
+ X_slice = X[:, var_idx, :]
+ result[:, var_idx, :] = self._impute_2d(X_slice)
+ return result
+
+ if X.ndim == 3:
+ n_obs, n_vars, n_t = X.shape
+ X = X.reshape(n_obs, n_vars * n_t)
+
+ result = self._impute_2d(X)
+
+ if len(original_shape) == 3:
+ result = result.reshape(original_shape)
+
+ return result
+
+ def _impute_2d(self, X: np.ndarray) -> np.ndarray:
self.X_full = np.asarray(X, dtype=np.float64) if not np.issubdtype(X.dtype, np.floating) else X
if np.isnan(self.X_full).all(axis=0).any():
raise ValueError("Features with only missing values cannot be handled.")
@@ -72,7 +105,9 @@ def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dty
# Prepare fallback values, used to prefill the query vectors nan´s
# or as an imputation fallback if we can't build an index
global_fallbacks_ = (
- np.nanmean(self.X_full, axis=0) if self.strategy in ["mean", "weighted"] else np.nanmedian(self.X_full, axis=0)
+ np.nanmean(self.X_full, axis=0)
+ if self.strategy in ["mean", "weighted"]
+ else np.nanmedian(self.X_full, axis=0)
)
# We will need to impute all features having nan´s
@@ -80,12 +115,14 @@ def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dty
# Now impute iteratively
while feature_indices_to_impute:
- feature_indices_being_imputed, training_indices, training_data, index = self._fit_train_imputer(feature_indices_to_impute)
+ feature_indices_being_imputed, training_indices, training_data, index = self._fit_train_imputer(
+ feature_indices_to_impute
+ )
# Use fallback data if we can't build an index and iterate again
if index is None:
self._warn_fallback()
- self.X_full[:, feature_indices_being_imputed] = global_fallbacks_[feature_indices_being_imputed]
+ self.X_full[:, feature_indices_being_imputed] = global_fallbacks_[feature_indices_being_imputed]
continue
# Extract the features from X that was used to train FAISS, and compute the sparseness matrix
@@ -106,7 +143,9 @@ def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dty
# Call FAISS and retrieve data
distances, indices = index.search(sample.reshape(1, -1), self.n_neighbors)
assert len(indices[0]) == self.n_neighbors
- valid_indices = indices[0][indices[0] >= 0] # Filter out negative indices because they are FAISS error codes
+ valid_indices = indices[0][
+ indices[0] >= 0
+ ] # Filter out negative indices because they are FAISS error codes
# FAISS couldn't find any neighbor, use fallback values, and go to next row
if len(valid_indices) == 0:
@@ -117,8 +156,9 @@ def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dty
# FAISS couldn't find the amount of requested neighbors, warn user and proceed
if len(valid_indices) < self.n_neighbors:
if not self.warned_unsufficient_neighbors:
- logger.warning(f"FAISS couldn't find all the requested neighbors. "
- f"This warning will be displayed only once.")
+ logger.warning(
+ "FAISS couldn't find all the requested neighbors. This warning will be displayed only once."
+ )
self.warned_unsufficient_neighbors = True
# Apply strategy on neighbors data
@@ -139,20 +179,24 @@ def fit_transform(self, X: np.ndarray, y=None, **fit_params) -> ndarray[Any, dty
self.X_full[:, feature_indices_being_imputed] = x_imputed[:, np.arange(len(feature_indices_being_imputed))]
# Remove the imputed features from the to-do list
- feature_indices_to_impute = [feature_indice for feature_indice in feature_indices_to_impute if feature_indice not in feature_indices_being_imputed]
+ feature_indices_to_impute = [
+ feature_indice
+ for feature_indice in feature_indices_to_impute
+ if feature_indice not in feature_indices_being_imputed
+ ]
assert not np.isnan(self.X_full).any()
return self.X_full
- def _fit_train_imputer(self, features_indices: list[int]) -> (list[int],
- list[int] | None,
- np.ndarray | None,
- faiss.Index | None):
+ def _fit_train_imputer(
+ self, features_indices: Sequence[int]
+ ) -> tuple[list[int], list[int] | None, np.ndarray | None, faiss.Index | None]:
features_indices_to_impute = features_indices.copy()
# See what features are already imputed
- already_imputed_features_indices = [i for i in range(self.X_full.shape[1])
- if not np.isnan(self.X_full[:, i]).any()]
+ already_imputed_features_indices = [
+ i for i in range(self.X_full.shape[1]) if not np.isnan(self.X_full[:, i]).any()
+ ]
while True:
# Train data features are those indexed by features_indices AND those already fully imputed in
@@ -184,7 +228,7 @@ def _features_indices_sorted_descending_on_nan(self) -> list[int]:
self.features_nan = sorted(
(i for i in range(self.X_full.shape[1]) if np.isnan(self.X_full[:, i]).sum() > 0),
key=lambda i: np.isnan(self.X_full[:, i]).sum(),
- reverse=True
+ reverse=True,
)
return self.features_nan
@@ -198,7 +242,7 @@ def _train(self, x_train: np.ndarray) -> faiss.Index:
def _warn_fallback(self):
if not self.warned_fallback:
- logger.warning(f"Fallback data (as defined by passed strategy) were used. "
- f"This warning will only be displayed once.")
+ logger.warning(
+ "Fallback data (as defined by passed strategy) were used. This warning will only be displayed once."
+ )
self.warned_fallback = True
-
diff --git a/tests/test_faiss_imputation.py b/tests/test_faiss_imputation.py
index 004489f..4040e63 100644
--- a/tests/test_faiss_imputation.py
+++ b/tests/test_faiss_imputation.py
@@ -1,8 +1,8 @@
-from typing import Any
import numpy as np
import pandas as pd
import pytest
from sklearn.datasets import make_regression
+
from fknni.faiss.faiss import FaissImputer
@@ -55,7 +55,6 @@ def _base_check_imputation(
if not _are_ndarrays_equal(data_original[imputed_non_nan_mask], data_imputed[imputed_non_nan_mask]):
raise AssertionError("Non-NaN values in imputed columns were modified.")
- # If reaching here: all checks passed
return
@@ -165,6 +164,66 @@ def test_no_full_rows():
_base_check_imputation(arr_original, arr)
+def test_3d_flatten_imputation(rng):
+ """Tests if 3D imputation with flatten mode successfully fills all NaN values"""
+ data_3d = rng.uniform(0, 100, size=(10, 5, 3))
+ data_missing = data_3d.copy()
+ indices = [
+ (i, j, k) for i in range(data_3d.shape[0]) for j in range(data_3d.shape[1]) for k in range(data_3d.shape[2])
+ ]
+ rng.shuffle(indices)
+ for i, j, k in indices[:20]:
+ data_missing[i, j, k] = np.nan
+
+ data_original = data_missing.copy()
+ FaissImputer(n_neighbors=5, temporal_mode="flatten").fit_transform(data_missing)
+ _base_check_imputation(data_original, data_missing)
+ assert data_missing.shape == (10, 5, 3)
+
+
+def test_3d_per_variable_imputation(rng):
+ """Tests if 3D imputation with per_variable mode successfully fills all NaN values"""
+ data_3d = rng.uniform(0, 100, size=(10, 5, 3))
+ data_missing = data_3d.copy()
+ indices = [
+ (i, j, k) for i in range(data_3d.shape[0]) for j in range(data_3d.shape[1]) for k in range(data_3d.shape[2])
+ ]
+ rng.shuffle(indices)
+ for i, j, k in indices[:20]:
+ data_missing[i, j, k] = np.nan
+
+ data_original = data_missing.copy()
+ FaissImputer(n_neighbors=5, temporal_mode="per_variable").fit_transform(data_missing)
+ _base_check_imputation(data_original, data_missing)
+ assert data_missing.shape == (10, 5, 3)
+
+
+def test_3d_modes_produce_different_results(rng):
+ """Tests if flatten and per_variable modes produce different results"""
+ data_3d = rng.uniform(0, 100, size=(10, 5, 3))
+ data_missing = data_3d.copy()
+ indices = [
+ (i, j, k) for i in range(data_3d.shape[0]) for j in range(data_3d.shape[1]) for k in range(data_3d.shape[2])
+ ]
+ rng.shuffle(indices)
+ for i, j, k in indices[:20]:
+ data_missing[i, j, k] = np.nan
+
+ data_flatten = data_missing.copy()
+ data_per_var = data_missing.copy()
+
+ FaissImputer(n_neighbors=5, temporal_mode="flatten").fit_transform(data_flatten)
+ FaissImputer(n_neighbors=5, temporal_mode="per_variable").fit_transform(data_per_var)
+
+ assert not np.array_equal(data_flatten, data_per_var)
+
+
+def test_invalid_temporal_mode():
+ """Tests if imputer raises error for invalid temporal_mode"""
+ with pytest.raises(ValueError):
+ FaissImputer(temporal_mode="invalid")
+
+
def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
"""Check if two arrays are equal member-wise.
From f587c344ef1342e68e7222cf95de53b5bdf777d5 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:21:32 +0100
Subject: [PATCH 05/15] add GPU CI
Signed-off-by: Lukas Heumos
---
.cirun.config | 11 ++++++
.github/workflows/test-gpu.yaml | 61 +++++++++++++++++++++++++++++++++
CHANGELOG.md | 6 ++++
README.md | 2 +-
pyproject.toml | 4 +++
5 files changed, 83 insertions(+), 1 deletion(-)
create mode 100644 .cirun.config
create mode 100644 .github/workflows/test-gpu.yaml
diff --git a/.cirun.config b/.cirun.config
new file mode 100644
index 0000000..263d0fa
--- /dev/null
+++ b/.cirun.config
@@ -0,0 +1,11 @@
+runners:
+ - name: aws-gpu-runner
+ cloud: aws
+ instance_type: g4dn.xlarge
+ machine_image: ami-067a4ba2816407ee9
+ region: eu-north-1
+ preemptible:
+ - true
+ - false
+ labels:
+ - cirun-aws-gpu
diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml
new file mode 100644
index 0000000..2ed4452
--- /dev/null
+++ b/.github/workflows/test-gpu.yaml
@@ -0,0 +1,61 @@
+name: GPU-CI
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ types:
+ - labeled
+ - opened
+ - synchronize
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ check:
+ name: "Triage: Check if GPU tests are allowed to run"
+ if: (!contains(github.event.pull_request.labels.*.name, 'skip-gpu-ci'))
+ runs-on: ubuntu-latest
+ steps:
+ - uses: flying-sheep/check@v1
+ with:
+ success: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'run-gpu-ci') }}
+ test:
+ name: GPU Tests
+ needs: check
+ runs-on: "cirun-aws-gpu--${{ github.run_id }}"
+ timeout-minutes: 30
+
+ defaults:
+ run:
+ shell: bash -el {0}
+ steps:
+ - uses: actions/checkout@v5
+ with:
+ fetch-depth: 0
+
+ - name: Nvidia SMI sanity check
+ run: nvidia-smi
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ python-version: "3.13"
+ cache-dependency-glob: pyproject.toml
+
+ - name: Install fknni
+ run: uv pip install --system -e "."
+ - name: Pip list
+ run: pip list
+
+ - name: Run test
+ run: pytest -m gpu
+
+ - name: Remove 'run-gpu-ci' Label
+ if: always()
+ uses: actions-ecosystem/action-remove-labels@v1
+ with:
+ labels: "run-gpu-ci"
+ github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bc9110..885e723 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,12 @@ and this project adheres to [Semantic Versioning][].
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
[semantic versioning]: https://semver.org/spec/v2.0.0.html
+## [1.3.0]
+
+- Add GPU CI ([#15](https://github.com/zethson/fknni/pull/15)) @zethson
+- Add support for 3D imputation ([#14](https://github.com/zethson/fknni/pull/14)) @zethson
+- Require Python 3.12+ ([#13](https://github.com/zethson/fknni/pull/14)) @zethson
+
## [1.2.0]
- Iterative imputation for very sparse datasets
diff --git a/README.md b/README.md
index 0804041..3ebf244 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ Please refer to the [documentation][link-docs]. In particular, the
## Installation
-You need to have Python 3.10 or newer installed on your system.
+You need to have Python 3.12 or newer installed on your system.
If you don't have Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).
Install the latest release of `fknni` from [PyPI](https://pypi.org/project/fknni):
diff --git a/pyproject.toml b/pyproject.toml
index e45f9b4..3807c40 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -129,6 +129,10 @@ testpaths = [ "tests" ]
xfail_strict = true
addopts = [
"--import-mode=importlib", # allow using test files with same name
+ "-m not gpu",
+]
+markers = [
+ "gpu: mark test to run on GPU",
]
[tool.coverage.run]
From e85c1ebf9005803292f615710afe763eee7b3f87 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:23:44 +0100
Subject: [PATCH 06/15] fix config
Signed-off-by: Lukas Heumos
---
.cirun.config => .cirun.yml | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename .cirun.config => .cirun.yml (100%)
diff --git a/.cirun.config b/.cirun.yml
similarity index 100%
rename from .cirun.config
rename to .cirun.yml
From b3b876c4fde29dc826ddd891f202bf3b50277651 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:30:22 +0100
Subject: [PATCH 07/15] fix config maybe
Signed-off-by: Lukas Heumos
---
.github/workflows/test-gpu.yaml | 6 +++++-
.github/workflows/test.yaml | 2 +-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml
index 2ed4452..165d4e8 100644
--- a/.github/workflows/test-gpu.yaml
+++ b/.github/workflows/test-gpu.yaml
@@ -39,10 +39,14 @@ jobs:
- name: Nvidia SMI sanity check
run: nvidia-smi
+ - name: Install Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
- python-version: "3.13"
cache-dependency-glob: pyproject.toml
- name: Install fknni
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ea56d64..eda403b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -31,7 +31,7 @@ jobs:
filter: blob:none
fetch-depth: 0
- name: Install uv
- uses: astral-sh/setup-uv@v5
+ uses: astral-sh/setup-uv@v7
- name: Get test environments
id: get-envs
run: |
From 4e5d40be8028061dc8fe53e3db066195a0b32992 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:34:02 +0100
Subject: [PATCH 08/15] dev
Signed-off-by: Lukas Heumos
---
.github/workflows/test-gpu.yaml | 2 +-
ci/environment.yml | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
create mode 100644 ci/environment.yml
diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml
index 165d4e8..a7d7b3c 100644
--- a/.github/workflows/test-gpu.yaml
+++ b/.github/workflows/test-gpu.yaml
@@ -50,7 +50,7 @@ jobs:
cache-dependency-glob: pyproject.toml
- name: Install fknni
- run: uv pip install --system -e "."
+ run: uv pip install --system -e ".[test]"
- name: Pip list
run: pip list
diff --git a/ci/environment.yml b/ci/environment.yml
new file mode 100644
index 0000000..38e8c1d
--- /dev/null
+++ b/ci/environment.yml
@@ -0,0 +1,10 @@
+name: fknni
+channels:
+ - rapidsai
+ - nvidia
+ - conda-forge
+dependencies:
+ - rapids=25.10
+ - python=3.13
+ - cuda-version=12.9
+ - cudnn
From d904ab09a59e964ab28f17576d5fe1dc613251fe Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:38:48 +0100
Subject: [PATCH 09/15] add test
Signed-off-by: Lukas Heumos
---
pyproject.toml | 21 +++++++++++++++++----
tests/{ => cpu}/conftest.py | 0
tests/{ => cpu}/test_faiss_imputation.py | 0
tests/gpu/test_stub.py | 6 ++++++
4 files changed, 23 insertions(+), 4 deletions(-)
rename tests/{ => cpu}/conftest.py (100%)
rename tests/{ => cpu}/test_faiss_imputation.py (100%)
create mode 100644 tests/gpu/test_stub.py
diff --git a/pyproject.toml b/pyproject.toml
index 3807c40..967609e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,14 +45,27 @@ optional-dependencies.doc = [
"sphinxcontrib-bibtex>=1",
"sphinxext-opengraph",
]
+optional-dependencies.rapids12 = [
+ "cudf-cu12>=25.10",
+ "cugraph-cu12>=25.10",
+ "cuml-cu12>=25.10",
+ "cupy-cuda12x",
+ "cuvs-cu12>=25.10",
+]
+optional-dependencies.rapids13 = [
+ "cudf-cu13>=25.10",
+ "cugraph-cu13>=25.10",
+ "cuml-cu13>=25.10",
+ "cupy-cuda13x",
+ "cuvs-cu13>=25.10",
+]
optional-dependencies.test = [
"coverage",
"pytest",
]
-
-urls.Documentation = "https://fknni.readthedocs.io/"
-urls.Homepage = "https://github.com/zethson/fknni"
-urls.Source = "https://github.com/zethson/fknni"
+optional-dependencies.urls.Documentation = "https://fknni.readthedocs.io/"
+optional-dependencies.urls.Homepage = "https://github.com/zethson/fknni"
+optional-dependencies.urls.Source = "https://github.com/zethson/fknni"
[tool.hatch.envs.default]
installer = "uv"
diff --git a/tests/conftest.py b/tests/cpu/conftest.py
similarity index 100%
rename from tests/conftest.py
rename to tests/cpu/conftest.py
diff --git a/tests/test_faiss_imputation.py b/tests/cpu/test_faiss_imputation.py
similarity index 100%
rename from tests/test_faiss_imputation.py
rename to tests/cpu/test_faiss_imputation.py
diff --git a/tests/gpu/test_stub.py b/tests/gpu/test_stub.py
new file mode 100644
index 0000000..be61fda
--- /dev/null
+++ b/tests/gpu/test_stub.py
@@ -0,0 +1,6 @@
+import pytest
+
+
+@pytest.mark.gpu
+def gpu_test():
+ assert 1 + 1 == 2
From 05a80ff478dd012298d7e06f359a6c944aa10594 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 21:54:41 +0100
Subject: [PATCH 10/15] layout
Signed-off-by: Lukas Heumos
---
pyproject.toml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 967609e..67f5f88 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,9 +63,9 @@ optional-dependencies.test = [
"coverage",
"pytest",
]
-optional-dependencies.urls.Documentation = "https://fknni.readthedocs.io/"
-optional-dependencies.urls.Homepage = "https://github.com/zethson/fknni"
-optional-dependencies.urls.Source = "https://github.com/zethson/fknni"
+urls.Documentation = "https://fknni.readthedocs.io/"
+urls.Homepage = "https://github.com/zethson/fknni"
+urls.Source = "https://github.com/zethson/fknni"
[tool.hatch.envs.default]
installer = "uv"
From 1ac70fc03525b57fa844e05926c327bd3f7108eb Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Sun, 16 Nov 2025 22:02:58 +0100
Subject: [PATCH 11/15] rename
Signed-off-by: Lukas Heumos
---
pyproject.toml | 1 -
tests/gpu/{test_stub.py => test_gpu.py} | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
rename tests/gpu/{test_stub.py => test_gpu.py} (77%)
diff --git a/pyproject.toml b/pyproject.toml
index 67f5f88..419a0b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -142,7 +142,6 @@ testpaths = [ "tests" ]
xfail_strict = true
addopts = [
"--import-mode=importlib", # allow using test files with same name
- "-m not gpu",
]
markers = [
"gpu: mark test to run on GPU",
diff --git a/tests/gpu/test_stub.py b/tests/gpu/test_gpu.py
similarity index 77%
rename from tests/gpu/test_stub.py
rename to tests/gpu/test_gpu.py
index be61fda..f3891cc 100644
--- a/tests/gpu/test_stub.py
+++ b/tests/gpu/test_gpu.py
@@ -2,5 +2,5 @@
@pytest.mark.gpu
-def gpu_test():
+def test_gpu():
assert 1 + 1 == 2
From be983938e37e75a399a79e853c22dda9e2a03501 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Mon, 17 Nov 2025 21:43:51 +0100
Subject: [PATCH 12/15] add faiss-gpu setup
Signed-off-by: Lukas Heumos
---
.../workflows/{test.yaml => test-cpu.yaml} | 2 +-
.github/workflows/test-gpu.yaml | 2 +-
pyproject.toml | 2 +
src/fknni/faiss/faiss.py | 18 ++++++
tests/__init__.py | 0
tests/compare_predictions.py | 46 +++++++++++++++
tests/conftest.py | 19 ++++++
tests/cpu/conftest.py | 7 ---
tests/cpu/test_faiss_imputation.py | 58 +------------------
tests/gpu/conftest.py | 0
tests/gpu/test_gpu.py | 11 +++-
11 files changed, 97 insertions(+), 68 deletions(-)
rename .github/workflows/{test.yaml => test-cpu.yaml} (98%)
create mode 100644 tests/__init__.py
create mode 100644 tests/compare_predictions.py
create mode 100644 tests/conftest.py
create mode 100644 tests/gpu/conftest.py
diff --git a/.github/workflows/test.yaml b/.github/workflows/test-cpu.yaml
similarity index 98%
rename from .github/workflows/test.yaml
rename to .github/workflows/test-cpu.yaml
index eda403b..46b8901 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test-cpu.yaml
@@ -26,7 +26,7 @@ jobs:
outputs:
envs: ${{ steps.get-envs.outputs.envs }}
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v5
with:
filter: blob:none
fetch-depth: 0
diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml
index a7d7b3c..adc5338 100644
--- a/.github/workflows/test-gpu.yaml
+++ b/.github/workflows/test-gpu.yaml
@@ -50,7 +50,7 @@ jobs:
cache-dependency-glob: pyproject.toml
- name: Install fknni
- run: uv pip install --system -e ".[test]"
+ run: uv pip install --system -e ".[test,faissgpu]"
- name: Pip list
run: pip list
diff --git a/pyproject.toml b/pyproject.toml
index 419a0b5..3b8dc3b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ optional-dependencies.doc = [
"sphinxcontrib-bibtex>=1",
"sphinxext-opengraph",
]
+optional-dependencies.faissgpu = [ "faiss-gpu-cu12" ]
optional-dependencies.rapids12 = [
"cudf-cu12>=25.10",
"cugraph-cu12>=25.10",
@@ -142,6 +143,7 @@ testpaths = [ "tests" ]
xfail_strict = true
addopts = [
"--import-mode=importlib", # allow using test files with same name
+ "-m not gpu",
]
markers = [
"gpu: mark test to run on GPU",
diff --git a/src/fknni/faiss/faiss.py b/src/fknni/faiss/faiss.py
index 6d97a73..1df044e 100644
--- a/src/fknni/faiss/faiss.py
+++ b/src/fknni/faiss/faiss.py
@@ -9,6 +9,13 @@
from numpy import dtype
from sklearn.base import BaseEstimator, TransformerMixin
+try:
+ import faiss
+
+ HAS_FAISS_GPU = hasattr(faiss, "StandardGpuResources")
+except ImportError:
+ raise ImportError("faiss-cpu or faiss-gpu required") from None
+
class FaissImputer(BaseEstimator, TransformerMixin):
"""Imputer for completing missing values using Faiss, incorporating weighted averages based on distance."""
@@ -23,6 +30,7 @@ def __init__(
index_factory: str = "Flat",
min_data_ratio: float = 0.25,
temporal_mode: Literal["flatten", "per_variable"] = "flatten",
+ use_gpu: bool = False,
):
"""Initializes FaissImputer with specified parameters that are used for the imputation.
@@ -39,6 +47,7 @@ def __init__(
temporal_mode: How to handle 3D temporal data. 'flatten' treats all (variable, timestep) pairs as
independent features (fast but allows temporal leakage).
'per_variable' imputes each variable independently across time (slower but respects temporal causality).
+ use_gpu: Whether to train using GPU.
"""
if n_neighbors < 1:
raise ValueError("n_neighbors must be at least 1.")
@@ -47,6 +56,10 @@ def __init__(
if temporal_mode not in {"flatten", "per_variable"}:
raise ValueError("Unknown temporal_mode. Choose one of 'flatten', 'per_variable'")
+ self.use_gpu = use_gpu
+ if use_gpu and not HAS_FAISS_GPU:
+ raise ValueError("use_gpu=True requires faiss-gpu package, install with: pip install faiss-gpu") from None
+
self.missing_values = missing_values
self.n_neighbors = n_neighbors
self.metric = metric
@@ -236,6 +249,11 @@ def _features_indices_sorted_descending_on_nan(self) -> list[int]:
def _train(self, x_train: np.ndarray) -> faiss.Index:
index = faiss.index_factory(x_train.shape[1], self.index_factory)
index.metric_type = faiss.METRIC_L2 if self.metric == "l2" else faiss.METRIC_INNER_PRODUCT
+
+ if self.use_gpu:
+ res = faiss.StandardGpuResources()
+ index = faiss.index_cpu_to_gpu(res, 0, index)
+
index.train(x_train)
index.add(x_train)
return index
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/compare_predictions.py b/tests/compare_predictions.py
new file mode 100644
index 0000000..397550d
--- /dev/null
+++ b/tests/compare_predictions.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+
+def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
+ """Check if two arrays are equal member-wise.
+
+ Note: Two NaN are considered equal.
+
+ Args:
+ arr1: First array to compare
+ arr2: Second array to compare
+
+ Returns:
+ True if the two arrays are equal member-wise
+ """
+ return np.all(np.equal(arr1, arr2, dtype=object) | ((arr1 != arr1) & (arr2 != arr2)))
+
+
+def _base_check_imputation(
+ data_original: np.ndarray,
+ data_imputed: np.ndarray,
+):
+ """Provides the following base checks:
+ - Imputation doesn't leave any NaN behind
+ - Imputation doesn't modify any data that wasn't NaN
+
+ Args:
+ data_before_imputation: Dataset before imputation
+ data_after_imputation: Dataset after imputation
+
+ Raises:
+ AssertionError: If any of the checks fail.
+ """
+ if data_original.shape != data_imputed.shape:
+ raise AssertionError("The shapes of the two datasets do not match")
+
+ # Ensure no NaN remains in the imputed dataset
+ if np.isnan(data_imputed).any():
+ raise AssertionError("NaN found in imputed columns of layer_after.")
+
+ # Ensure imputation does not alter non-NaN values in the imputed columns
+ imputed_non_nan_mask = ~np.isnan(data_original)
+ if not _are_ndarrays_equal(data_original[imputed_non_nan_mask], data_imputed[imputed_non_nan_mask]):
+ raise AssertionError("Non-NaN values in imputed columns were modified.")
+
+ return
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..fe25307
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,19 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+
+@pytest.fixture
+def rng():
+ return np.random.default_rng(0)
+
+
+@pytest.fixture
+def simple_test_df(rng):
+ data = pd.DataFrame(rng.integers(0, 100, size=(10, 5)), columns=list("ABCDE"))
+ data_missing = data.copy()
+ indices = [(i, j) for i in range(data.shape[0]) for j in range(data.shape[1])]
+ rng.shuffle(indices)
+ for i, j in indices[:5]:
+ data_missing.iat[i, j] = np.nan
+ return data.to_numpy(), data_missing.to_numpy()
diff --git a/tests/cpu/conftest.py b/tests/cpu/conftest.py
index 0e05b5a..e69de29 100644
--- a/tests/cpu/conftest.py
+++ b/tests/cpu/conftest.py
@@ -1,7 +0,0 @@
-import numpy as np
-import pytest
-
-
-@pytest.fixture
-def rng():
- return np.random.default_rng(0)
diff --git a/tests/cpu/test_faiss_imputation.py b/tests/cpu/test_faiss_imputation.py
index 4040e63..2c5c668 100644
--- a/tests/cpu/test_faiss_imputation.py
+++ b/tests/cpu/test_faiss_imputation.py
@@ -1,22 +1,11 @@
import numpy as np
-import pandas as pd
import pytest
from sklearn.datasets import make_regression
+from tests.compare_predictions import _base_check_imputation
from fknni.faiss.faiss import FaissImputer
-@pytest.fixture
-def simple_test_df(rng):
- data = pd.DataFrame(rng.integers(0, 100, size=(10, 5)), columns=list("ABCDE"))
- data_missing = data.copy()
- indices = [(i, j) for i in range(data.shape[0]) for j in range(data.shape[1])]
- rng.shuffle(indices)
- for i, j in indices[:5]:
- data_missing.iat[i, j] = np.nan
- return data.to_numpy(), data_missing.to_numpy()
-
-
@pytest.fixture
def regression_dataset(rng):
X, y = make_regression(n_samples=100, n_features=20, random_state=42)
@@ -28,36 +17,6 @@ def regression_dataset(rng):
return X, X_missing, y
-def _base_check_imputation(
- data_original: np.ndarray,
- data_imputed: np.ndarray,
-):
- """Provides the following base checks:
- - Imputation doesn't leave any NaN behind
- - Imputation doesn't modify any data that wasn't NaN
-
- Args:
- data_before_imputation: Dataset before imputation
- data_after_imputation: Dataset after imputation
-
- Raises:
- AssertionError: If any of the checks fail.
- """
- if data_original.shape != data_imputed.shape:
- raise AssertionError("The shapes of the two datasets do not match")
-
- # Ensure no NaN remains in the imputed dataset
- if np.isnan(data_imputed).any():
- raise AssertionError("NaN found in imputed columns of layer_after.")
-
- # Ensure imputation does not alter non-NaN values in the imputed columns
- imputed_non_nan_mask = ~np.isnan(data_original)
- if not _are_ndarrays_equal(data_original[imputed_non_nan_mask], data_imputed[imputed_non_nan_mask]):
- raise AssertionError("Non-NaN values in imputed columns were modified.")
-
- return
-
-
def test_median_imputation(simple_test_df):
"""Tests if median imputation successfully fills all NaN values"""
data, data_missing = simple_test_df
@@ -222,18 +181,3 @@ def test_invalid_temporal_mode():
"""Tests if imputer raises error for invalid temporal_mode"""
with pytest.raises(ValueError):
FaissImputer(temporal_mode="invalid")
-
-
-def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
- """Check if two arrays are equal member-wise.
-
- Note: Two NaN are considered equal.
-
- Args:
- arr1: First array to compare
- arr2: Second array to compare
-
- Returns:
- True if the two arrays are equal member-wise
- """
- return np.all(np.equal(arr1, arr2, dtype=object) | ((arr1 != arr1) & (arr2 != arr2)))
diff --git a/tests/gpu/conftest.py b/tests/gpu/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/gpu/test_gpu.py b/tests/gpu/test_gpu.py
index f3891cc..5ea13b4 100644
--- a/tests/gpu/test_gpu.py
+++ b/tests/gpu/test_gpu.py
@@ -1,6 +1,13 @@
import pytest
+from tests.compare_predictions import _base_check_imputation
+
+from fknni.faiss.faiss import FaissImputer
@pytest.mark.gpu
-def test_gpu():
- assert 1 + 1 == 2
+def test_median_imputation(simple_test_df):
+ """Tests if median imputation successfully fills all NaN values"""
+ data, data_missing = simple_test_df
+ data_original = data_missing.copy()
+ FaissImputer(n_neighbors=5, strategy="median", use_gpu=True).fit_transform(data_missing)
+ _base_check_imputation(data_original, data_missing)
From 9c6fedee8c9d017732593c451989e5ad3f6abf6e Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Mon, 17 Nov 2025 21:58:13 +0100
Subject: [PATCH 13/15] go python 3.12
Signed-off-by: Lukas Heumos
---
.github/workflows/test-gpu.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml
index adc5338..68319e7 100644
--- a/.github/workflows/test-gpu.yaml
+++ b/.github/workflows/test-gpu.yaml
@@ -42,7 +42,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v5
with:
- python-version: "3.13"
+ python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v7
From 245f96dabfa5ae2ca9167b257ad61f4bf40dba27 Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Mon, 17 Nov 2025 22:01:33 +0100
Subject: [PATCH 14/15] move conftest
Signed-off-by: Lukas Heumos
---
tests/{cpu => }/conftest.py | 0
tests/gpu/conftest.py | 0
2 files changed, 0 insertions(+), 0 deletions(-)
rename tests/{cpu => }/conftest.py (100%)
delete mode 100644 tests/gpu/conftest.py
diff --git a/tests/cpu/conftest.py b/tests/conftest.py
similarity index 100%
rename from tests/cpu/conftest.py
rename to tests/conftest.py
diff --git a/tests/gpu/conftest.py b/tests/gpu/conftest.py
deleted file mode 100644
index e69de29..0000000
From 657776852bc39c88684d9d5d2dc6624099df8a2f Mon Sep 17 00:00:00 2001
From: Lukas Heumos
Date: Mon, 17 Nov 2025 22:14:42 +0100
Subject: [PATCH 15/15] only install cpu
Signed-off-by: Lukas Heumos
---
pyproject.toml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 3b8dc3b..6f11b7e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,6 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
- "faiss-cpu",
"lamin-utils",
"pandas",
"scikit-learn",
@@ -45,6 +44,7 @@ optional-dependencies.doc = [
"sphinxcontrib-bibtex>=1",
"sphinxext-opengraph",
]
+optional-dependencies.faisscpu = [ "faiss-cpu" ]
optional-dependencies.faissgpu = [ "faiss-gpu-cu12" ]
optional-dependencies.rapids12 = [
"cudf-cu12>=25.10",
@@ -89,7 +89,7 @@ deps = [ "pre" ]
python = [ "3.13" ]
[tool.hatch.envs.hatch-test]
-features = [ "dev", "test" ]
+features = [ "dev", "test", "faisscpu" ]
[tool.hatch.envs.hatch-test.overrides]
# If the matrix variable `deps` is set to "pre",