diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index 0ba64d4..0000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - // See https://go.microsoft.com/fwlink/?LinkId=733558 - // for the documentation about the tasks.json format - "version": "2.0.0", - "tasks": [ - { - "options": - { - "cwd": "${workspaceRoot}/dev" - }, - "taskName": "build harness", - "type": "shell", - "command": "make", - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..86aca69 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,71 @@ +cmake_minimum_required(VERSION 3.15...3.26) + +project(humanleague LANGUAGES CXX) + +if (NOT SKBUILD) + message(WARNING "\ + This CMake file is meant to be executed using 'scikit-build'. Running + it directly will almost certainly not produce the desired result. If + you are a user trying to install this package, please use the command + below, which will install all necessary build dependencies, compile + the package in an isolated environment, and then install it. + ===================================================================== + $ pip install . + ===================================================================== + If you are a software developer, and this is your own package, then + it is usually much more efficient to install the build dependencies + in your environment once and use the following command that avoids + a costly creation of a new virtual environment at every compilation: + ===================================================================== + $ pip install nanobind scikit-build-core[pyproject] + $ pip install --no-build-isolation -ve . + ===================================================================== + You may optionally add -Ceditable.rebuild=true to auto-rebuild when + the package is imported. Otherwise, you need to re-run the above + after editing C++ files.") +endif() + +# Try to import all Python components potentially needed by nanobind +find_package(Python 3.12 + REQUIRED COMPONENTS Interpreter Development.Module + OPTIONAL_COMPONENTS Development.SABIModule) + +# Import nanobind through CMake's find_package mechanism +find_package(nanobind CONFIG REQUIRED) + +# We are now ready to compile the actual extension module +nanobind_add_module( + humanleague_ext + + # Target the stable ABI for Python 3.12+, which reduces + # the number of binary wheels that must be built. This + # does nothing on older Python versions + STABLE_ABI + + # Build libnanobind statically + NB_STATIC + # NB_SHARED creates path issues + + # Source code goes here + src/Index.cpp + src/QIS.cpp + src/StatFuncs.cpp + src/TestReduce.cpp + src/TestStatFuncs.cpp + src/Integerise.cpp + src/QISI.cpp + src/Sobol.cpp + src/TestIndex.cpp + src/TestSlice.cpp + src/UnitTester.cpp + src/module.cpp + src/SobolImpl.cpp + src/TestNDArray.cpp + src/TestSobol.cpp +) + +target_compile_definitions(humanleague_ext PUBLIC PYTHON_MODULE) + +# Install directive for scikit-build-core +install(TARGETS humanleague_ext LIBRARY DESTINATION humanleague) + diff --git a/README.md b/README.md index 48ff8fa..845c8ce 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,25 @@ pip install humanleague [uv](https://docs.astral.sh/uv/) is highly recommended for managing environments. + ```bash uv sync --dev uv run pytest ``` +Nanobind docs suggest a dev workflow where the build happens directly in the dev env - first manually install +the build deps (required after every `uv sync`) + +```sh +uv pip install nanobind scikit-build-core[pyproject] +``` + +Then build with + +```sh +uv pip install --no-build-isolation -ve . +``` + ### R Official release: @@ -105,7 +119,13 @@ The package now contains type annotations and your IDE should automatically disp ![help](./doc/help.png) -NB type stubs are generated using the `pybind11-stubgen` package, with some [manual corrections](./doc/type-stubs.md). +~~NB type stubs are generated using the `pybind11-stubgen` package, with some [manual corrections](./doc/type-stubs.md).~~ + +`nanobind` now has stubgen functionality (but appears limited as of 2.9.2) + +```sh +uv run python -m nanobind.stubgen -P -m humanleague.humanleague_ext -o humanleague/__init__.pyi -M humanleague/py.typed +``` ### Multidimensional integerisation diff --git a/humanleague/__init__.py b/humanleague/__init__.py index 866733d..6c70472 100644 --- a/humanleague/__init__.py +++ b/humanleague/__init__.py @@ -2,8 +2,7 @@ __version__ = importlib.metadata.version("humanleague") -from _humanleague import SobolSequence, flatten, integerise, ipf, qis, qisi - +from .humanleague_ext import SobolSequence, flatten, integerise, ipf, qis, qisi from .utils import tabulate_counts, tabulate_individuals __all__ = [ diff --git a/humanleague/__init__.pyi b/humanleague/__init__.pyi index b1c2686..b9cd80b 100644 --- a/humanleague/__init__.pyi +++ b/humanleague/__init__.pyi @@ -1,5 +1,4 @@ -"""Microsynthesis using quasirandom sampling and IPF, plus related functionality -""" +"""Microsynthesis using quasirandom sampling and IPF, plus related functionality""" from __future__ import annotations @@ -52,16 +51,13 @@ class SobolSequence: """ def __iter__(self) -> SobolSequence: - """__iter__ dunder - """ + """__iter__ dunder""" def __next__(self) -> npt.NDArray[np.float64]: - """__next__ dunder - """ + """__next__ dunder""" pass def _unittest() -> dict: - """For developers. Runs the C++ unit tests. - """ + """For developers. Runs the C++ unit tests.""" def flatten(pop: npt.NDArray[np.int64]) -> list: """Converts an n-dimensional array of counts into an n-column table with a row for each unit diff --git a/pyproject.toml b/pyproject.toml index c22d957..c0b65fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,19 +1,15 @@ [build-system] -requires = [ - "setuptools>=42", - "wheel", - "pybind11>=2.10.3", - "pytest" -] +requires = ["scikit-build-core >=0.10", "nanobind >=2.3.2"] +build-backend = "scikit_build_core.build" -build-backend = "setuptools.build_meta" [project] name = "humanleague" -version = "2.4.3" +version = "2.5.0" authors = [ { name="Andrew Smith", email="andrew@friarswood.net" }, ] +license = "MIT" license-files = ["LICENCE", "LICENSE.md"] description = "Microsynthesis using quasirandom sampling and/or IPF" readme = "README.md" @@ -32,13 +28,12 @@ dependencies = [ [dependency-groups] dev = [ - "pybind11>=3.0.0", "pytest>=8.1.4", "mypy>=1.5.0", "mypy-extensions>=1.0.0", "ruff>=0.12.9", - "build>=1.2.2.post1", "typing-extensions>=4.15.0", + "scikit-build-core[pyproject]" ] [tool.pytest.ini_options] @@ -46,12 +41,22 @@ testpaths = [ "tests" ] +[tool.scikit-build] +# Protect the configuration against future changes in scikit-build-core +minimum-version = "build-system.requires" + +# Setuptools-style build caching in a local directory +build-dir = "build/{wheel_tag}" + +# Build stable ABI wheels for CPython 3.12+ +wheel.py-api = "cp312" + [tool.ruff] line-length = 120 [tool.ruff.lint] -select = ["B", "C", "E", "F", "I"] -ignore = ["E501"] +select = ["B", "C", "E", "F", "I", "N", "W"] +ignore = ["E501", "N802"] [tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["F401", "F403"] diff --git a/setup.py b/setup.py deleted file mode 100755 index d8947c2..0000000 --- a/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -import glob - -from pybind11.setup_helpers import ParallelCompile, Pybind11Extension -from setuptools import setup # type: ignore - - -def source_files() -> list[str]: - sources = glob.glob("src/*.cpp") - # can't use compile skips as some files are auto-generated - skip = ["RcppExports.cpp", "rcpp_api.cpp"] - return [file for file in sources if not any(s in file for s in skip)] - - -def header_files() -> list[str]: - return glob.glob("src/*.h") - - -def defines() -> list[tuple[str, str | None]]: - return [("PYTHON_MODULE", None)] - - -ext_modules = [ - Pybind11Extension( - "_humanleague", - sources=source_files(), - include_dirs=["src"], - define_macros=defines(), - depends=["setup.py", "src/docstr.inl"] + header_files(), - cxx_std=20, - ) -] - - -ParallelCompile().install() - -setup( - name="humanleague", - packages=["humanleague"], - package_data={"humanleague": ["py.typed", "*.pyi"]}, - ext_modules=ext_modules, - zip_safe=False, -) diff --git a/src/module.cpp b/src/module.cpp index b9e2515..019425a 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -9,47 +9,37 @@ #include "UnitTester.h" -#include -#include -#include +#include +#include +#include -namespace py = pybind11; +namespace nb = nanobind; -using namespace py::literals; +using namespace nb::literals; + +template using np_array = nb::ndarray; namespace hl { -template T* begin(py::array_t& a) { - // assert(a.itemsize() == sizeof(T)); - return (T*)a.request().ptr; -} +template T* begin(np_array& a) { return a.data(); } -template T* end(py::array_t& a) { - // assert(a.itemsize() == sizeof(T)); - return (T*)a.request().ptr + a.size(); -} +template T* end(np_array& a) { return a.data() + a.size(); } -template const T* cbegin(const py::array_t& a) { - // assert(a.itemsize() == sizeof(T)); - return (const T*)a.request().ptr; -} +template const T* cbegin(const np_array& a) { return a.data(); } -template const T* cend(const py::array_t& a) { - // assert(a.itemsize() == sizeof(T)); - return (const T*)a.request().ptr + a.size(); -} +template const T* cend(const np_array& a) { return a.data() + a.size(); } -template std::vector toVector(const py::array_t& a) { +template std::vector toVector(const np_array& a) { if (a.ndim() == 0) { - return std::vector(1, *cbegin(a)); + return std::vector(1, *a.data()); } if (a.ndim() != 1) { - throw py::value_error("cannot convert %%-dimensional array to vector"s % a.ndim()); + throw nb::value_error(("cannot convert %%-dimensional array to vector"s % a.ndim()).c_str()); } return std::vector(cbegin(a), cend(a)); } -template NDArray toNDArray(const py::array_t& np) { +template NDArray toNDArray(const np_array& np) { const size_t dim = np.ndim(); std::vector sizes(dim); for (size_t i = 0; i < dim; ++i) @@ -59,41 +49,50 @@ template NDArray toNDArray(const py::array_t& np) { return tmp; } -template NDArray asNDArray(const py::array_t& np) { +template NDArray asNDArray(const np_array& np) { // this is a bit iffy re: constness - return NDArray(std::vector(np.shape(), np.shape() + np.ndim()), const_cast(cbegin(np))); + return NDArray(std::vector(np.shape_ptr(), np.shape_ptr() + np.ndim()), np.data()); } -template py::array_t fromNDArray(const NDArray& a) { - // TODO ensure this is safe. may need to explicitly copy data - return py::array_t(a.sizes(), a.rawData()); +template np_array fromNDArray(const NDArray& a) { + // TODO ensure this is safe. may need to explicitly copy data (and shape?) + return np_array(a.rawData(), a.dim(), std::vector(a.sizes().begin(), a.sizes().end()).data()); } -std::vector> collect_indices(const py::iterable& iterable) { +std::vector> collect_indices(const nb::iterable& iterable) { std::vector> indices; for (const auto& elem : iterable) { - const py::array_t ia = elem.cast>(); - indices.push_back(toVector(ia)); + if (nb::isinstance(elem)) { + indices.push_back(std::vector(1, nb::cast(elem))); + } else if (nb::isinstance(elem)) { + // TODO must be an easier way??? + std::vector index; + for (const auto& item : elem) { + index.push_back(nb::cast(item)); + } + indices.push_back(index); + } else { + throw nb::value_error("unexpected type for index"); + } } return indices; } -template std::vector> collect_marginals(const py::iterable& iterable) { +template std::vector> collect_marginals(const nb::iterable& iterable) { std::vector> marginals; for (const auto& elem : iterable) { - const py::array_t ma = elem.cast>(); + const np_array ma = nb::cast>(elem); marginals.emplace_back(toNDArray(ma)); } return marginals; } -py::list flatten(const py::array_t& a) { +nb::list flatten(const np_array& a) { - auto warnings = pybind11::module::import("warnings"); - warnings.attr("warn")( - "humanleague.flatten is deprecated, consider using humanleague.tabulate_individuals instead."); + auto warnings = nb::module_::import_("warnings"); + warnings.attr("warn")("humanleague.flatten is deprecated, consider using humanleague.tabulate_individuals instead."); const NDArray array = asNDArray(a); @@ -103,24 +102,23 @@ py::list flatten(const py::array_t& a) { } const std::vector>& list = listify(pop, array); - py::list outer; // array.dim()); + nb::list outer; for (size_t i = 0; i < array.dim(); ++i) { - py::list l(list[i].size()); + nb::list inner; for (size_t j = 0; j < list[i].size(); ++j) { - l[j] = list[i][j]; + inner.append(list[i][j]); } - outer.insert(i, l); + outer.append(inner); } - return outer; } -py::tuple integerise1d(py::array_t frac_a, int pop) { +nb::tuple integerise1d(np_array frac_a, int pop) { if (pop < 0) { - throw py::value_error("population cannot be negative"); + throw nb::value_error("population cannot be negative"); } - // convert py::array_t to vector and normalise it to get probabilities + // convert np_array to vector and normalise it to get probabilities std::vector prob = toVector(frac_a); double sum = std::accumulate(prob.begin(), prob.end(), 0.0); for (double& p : prob) { @@ -129,25 +127,28 @@ py::tuple integerise1d(py::array_t frac_a, int pop) { double var = 0.0; const std::vector& freq = integeriseMarginalDistribution(prob, pop, var); - py::dict stats; + nb::dict stats; stats["conv"] = true; // always converges, but including for consistency stats["rmse"] = var; - return py::make_tuple(py::array_t(freq.size(), freq.data()), stats); + return nb::make_tuple(np_array(freq.data(), {freq.size()}), stats); } class SobolGenerator { public: - SobolGenerator(uint32_t dim, uint32_t nSkip = 0) : m_sobol(dim, nSkip) {} + SobolGenerator(size_t dim, size_t nSkip = 0) : m_sobol(dim, nSkip) {} + + np_array next() { + double* data = new double[m_sobol.dim()]; + // Delete 'data' when the 'owner' capsule expires + nb::capsule owner(data, [](void* p) noexcept { delete[] (double*)p; }); - py::array_t next() { - py::array_t sequence(m_sobol.dim()); try { const std::vector& buf = m_sobol.buf(); - std::transform(buf.cbegin(), buf.cend(), begin(sequence), [](uint32_t i) { return i * Sobol::SCALE; }); - return sequence; + std::transform(buf.cbegin(), buf.cend(), data, [](uint64_t i) { return i * Sobol::SCALE; }); + return np_array(data, {m_sobol.dim()}, owner); } catch (const std::runtime_error&) { - throw py::stop_iteration(); + throw nb::stop_iteration(); } } @@ -157,63 +158,79 @@ class SobolGenerator { Sobol m_sobol; }; -py::tuple integerise(const py::array_t& npseed) { +nb::tuple integerise(const np_array& npseed) { const NDArray seed = asNDArray(npseed); // shallow copy Integeriser integeriser(seed); - - py::dict stats("conv"_a = integeriser.conv(), "rmse"_a = integeriser.rmse()); - return py::make_tuple(fromNDArray(integeriser.result()), stats); + nb::dict stats; + stats["conv"] = integeriser.conv(); + stats["rmse"] = integeriser.rmse(); + return nb::make_tuple(fromNDArray(integeriser.result()), stats); } -py::tuple ipf(const py::array_t& seed, const py::iterable& index_iter, const py::iterable& marginal_iter) { +nb::tuple ipf(const np_array& seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); IPF ipf(indices, marginals); const NDArray& result = ipf.solve(asNDArray(seed)); - py::dict stats("conv"_a = ipf.conv(), "pop"_a = ipf.population(), "iterations"_a = ipf.iters(), - "maxError"_a = ipf.maxError()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["conv"] = ipf.conv(); + stats["pop"] = ipf.population(); + stats["iterations"] = ipf.iters(); + stats["maxError"] = ipf.maxError(); + return nb::make_tuple(fromNDArray(result), stats); } -py::tuple qis(const py::iterable& index_iter, const py::iterable& marginal_iter, int64_t skips) { +nb::tuple qis(const nb::iterable& index_iter, const nb::iterable& marginal_iter, int64_t skips) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); QIS qis(indices, marginals, skips); const NDArray& result = qis.solve(); const NDArray& expect = qis.expectation(); - py::dict stats("expectation"_a = fromNDArray(expect), "conv"_a = qis.conv(), "pop"_a = qis.population(), - "chiSq"_a = qis.chiSq(), "pValue"_a = qis.pValue(), "degeneracy"_a = qis.degeneracy()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["expectation"] = fromNDArray(expect); + stats["conv"] = qis.conv(); + stats["pop"] = qis.population(); + stats["chiSq"] = qis.chiSq(); + stats["pValue"] = qis.pValue(); + stats["degeneracy"] = qis.degeneracy(); + return nb::make_tuple(fromNDArray(result), stats); + return nb::make_tuple(fromNDArray(result), stats); } -py::tuple qisi(const py::array_t seed, const py::iterable& index_iter, const py::iterable& marginal_iter, +nb::tuple qisi(const np_array seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter, int64_t skips) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); QISI qisi(indices, marginals, skips); const NDArray& result = qisi.solve(asNDArray(seed)); - py::dict stats("expectation"_a = fromNDArray(qisi.expectation()), "conv"_a = qisi.conv(), - "pop"_a = qisi.population(), "chiSq"_a = qisi.chiSq(), "pValue"_a = qisi.pValue(), - "degeneracy"_a = qisi.degeneracy()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["expectation"] = fromNDArray(qisi.expectation()); + stats["conv"] = qisi.conv(); + stats["pop"] = qisi.population(); + stats["chiSq"] = qisi.chiSq(); + stats["pValue"] = qisi.pValue(); + stats["degeneracy"] = qisi.degeneracy(); + stats["degeneracy"] = qisi.degeneracy(); + + return nb::make_tuple(fromNDArray(result), stats); } -py::dict unittest() { +nb::dict unittest() { const unittest::Logger& log = unittest::run(); - py::dict result; + nb::dict result; result["nTests"] = log.testsRun; result["nFails"] = log.testsFailed; result["errors"] = log.errors; @@ -223,7 +240,7 @@ py::dict unittest() { } // namespace hl -PYBIND11_MODULE(_humanleague, m) { +NB_MODULE(humanleague_ext, m) { #include "docstr.inl" @@ -236,22 +253,22 @@ PYBIND11_MODULE(_humanleague, m) { .def("qis", hl::qis, qis_docstr, "indices"_a, "marginals"_a, "skips"_a) .def( "qis", - [](const py::iterable& indices, const py::iterable& marginals) { return hl::qis(indices, marginals, 0); }, + [](const nb::iterable& indices, const nb::iterable& marginals) { return hl::qis(indices, marginals, 0); }, qis2_docstr, "indices"_a, "marginals"_a) .def("qisi", hl::qisi, qisi_docstr, "seed"_a, "indices"_a, "marginals"_a, "skips"_a) .def( "qisi", - [](const py::array_t& seed, const py::iterable& indices, const py::iterable& marginals) { + [](const np_array& seed, const nb::iterable& indices, const nb::iterable& marginals) { return hl::qisi(seed, indices, marginals, 0); }, qisi2_docstr, "seed"_a, "indices"_a, "marginals"_a) .def("_unittest", hl::unittest, unittest_docstr); - py::class_(m, "SobolSequence") - .def(py::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) - .def(py::init(), SobolSequence_init1_docstr, "dim"_a) + nb::class_(m, "SobolSequence") + .def(nb::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) + .def(nb::init(), SobolSequence_init1_docstr, "dim"_a) .def("__iter__", &hl::SobolGenerator::iter, "__iter__ dunder") .def("__next__", &hl::SobolGenerator::next, "__next__ dunder"); } -#endif \ No newline at end of file +#endif diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_all.py b/tests/test_all.py index 629112a..518529d 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,6 +1,6 @@ import numpy as np import pytest -from _humanleague import _unittest as hl_unittest # type: ignore[import] +from humanleague.humanleague_ext import _unittest as hl_unittest # type: ignore[import] import humanleague as hl @@ -156,10 +156,10 @@ def test_IPF() -> None: assert np.allclose(np.sum(p, 0), m1) assert np.allclose(np.sum(p, 1), m0) - # mix list and tuple - im = ((0,), (1,), [2]) + # mixed type indices + im = ((0,), np.array([1]), 2) s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]) - p, stats = hl.ipf(s, im, (m0, m1, m2)) + p, stats = hl.ipf(s, im, (m0, m1, m2)) # type: ignore[arg-type] assert stats["conv"] # check overall population and marginals correct assert np.sum(p) == pytest.approx(stats["pop"], 1e-8)