From 216cfd1580082617baf9412175af05b4af738bc0 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sat, 18 Oct 2025 16:46:52 +0100 Subject: [PATCH 1/8] [wip] --- .vscode/tasks.json | 20 ------ CMakeLists.txt | 76 +++++++++++++++++++++ humanleague/__init__.py | 3 +- pyproject.toml | 29 ++++---- setup.py | 44 ------------- src/module.cpp | 142 ++++++++++++++++++++++------------------ tests/test_all.py | 3 +- 7 files changed, 176 insertions(+), 141 deletions(-) delete mode 100644 .vscode/tasks.json create mode 100644 CMakeLists.txt delete mode 100755 setup.py diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index 0ba64d4..0000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - // See https://go.microsoft.com/fwlink/?LinkId=733558 - // for the documentation about the tasks.json format - "version": "2.0.0", - "tasks": [ - { - "options": - { - "cwd": "${workspaceRoot}/dev" - }, - "taskName": "build harness", - "type": "shell", - "command": "make", - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d5ed166 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,76 @@ +cmake_minimum_required(VERSION 3.15...3.26) + +project(humanleague LANGUAGES CXX) + +if (NOT SKBUILD) + message(WARNING "\ + This CMake file is meant to be executed using 'scikit-build'. Running + it directly will almost certainly not produce the desired result. If + you are a user trying to install this package, please use the command + below, which will install all necessary build dependencies, compile + the package in an isolated environment, and then install it. + ===================================================================== + $ pip install . + ===================================================================== + If you are a software developer, and this is your own package, then + it is usually much more efficient to install the build dependencies + in your environment once and use the following command that avoids + a costly creation of a new virtual environment at every compilation: + ===================================================================== + $ pip install nanobind scikit-build-core[pyproject] + $ pip install --no-build-isolation -ve . + ===================================================================== + You may optionally add -Ceditable.rebuild=true to auto-rebuild when + the package is imported. Otherwise, you need to re-run the above + after editing C++ files.") +endif() + +# Try to import all Python components potentially needed by nanobind +find_package(Python 3.12 + REQUIRED COMPONENTS Interpreter Development.Module + OPTIONAL_COMPONENTS Development.SABIModule) + +# Import nanobind through CMake's find_package mechanism +find_package(nanobind CONFIG REQUIRED) + +# We are now ready to compile the actual extension module +nanobind_add_module( + # Name of the extension + humanleague_ext + + # Target the stable ABI for Python 3.12+, which reduces + # the number of binary wheels that must be built. This + # does nothing on older Python versions + STABLE_ABI + + # Build libnanobind statically and merge it into the + # extension (which itself remains a shared library) + # + # If your project builds multiple extensions, you can + # replace this flag by NB_SHARED to conserve space by + # reusing a shared libnanobind across libraries + NB_STATIC + + # Source code goes here + src/Index.cpp + src/QIS.cpp + src/StatFuncs.cpp + src/TestReduce.cpp + src/TestStatFuncs.cpp + src/Integerise.cpp + src/QISI.cpp + src/Sobol.cpp + src/TestIndex.cpp + src/TestSlice.cpp + src/UnitTester.cpp + src/module.cpp + src/SobolImpl.cpp + src/TestNDArray.cpp + src/TestSobol.cpp +) + +target_compile_definitions(humanleague_ext PUBLIC PYTHON_MODULE) + + +# Install directive for scikit-build-core +install(TARGETS humanleague_ext LIBRARY DESTINATION humanleague) diff --git a/humanleague/__init__.py b/humanleague/__init__.py index 866733d..6c70472 100644 --- a/humanleague/__init__.py +++ b/humanleague/__init__.py @@ -2,8 +2,7 @@ __version__ = importlib.metadata.version("humanleague") -from _humanleague import SobolSequence, flatten, integerise, ipf, qis, qisi - +from .humanleague_ext import SobolSequence, flatten, integerise, ipf, qis, qisi from .utils import tabulate_counts, tabulate_individuals __all__ = [ diff --git a/pyproject.toml b/pyproject.toml index c22d957..c0b65fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,19 +1,15 @@ [build-system] -requires = [ - "setuptools>=42", - "wheel", - "pybind11>=2.10.3", - "pytest" -] +requires = ["scikit-build-core >=0.10", "nanobind >=2.3.2"] +build-backend = "scikit_build_core.build" -build-backend = "setuptools.build_meta" [project] name = "humanleague" -version = "2.4.3" +version = "2.5.0" authors = [ { name="Andrew Smith", email="andrew@friarswood.net" }, ] +license = "MIT" license-files = ["LICENCE", "LICENSE.md"] description = "Microsynthesis using quasirandom sampling and/or IPF" readme = "README.md" @@ -32,13 +28,12 @@ dependencies = [ [dependency-groups] dev = [ - "pybind11>=3.0.0", "pytest>=8.1.4", "mypy>=1.5.0", "mypy-extensions>=1.0.0", "ruff>=0.12.9", - "build>=1.2.2.post1", "typing-extensions>=4.15.0", + "scikit-build-core[pyproject]" ] [tool.pytest.ini_options] @@ -46,12 +41,22 @@ testpaths = [ "tests" ] +[tool.scikit-build] +# Protect the configuration against future changes in scikit-build-core +minimum-version = "build-system.requires" + +# Setuptools-style build caching in a local directory +build-dir = "build/{wheel_tag}" + +# Build stable ABI wheels for CPython 3.12+ +wheel.py-api = "cp312" + [tool.ruff] line-length = 120 [tool.ruff.lint] -select = ["B", "C", "E", "F", "I"] -ignore = ["E501"] +select = ["B", "C", "E", "F", "I", "N", "W"] +ignore = ["E501", "N802"] [tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["F401", "F403"] diff --git a/setup.py b/setup.py deleted file mode 100755 index d8947c2..0000000 --- a/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 - -import glob - -from pybind11.setup_helpers import ParallelCompile, Pybind11Extension -from setuptools import setup # type: ignore - - -def source_files() -> list[str]: - sources = glob.glob("src/*.cpp") - # can't use compile skips as some files are auto-generated - skip = ["RcppExports.cpp", "rcpp_api.cpp"] - return [file for file in sources if not any(s in file for s in skip)] - - -def header_files() -> list[str]: - return glob.glob("src/*.h") - - -def defines() -> list[tuple[str, str | None]]: - return [("PYTHON_MODULE", None)] - - -ext_modules = [ - Pybind11Extension( - "_humanleague", - sources=source_files(), - include_dirs=["src"], - define_macros=defines(), - depends=["setup.py", "src/docstr.inl"] + header_files(), - cxx_std=20, - ) -] - - -ParallelCompile().install() - -setup( - name="humanleague", - packages=["humanleague"], - package_data={"humanleague": ["py.typed", "*.pyi"]}, - ext_modules=ext_modules, - zip_safe=False, -) diff --git a/src/module.cpp b/src/module.cpp index b9e2515..41973ad 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -9,47 +9,46 @@ #include "UnitTester.h" -#include -#include -#include +#include +#include -namespace py = pybind11; +namespace nb = nanobind; -using namespace py::literals; +using namespace nb::literals; namespace hl { -template T* begin(py::array_t& a) { +template T* begin(nb::ndarray& a) { // assert(a.itemsize() == sizeof(T)); return (T*)a.request().ptr; } -template T* end(py::array_t& a) { +template T* end(nb::ndarray& a) { // assert(a.itemsize() == sizeof(T)); return (T*)a.request().ptr + a.size(); } -template const T* cbegin(const py::array_t& a) { +template const T* cbegin(const nb::ndarray& a) { // assert(a.itemsize() == sizeof(T)); return (const T*)a.request().ptr; } -template const T* cend(const py::array_t& a) { +template const T* cend(const nb::ndarray& a) { // assert(a.itemsize() == sizeof(T)); return (const T*)a.request().ptr + a.size(); } -template std::vector toVector(const py::array_t& a) { +template std::vector toVector(const nb::ndarray& a) { if (a.ndim() == 0) { return std::vector(1, *cbegin(a)); } if (a.ndim() != 1) { - throw py::value_error("cannot convert %%-dimensional array to vector"s % a.ndim()); + throw nb::value_error(("cannot convert %%-dimensional array to vector"s % a.ndim()).c_str()); } return std::vector(cbegin(a), cend(a)); } -template NDArray toNDArray(const py::array_t& np) { +template NDArray toNDArray(const nb::ndarray& np) { const size_t dim = np.ndim(); std::vector sizes(dim); for (size_t i = 0; i < dim; ++i) @@ -59,39 +58,39 @@ template NDArray toNDArray(const py::array_t& np) { return tmp; } -template NDArray asNDArray(const py::array_t& np) { +template NDArray asNDArray(const nb::ndarray& np) { // this is a bit iffy re: constness - return NDArray(std::vector(np.shape(), np.shape() + np.ndim()), const_cast(cbegin(np))); + return NDArray(std::vector(np.shape_ptr(), np.shape_ptr() + np.ndim()), const_cast(cbegin(np))); } -template py::array_t fromNDArray(const NDArray& a) { +template nb::ndarray fromNDArray(const NDArray& a) { // TODO ensure this is safe. may need to explicitly copy data - return py::array_t(a.sizes(), a.rawData()); + return nb::ndarray(a.rawData(), a.dim(), std::vector(a.sizes().begin(), a.sizes().end()).data()); } -std::vector> collect_indices(const py::iterable& iterable) { +std::vector> collect_indices(const nb::iterable& iterable) { std::vector> indices; for (const auto& elem : iterable) { - const py::array_t ia = elem.cast>(); + const nb::ndarray ia = nb::cast>(elem); indices.push_back(toVector(ia)); } return indices; } -template std::vector> collect_marginals(const py::iterable& iterable) { +template std::vector> collect_marginals(const nb::iterable& iterable) { std::vector> marginals; for (const auto& elem : iterable) { - const py::array_t ma = elem.cast>(); + const nb::ndarray ma = nb::cast>(elem); marginals.emplace_back(toNDArray(ma)); } return marginals; } -py::list flatten(const py::array_t& a) { +nb::list flatten(const nb::ndarray& a) { - auto warnings = pybind11::module::import("warnings"); + auto warnings = nb::module_::import_("warnings"); warnings.attr("warn")( "humanleague.flatten is deprecated, consider using humanleague.tabulate_individuals instead."); @@ -103,11 +102,11 @@ py::list flatten(const py::array_t& a) { } const std::vector>& list = listify(pop, array); - py::list outer; // array.dim()); + nb::list outer; // array.dim()); for (size_t i = 0; i < array.dim(); ++i) { - py::list l(list[i].size()); + nb::list l; for (size_t j = 0; j < list[i].size(); ++j) { - l[j] = list[i][j]; + l.append(list[i][j]); } outer.insert(i, l); } @@ -115,12 +114,12 @@ py::list flatten(const py::array_t& a) { return outer; } -py::tuple integerise1d(py::array_t frac_a, int pop) { +nb::tuple integerise1d(nb::ndarray frac_a, int pop) { if (pop < 0) { - throw py::value_error("population cannot be negative"); + throw nb::value_error("population cannot be negative"); } - // convert py::array_t to vector and normalise it to get probabilities + // convert nb::ndarray to vector and normalise it to get probabilities std::vector prob = toVector(frac_a); double sum = std::accumulate(prob.begin(), prob.end(), 0.0); for (double& p : prob) { @@ -129,25 +128,25 @@ py::tuple integerise1d(py::array_t frac_a, int pop) { double var = 0.0; const std::vector& freq = integeriseMarginalDistribution(prob, pop, var); - py::dict stats; + nb::dict stats; stats["conv"] = true; // always converges, but including for consistency stats["rmse"] = var; - return py::make_tuple(py::array_t(freq.size(), freq.data()), stats); + return nb::make_tuple(nb::ndarray(freq.data(), {freq.size()}), stats); } class SobolGenerator { public: SobolGenerator(uint32_t dim, uint32_t nSkip = 0) : m_sobol(dim, nSkip) {} - py::array_t next() { - py::array_t sequence(m_sobol.dim()); + nb::ndarray next() { + std::vector sequence(m_sobol.dim()); try { const std::vector& buf = m_sobol.buf(); std::transform(buf.cbegin(), buf.cend(), begin(sequence), [](uint32_t i) { return i * Sobol::SCALE; }); - return sequence; + return nb::ndarray(sequence.data(), {sequence.size()}); } catch (const std::runtime_error&) { - throw py::stop_iteration(); + throw nb::stop_iteration(); } } @@ -157,63 +156,78 @@ class SobolGenerator { Sobol m_sobol; }; -py::tuple integerise(const py::array_t& npseed) { +nb::tuple integerise(const nb::ndarray& npseed) { const NDArray seed = asNDArray(npseed); // shallow copy Integeriser integeriser(seed); - py::dict stats("conv"_a = integeriser.conv(), "rmse"_a = integeriser.rmse()); - return py::make_tuple(fromNDArray(integeriser.result()), stats); + nb::dict stats; + stats["conv"] = integeriser.conv(), + stats["rmse"] = integeriser.rmse(); + return nb::make_tuple(fromNDArray(integeriser.result()), stats); } -py::tuple ipf(const py::array_t& seed, const py::iterable& index_iter, const py::iterable& marginal_iter) { +nb::tuple ipf(const nb::ndarray& seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); IPF ipf(indices, marginals); const NDArray& result = ipf.solve(asNDArray(seed)); - py::dict stats("conv"_a = ipf.conv(), "pop"_a = ipf.population(), "iterations"_a = ipf.iters(), - "maxError"_a = ipf.maxError()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["conv"] = ipf.conv(); + stats["pop"] = ipf.population(); + stats["iterations"] = ipf.iters(); + stats["maxError"] = ipf.maxError(); + return nb::make_tuple(fromNDArray(result), stats); } -py::tuple qis(const py::iterable& index_iter, const py::iterable& marginal_iter, int64_t skips) { +nb::tuple qis(const nb::iterable& index_iter, const nb::iterable& marginal_iter, int64_t skips) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); QIS qis(indices, marginals, skips); const NDArray& result = qis.solve(); const NDArray& expect = qis.expectation(); - py::dict stats("expectation"_a = fromNDArray(expect), "conv"_a = qis.conv(), "pop"_a = qis.population(), - "chiSq"_a = qis.chiSq(), "pValue"_a = qis.pValue(), "degeneracy"_a = qis.degeneracy()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["expectation"] = fromNDArray(expect); + stats["conv"] = qis.conv(); + stats["pop"] = qis.population(); + stats["chiSq"] = qis.chiSq(), + stats["pValue"] = qis.pValue(); + stats["degeneracy"] = qis.degeneracy(); + return nb::make_tuple(fromNDArray(result), stats); } -py::tuple qisi(const py::array_t seed, const py::iterable& index_iter, const py::iterable& marginal_iter, +nb::tuple qisi(const nb::ndarray seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter, int64_t skips) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) - throw py::value_error("index and marginals lists differ in size"); + throw nb::value_error("index and marginals lists differ in size"); QISI qisi(indices, marginals, skips); const NDArray& result = qisi.solve(asNDArray(seed)); - py::dict stats("expectation"_a = fromNDArray(qisi.expectation()), "conv"_a = qisi.conv(), - "pop"_a = qisi.population(), "chiSq"_a = qisi.chiSq(), "pValue"_a = qisi.pValue(), - "degeneracy"_a = qisi.degeneracy()); - return py::make_tuple(fromNDArray(result), stats); + nb::dict stats; + stats["expectation"] = fromNDArray(qisi.expectation()); + stats["conv"] = qisi.conv(); + stats["pop"] = qisi.population(); + stats["chiSq"] = qisi.chiSq(), + stats["pValue"] = qisi.pValue(); + stats["degeneracy"] = qisi.degeneracy(); + + return nb::make_tuple(fromNDArray(result), stats); } -py::dict unittest() { +nb::dict unittest() { const unittest::Logger& log = unittest::run(); - py::dict result; + nb::dict result; result["nTests"] = log.testsRun; result["nFails"] = log.testsFailed; result["errors"] = log.errors; @@ -223,7 +237,7 @@ py::dict unittest() { } // namespace hl -PYBIND11_MODULE(_humanleague, m) { +NB_MODULE(humanleague_ext, m) { #include "docstr.inl" @@ -236,22 +250,26 @@ PYBIND11_MODULE(_humanleague, m) { .def("qis", hl::qis, qis_docstr, "indices"_a, "marginals"_a, "skips"_a) .def( "qis", - [](const py::iterable& indices, const py::iterable& marginals) { return hl::qis(indices, marginals, 0); }, + [](const nb::iterable& indices, const nb::iterable& marginals) { return hl::qis(indices, marginals, 0); }, qis2_docstr, "indices"_a, "marginals"_a) .def("qisi", hl::qisi, qisi_docstr, "seed"_a, "indices"_a, "marginals"_a, "skips"_a) .def( "qisi", - [](const py::array_t& seed, const py::iterable& indices, const py::iterable& marginals) { + [](const nb::ndarray& seed, const nb::iterable& indices, const nb::iterable& marginals) { return hl::qisi(seed, indices, marginals, 0); }, qisi2_docstr, "seed"_a, "indices"_a, "marginals"_a) .def("_unittest", hl::unittest, unittest_docstr); - py::class_(m, "SobolSequence") - .def(py::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) - .def(py::init(), SobolSequence_init1_docstr, "dim"_a) + nb::class_(m, "SobolSequence") + .def(nb::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) + .def(nb::init(), SobolSequence_init1_docstr, "dim"_a) .def("__iter__", &hl::SobolGenerator::iter, "__iter__ dunder") .def("__next__", &hl::SobolGenerator::next, "__next__ dunder"); } -#endif \ No newline at end of file +#else +#error You are attempting to compile module.cpp but have not set PYTHON_MODULE \ + python builds: include module.cpp and set -DPYTHON_MODULE \ + R builds: do not include module.cpp and ensure -DPYTHON_MODULE is not set +#endif diff --git a/tests/test_all.py b/tests/test_all.py index 629112a..bcf5d50 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,9 +1,10 @@ import numpy as np import pytest -from _humanleague import _unittest as hl_unittest # type: ignore[import] import humanleague as hl +from .humanleague_ext import _unittest as hl_unittest # type: ignore[import] + def test_version() -> None: assert hl.__version__ From 2b977bf7bd9b549bd6ee95188679106db6c7f7b8 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 13:11:07 +0100 Subject: [PATCH 2/8] works --- CMakeLists.txt | 2 +- README.md | 8 +++- humanleague/__init__.pyi | 12 ++---- src/module.cpp | 84 +++++++++++++++++++++++----------------- tests/__init__.py_ | 0 tests/test_all.py | 7 ++-- 6 files changed, 63 insertions(+), 50 deletions(-) create mode 100644 tests/__init__.py_ diff --git a/CMakeLists.txt b/CMakeLists.txt index d5ed166..0be4c51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,6 @@ nanobind_add_module( target_compile_definitions(humanleague_ext PUBLIC PYTHON_MODULE) - # Install directive for scikit-build-core install(TARGETS humanleague_ext LIBRARY DESTINATION humanleague) + diff --git a/README.md b/README.md index 48ff8fa..8b61f19 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,13 @@ The package now contains type annotations and your IDE should automatically disp ![help](./doc/help.png) -NB type stubs are generated using the `pybind11-stubgen` package, with some [manual corrections](./doc/type-stubs.md). +~~NB type stubs are generated using the `pybind11-stubgen` package, with some [manual corrections](./doc/type-stubs.md).~~ + +`nanobind` now has stubgen functionality (but appears limited as of 2.9.2) + +```sh +uv run python -m nanobind.stubgen -P -m humanleague.humanleague_ext -o humanleague/__init__.pyi -M humanleague/py.typed +``` ### Multidimensional integerisation diff --git a/humanleague/__init__.pyi b/humanleague/__init__.pyi index b1c2686..b9cd80b 100644 --- a/humanleague/__init__.pyi +++ b/humanleague/__init__.pyi @@ -1,5 +1,4 @@ -"""Microsynthesis using quasirandom sampling and IPF, plus related functionality -""" +"""Microsynthesis using quasirandom sampling and IPF, plus related functionality""" from __future__ import annotations @@ -52,16 +51,13 @@ class SobolSequence: """ def __iter__(self) -> SobolSequence: - """__iter__ dunder - """ + """__iter__ dunder""" def __next__(self) -> npt.NDArray[np.float64]: - """__next__ dunder - """ + """__next__ dunder""" pass def _unittest() -> dict: - """For developers. Runs the C++ unit tests. - """ + """For developers. Runs the C++ unit tests.""" def flatten(pop: npt.NDArray[np.int64]) -> list: """Converts an n-dimensional array of counts into an n-column table with a row for each unit diff --git a/src/module.cpp b/src/module.cpp index 41973ad..6e6a768 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -11,34 +11,37 @@ #include #include +#include namespace nb = nanobind; using namespace nb::literals; +template using np_array = nb::ndarray; + namespace hl { -template T* begin(nb::ndarray& a) { +template T* begin(np_array& a) { // assert(a.itemsize() == sizeof(T)); - return (T*)a.request().ptr; + return (T*)a.data(); } -template T* end(nb::ndarray& a) { +template T* end(np_array& a) { // assert(a.itemsize() == sizeof(T)); - return (T*)a.request().ptr + a.size(); + return (T*)a.data() + a.size(); } -template const T* cbegin(const nb::ndarray& a) { +template const T* cbegin(const np_array& a) { // assert(a.itemsize() == sizeof(T)); - return (const T*)a.request().ptr; + return (const T*)a.data(); } -template const T* cend(const nb::ndarray& a) { +template const T* cend(const np_array& a) { // assert(a.itemsize() == sizeof(T)); - return (const T*)a.request().ptr + a.size(); + return (const T*)a.data() + a.size(); } -template std::vector toVector(const nb::ndarray& a) { +template std::vector toVector(const np_array& a) { if (a.ndim() == 0) { return std::vector(1, *cbegin(a)); } @@ -48,7 +51,7 @@ template std::vector toVector(const nb::ndarray& a) { return std::vector(cbegin(a), cend(a)); } -template NDArray toNDArray(const nb::ndarray& np) { +template NDArray toNDArray(const np_array& np) { const size_t dim = np.ndim(); std::vector sizes(dim); for (size_t i = 0; i < dim; ++i) @@ -58,22 +61,32 @@ template NDArray toNDArray(const nb::ndarray& np) { return tmp; } -template NDArray asNDArray(const nb::ndarray& np) { +template NDArray asNDArray(const np_array& np) { // this is a bit iffy re: constness return NDArray(std::vector(np.shape_ptr(), np.shape_ptr() + np.ndim()), const_cast(cbegin(np))); } -template nb::ndarray fromNDArray(const NDArray& a) { +template np_array fromNDArray(const NDArray& a) { // TODO ensure this is safe. may need to explicitly copy data - return nb::ndarray(a.rawData(), a.dim(), std::vector(a.sizes().begin(), a.sizes().end()).data()); + return np_array(a.rawData(), a.dim(), std::vector(a.sizes().begin(), a.sizes().end()).data()); } std::vector> collect_indices(const nb::iterable& iterable) { std::vector> indices; for (const auto& elem : iterable) { - const nb::ndarray ia = nb::cast>(elem); - indices.push_back(toVector(ia)); + if (nb::isinstance(elem)) { + indices.push_back(std::vector(1, nb::cast(elem))); + } else if (nb::isinstance(elem)) { + // TODO must be an easier way??? + std::vector index; + for (const auto& item: elem) { + index.push_back(nb::cast(item)); + } + indices.push_back(index); + } else { + throw nb::value_error("unexpected type for index"); + } } return indices; } @@ -82,17 +95,16 @@ template std::vector> collect_marginals(const nb::iterab std::vector> marginals; for (const auto& elem : iterable) { - const nb::ndarray ma = nb::cast>(elem); + const np_array ma = nb::cast>(elem); marginals.emplace_back(toNDArray(ma)); } return marginals; } -nb::list flatten(const nb::ndarray& a) { +nb::list flatten(const np_array& a) { auto warnings = nb::module_::import_("warnings"); - warnings.attr("warn")( - "humanleague.flatten is deprecated, consider using humanleague.tabulate_individuals instead."); + warnings.attr("warn")("humanleague.flatten is deprecated, consider using humanleague.tabulate_individuals instead."); const NDArray array = asNDArray(a); @@ -114,12 +126,12 @@ nb::list flatten(const nb::ndarray& a) { return outer; } -nb::tuple integerise1d(nb::ndarray frac_a, int pop) { +nb::tuple integerise1d(np_array frac_a, int pop) { if (pop < 0) { throw nb::value_error("population cannot be negative"); } - // convert nb::ndarray to vector and normalise it to get probabilities + // convert np_array to vector and normalise it to get probabilities std::vector prob = toVector(frac_a); double sum = std::accumulate(prob.begin(), prob.end(), 0.0); for (double& p : prob) { @@ -132,19 +144,22 @@ nb::tuple integerise1d(nb::ndarray frac_a, int pop) { stats["conv"] = true; // always converges, but including for consistency stats["rmse"] = var; - return nb::make_tuple(nb::ndarray(freq.data(), {freq.size()}), stats); + return nb::make_tuple(np_array(freq.data(), {freq.size()}), stats); } class SobolGenerator { public: SobolGenerator(uint32_t dim, uint32_t nSkip = 0) : m_sobol(dim, nSkip) {} - nb::ndarray next() { - std::vector sequence(m_sobol.dim()); + np_array next() { + double* data = new double[m_sobol.dim()]; + // Delete 'data' when the 'owner' capsule expires + nb::capsule owner(data, [](void* p) noexcept { delete[] (float*)p; }); + try { const std::vector& buf = m_sobol.buf(); - std::transform(buf.cbegin(), buf.cend(), begin(sequence), [](uint32_t i) { return i * Sobol::SCALE; }); - return nb::ndarray(sequence.data(), {sequence.size()}); + std::transform(buf.cbegin(), buf.cend(), data, [](uint32_t i) { return i * Sobol::SCALE; }); + return np_array(data, {m_sobol.dim()}, owner); } catch (const std::runtime_error&) { throw nb::stop_iteration(); } @@ -156,17 +171,16 @@ class SobolGenerator { Sobol m_sobol; }; -nb::tuple integerise(const nb::ndarray& npseed) { +nb::tuple integerise(const np_array& npseed) { const NDArray seed = asNDArray(npseed); // shallow copy Integeriser integeriser(seed); nb::dict stats; - stats["conv"] = integeriser.conv(), - stats["rmse"] = integeriser.rmse(); + stats["conv"] = integeriser.conv(), stats["rmse"] = integeriser.rmse(); return nb::make_tuple(fromNDArray(integeriser.result()), stats); } -nb::tuple ipf(const nb::ndarray& seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter) { +nb::tuple ipf(const np_array& seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); if (indices.size() != marginals.size()) @@ -197,13 +211,12 @@ nb::tuple qis(const nb::iterable& index_iter, const nb::iterable& marginal_iter, stats["expectation"] = fromNDArray(expect); stats["conv"] = qis.conv(); stats["pop"] = qis.population(); - stats["chiSq"] = qis.chiSq(), - stats["pValue"] = qis.pValue(); + stats["chiSq"] = qis.chiSq(), stats["pValue"] = qis.pValue(); stats["degeneracy"] = qis.degeneracy(); return nb::make_tuple(fromNDArray(result), stats); } -nb::tuple qisi(const nb::ndarray seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter, +nb::tuple qisi(const np_array seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter, int64_t skips) { std::vector> indices = collect_indices(index_iter); std::vector> marginals = collect_marginals(marginal_iter); @@ -217,8 +230,7 @@ nb::tuple qisi(const nb::ndarray seed, const nb::iterable& index_iter, c stats["expectation"] = fromNDArray(qisi.expectation()); stats["conv"] = qisi.conv(); stats["pop"] = qisi.population(); - stats["chiSq"] = qisi.chiSq(), - stats["pValue"] = qisi.pValue(); + stats["chiSq"] = qisi.chiSq(), stats["pValue"] = qisi.pValue(); stats["degeneracy"] = qisi.degeneracy(); return nb::make_tuple(fromNDArray(result), stats); @@ -255,7 +267,7 @@ NB_MODULE(humanleague_ext, m) { .def("qisi", hl::qisi, qisi_docstr, "seed"_a, "indices"_a, "marginals"_a, "skips"_a) .def( "qisi", - [](const nb::ndarray& seed, const nb::iterable& indices, const nb::iterable& marginals) { + [](const np_array& seed, const nb::iterable& indices, const nb::iterable& marginals) { return hl::qisi(seed, indices, marginals, 0); }, qisi2_docstr, "seed"_a, "indices"_a, "marginals"_a) diff --git a/tests/__init__.py_ b/tests/__init__.py_ new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_all.py b/tests/test_all.py index bcf5d50..bf0712b 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -2,8 +2,7 @@ import pytest import humanleague as hl - -from .humanleague_ext import _unittest as hl_unittest # type: ignore[import] +from humanleague.humanleague_ext import _unittest as hl_unittest # type: ignore[import] def test_version() -> None: @@ -157,8 +156,8 @@ def test_IPF() -> None: assert np.allclose(np.sum(p, 0), m1) assert np.allclose(np.sum(p, 1), m0) - # mix list and tuple - im = ((0,), (1,), [2]) + # mixed type indices + im = ((0,), np.array([1]), 2) s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]) p, stats = hl.ipf(s, im, (m0, m1, m2)) assert stats["conv"] From cc496c67b6bd574166b59bd8588b376cb633348a Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 13:16:05 +0100 Subject: [PATCH 3/8] oops --- tests/{__init__.py_ => __init__.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{__init__.py_ => __init__.py} (100%) diff --git a/tests/__init__.py_ b/tests/__init__.py similarity index 100% rename from tests/__init__.py_ rename to tests/__init__.py From 6f8a02c817a477c10318463fa4c35d828413cb72 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 13:19:07 +0100 Subject: [PATCH 4/8] ... --- tests/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index bf0712b..9990f60 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,8 +1,8 @@ import numpy as np import pytest +from humanleague.humanleague_ext import _unittest as hl_unittest # type: ignore[import] import humanleague as hl -from humanleague.humanleague_ext import _unittest as hl_unittest # type: ignore[import] def test_version() -> None: From 2101e8b824d9962fea2c36e9a32b9a8a95d31433 Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 13:25:06 +0100 Subject: [PATCH 5/8] fix --- tests/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index 9990f60..518529d 100755 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -159,7 +159,7 @@ def test_IPF() -> None: # mixed type indices im = ((0,), np.array([1]), 2) s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]) - p, stats = hl.ipf(s, im, (m0, m1, m2)) + p, stats = hl.ipf(s, im, (m0, m1, m2)) # type: ignore[arg-type] assert stats["conv"] # check overall population and marginals correct assert np.sum(p) == pytest.approx(stats["pop"], 1e-8) From 318d16efbdb43a54e729e7a937d6a3fbd8e49dde Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 18:02:58 +0100 Subject: [PATCH 6/8] fix narrowing --- src/module.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/module.cpp b/src/module.cpp index 6e6a768..7f9987b 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -149,7 +149,7 @@ nb::tuple integerise1d(np_array frac_a, int pop) { class SobolGenerator { public: - SobolGenerator(uint32_t dim, uint32_t nSkip = 0) : m_sobol(dim, nSkip) {} + SobolGenerator(size_t dim, size_t nSkip = 0) : m_sobol(dim, nSkip) {} np_array next() { double* data = new double[m_sobol.dim()]; @@ -158,7 +158,7 @@ class SobolGenerator { try { const std::vector& buf = m_sobol.buf(); - std::transform(buf.cbegin(), buf.cend(), data, [](uint32_t i) { return i * Sobol::SCALE; }); + std::transform(buf.cbegin(), buf.cend(), data, [](uint64_t i) { return i * Sobol::SCALE; }); return np_array(data, {m_sobol.dim()}, owner); } catch (const std::runtime_error&) { throw nb::stop_iteration(); @@ -274,7 +274,7 @@ NB_MODULE(humanleague_ext, m) { .def("_unittest", hl::unittest, unittest_docstr); nb::class_(m, "SobolSequence") - .def(nb::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) + .def(nb::init(), SobolSequence_init2_docstr, "dim"_a, "skips"_a) .def(nb::init(), SobolSequence_init1_docstr, "dim"_a) .def("__iter__", &hl::SobolGenerator::iter, "__iter__ dunder") .def("__next__", &hl::SobolGenerator::next, "__next__ dunder"); From d7a127a05684d472b10f50cfee74f343cad0271c Mon Sep 17 00:00:00 2001 From: virgesmith Date: Sun, 19 Oct 2025 18:11:54 +0100 Subject: [PATCH 7/8] fix R --- src/module.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/module.cpp b/src/module.cpp index 7f9987b..8bf4e5f 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -280,8 +280,4 @@ NB_MODULE(humanleague_ext, m) { .def("__next__", &hl::SobolGenerator::next, "__next__ dunder"); } -#else -#error You are attempting to compile module.cpp but have not set PYTHON_MODULE \ - python builds: include module.cpp and set -DPYTHON_MODULE \ - R builds: do not include module.cpp and ensure -DPYTHON_MODULE is not set #endif From bec0c9329e8cd37d48674b44f4704c3459a3859e Mon Sep 17 00:00:00 2001 From: virgesmith Date: Thu, 23 Oct 2025 09:06:19 +0100 Subject: [PATCH 8/8] tweaks --- CMakeLists.txt | 9 ++------ README.md | 14 +++++++++++++ src/module.cpp | 57 +++++++++++++++++++++----------------------------- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0be4c51..86aca69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,6 @@ find_package(nanobind CONFIG REQUIRED) # We are now ready to compile the actual extension module nanobind_add_module( - # Name of the extension humanleague_ext # Target the stable ABI for Python 3.12+, which reduces @@ -43,13 +42,9 @@ nanobind_add_module( # does nothing on older Python versions STABLE_ABI - # Build libnanobind statically and merge it into the - # extension (which itself remains a shared library) - # - # If your project builds multiple extensions, you can - # replace this flag by NB_SHARED to conserve space by - # reusing a shared libnanobind across libraries + # Build libnanobind statically NB_STATIC + # NB_SHARED creates path issues # Source code goes here src/Index.cpp diff --git a/README.md b/README.md index 8b61f19..845c8ce 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,25 @@ pip install humanleague [uv](https://docs.astral.sh/uv/) is highly recommended for managing environments. + ```bash uv sync --dev uv run pytest ``` +Nanobind docs suggest a dev workflow where the build happens directly in the dev env - first manually install +the build deps (required after every `uv sync`) + +```sh +uv pip install nanobind scikit-build-core[pyproject] +``` + +Then build with + +```sh +uv pip install --no-build-isolation -ve . +``` + ### R Official release: diff --git a/src/module.cpp b/src/module.cpp index 8bf4e5f..019425a 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -21,29 +21,17 @@ template using np_array = nb::ndarray; namespace hl { -template T* begin(np_array& a) { - // assert(a.itemsize() == sizeof(T)); - return (T*)a.data(); -} +template T* begin(np_array& a) { return a.data(); } -template T* end(np_array& a) { - // assert(a.itemsize() == sizeof(T)); - return (T*)a.data() + a.size(); -} +template T* end(np_array& a) { return a.data() + a.size(); } -template const T* cbegin(const np_array& a) { - // assert(a.itemsize() == sizeof(T)); - return (const T*)a.data(); -} +template const T* cbegin(const np_array& a) { return a.data(); } -template const T* cend(const np_array& a) { - // assert(a.itemsize() == sizeof(T)); - return (const T*)a.data() + a.size(); -} +template const T* cend(const np_array& a) { return a.data() + a.size(); } -template std::vector toVector(const np_array& a) { +template std::vector toVector(const np_array& a) { if (a.ndim() == 0) { - return std::vector(1, *cbegin(a)); + return std::vector(1, *a.data()); } if (a.ndim() != 1) { throw nb::value_error(("cannot convert %%-dimensional array to vector"s % a.ndim()).c_str()); @@ -51,7 +39,7 @@ template std::vector toVector(const np_array(cbegin(a), cend(a)); } -template NDArray toNDArray(const np_array& np) { +template NDArray toNDArray(const np_array& np) { const size_t dim = np.ndim(); std::vector sizes(dim); for (size_t i = 0; i < dim; ++i) @@ -61,13 +49,13 @@ template NDArray toNDArray(const np_array NDArray asNDArray(const np_array& np) { +template NDArray asNDArray(const np_array& np) { // this is a bit iffy re: constness - return NDArray(std::vector(np.shape_ptr(), np.shape_ptr() + np.ndim()), const_cast(cbegin(np))); + return NDArray(std::vector(np.shape_ptr(), np.shape_ptr() + np.ndim()), np.data()); } template np_array fromNDArray(const NDArray& a) { - // TODO ensure this is safe. may need to explicitly copy data + // TODO ensure this is safe. may need to explicitly copy data (and shape?) return np_array(a.rawData(), a.dim(), std::vector(a.sizes().begin(), a.sizes().end()).data()); } @@ -80,7 +68,7 @@ std::vector> collect_indices(const nb::iterable& iterable) } else if (nb::isinstance(elem)) { // TODO must be an easier way??? std::vector index; - for (const auto& item: elem) { + for (const auto& item : elem) { index.push_back(nb::cast(item)); } indices.push_back(index); @@ -114,15 +102,14 @@ nb::list flatten(const np_array& a) { } const std::vector>& list = listify(pop, array); - nb::list outer; // array.dim()); + nb::list outer; for (size_t i = 0; i < array.dim(); ++i) { - nb::list l; + nb::list inner; for (size_t j = 0; j < list[i].size(); ++j) { - l.append(list[i][j]); + inner.append(list[i][j]); } - outer.insert(i, l); + outer.append(inner); } - return outer; } @@ -154,7 +141,7 @@ class SobolGenerator { np_array next() { double* data = new double[m_sobol.dim()]; // Delete 'data' when the 'owner' capsule expires - nb::capsule owner(data, [](void* p) noexcept { delete[] (float*)p; }); + nb::capsule owner(data, [](void* p) noexcept { delete[] (double*)p; }); try { const std::vector& buf = m_sobol.buf(); @@ -174,9 +161,9 @@ class SobolGenerator { nb::tuple integerise(const np_array& npseed) { const NDArray seed = asNDArray(npseed); // shallow copy Integeriser integeriser(seed); - nb::dict stats; - stats["conv"] = integeriser.conv(), stats["rmse"] = integeriser.rmse(); + stats["conv"] = integeriser.conv(); + stats["rmse"] = integeriser.rmse(); return nb::make_tuple(fromNDArray(integeriser.result()), stats); } @@ -211,9 +198,11 @@ nb::tuple qis(const nb::iterable& index_iter, const nb::iterable& marginal_iter, stats["expectation"] = fromNDArray(expect); stats["conv"] = qis.conv(); stats["pop"] = qis.population(); - stats["chiSq"] = qis.chiSq(), stats["pValue"] = qis.pValue(); + stats["chiSq"] = qis.chiSq(); + stats["pValue"] = qis.pValue(); stats["degeneracy"] = qis.degeneracy(); return nb::make_tuple(fromNDArray(result), stats); + return nb::make_tuple(fromNDArray(result), stats); } nb::tuple qisi(const np_array seed, const nb::iterable& index_iter, const nb::iterable& marginal_iter, @@ -230,7 +219,9 @@ nb::tuple qisi(const np_array seed, const nb::iterable& index_iter, cons stats["expectation"] = fromNDArray(qisi.expectation()); stats["conv"] = qisi.conv(); stats["pop"] = qisi.population(); - stats["chiSq"] = qisi.chiSq(), stats["pValue"] = qisi.pValue(); + stats["chiSq"] = qisi.chiSq(); + stats["pValue"] = qisi.pValue(); + stats["degeneracy"] = qisi.degeneracy(); stats["degeneracy"] = qisi.degeneracy(); return nb::make_tuple(fromNDArray(result), stats);