From a42828af22358a259c4bd024adccf3b32ea8a08a Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Thu, 30 Oct 2025 15:47:44 +0100 Subject: [PATCH] Add experiment publish helper and Zenodo integration (#5) * Add experiment publish helper and Zenodo integration * Cleaning up readme template --- doc/python/source/api/ioh.Experiment.rst | 1 + doc/python/source/python.rst | 3 +- doc/python/source/python/publish.rst | 45 +++++ example/python_example_publish.py | 37 +++++ ioh/__init__.py | 201 +++++++++++++++++++++++ setup.py | 8 +- tests/python/test_experiment.py | 75 ++++++++- 7 files changed, 366 insertions(+), 4 deletions(-) create mode 100644 doc/python/source/python/publish.rst create mode 100644 example/python_example_publish.py diff --git a/doc/python/source/api/ioh.Experiment.rst b/doc/python/source/api/ioh.Experiment.rst index 784fa6709..3882273a5 100644 --- a/doc/python/source/api/ioh.Experiment.rst +++ b/doc/python/source/api/ioh.Experiment.rst @@ -16,6 +16,7 @@ Experiment ~Experiment.evaluate ~Experiment.merge_output_to_single_folder ~Experiment.merge_tmp_folders + ~Experiment.publish ~Experiment.run .. rubric:: Methods Documentation diff --git a/doc/python/source/python.rst b/doc/python/source/python.rst index 8d5190681..58da8743e 100644 --- a/doc/python/source/python.rst +++ b/doc/python/source/python.rst @@ -17,4 +17,5 @@ API documentation python/structures.rst python/ioh.rst python/logger.rst - python/suite.rst + python/suite.rst + python/publish.rst diff --git a/doc/python/source/python/publish.rst b/doc/python/source/python/publish.rst new file mode 100644 index 000000000..67eac915d --- /dev/null +++ b/doc/python/source/python/publish.rst @@ -0,0 +1,45 @@ +Publishing experiment archives +============================== + +The :meth:`ioh.Experiment.publish` helper streamlines turning an experiment run +into a shareable Zenodo deposit. After running an experiment, calling +``publish`` will: + +* generate the ``ioh_data.zip`` archive with an up-to-date ``README.md`` + describing the algorithm, problems, instances, dimensions, repetitions and + optional evaluation budget; +* serialise the algorithm instance with :mod:`cloudpickle` so it can be + restored when reproducing the experiment; and +* upload the archive, README and algorithm bundle to Zenodo using their REST + API. + +> [!IMPORTANT] You need a Zenodo access token with appropriate permissions to upload +> deposits. You can create and manage your tokens in your Zenodo account +> settings. See https://zenodo.org/account/settings/applications/tokens/new/ + +Example usage:: + + exp = ioh.Experiment( + my_algorithm, + fids=[1, 2, 3], + iids=[1], + dims=[5, 10], + reps=5, + ) + exp() # run the benchmark + exp.publish( + zenodo_token="", + title="Benchmarking IOH on BBOB", + description="Log data for the benchmark run.", + creators=[{"name": "Doe, Jane"}], + budget=20000, + keywords=["optimization", "benchmark"], + sandbox=True, + ) + +By default the helper targets the production Zenodo endpoint. Pass +``sandbox=True`` while testing credentials or automation against the sandbox +service. You can also provide an existing :class:`requests.Session` to reuse +connections or configure proxies, and the ``additional_metadata`` parameter to +add any extra Zenodo metadata fields. + diff --git a/example/python_example_publish.py b/example/python_example_publish.py new file mode 100644 index 000000000..57eaf2e6a --- /dev/null +++ b/example/python_example_publish.py @@ -0,0 +1,37 @@ +import ioh +import numpy as np + +class RandomSearch: + 'Simple random search algorithm' + def __init__(self, n: int, length: float = 0.0): + self.n: int = n + self.length: float = length + + def __call__(self, problem: ioh.problem.RealSingleObjective) -> None: + 'Evaluate the problem n times with a randomly generated solution' + + for _ in range(self.n): + # We can use the problems bounds accessor to get information about the problem bounds + x = np.random.uniform(problem.bounds.lb, problem.bounds.ub) + self.length = np.linalg.norm(x) + + problem(x) +budget = 1000 +my_algorithm = RandomSearch(n=budget) +exp = ioh.Experiment( + my_algorithm, + fids=[1, 2, 3], + iids=[1], + dims=[5, 10], + reps=5, +) +exp() # run the benchmark +exp.publish( + zenodo_token="", + title="Benchmarking IOH publish example", + description="Log data for the benchmark run.", + creators=[{"name": "van Stein, Niki"}], + budget=budget, + keywords=["optimization", "benchmark", "ioh"], + sandbox=True, +) \ No newline at end of file diff --git a/ioh/__init__.py b/ioh/__init__.py index 116dda6c6..4d125bc68 100644 --- a/ioh/__init__.py +++ b/ioh/__init__.py @@ -10,9 +10,12 @@ import shutil import copy import json +import datetime +import textwrap import urllib.request import tarfile +import requests # Set the path to the static/ directory. # NEEDED for C++ code to load transformation details for several functions. @@ -623,6 +626,204 @@ def __call__(self): return self + def _problem_summary(self) -> typing.Dict[int, str]: + summary = {} + if not self.fids: + return summary + sample_instance = self.iids[0] if self.iids else 1 + sample_dimension = self.dims[0] if self.dims else 1 + + for fid in self.fids: + try: + p = get_problem(fid, sample_instance, sample_dimension, self.problem_class) + summary[fid] = p.meta_data.name + except Exception: + summary[fid] = f"Problem {fid}" + return summary + + def _write_readme( + self, + title: str, + description: str, + budget: typing.Optional[int], + reproduction_hint: str, + ) -> str: + os.makedirs(self.logger_root, exist_ok=True) + problems = self._problem_summary() + problem_lines = [f"- **f{fid}**: {name}" for fid, name in problems.items()] + problem_section = "\n".join(problem_lines) if problem_lines else "- No problems recorded" + + algorithm_name = self.logger_params.get("algorithm_name", str(self.algorithm)) + budget_text = str(budget) if budget is not None else "Not specified" + + content = textwrap.dedent( + f""" + # {title} + + {description} + + ## Experiment configuration + + - **Algorithm**: {algorithm_name} + - **Problem class**: {self.problem_class.name} + - **Function IDs**: {self.fids} + - **Instances**: {self.iids} + - **Dimensions**: {self.dims} + - **Repetitions**: {self.reps} + - **Budget**: {budget_text} + + ### Problem details + + {problem_section} + + ## Contents + + - Experimental logs located in `{os.path.basename(self.logger_root)}`. + - `README.md` (this file). + + ## Directions + + 1. Unzip `ioh_data.zip` (or the generated archive) to access the raw logs. + 2. Review the `.json`/`.info` files for scenario metadata and `.dat` files for evaluation traces. + 3. Use the reproduction snippet below to re-run the experiment. + + ## Reproducing the results + + ```python + {reproduction_hint} + ``` + """ + ).strip() + "\n" + + readme_path = os.path.join(self.logger_root, "README.md") + with open(readme_path, "w", encoding="utf-8") as handle: + handle.write(content) + return readme_path + + def _serialize_algorithm(self) -> typing.Optional[str]: + try: + import cloudpickle + except ImportError: # pragma: no cover - dependency missing + warnings.warn( + "cloudpickle is not available; algorithm serialization skipped.", + RuntimeWarning, + ) + return None + + algorithm_name = self.logger_params.get("algorithm_name") or self.algorithm.__class__.__name__ + safe_name = "".join(c if c.isalnum() or c in {"-", "_"} else "_" for c in algorithm_name) + filename = f"{safe_name or 'algorithm'}.pkl" + path = os.path.join(self.logger_root, filename) + with open(path, "wb") as handle: + cloudpickle.dump(self.algorithm, handle) + return path + + def publish( + self, + zenodo_token: str, + title: str, + description: str, + creators: typing.List[typing.Dict[str, typing.Any]], + *, + budget: typing.Optional[int] = None, + keywords: typing.Optional[typing.List[str]] = None, + related_identifiers: typing.Optional[typing.List[typing.Dict[str, typing.Any]]] = None, + upload_type: str = "dataset", + access_right: str = "open", + publish_date: typing.Optional[str] = None, + sandbox: bool = False, + include_algorithm: bool = True, + additional_metadata: typing.Optional[typing.Dict[str, typing.Any]] = None, + session: typing.Optional[typing.Any] = None, + ) -> typing.Dict[str, typing.Any]: + if not os.path.isdir(self.logger_root): + raise FileNotFoundError( + f"Logger output directory '{self.logger_root}' does not exist. Run the experiment before publishing." + ) + + algorithm_class = self.algorithm.__class__ + algorithm_module = algorithm_class.__module__ + algorithm_name = algorithm_class.__name__ + if algorithm_module in {"__main__", "builtins"}: + import_line = "# Define or import your algorithm implementation" + algorithm_instantiation = f"algorithm = {repr(self.algorithm)} # replace with your implementation" + else: + import_line = f"from {algorithm_module} import {algorithm_name}" + algorithm_instantiation = f"algorithm = {algorithm_name}()" + + reproduction_hint = textwrap.dedent( + f""" + import ioh + {import_line} + + {algorithm_instantiation} + experiment = ioh.Experiment( + algorithm, + fids={self.fids}, + iids={self.iids}, + dims={self.dims}, + reps={self.reps}, + problem_class=ioh.ProblemClass.{self.problem_class.name}, + ) + experiment() + """ + ).strip() + + readme_path = self._write_readme(title, description, budget, reproduction_hint) + archive_path = shutil.make_archive(self.logger_root, "zip", self.logger_root) + + algorithm_path = self._serialize_algorithm() if include_algorithm else None + + files_to_upload = [archive_path, readme_path] + if algorithm_path is not None: + files_to_upload.append(algorithm_path) + + metadata: typing.Dict[str, typing.Any] = { + "title": title, + "upload_type": upload_type, + "description": description, + "creators": creators, + "access_right": access_right, + } + + if keywords: + metadata["keywords"] = keywords + if related_identifiers: + metadata["related_identifiers"] = related_identifiers + if publish_date is None: + publish_date = datetime.date.today().isoformat() + metadata["publication_date"] = publish_date + if additional_metadata: + metadata.update(additional_metadata) + if budget is not None: + budget_note = f"Budget: {budget}" + if metadata.get("notes"): + metadata["notes"] = f"{metadata['notes']}\n{budget_note}" + else: + metadata["notes"] = budget_note + + api_root = "https://sandbox.zenodo.org/api" if sandbox else "https://zenodo.org/api" + deposit_url = f"{api_root}/deposit/depositions" + + session = session or requests.Session() + params = {"access_token": zenodo_token} + + response = session.post(deposit_url, params=params, json={"metadata": metadata}) + response.raise_for_status() + deposition = response.json() + + bucket_url = deposition["links"]["bucket"] + for file_path in files_to_upload: + with open(file_path, "rb") as handle: + upload_response = session.put( + f"{bucket_url}/{os.path.basename(file_path)}", + params=params, + data=handle, + ) + upload_response.raise_for_status() + + return deposition + __all__ = ( "get_problem", diff --git a/setup.py b/setup.py index 18f0c8ccf..9d8c9dfb8 100644 --- a/setup.py +++ b/setup.py @@ -174,5 +174,9 @@ def build_extension(self, ext): ], license="BSD", url="https://iohprofiler.github.io/IOHexperimenter", - install_requires=["numpy>=2.0"] -) + install_requires=[ + "numpy>=2.0", + "requests>=2.0", + "cloudpickle>=2.0", + ] +) diff --git a/tests/python/test_experiment.py b/tests/python/test_experiment.py index bde9a316f..8623c729c 100644 --- a/tests/python/test_experiment.py +++ b/tests/python/test_experiment.py @@ -2,6 +2,7 @@ import random import unittest import shutil +import zipfile import ioh @@ -77,7 +78,79 @@ def a_problem(x): self.assertSetEqual(info_files, set()) self.assertSetEqual(data_files, set()) - self.assertTrue(os.path.isfile("ioh_data.zip")) + self.assertTrue(os.path.isfile("ioh_data.zip")) + + def test_publish_generates_artifacts_and_uploads(self): + exp = ioh.Experiment( + Algorithm(), + [1], + [1], + [5], + njobs=1, + reps=1, + algorithm_name="RandomSearch", + old_logger=False, + logger_triggers=[ioh.logger.trigger.ALWAYS], + ) + + exp() + + class DummyResponse: + def __init__(self, payload, status_code=200): + self._payload = payload + self.status_code = status_code + + def raise_for_status(self): + if not (200 <= self.status_code < 300): + raise RuntimeError("HTTP error") + + def json(self): + return self._payload + + class DummySession: + def __init__(self): + self.post_calls = [] + self.put_calls = [] + + def post(self, url, params=None, json=None): + self.post_calls.append((url, params, json)) + payload = {"id": 42, "links": {"bucket": "https://sandbox.zenodo.org/api/files/42"}} + return DummyResponse(payload) + + def put(self, url, params=None, data=None): + self.put_calls.append((url, params, getattr(data, "name", ""))) + return DummyResponse({}) + + session = DummySession() + + deposition = exp.publish( + zenodo_token="secret", + title="Unit Test Experiment", + description="An automated test upload.", + creators=[{"name": "Doe, Jane"}], + budget=100, + sandbox=True, + session=session, + ) + + self.assertEqual(deposition["id"], 42) + readme_path = os.path.join("ioh_data", "README.md") + self.assertTrue(os.path.isfile(readme_path)) + with open(readme_path, encoding="utf-8") as handle: + readme_contents = handle.read() + self.assertIn("Unit Test Experiment", readme_contents) + self.assertIn("Budget", readme_contents) + + algorithm_dump = os.path.join("ioh_data", "RandomSearch.pkl") + self.assertTrue(os.path.isfile(algorithm_dump)) + + with zipfile.ZipFile("ioh_data.zip") as archive: + self.assertTrue(any(name.endswith("README.md") for name in archive.namelist())) + + uploaded_files = {os.path.basename(call[0]) for call in session.put_calls} + self.assertIn("ioh_data.zip", uploaded_files) + self.assertIn("README.md", uploaded_files) + self.assertIn("RandomSearch.pkl", uploaded_files) def test_experimenter_v2(self): ioh.Experiment(