From a42828af22358a259c4bd024adccf3b32ea8a08a Mon Sep 17 00:00:00 2001
From: Niki van Stein <n.van.stein@liacs.leidenuniv.nl>
Date: Thu, 30 Oct 2025 15:47:44 +0100
Subject: [PATCH] Add experiment publish helper and Zenodo integration (#5)

* Add experiment publish helper and Zenodo integration
* Cleaning up readme template
---
 doc/python/source/api/ioh.Experiment.rst |   1 +
 doc/python/source/python.rst             |   3 +-
 doc/python/source/python/publish.rst     |  45 +++++
 example/python_example_publish.py        |  37 +++++
 ioh/__init__.py                          | 201 +++++++++++++++++++++++
 setup.py                                 |   8 +-
 tests/python/test_experiment.py          |  75 ++++++++-
 7 files changed, 366 insertions(+), 4 deletions(-)
 create mode 100644 doc/python/source/python/publish.rst
 create mode 100644 example/python_example_publish.py
diff --git a/doc/python/source/api/ioh.Experiment.rst b/doc/python/source/api/ioh.Experiment.rst
index 784fa6709..3882273a5 100644
--- a/doc/python/source/api/ioh.Experiment.rst
+++ b/doc/python/source/api/ioh.Experiment.rst
@@ -16,6 +16,7 @@ Experiment
       ~Experiment.evaluate
       ~Experiment.merge_output_to_single_folder
       ~Experiment.merge_tmp_folders
+      ~Experiment.publish
       ~Experiment.run
 
    .. rubric:: Methods Documentation
diff --git a/doc/python/source/python.rst b/doc/python/source/python.rst
index 8d5190681..58da8743e 100644
--- a/doc/python/source/python.rst
+++ b/doc/python/source/python.rst
@@ -17,4 +17,5 @@ API documentation
    python/structures.rst
    python/ioh.rst
    python/logger.rst
-   python/suite.rst 
+   python/suite.rst
+   python/publish.rst
diff --git a/doc/python/source/python/publish.rst b/doc/python/source/python/publish.rst
new file mode 100644
index 000000000..67eac915d
--- /dev/null
+++ b/doc/python/source/python/publish.rst
@@ -0,0 +1,45 @@
+Publishing experiment archives
+==============================
+
+The :meth:`ioh.Experiment.publish` helper streamlines turning an experiment run
+into a shareable Zenodo deposit. After running an experiment, calling
+``publish`` will:
+
+* generate the ``ioh_data.zip`` archive with an up-to-date ``README.md``
+  describing the algorithm, problems, instances, dimensions, repetitions and
+  optional evaluation budget;
+* serialise the algorithm instance with :mod:`cloudpickle` so it can be
+  restored when reproducing the experiment; and
+* upload the archive, README and algorithm bundle to Zenodo using their REST
+  API.
+
+> [!IMPORTANT] You need a Zenodo access token with appropriate permissions to upload
+> deposits. You can create and manage your tokens in your Zenodo account
+> settings. See https://zenodo.org/account/settings/applications/tokens/new/
+
+Example usage::
+
+    exp = ioh.Experiment(
+        my_algorithm,
+        fids=[1, 2, 3],
+        iids=[1],
+        dims=[5, 10],
+        reps=5,
+    )
+    exp()  # run the benchmark
+    exp.publish(
+        zenodo_token="<your access token>",
+        title="Benchmarking IOH on BBOB",
+        description="Log data for the benchmark run.",
+        creators=[{"name": "Doe, Jane"}],
+        budget=20000,
+        keywords=["optimization", "benchmark"],
+        sandbox=True,
+    )
+
+By default the helper targets the production Zenodo endpoint. Pass
+``sandbox=True`` while testing credentials or automation against the sandbox
+service. You can also provide an existing :class:`requests.Session` to reuse
+connections or configure proxies, and the ``additional_metadata`` parameter to
+add any extra Zenodo metadata fields.
+
diff --git a/example/python_example_publish.py b/example/python_example_publish.py
new file mode 100644
index 000000000..57eaf2e6a
--- /dev/null
+++ b/example/python_example_publish.py
@@ -0,0 +1,37 @@
+import ioh
+import numpy as np
+
+class RandomSearch:
+    'Simple random search algorithm'
+    def __init__(self, n: int, length: float = 0.0):
+        self.n: int = n
+        self.length: float = length
+        
+    def __call__(self, problem: ioh.problem.RealSingleObjective) -> None:
+        'Evaluate the problem n times with a randomly generated solution'
+        
+        for _ in range(self.n):
+            # We can use the problems bounds accessor to get information about the problem bounds
+            x = np.random.uniform(problem.bounds.lb, problem.bounds.ub)
+            self.length = np.linalg.norm(x)
+            
+            problem(x)             
+budget = 1000
+my_algorithm = RandomSearch(n=budget)
+exp = ioh.Experiment(
+    my_algorithm,
+    fids=[1, 2, 3],
+    iids=[1],
+    dims=[5, 10],
+    reps=5,
+)
+exp()  # run the benchmark
+exp.publish(
+    zenodo_token="<your token here>",
+    title="Benchmarking IOH publish example",
+    description="Log data for the benchmark run.",
+    creators=[{"name": "van Stein, Niki"}],
+    budget=budget,
+    keywords=["optimization", "benchmark", "ioh"],
+    sandbox=True,
+)
\ No newline at end of file
diff --git a/ioh/__init__.py b/ioh/__init__.py
index 116dda6c6..4d125bc68 100644
--- a/ioh/__init__.py
+++ b/ioh/__init__.py
@@ -10,9 +10,12 @@
 import shutil
 import copy
 import json
+import datetime
+import textwrap
 
 import urllib.request
 import tarfile
+import requests
 
 # Set the path to the static/ directory.
 # NEEDED for C++ code to load transformation details for several functions.
@@ -623,6 +626,204 @@ def __call__(self):
 
         return self
 
+    def _problem_summary(self) -> typing.Dict[int, str]:
+        summary = {}
+        if not self.fids:
+            return summary
+        sample_instance = self.iids[0] if self.iids else 1
+        sample_dimension = self.dims[0] if self.dims else 1
+
+        for fid in self.fids:
+            try:
+                p = get_problem(fid, sample_instance, sample_dimension, self.problem_class)
+                summary[fid] = p.meta_data.name
+            except Exception:
+                summary[fid] = f"Problem {fid}"
+        return summary
+
+    def _write_readme(
+        self,
+        title: str,
+        description: str,
+        budget: typing.Optional[int],
+        reproduction_hint: str,
+    ) -> str:
+        os.makedirs(self.logger_root, exist_ok=True)
+        problems = self._problem_summary()
+        problem_lines = [f"- **f{fid}**: {name}" for fid, name in problems.items()]
+        problem_section = "\n".join(problem_lines) if problem_lines else "- No problems recorded"
+
+        algorithm_name = self.logger_params.get("algorithm_name", str(self.algorithm))
+        budget_text = str(budget) if budget is not None else "Not specified"
+
+        content = textwrap.dedent(
+            f"""
+            # {title}
+            
+            {description}
+            
+            ## Experiment configuration
+            
+            - **Algorithm**: {algorithm_name}
+            - **Problem class**: {self.problem_class.name}
+            - **Function IDs**: {self.fids}
+            - **Instances**: {self.iids}
+            - **Dimensions**: {self.dims}
+            - **Repetitions**: {self.reps}
+            - **Budget**: {budget_text}
+            
+            ### Problem details
+            
+            {problem_section}
+            
+            ## Contents
+            
+            - Experimental logs located in `{os.path.basename(self.logger_root)}`.
+            - `README.md` (this file).
+            
+            ## Directions
+            
+            1. Unzip `ioh_data.zip` (or the generated archive) to access the raw logs.
+            2. Review the `.json`/`.info` files for scenario metadata and `.dat` files for evaluation traces.
+            3. Use the reproduction snippet below to re-run the experiment.
+            
+            ## Reproducing the results
+            
+            ```python
+            {reproduction_hint}
+            ```
+            """
+        ).strip() + "\n"
+
+        readme_path = os.path.join(self.logger_root, "README.md")
+        with open(readme_path, "w", encoding="utf-8") as handle:
+            handle.write(content)
+        return readme_path
+
+    def _serialize_algorithm(self) -> typing.Optional[str]:
+        try:
+            import cloudpickle
+        except ImportError:  # pragma: no cover - dependency missing
+            warnings.warn(
+                "cloudpickle is not available; algorithm serialization skipped.",
+                RuntimeWarning,
+            )
+            return None
+
+        algorithm_name = self.logger_params.get("algorithm_name") or self.algorithm.__class__.__name__
+        safe_name = "".join(c if c.isalnum() or c in {"-", "_"} else "_" for c in algorithm_name)
+        filename = f"{safe_name or 'algorithm'}.pkl"
+        path = os.path.join(self.logger_root, filename)
+        with open(path, "wb") as handle:
+            cloudpickle.dump(self.algorithm, handle)
+        return path
+
+    def publish(
+        self,
+        zenodo_token: str,
+        title: str,
+        description: str,
+        creators: typing.List[typing.Dict[str, typing.Any]],
+        *,
+        budget: typing.Optional[int] = None,
+        keywords: typing.Optional[typing.List[str]] = None,
+        related_identifiers: typing.Optional[typing.List[typing.Dict[str, typing.Any]]] = None,
+        upload_type: str = "dataset",
+        access_right: str = "open",
+        publish_date: typing.Optional[str] = None,
+        sandbox: bool = False,
+        include_algorithm: bool = True,
+        additional_metadata: typing.Optional[typing.Dict[str, typing.Any]] = None,
+        session: typing.Optional[typing.Any] = None,
+    ) -> typing.Dict[str, typing.Any]:
+        if not os.path.isdir(self.logger_root):
+            raise FileNotFoundError(
+                f"Logger output directory '{self.logger_root}' does not exist. Run the experiment before publishing."
+            )
+
+        algorithm_class = self.algorithm.__class__
+        algorithm_module = algorithm_class.__module__
+        algorithm_name = algorithm_class.__name__
+        if algorithm_module in {"__main__", "builtins"}:
+            import_line = "# Define or import your algorithm implementation"
+            algorithm_instantiation = f"algorithm = {repr(self.algorithm)}  # replace with your implementation"
+        else:
+            import_line = f"from {algorithm_module} import {algorithm_name}"
+            algorithm_instantiation = f"algorithm = {algorithm_name}()"
+
+        reproduction_hint = textwrap.dedent(
+            f"""
+            import ioh
+            {import_line}
+
+            {algorithm_instantiation}
+            experiment = ioh.Experiment(
+                algorithm,
+                fids={self.fids},
+                iids={self.iids},
+                dims={self.dims},
+                reps={self.reps},
+                problem_class=ioh.ProblemClass.{self.problem_class.name},
+            )
+            experiment()
+            """
+        ).strip()
+
+        readme_path = self._write_readme(title, description, budget, reproduction_hint)
+        archive_path = shutil.make_archive(self.logger_root, "zip", self.logger_root)
+
+        algorithm_path = self._serialize_algorithm() if include_algorithm else None
+
+        files_to_upload = [archive_path, readme_path]
+        if algorithm_path is not None:
+            files_to_upload.append(algorithm_path)
+
+        metadata: typing.Dict[str, typing.Any] = {
+            "title": title,
+            "upload_type": upload_type,
+            "description": description,
+            "creators": creators,
+            "access_right": access_right,
+        }
+
+        if keywords:
+            metadata["keywords"] = keywords
+        if related_identifiers:
+            metadata["related_identifiers"] = related_identifiers
+        if publish_date is None:
+            publish_date = datetime.date.today().isoformat()
+        metadata["publication_date"] = publish_date
+        if additional_metadata:
+            metadata.update(additional_metadata)
+        if budget is not None:
+            budget_note = f"Budget: {budget}"
+            if metadata.get("notes"):
+                metadata["notes"] = f"{metadata['notes']}\n{budget_note}"
+            else:
+                metadata["notes"] = budget_note
+
+        api_root = "https://sandbox.zenodo.org/api" if sandbox else "https://zenodo.org/api"
+        deposit_url = f"{api_root}/deposit/depositions"
+
+        session = session or requests.Session()
+        params = {"access_token": zenodo_token}
+
+        response = session.post(deposit_url, params=params, json={"metadata": metadata})
+        response.raise_for_status()
+        deposition = response.json()
+
+        bucket_url = deposition["links"]["bucket"]
+        for file_path in files_to_upload:
+            with open(file_path, "rb") as handle:
+                upload_response = session.put(
+                    f"{bucket_url}/{os.path.basename(file_path)}",
+                    params=params,
+                    data=handle,
+                )
+            upload_response.raise_for_status()
+
+        return deposition
+
 
 __all__ = (
     "get_problem",
diff --git a/setup.py b/setup.py
index 18f0c8ccf..9d8c9dfb8 100644
--- a/setup.py
+++ b/setup.py
@@ -174,5 +174,9 @@ def build_extension(self, ext):
     ],
     license="BSD",
     url="https://iohprofiler.github.io/IOHexperimenter",
-    install_requires=["numpy>=2.0"]
-)
+    install_requires=[
+        "numpy>=2.0",
+        "requests>=2.0",
+        "cloudpickle>=2.0",
+    ]
+)
diff --git a/tests/python/test_experiment.py b/tests/python/test_experiment.py
index bde9a316f..8623c729c 100644
--- a/tests/python/test_experiment.py
+++ b/tests/python/test_experiment.py
@@ -2,6 +2,7 @@
 import random
 import unittest
 import shutil
+import zipfile
 
 import ioh
 
@@ -77,7 +78,79 @@ def a_problem(x):
 
         self.assertSetEqual(info_files, set())
         self.assertSetEqual(data_files, set())
-        self.assertTrue(os.path.isfile("ioh_data.zip"))    
+        self.assertTrue(os.path.isfile("ioh_data.zip"))
+
+    def test_publish_generates_artifacts_and_uploads(self):
+        exp = ioh.Experiment(
+            Algorithm(),
+            [1],
+            [1],
+            [5],
+            njobs=1,
+            reps=1,
+            algorithm_name="RandomSearch",
+            old_logger=False,
+            logger_triggers=[ioh.logger.trigger.ALWAYS],
+        )
+
+        exp()
+
+        class DummyResponse:
+            def __init__(self, payload, status_code=200):
+                self._payload = payload
+                self.status_code = status_code
+
+            def raise_for_status(self):
+                if not (200 <= self.status_code < 300):
+                    raise RuntimeError("HTTP error")
+
+            def json(self):
+                return self._payload
+
+        class DummySession:
+            def __init__(self):
+                self.post_calls = []
+                self.put_calls = []
+
+            def post(self, url, params=None, json=None):
+                self.post_calls.append((url, params, json))
+                payload = {"id": 42, "links": {"bucket": "https://sandbox.zenodo.org/api/files/42"}}
+                return DummyResponse(payload)
+
+            def put(self, url, params=None, data=None):
+                self.put_calls.append((url, params, getattr(data, "name", "")))
+                return DummyResponse({})
+
+        session = DummySession()
+
+        deposition = exp.publish(
+            zenodo_token="secret",
+            title="Unit Test Experiment",
+            description="An automated test upload.",
+            creators=[{"name": "Doe, Jane"}],
+            budget=100,
+            sandbox=True,
+            session=session,
+        )
+
+        self.assertEqual(deposition["id"], 42)
+        readme_path = os.path.join("ioh_data", "README.md")
+        self.assertTrue(os.path.isfile(readme_path))
+        with open(readme_path, encoding="utf-8") as handle:
+            readme_contents = handle.read()
+        self.assertIn("Unit Test Experiment", readme_contents)
+        self.assertIn("Budget", readme_contents)
+
+        algorithm_dump = os.path.join("ioh_data", "RandomSearch.pkl")
+        self.assertTrue(os.path.isfile(algorithm_dump))
+
+        with zipfile.ZipFile("ioh_data.zip") as archive:
+            self.assertTrue(any(name.endswith("README.md") for name in archive.namelist()))
+
+        uploaded_files = {os.path.basename(call[0]) for call in session.put_calls}
+        self.assertIn("ioh_data.zip", uploaded_files)
+        self.assertIn("README.md", uploaded_files)
+        self.assertIn("RandomSearch.pkl", uploaded_files)
 
     def test_experimenter_v2(self):
         ioh.Experiment(