openml · PGijsbers · Apr 2, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 docker/mysql/data
+*.log
+logs/
 .DS_Store
 
 # Byte-compiled / optimized / DLL files

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ classifiers = [
 ]
 dependencies = [
     "fastapi",
+    "loguru",
     "cryptography",
     "pydantic",
     "uvicorn",

diff --git a/src/config.py b/src/config.py
@@ -1,13 +1,11 @@
 import functools
-import logging
 import os
 import tomllib
 import typing
 from pathlib import Path
 
 from dotenv import load_dotenv
-
-logger = logging.getLogger(__name__)
+from loguru import logger
 
 TomlTable = dict[str, typing.Any]
 
@@ -27,9 +25,13 @@
 _dotenv_file = Path(os.getenv(DOTENV_FILE_ENV, _config_directory / ".env"))
 _dotenv_file = _dotenv_file.expanduser().absolute()
 
-logger.info("Configuration directory is '%s'", _config_directory)
-logger.info("Loading configuration file from '%s'", _config_file)
-logger.info("Loading environment variables from '%s'", _dotenv_file)
+
+logger.info(
+    "Determined configuration sources.",
+    configuration_directory=_config_directory,
+    configuration_file=_config_file,
+    dotenv_file=_dotenv_file,
+)
 
 load_dotenv(dotenv_path=_dotenv_file)
 
@@ -77,5 +79,6 @@ def load_database_configuration(file: Path = _config_file) -> TomlTable:
     return database_configuration
 
 
-def load_configuration(file: Path = _config_file) -> TomlTable:
+def load_configuration(file: Path | None = None) -> TomlTable:
+    file = file or _config_file
     return tomllib.loads(file.read_text())
diff --git a/src/config.toml b/src/config.toml
@@ -4,6 +4,21 @@ minio_base_url="https://openml1.win.tue.nl"
 [development]
 allow_test_api_keys=true
 
+# Any number of logging.NAME configurations can be added.
+# NAME is for reference only, it has no meaning otherwise.
+# You can add any arguments to `loguru.logger.add`,
+# the `sink` variable will be used as first positional argument.
+# https://loguru.readthedocs.io/en/stable/api/logger.html
+[logging.develop]
+sink="develop.log"
+# One of loguru levels: TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR
+level="DEBUG"
+# Automatically create a new file by date or file size
+rotation="50 MB"
+# Retention specifies the timespan after which automatic cleanup occurs.
+retention="1 day"
+compression="gz"
+
 [fastapi]
 root_path=""
 

diff --git a/src/core/logging.py b/src/core/logging.py
@@ -0,0 +1,56 @@
+"""Utility functions for logging."""
+
+import sys
+import uuid
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+
+from loguru import logger
+from starlette.requests import Request
+from starlette.responses import Response
+
+from config import load_configuration
+
+
+def setup_log_sinks(configuration_file: Path | None = None) -> None:
+    """Configure loguru based on app configuration."""
+    configuration = load_configuration(configuration_file)
+    for nickname, sink_configuration in configuration.get("logging", {}).items():
+        logger.info("Configuring sink", nickname=nickname, **sink_configuration)
+        sink = sink_configuration.pop("sink")
+        if sink == "sys.stderr":
+            sink = sys.stderr
+        logger.add(sink, serialize=True, **sink_configuration)
+
+
+async def add_request_context_to_log(
+    request: Request,
+    call_next: Callable[[Request], Awaitable[Response]],
+) -> Response:
+    """Add a unique request id to each log call."""
+    identifier = uuid.uuid4().hex
+    with logger.contextualize(request_id=identifier):
+        return await call_next(request)
+
+
+async def request_response_logger(
+    request: Request,
+    call_next: Callable[[Request], Awaitable[Response]],
+) -> Response:
+    """Log the incoming request and outgoing response."""
+    logger.info(
+        "request",
+        url=request.url,
+        headers=request.headers,
+        cookies=request.cookies,
+        path_params=request.path_params,
+        query_params=request.query_params,
+    )
+    response: Response = await call_next(request)
+    logger.info(
+        "response",
+        status_code=response.status_code,
+        headers=response.headers,
+        media_type=response.media_type,
+    )
+    return response
diff --git a/src/main.py b/src/main.py
@@ -1,13 +1,16 @@
 import argparse
-import logging
+import sys
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
+from pathlib import Path
 
 import uvicorn
 from fastapi import FastAPI
+from loguru import logger
 
 from config import load_configuration
 from core.errors import ProblemDetailError, problem_detail_exception_handler
+from core.logging import add_request_context_to_log, request_response_logger, setup_log_sinks
 from database.setup import close_databases
 from routers.mldcat_ap.dataset import router as mldcat_ap_router
 from routers.openml.datasets import router as datasets_router
@@ -55,12 +58,25 @@ def _parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-def create_api() -> FastAPI:
-    fastapi_kwargs = load_configuration()["fastapi"]
+def create_api(configuration_file: Path | None = None) -> FastAPI:
+    # Default logging configuration so we have logs during setup
+    logger.remove()
+    setup_sink = logger.add(sys.stderr, serialize=True)
+    setup_log_sinks(configuration_file)
+
+    fastapi_kwargs = load_configuration(configuration_file)["fastapi"]
+    logger.info("Creating FastAPI App", lifespan=lifespan, **fastapi_kwargs)
     app = FastAPI(**fastapi_kwargs, lifespan=lifespan)
 
+    logger.info("Setting up middleware and exception handlers.")
+    # Order matters! Each added middleware wraps the previous, creating a stack.
+    # See also: https://fastapi.tiangolo.com/tutorial/middleware/#multiple-middleware-execution-order
+    app.middleware("http")(request_response_logger)
+    app.middleware("http")(add_request_context_to_log)
+
     app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler)  # type: ignore[arg-type]
 
+    logger.info("Adding routers to app")
     app.include_router(datasets_router)
     app.include_router(qualities_router)
     app.include_router(mldcat_ap_router)
@@ -72,11 +88,13 @@ def create_api() -> FastAPI:
     app.include_router(study_router)
     app.include_router(setup_router)
     app.include_router(run_router)
+
+    logger.info("App setup completed.")
+    logger.remove(setup_sink)
     return app
 
 
 def main() -> None:
-    logging.basicConfig(level=logging.INFO)
     args = _parse_args()
     uvicorn.run(
         app="main:create_api",

diff --git a/tests/config.test.toml b/tests/config.test.toml
@@ -0,0 +1,28 @@
+arff_base_url="https://test.openml.org"
+minio_base_url="https://openml1.win.tue.nl"
+
+[development]
+allow_test_api_keys=true
+
+[logging.develop]
+sink="sys.stderr"
+level="DEBUG"
+
+[fastapi]
+root_path=""
+
+[databases.defaults]
+host="database"
+port="3306"
+# SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html
+drivername="mysql+aiomysql"
+
+[databases.expdb]
+database="openml_expdb"
+
+[databases.openml]
+database="openml"
+
+[routing]
+minio_url="http://minio:9000/"
+server_url="http://php-api:80/"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -73,7 +73,7 @@ async def php_api() -> AsyncIterator[httpx.AsyncClient]:
 async def py_api(
     expdb_test: AsyncConnection, user_test: AsyncConnection
 ) -> AsyncIterator[httpx.AsyncClient]:
-    app = create_api()
+    app = create_api(Path(__file__).parent / "config.test.toml")
 
     # We use async generator functions because fixtures may not be called directly.
     # The async generator returns the test connections for FastAPI to handle properly