Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
docker/mysql/data
*.log
logs/
.DS_Store

# Byte-compiled / optimized / DLL files
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ classifiers = [
]
dependencies = [
"fastapi",
"loguru",
"cryptography",
"pydantic",
"uvicorn",
Expand Down
17 changes: 10 additions & 7 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import functools
import logging
import os
import tomllib
import typing
from pathlib import Path

from dotenv import load_dotenv

logger = logging.getLogger(__name__)
from loguru import logger

TomlTable = dict[str, typing.Any]

Expand All @@ -27,9 +25,13 @@
_dotenv_file = Path(os.getenv(DOTENV_FILE_ENV, _config_directory / ".env"))
_dotenv_file = _dotenv_file.expanduser().absolute()

logger.info("Configuration directory is '%s'", _config_directory)
logger.info("Loading configuration file from '%s'", _config_file)
logger.info("Loading environment variables from '%s'", _dotenv_file)

logger.info(
"Determined configuration sources.",
configuration_directory=_config_directory,
configuration_file=_config_file,
dotenv_file=_dotenv_file,
)

load_dotenv(dotenv_path=_dotenv_file)

Expand Down Expand Up @@ -77,5 +79,6 @@ def load_database_configuration(file: Path = _config_file) -> TomlTable:
return database_configuration


def load_configuration(file: Path = _config_file) -> TomlTable:
def load_configuration(file: Path | None = None) -> TomlTable:
file = file or _config_file
return tomllib.loads(file.read_text())
Comment on lines +82 to 84
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Missing caching - delegates to uncached file read.

load_configuration duplicates the tomllib.loads(file.read_text()) logic from _load_configuration (line 49-50) but lacks the @functools.cache decorator. Each call will re-read and re-parse the file.

Delegate to _load_configuration to benefit from caching and reduce duplication:

♻️ Proposed fix
 def load_configuration(file: Path | None = None) -> TomlTable:
     file = file or _config_file
-    return tomllib.loads(file.read_text())
+    return _load_configuration(file)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/config.py` around lines 82 - 84, load_configuration currently re-reads
and re-parses the file by calling tomllib.loads(file.read_text()) instead of
delegating to the cached helper; change load_configuration to simply delegate to
the cached _load_configuration by calling and returning _load_configuration(file
or _config_file) so it reuses the existing functools.cache behavior and removes
the duplicated parsing logic (refer to load_configuration, _load_configuration,
and _config_file).

15 changes: 15 additions & 0 deletions src/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ minio_base_url="https://openml1.win.tue.nl"
[development]
allow_test_api_keys=true

# Any number of logging.NAME configurations can be added.
# NAME is for reference only, it has no meaning otherwise.
# You can add any arguments to `loguru.logger.add`,
# the `sink` variable will be used as first positional argument.
# https://loguru.readthedocs.io/en/stable/api/logger.html
[logging.develop]
sink="develop.log"
# One of loguru levels: TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR
level="DEBUG"
# Automatically create a new file by date or file size
rotation="50 MB"
# Retention specifies the timespan after which automatic cleanup occurs.
retention="1 day"
compression="gz"

[fastapi]
root_path=""

Expand Down
56 changes: 56 additions & 0 deletions src/core/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Utility functions for logging."""

import sys
import uuid
from collections.abc import Awaitable, Callable
from pathlib import Path

from loguru import logger
from starlette.requests import Request
from starlette.responses import Response

from config import load_configuration


def setup_log_sinks(configuration_file: Path | None = None) -> None:
"""Configure loguru based on app configuration."""
configuration = load_configuration(configuration_file)
for nickname, sink_configuration in configuration.get("logging", {}).items():
logger.info("Configuring sink", nickname=nickname, **sink_configuration)
sink = sink_configuration.pop("sink")
if sink == "sys.stderr":
sink = sys.stderr
logger.add(sink, serialize=True, **sink_configuration)


async def add_request_context_to_log(
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
"""Add a unique request id to each log call."""
identifier = uuid.uuid4().hex
with logger.contextualize(request_id=identifier):
return await call_next(request)


async def request_response_logger(
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
"""Log the incoming request and outgoing response."""
logger.info(
"request",
url=request.url,
headers=request.headers,
cookies=request.cookies,
path_params=request.path_params,
query_params=request.query_params,
)
response: Response = await call_next(request)
logger.info(
"response",
status_code=response.status_code,
headers=response.headers,
media_type=response.media_type,
)
return response
26 changes: 22 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import argparse
import logging
import sys
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from pathlib import Path

import uvicorn
from fastapi import FastAPI
from loguru import logger

from config import load_configuration
from core.errors import ProblemDetailError, problem_detail_exception_handler
from core.logging import add_request_context_to_log, request_response_logger, setup_log_sinks
from database.setup import close_databases
from routers.mldcat_ap.dataset import router as mldcat_ap_router
from routers.openml.datasets import router as datasets_router
Expand Down Expand Up @@ -55,12 +58,25 @@ def _parse_args() -> argparse.Namespace:
return parser.parse_args()


def create_api() -> FastAPI:
fastapi_kwargs = load_configuration()["fastapi"]
def create_api(configuration_file: Path | None = None) -> FastAPI:
# Default logging configuration so we have logs during setup
logger.remove()
setup_sink = logger.add(sys.stderr, serialize=True)
setup_log_sinks(configuration_file)

fastapi_kwargs = load_configuration(configuration_file)["fastapi"]
logger.info("Creating FastAPI App", lifespan=lifespan, **fastapi_kwargs)
app = FastAPI(**fastapi_kwargs, lifespan=lifespan)

logger.info("Setting up middleware and exception handlers.")
# Order matters! Each added middleware wraps the previous, creating a stack.
# See also: https://fastapi.tiangolo.com/tutorial/middleware/#multiple-middleware-execution-order
app.middleware("http")(request_response_logger)
app.middleware("http")(add_request_context_to_log)

app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type]

logger.info("Adding routers to app")
app.include_router(datasets_router)
app.include_router(qualities_router)
app.include_router(mldcat_ap_router)
Expand All @@ -72,11 +88,13 @@ def create_api() -> FastAPI:
app.include_router(study_router)
app.include_router(setup_router)
app.include_router(run_router)

logger.info("App setup completed.")
logger.remove(setup_sink)
return app


def main() -> None:
logging.basicConfig(level=logging.INFO)
args = _parse_args()
uvicorn.run(
app="main:create_api",
Expand Down
28 changes: 28 additions & 0 deletions tests/config.test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
arff_base_url="https://test.openml.org"
minio_base_url="https://openml1.win.tue.nl"

[development]
allow_test_api_keys=true

[logging.develop]
sink="sys.stderr"
level="DEBUG"

[fastapi]
root_path=""

[databases.defaults]
host="database"
port="3306"
# SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html
drivername="mysql+aiomysql"

[databases.expdb]
database="openml_expdb"

[databases.openml]
database="openml"

[routing]
minio_url="http://minio:9000/"
server_url="http://php-api:80/"
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ async def php_api() -> AsyncIterator[httpx.AsyncClient]:
async def py_api(
expdb_test: AsyncConnection, user_test: AsyncConnection
) -> AsyncIterator[httpx.AsyncClient]:
app = create_api()
app = create_api(Path(__file__).parent / "config.test.toml")

# We use async generator functions because fixtures may not be called directly.
# The async generator returns the test connections for FastAPI to handle properly
Expand Down
Loading