Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# MLFLOW
MLFLOW_TRACKING_USERNAME=mlflowadmin
MLFLOW_TRACKING_PASSWORD=value
MLFLOW_HOST_PORT=5000
MLFLOW_CONT_PORT=5000
MLFLOW_HOST=0.0.0.0
MLFLOW_PORT=${MLFLOW_CONT_PORT}
MLFLOW_BACKEND_STORE_URI=sqlite:////mlflow/mlflow_data/mlflow.db
MLFLOW_DEFAULT_ARTIFACT_ROOT=file:///mlflow/mlflow_artifacts
MLFLOW_HOST_CONFIG_PATH=./mlflow/config
MLFLOW_CONT_CONFIG_PATH=/mlflow/config
MLFLOW_FLASK_SERVER_SECRET_KEY=value
26 changes: 26 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,29 @@ node_modules/
*.DS_Store
*.log

# MLflow generated files
mlflow_artifacts/
mlflow_data/

# Python
__pycache__/
*.py[cod]
*$py.class
.pytest_cache/
.coverage
htmlcov/

# IDE
.idea/
.vscode/
*.swp
*.swo

# Logs
logs/
*.log
test/logs/
**/logs/
*.log.*
*.zip

25 changes: 25 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
version: '3'

name: mlops-stack
services:
mlflow:
build: ./mlflow
image: mlflow
container_name: mlflow_mlops
ports:
- ${MLFLOW_HOST_PORT}:${MLFLOW_CONT_PORT}
volumes:
- ${MLFLOW_HOST_CONFIG_PATH}:${MLFLOW_CONT_CONFIG_PATH}
- ./mlflow_data:/mlflow/mlflow_data
- ./mlflow_artifacts:/mlflow/mlflow_artifacts
environment:
- MLFLOW_TRACKING_USERNAME=${MLFLOW_TRACKING_USERNAME}
- MLFLOW_TRACKING_PASSWORD=${MLFLOW_TRACKING_PASSWORD}
- MLFLOW_BACKEND_STORE_URI=sqlite:////mlflow/mlflow_data/mlflow.db
- MLFLOW_DEFAULT_ARTIFACT_ROOT=file:///mlflow/mlflow_artifacts
- MLFLOW_FLASK_SERVER_SECRET_KEY=${MLFLOW_FLASK_SERVER_SECRET_KEY}
restart: unless-stopped

volumes:
mlflow_data:
mlflow_artifacts:
20 changes: 20 additions & 0 deletions mlflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.9-slim


COPY ./requirements.txt /mlflow/requirements.txt

RUN pip install --no-cache-dir -r /mlflow/requirements.txt

# Create directories for MLflow data with explicit permissions
RUN mkdir -p /mlflow/mlflow_data /mlflow/mlflow_artifacts && \
chmod -R 777 /mlflow/mlflow_data /mlflow/mlflow_artifacts

WORKDIR /mlflow

# Copy any initialization files if needed
COPY ./entrypoint.sh /mlflow/entrypoint.sh
RUN chmod +x /mlflow/entrypoint.sh

EXPOSE 5000

ENTRYPOINT ["/mlflow/entrypoint.sh"]
24 changes: 24 additions & 0 deletions mlflow/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
set -e

echo "=== [ENTRYPOINT DEBUG] Starting MLflow at $(date) ==="
echo "MLFLOW_TRACKING_USERNAME: ${MLFLOW_TRACKING_USERNAME}"
echo "MLFLOW_BACKEND_STORE_URI: ${MLFLOW_BACKEND_STORE_URI}"
echo "MLFLOW_DEFAULT_ARTIFACT_ROOT: ${MLFLOW_DEFAULT_ARTIFACT_ROOT}"
echo "MLFLOW_HOST: ${MLFLOW_HOST:-0.0.0.0}"
echo "MLFLOW_PORT: ${MLFLOW_PORT:-5000}"

# Create necessary directories if they don't exist
mkdir -p /mlflow/mlflow_data /mlflow/mlflow_artifacts

# Start the MLflow server
if [ "$#" -eq 0 ]; then
exec mlflow server \
--host "${MLFLOW_HOST:-0.0.0.0}" \
--port "${MLFLOW_PORT:-5000}" \
--backend-store-uri "${MLFLOW_BACKEND_STORE_URI:-sqlite:////mlflow/mlflow_data/mlflow.db}" \
--default-artifact-root "${MLFLOW_DEFAULT_ARTIFACT_ROOT:-file:///mlflow/mlflow_artifacts}" \
--gunicorn-opts="--workers=1 --timeout 120"
else
exec "$@"
fi
7 changes: 7 additions & 0 deletions mlflow/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mlflow==2.22.0
mlflow[auth]
psycopg2-binary==2.9.10
sqlalchemy==2.0.23
boto3==1.38.2
PyMySQL==1.1.1
loguru==0.7.3
38 changes: 38 additions & 0 deletions test/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Constants and configuration values for the Global-Classifier project.
"""

# MLflow connection settings
MLFLOW_TRACKING_URI = "http://localhost:5000"
MLFLOW_USERNAME = "mlflowadmin"
MLFLOW_PASSWORD = "null"

# Experiment settings
DEFAULT_EXPERIMENT_NAME = "test_experiment"

# Artifact settings
ARTIFACT_PATH = "mlflow_artifacts/test_artifacts.txt"

# Run configuration
RUN_NAME_FORMAT = "test_run_%Y%m%d_%H%M%S"

# Parameter names
PARAM_TEST = "test_param"
PARAM_TEST_VALUE = "test_value"
PARAM_RANDOM_INTEGER = "random_integer"

# Metric names
METRIC_ACCURACY = "accuracy"
METRIC_ACCURACY_MIN = 0.7
METRIC_ACCURACY_MAX = 0.99
METRIC_LOSS = "loss"
METRIC_LOSS_MIN = 0.01
METRIC_LOSS_MAX = 0.3

# Messages
MSG_ARTIFACT_CONTENT = "This is a test artifact created at {}"
MSG_SUCCESS = "\nTest completed successfully! Check the MLflow UI at {}"

# Logging settings
LOGS_DIR = "logs"
LOG_FILE = "test_mlflow_connection.log"
113 changes: 113 additions & 0 deletions test/test_mlflow_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
Test script to verify MLflow connection and functionality.
"""
import sys
import os
import random
import mlflow
import traceback
from datetime import datetime
from loguru import logger

# Add the parent directory to sys.path to import constants
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from constants import (
MLFLOW_TRACKING_URI, MLFLOW_USERNAME, MLFLOW_PASSWORD,
DEFAULT_EXPERIMENT_NAME, ARTIFACT_PATH, RUN_NAME_FORMAT,
PARAM_TEST, PARAM_TEST_VALUE, PARAM_RANDOM_INTEGER,
METRIC_ACCURACY, METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX,
METRIC_LOSS, METRIC_LOSS_MIN, METRIC_LOSS_MAX,
MSG_ARTIFACT_CONTENT, MSG_SUCCESS, LOGS_DIR, LOG_FILE
)

# Configure Loguru
log_file = os.path.join(os.path.dirname(__file__), LOGS_DIR, LOG_FILE)
os.makedirs(os.path.dirname(log_file), exist_ok=True)

# Remove default handler and add custom handlers
logger.remove()
# Add console handler
logger.add(sys.stdout, format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}")
# Add file handler with rotation
logger.add(
log_file,
rotation="10 MB", # Rotate file when it reaches 10MB
retention="1 month", # Keep logs for 1 month
compression="zip", # Compress rotated logs
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)

logger.info("Starting MLflow connection test...")

try:
logger.info("Setting tracking URI...")
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

logger.info("Setting environment variables...")
os.environ["MLFLOW_TRACKING_USERNAME"] = MLFLOW_USERNAME
os.environ["MLFLOW_TRACKING_PASSWORD"] = MLFLOW_PASSWORD

# Test basic connectivity
logger.info("Testing connection by listing experiments...")
experiments = mlflow.search_experiments()
logger.info(f"Found {len(experiments)} experiments")

# Create or get experiment
experiment_name = DEFAULT_EXPERIMENT_NAME
logger.info(f"Creating/getting experiment '{experiment_name}'...")
try:
experiment_id = mlflow.create_experiment(experiment_name)
logger.success(f"Created new experiment with ID: {experiment_id}")
except Exception as e:
logger.warning(f"Could not create experiment: {str(e)}")
logger.info("Trying to retrieve existing experiment...")
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment:
experiment_id = experiment.experiment_id
logger.info(f"Using existing experiment with ID: {experiment_id}")
else:
logger.error("ERROR: Could not create or find experiment")
sys.exit(1)

logger.info(f"Setting active experiment to {experiment_name}...")
mlflow.set_experiment(experiment_name)

# Start a run
run_name = datetime.now().strftime(RUN_NAME_FORMAT)
logger.info(f"Starting a new run with name: {run_name}...")
with mlflow.start_run(run_name=run_name) as run:
run_id = run.info.run_id
logger.info(f"Started run with ID: {run_id}")

# Log parameters
logger.info("Logging parameters...")
mlflow.log_param(PARAM_TEST, PARAM_TEST_VALUE)
mlflow.log_param(PARAM_RANDOM_INTEGER, random.randint(1, 100))

# Log metrics
logger.info("Logging metrics...")
mlflow.log_metric(METRIC_ACCURACY, random.uniform(METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX))
mlflow.log_metric(METRIC_LOSS, random.uniform(METRIC_LOSS_MIN, METRIC_LOSS_MAX))

# Create and log an artifact
logger.info("Creating artifact...")
# Ensure the directory exists
artifact_dir = os.path.dirname(ARTIFACT_PATH)
if artifact_dir and not os.path.exists(artifact_dir):
logger.info(f"Creating directory for artifact: {artifact_dir}")
os.makedirs(artifact_dir, exist_ok=True)

with open(ARTIFACT_PATH, "w") as f:
f.write(MSG_ARTIFACT_CONTENT.format(datetime.now().isoformat()))

logger.info(f"Logging artifact: {ARTIFACT_PATH}")
mlflow.log_artifact(ARTIFACT_PATH)

logger.success("Successfully logged parameters, metrics, and artifacts")

logger.success(MSG_SUCCESS.format(MLFLOW_TRACKING_URI))
logger.info(f"Experiment: {experiment_name}, Run ID: {run_id}")

except Exception as e:
logger.error(f"An unexpected error occurred: {type(e).__name__}: {str(e)}")
logger.error(traceback.format_exc())
Loading