From 23c5016bce5ad3ab33c55a760f48c299bffeccca Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Mon, 28 Apr 2025 12:51:53 +0530
Subject: [PATCH 1/3] Set up mlflow locally  and tested ists connection using
 test script

---
 .env                           | 12 +++++
 .gitignore                     | 17 +++++++
 docker-compose.yaml            | 25 ++++++++++
 mlflow/Dockerfile              | 20 ++++++++
 mlflow/entrypoint.sh           | 24 +++++++++
 mlflow/requirements.txt        |  6 +++
 test/constants.py              | 34 +++++++++++++
 test/test_mlflow_connection.py | 89 ++++++++++++++++++++++++++++++++++
 8 files changed, 227 insertions(+)
 create mode 100644 .env
 create mode 100644 .gitignore
 create mode 100644 docker-compose.yaml
 create mode 100644 mlflow/Dockerfile
 create mode 100644 mlflow/entrypoint.sh
 create mode 100644 mlflow/requirements.txt
 create mode 100644 test/constants.py
 create mode 100644 test/test_mlflow_connection.py

diff --git a/.env b/.env
new file mode 100644
index 00000000..4ee43bfe
--- /dev/null
+++ b/.env
@@ -0,0 +1,12 @@
+# MLFLOW
+MLFLOW_TRACKING_USERNAME=mlflowadmin
+MLFLOW_TRACKING_PASSWORD=value
+MLFLOW_HOST_PORT=5000
+MLFLOW_CONT_PORT=5000
+MLFLOW_HOST=0.0.0.0
+MLFLOW_PORT=${MLFLOW_CONT_PORT}
+MLFLOW_BACKEND_STORE_URI=sqlite:////mlflow/mlflow_data/mlflow.db
+MLFLOW_DEFAULT_ARTIFACT_ROOT=file:///mlflow/mlflow_artifacts
+MLFLOW_HOST_CONFIG_PATH=./mlflow/config
+MLFLOW_CONT_CONFIG_PATH=/mlflow/config
+MLFLOW_FLASK_SERVER_SECRET_KEY=value
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..72b877a2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+# MLflow generated files
+mlflow_artifacts/
+mlflow_data/
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+.coverage
+htmlcov/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 00000000..949893e3
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,25 @@
+version: '3'
+
+name: mlops-stack
+services:
+  mlflow:
+    build: ./mlflow
+    image: mlflow
+    container_name: mlflow_mlops
+    ports:
+      - ${MLFLOW_HOST_PORT}:${MLFLOW_CONT_PORT}
+    volumes:
+      - ${MLFLOW_HOST_CONFIG_PATH}:${MLFLOW_CONT_CONFIG_PATH}
+      - ./mlflow_data:/mlflow/mlflow_data
+      - ./mlflow_artifacts:/mlflow/mlflow_artifacts
+    environment:
+      - MLFLOW_TRACKING_USERNAME=${MLFLOW_TRACKING_USERNAME}
+      - MLFLOW_TRACKING_PASSWORD=${MLFLOW_TRACKING_PASSWORD}
+      - MLFLOW_BACKEND_STORE_URI=sqlite:////mlflow/mlflow_data/mlflow.db
+      - MLFLOW_DEFAULT_ARTIFACT_ROOT=file:///mlflow/mlflow_artifacts
+      - MLFLOW_FLASK_SERVER_SECRET_KEY=${MLFLOW_FLASK_SERVER_SECRET_KEY}
+    restart: unless-stopped
+
+volumes:
+  mlflow_data:
+  mlflow_artifacts:
\ No newline at end of file
diff --git a/mlflow/Dockerfile b/mlflow/Dockerfile
new file mode 100644
index 00000000..a9e6111b
--- /dev/null
+++ b/mlflow/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.9-slim
+
+
+COPY ./requirements.txt /mlflow/requirements.txt
+
+RUN pip install --no-cache-dir -r /mlflow/requirements.txt
+
+# Create directories for MLflow data with explicit permissions
+RUN mkdir -p /mlflow/mlflow_data /mlflow/mlflow_artifacts && \
+    chmod -R 777 /mlflow/mlflow_data /mlflow/mlflow_artifacts
+
+WORKDIR /mlflow
+
+# Copy any initialization files if needed
+COPY ./entrypoint.sh /mlflow/entrypoint.sh
+RUN chmod +x /mlflow/entrypoint.sh
+
+EXPOSE 5000
+
+ENTRYPOINT ["/mlflow/entrypoint.sh"]
\ No newline at end of file
diff --git a/mlflow/entrypoint.sh b/mlflow/entrypoint.sh
new file mode 100644
index 00000000..da264efe
--- /dev/null
+++ b/mlflow/entrypoint.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+echo "=== [ENTRYPOINT DEBUG] Starting MLflow at $(date) ==="
+echo "MLFLOW_TRACKING_USERNAME: ${MLFLOW_TRACKING_USERNAME}"
+echo "MLFLOW_BACKEND_STORE_URI: ${MLFLOW_BACKEND_STORE_URI}"
+echo "MLFLOW_DEFAULT_ARTIFACT_ROOT: ${MLFLOW_DEFAULT_ARTIFACT_ROOT}"
+echo "MLFLOW_HOST: ${MLFLOW_HOST:-0.0.0.0}"
+echo "MLFLOW_PORT: ${MLFLOW_PORT:-5000}"
+
+# Create necessary directories if they don't exist
+mkdir -p /mlflow/mlflow_data /mlflow/mlflow_artifacts
+
+# Start the MLflow server
+if [ "$#" -eq 0 ]; then
+    exec mlflow server \
+        --host "${MLFLOW_HOST:-0.0.0.0}" \
+        --port "${MLFLOW_PORT:-5000}" \
+        --backend-store-uri "${MLFLOW_BACKEND_STORE_URI:-sqlite:////mlflow/mlflow_data/mlflow.db}" \
+        --default-artifact-root "${MLFLOW_DEFAULT_ARTIFACT_ROOT:-file:///mlflow/mlflow_artifacts}" \
+        --gunicorn-opts="--workers=1 --timeout 120"
+else
+    exec "$@"
+fi
diff --git a/mlflow/requirements.txt b/mlflow/requirements.txt
new file mode 100644
index 00000000..6518912d
--- /dev/null
+++ b/mlflow/requirements.txt
@@ -0,0 +1,6 @@
+mlflow==2.22.0
+mlflow[auth]
+psycopg2-binary==2.9.10
+sqlalchemy==2.0.23
+boto3==1.38.2
+PyMySQL==1.1.1
\ No newline at end of file
diff --git a/test/constants.py b/test/constants.py
new file mode 100644
index 00000000..bfb69fd9
--- /dev/null
+++ b/test/constants.py
@@ -0,0 +1,34 @@
+"""
+Constants and configuration values for the Global-Classifier project.
+"""
+
+# MLflow connection settings
+MLFLOW_TRACKING_URI = "http://localhost:5000"
+MLFLOW_USERNAME = "mlflowadmin"
+MLFLOW_PASSWORD = "null"
+
+# Experiment settings
+DEFAULT_EXPERIMENT_NAME = "test_experiment"
+
+# Artifact settings
+ARTIFACT_PATH = "mlflow_artifacts/test_artifacts.txt"
+
+# Run configuration
+RUN_NAME_FORMAT = "test_run_%Y%m%d_%H%M%S"
+
+# Parameter names
+PARAM_TEST = "test_param"
+PARAM_TEST_VALUE = "test_value"
+PARAM_RANDOM_INTEGER = "random_integer"
+
+# Metric names
+METRIC_ACCURACY = "accuracy"
+METRIC_ACCURACY_MIN = 0.7
+METRIC_ACCURACY_MAX = 0.99
+METRIC_LOSS = "loss"
+METRIC_LOSS_MIN = 0.01
+METRIC_LOSS_MAX = 0.3
+
+# Messages
+MSG_ARTIFACT_CONTENT = "This is a test artifact created at {}"
+MSG_SUCCESS = "\nTest completed successfully! Check the MLflow UI at {}"
\ No newline at end of file
diff --git a/test/test_mlflow_connection.py b/test/test_mlflow_connection.py
new file mode 100644
index 00000000..de0f8ffc
--- /dev/null
+++ b/test/test_mlflow_connection.py
@@ -0,0 +1,89 @@
+"""
+Test script to verify MLflow connection and functionality.
+"""
+import sys
+import os
+import random
+import mlflow
+import traceback
+from datetime import datetime
+
+# Add the parent directory to sys.path to import constants
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from constants import (
+    MLFLOW_TRACKING_URI, MLFLOW_USERNAME, MLFLOW_PASSWORD,
+    DEFAULT_EXPERIMENT_NAME, ARTIFACT_PATH, RUN_NAME_FORMAT,
+    PARAM_TEST, PARAM_TEST_VALUE, PARAM_RANDOM_INTEGER,
+    METRIC_ACCURACY, METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX,
+    METRIC_LOSS, METRIC_LOSS_MIN, METRIC_LOSS_MAX,
+    MSG_ARTIFACT_CONTENT, MSG_SUCCESS
+)
+
+print("Starting MLflow connection test...")
+
+try:
+    print("Setting tracking URI...")
+    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
+    
+    print("Setting environment variables...")
+    os.environ["MLFLOW_TRACKING_USERNAME"] = MLFLOW_USERNAME
+    os.environ["MLFLOW_TRACKING_PASSWORD"] = MLFLOW_PASSWORD
+    
+    # Test basic connectivity
+    print("Testing connection by listing experiments...")
+    experiments = mlflow.search_experiments()
+    print(f"Found {len(experiments)} experiments")
+    
+    # Create or get experiment
+    experiment_name = DEFAULT_EXPERIMENT_NAME
+    print(f"Creating/getting experiment '{experiment_name}'...")
+    try:
+        experiment_id = mlflow.create_experiment(experiment_name)
+        print(f"Created new experiment with ID: {experiment_id}")
+    except Exception as e:
+        print(f"Could not create experiment: {str(e)}")
+        print("Trying to retrieve existing experiment...")
+        experiment = mlflow.get_experiment_by_name(experiment_name)
+        if experiment:
+            experiment_id = experiment.experiment_id
+            print(f"Using existing experiment with ID: {experiment_id}")
+        else:
+            print("ERROR: Could not create or find experiment")
+            sys.exit(1)
+    
+    print(f"Setting active experiment to {experiment_name}...")
+    mlflow.set_experiment(experiment_name)
+    
+    # Start a run
+    run_name = datetime.now().strftime(RUN_NAME_FORMAT)
+    print(f"Starting a new run with name: {run_name}...")
+    with mlflow.start_run(run_name=run_name) as run:
+        run_id = run.info.run_id
+        print(f"Started run with ID: {run_id}")
+        
+        # Log parameters
+        print("Logging parameters...")
+        mlflow.log_param(PARAM_TEST, PARAM_TEST_VALUE)
+        mlflow.log_param(PARAM_RANDOM_INTEGER, random.randint(1, 100))
+        
+        # Log metrics
+        print("Logging metrics...")
+        mlflow.log_metric(METRIC_ACCURACY, random.uniform(METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX))
+        mlflow.log_metric(METRIC_LOSS, random.uniform(METRIC_LOSS_MIN, METRIC_LOSS_MAX))
+        
+        # Create and log an artifact
+        print("Creating artifact...")
+        with open(ARTIFACT_PATH, "w") as f:
+            f.write(MSG_ARTIFACT_CONTENT.format(datetime.now().isoformat()))
+        
+        print(f"Logging artifact: {ARTIFACT_PATH}")
+        mlflow.log_artifact(ARTIFACT_PATH)
+        
+        print("Successfully logged parameters, metrics, and artifacts")
+    
+    print(MSG_SUCCESS.format(MLFLOW_TRACKING_URI))
+    print(f"Experiment: {experiment_name}, Run ID: {run_id}")
+    
+except Exception as e:
+    print(f"ERROR: An unexpected error occurred: {type(e).__name__}: {str(e)}")
+    traceback.print_exc()
\ No newline at end of file

From a3ef3f197c87838486f56e53d9f8e7fb2c093243 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 29 Apr 2025 15:19:12 +0530
Subject: [PATCH 2/3] added loguru for logging

---
 .gitignore                     | 10 +++++-
 mlflow/requirements.txt        |  3 +-
 test/constants.py              |  6 +++-
 test/test_mlflow_connection.py | 66 +++++++++++++++++++++-------------
 4 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/.gitignore b/.gitignore
index 72b877a2..8255c680 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,12 @@ htmlcov/
 .idea/
 .vscode/
 *.swp
-*.swo
\ No newline at end of file
+*.swo
+
+# Logs
+logs/
+*.log
+test/logs/
+**/logs/
+*.log.*
+*.zip
\ No newline at end of file
diff --git a/mlflow/requirements.txt b/mlflow/requirements.txt
index 6518912d..cb7725a0 100644
--- a/mlflow/requirements.txt
+++ b/mlflow/requirements.txt
@@ -3,4 +3,5 @@ mlflow[auth]
 psycopg2-binary==2.9.10
 sqlalchemy==2.0.23
 boto3==1.38.2
-PyMySQL==1.1.1
\ No newline at end of file
+PyMySQL==1.1.1
+loguru==0.7.3
\ No newline at end of file
diff --git a/test/constants.py b/test/constants.py
index bfb69fd9..c3e2ab76 100644
--- a/test/constants.py
+++ b/test/constants.py
@@ -31,4 +31,8 @@
 
 # Messages
 MSG_ARTIFACT_CONTENT = "This is a test artifact created at {}"
-MSG_SUCCESS = "\nTest completed successfully! Check the MLflow UI at {}"
\ No newline at end of file
+MSG_SUCCESS = "\nTest completed successfully! Check the MLflow UI at {}"
+
+# Logging settings
+LOGS_DIR = "logs"
+LOG_FILE = "test_mlflow_connection.log"
\ No newline at end of file
diff --git a/test/test_mlflow_connection.py b/test/test_mlflow_connection.py
index de0f8ffc..9c4e7b63 100644
--- a/test/test_mlflow_connection.py
+++ b/test/test_mlflow_connection.py
@@ -7,6 +7,7 @@
 import mlflow
 import traceback
 from datetime import datetime
+from loguru import logger
 
 # Add the parent directory to sys.path to import constants
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -16,74 +17,91 @@
     PARAM_TEST, PARAM_TEST_VALUE, PARAM_RANDOM_INTEGER,
     METRIC_ACCURACY, METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX,
     METRIC_LOSS, METRIC_LOSS_MIN, METRIC_LOSS_MAX,
-    MSG_ARTIFACT_CONTENT, MSG_SUCCESS
+    MSG_ARTIFACT_CONTENT, MSG_SUCCESS, LOGS_DIR, LOG_FILE
 )
 
-print("Starting MLflow connection test...")
+# Configure Loguru
+log_file = os.path.join(os.path.dirname(__file__), LOGS_DIR, LOG_FILE)
+os.makedirs(os.path.dirname(log_file), exist_ok=True)
+
+# Remove default handler and add custom handlers
+logger.remove()
+# Add console handler
+logger.add(sys.stdout, format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}")
+# Add file handler with rotation
+logger.add(
+    log_file,
+    rotation="10 MB",  # Rotate file when it reaches 10MB
+    retention="1 month",  # Keep logs for 1 month
+    compression="zip",  # Compress rotated logs
+    format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
+)
+
+logger.info("Starting MLflow connection test...")
 
 try:
-    print("Setting tracking URI...")
+    logger.info("Setting tracking URI...")
     mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
     
-    print("Setting environment variables...")
+    logger.info("Setting environment variables...")
     os.environ["MLFLOW_TRACKING_USERNAME"] = MLFLOW_USERNAME
     os.environ["MLFLOW_TRACKING_PASSWORD"] = MLFLOW_PASSWORD
     
     # Test basic connectivity
-    print("Testing connection by listing experiments...")
+    logger.info("Testing connection by listing experiments...")
     experiments = mlflow.search_experiments()
-    print(f"Found {len(experiments)} experiments")
+    logger.info(f"Found {len(experiments)} experiments")
     
     # Create or get experiment
     experiment_name = DEFAULT_EXPERIMENT_NAME
-    print(f"Creating/getting experiment '{experiment_name}'...")
+    logger.info(f"Creating/getting experiment '{experiment_name}'...")
     try:
         experiment_id = mlflow.create_experiment(experiment_name)
-        print(f"Created new experiment with ID: {experiment_id}")
+        logger.success(f"Created new experiment with ID: {experiment_id}")
     except Exception as e:
-        print(f"Could not create experiment: {str(e)}")
-        print("Trying to retrieve existing experiment...")
+        logger.warning(f"Could not create experiment: {str(e)}")
+        logger.info("Trying to retrieve existing experiment...")
         experiment = mlflow.get_experiment_by_name(experiment_name)
         if experiment:
             experiment_id = experiment.experiment_id
-            print(f"Using existing experiment with ID: {experiment_id}")
+            logger.info(f"Using existing experiment with ID: {experiment_id}")
         else:
-            print("ERROR: Could not create or find experiment")
+            logger.error("ERROR: Could not create or find experiment")
             sys.exit(1)
     
-    print(f"Setting active experiment to {experiment_name}...")
+    logger.info(f"Setting active experiment to {experiment_name}...")
     mlflow.set_experiment(experiment_name)
     
     # Start a run
     run_name = datetime.now().strftime(RUN_NAME_FORMAT)
-    print(f"Starting a new run with name: {run_name}...")
+    logger.info(f"Starting a new run with name: {run_name}...")
     with mlflow.start_run(run_name=run_name) as run:
         run_id = run.info.run_id
-        print(f"Started run with ID: {run_id}")
+        logger.info(f"Started run with ID: {run_id}")
         
         # Log parameters
-        print("Logging parameters...")
+        logger.info("Logging parameters...")
         mlflow.log_param(PARAM_TEST, PARAM_TEST_VALUE)
         mlflow.log_param(PARAM_RANDOM_INTEGER, random.randint(1, 100))
         
         # Log metrics
-        print("Logging metrics...")
+        logger.info("Logging metrics...")
         mlflow.log_metric(METRIC_ACCURACY, random.uniform(METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX))
         mlflow.log_metric(METRIC_LOSS, random.uniform(METRIC_LOSS_MIN, METRIC_LOSS_MAX))
         
         # Create and log an artifact
-        print("Creating artifact...")
+        logger.info("Creating artifact...")
         with open(ARTIFACT_PATH, "w") as f:
             f.write(MSG_ARTIFACT_CONTENT.format(datetime.now().isoformat()))
         
-        print(f"Logging artifact: {ARTIFACT_PATH}")
+        logger.info(f"Logging artifact: {ARTIFACT_PATH}")
         mlflow.log_artifact(ARTIFACT_PATH)
         
-        print("Successfully logged parameters, metrics, and artifacts")
+        logger.success("Successfully logged parameters, metrics, and artifacts")
     
-    print(MSG_SUCCESS.format(MLFLOW_TRACKING_URI))
-    print(f"Experiment: {experiment_name}, Run ID: {run_id}")
+    logger.success(MSG_SUCCESS.format(MLFLOW_TRACKING_URI))
+    logger.info(f"Experiment: {experiment_name}, Run ID: {run_id}")
     
 except Exception as e:
-    print(f"ERROR: An unexpected error occurred: {type(e).__name__}: {str(e)}")
-    traceback.print_exc()
\ No newline at end of file
+    logger.error(f"An unexpected error occurred: {type(e).__name__}: {str(e)}")
+    logger.error(traceback.format_exc())
\ No newline at end of file

From 8a1fce2b9c06b92b05929cb3ef6fe89bebf0fc9a Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 29 Apr 2025 15:27:01 +0530
Subject: [PATCH 3/3] handled error for ensuring mlflow_artifacts folder exist

---
 test/test_mlflow_connection.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/test_mlflow_connection.py b/test/test_mlflow_connection.py
index 9c4e7b63..73963bb2 100644
--- a/test/test_mlflow_connection.py
+++ b/test/test_mlflow_connection.py
@@ -89,8 +89,14 @@
         mlflow.log_metric(METRIC_ACCURACY, random.uniform(METRIC_ACCURACY_MIN, METRIC_ACCURACY_MAX))
         mlflow.log_metric(METRIC_LOSS, random.uniform(METRIC_LOSS_MIN, METRIC_LOSS_MAX))
         
-        # Create and log an artifact
+                # Create and log an artifact
         logger.info("Creating artifact...")
+        # Ensure the directory exists
+        artifact_dir = os.path.dirname(ARTIFACT_PATH)
+        if artifact_dir and not os.path.exists(artifact_dir):
+            logger.info(f"Creating directory for artifact: {artifact_dir}")
+            os.makedirs(artifact_dir, exist_ok=True)
+            
         with open(ARTIFACT_PATH, "w") as f:
             f.write(MSG_ARTIFACT_CONTENT.format(datetime.now().isoformat()))