diff --git a/.gitignore b/.gitignore
index 710958d..555b42f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -234,3 +234,9 @@ mlflow_tracking_uri.txt
# Large trained model files (can be regenerated with train_and_save_models.py)
models/stack_*.pkl # Exclude large stack models but keep ensemble model
+
+# Remove Mac file
+*.DS_Store
+
+# Do not include pre-commit-output.txt
+*.pre-commit-output.txt
diff --git a/Makefile b/Makefile
index d741b2e..ed048aa 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
# Author: AI Assistant
# Date: 2025-07-14
-.PHONY: help install format lint test run train-models dash stop-dash
+.PHONY: help install format lint typecheck security check-all test run train-models dash stop-dash
# Default target
help:
@@ -12,7 +12,10 @@ help:
@echo "Available targets:"
@echo " install - Install dependencies using uv"
@echo " format - Format code with ruff"
- @echo " lint - Lint code with ruff"
+ @echo " lint - Lint code with ruff (includes format check)"
+ @echo " typecheck - Type check with mypy"
+ @echo " security - Security check with bandit"
+ @echo " check-all - Run all code quality checks (lint, typecheck, security)"
@echo " test - Run tests"
@echo " run - Run the modular pipeline"
@echo " train-models - Train and save ML models"
@@ -32,7 +35,22 @@ format:
lint:
@echo "๐ Linting code with ruff..."
- uv run ruff check src/ dash_app/ tests/ scripts/ --output-format=github
+ uv run ruff check . --fix
+ uv run ruff format --check .
+
+# Type checking
+typecheck:
+ @echo "๐ Type checking with mypy..."
+ uv run mypy src/ --ignore-missing-imports
+
+# Security checking
+security:
+ @echo "๐ Security checking with bandit..."
+ uv run bandit -r src/ -f json
+
+# Run all quality checks
+check-all: lint typecheck security
+ @echo "โ All code quality checks completed!"
# Testing
test:
diff --git a/README.md b/README.md
index 33b31d0..961bf75 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,13 @@
# Six-Stack Personality Classification Pipeline
-A state-of-the-art, production-ready machine learning pipeline for personality classification leveraging ensemble learning, advanced data augmentation, and automated hyperparameter optimization. Features a fully modular, maintainable architecture with interactive dashboard.
+Production-ready machine learning pipeline for personality classification using ensemble learning, data augmentation, and automated hyperparameter optimization. Modular, maintainable, and includes an interactive dashboard.
-## ๐ง Technology Stack
+## Technology Stack
-**Core ML**: scikit-learn, XGBoost, LightGBM, CatBoost, Optuna
-**Data Science**: pandas, numpy, scipy, SDV (synthetic data)
-**Dashboard**: Dash, Plotly, Bootstrap components
-**DevOps**: Docker, GitHub Actions, pre-commit hooks
-**Tools**: uv (package manager), Ruff (linting), mypy (types), Bandit (security)
+**ML**: scikit-learn, XGBoost, LightGBM, CatBoost, Optuna
+**Data**: pandas, numpy, scipy, SDV
+**Dashboard**: Dash, Plotly
+**DevOps**: Docker, GitHub Actions, pre-commit, uv, Ruff, mypy, Bandit
[](https://python.org)
[](LICENSE)
@@ -16,110 +15,49 @@ A state-of-the-art, production-ready machine learning pipeline for personality c
[](https://plotly.com/dash/)
[](#-architecture)
-## ๐ฑ Dashboard Preview
+## Dashboard Preview
-
+
- Main dashboard interface with personality feature sliders and input controls
-
-
-
- Prediction results with confidence visualization and detailed personality insights
+ Watch a live demo of the Personality Classification Dashboard in action
-## ๐ Quick Start
+## Quick Start
```bash
-# Clone and setup
git clone
cd Personality-classification
-
-# Install dependencies (using uv - modern Python package manager)
uv sync
-
-# Train models (required for dashboard)
-make train-models
-
-# Launch interactive dashboard
-make dash
-
-# Or run the production pipeline
-uv run python src/main_modular.py
-
-# Or explore examples
-uv run python examples/main_final.py # Lightweight version
-uv run python examples/main_demo.py # Demo with dummy models
-uv run python examples/minimal_test.py # Installation verification
+make train-models # Train models
+make dash # Launch dashboard
+uv run python src/main_modular.py # Run pipeline
```
-## ๐ Table of Contents
-
-- [Dashboard Preview](#-dashboard-preview)
-- [Features](#-features)
-- [Architecture](#-architecture)
-- [Installation](#-installation)
-- [Usage](#-usage)
-- [Dashboard](#-dashboard)
-- [Configuration](#-configuration)
-- [Model Stacks](#-model-stacks)
-- [Performance](#-performance)
-- [Documentation](#-documentation)
-- [Contributing](#-contributing)
-
-## ๐ฏ Features
-
-### **๐๏ธ Modern Modular Architecture**
-
-- **8 specialized modules** with single responsibility principle
-- **Clean separation of concerns** for maximum maintainability
-- **Independent testing** and validation of each component
-- **Thread-safe configuration** management
-
-### **๐ค Advanced Machine Learning Pipeline**
-
-- **6 specialized ensemble stacks** (A-F) with complementary algorithms
-- **Automated hyperparameter optimization** using Optuna
-- **Intelligent ensemble blending** with optimized weights
-- **Advanced data augmentation** with quality filtering and diversity control
-- **Adaptive augmentation strategies** based on dataset characteristics
+## Table of Contents
-### **๐ญ Production-Ready Infrastructure**
+**Contents:**
+- Dashboard Preview
+- Quick Start
+- Features
+- Usage
+- Documentation
-- **Interactive Dashboard**: Modern Dash-based web interface for model inference and exploration
-- **Model Training Pipeline**: Automated training and saving of ensemble models with metadata
-- **Docker Support**: Complete containerization for easy deployment and scaling
-- **Comprehensive Testing**: Full pytest coverage for all components with CI/CD integration
-- **Modular Architecture**: Clean separation of concerns for maintainability and extensibility
+## Features
-### **๐ Data Science Excellence**
+- Modular architecture: 8 specialized modules
+- 6 ensemble stacks (A-F) with complementary ML algorithms
+- Automated hyperparameter optimization (Optuna)
+- Advanced data augmentation (SDV Copula)
+- Interactive Dash dashboard
+- Dockerized deployment
+- Full test coverage (pytest)
-- **External data integration** using advanced merge strategy
-- **Sophisticated preprocessing** with correlation-based imputation
-- **Quality-controlled synthetic data** generation using SDV Copula
-- **Cross-validation** with stratified folds for robust evaluation
-- **Label noise injection** for improved generalization
-
-### **๐ ๏ธ Modern Development Tools**
-
-- **uv Package Manager**: Lightning-fast dependency resolution and virtual environment management
-- **Ruff Integration**: Ultra-fast Python linting and formatting (replaces Black, isort, flake8)
-- **Type Safety**: Comprehensive mypy type checking with strict configuration
-- **Security Scanning**: Bandit integration for security vulnerability detection
-- **Pre-commit Hooks**: Automated code quality checks on every commit
-- **GitHub Actions CI/CD**: Automated testing, linting, and validation on push
-- **Make Automation**: Simple Makefile for common development tasks
-
-### **๐ Production Features**
-
-- **Professional logging** with structured output and configurable levels
-- **Comprehensive error handling** and timeout protection for robust operation
-- **Model persistence** with metadata for reproducibility and version control
-- **Configurable settings** via centralized configuration management
-- **Health monitoring** with dashboard health checks and status endpoints
-- **Container support** with Docker and docker-compose for easy deployment
-
-## ๐๏ธ Architecture
+## Architecture
```
src/
@@ -136,7 +74,7 @@ src/
โ โโโ utils.py # ๐ ๏ธ Utility functions
dash_app/ # ๐ฅ๏ธ Interactive Dashboard
-โโโ src/ # Application source
+โโโ dashboard/ # Application source
โ โโโ app.py # Main Dash application
โ โโโ layout.py # UI layout components
โ โโโ callbacks.py # Interactive callbacks
@@ -154,16 +92,7 @@ models/ # ๐ค Trained Models
scripts/ # ๐ ๏ธ Utility Scripts
โโโ train_and_save_models.py # Model training and persistence
-examples/ # ๐ Usage examples
-โโโ main_final.py # โก Lightweight production
-โโโ main_demo.py # ๐ช Demonstration
-โโโ minimal_test.py # โ Installation check
-
data/ # ๐ Datasets
-โโโ train.csv # Training data
-โโโ test.csv # Test data
-โโโ sample_submission.csv # Submission template
-โโโ personality_datasert.csv # External data
docs/ # ๐ Documentation
โโโ [Generated documentation] # Technical guides
@@ -172,7 +101,7 @@ best_params/ # ๐พ Optimized parameters
โโโ stack_*_best_params.json # Per-stack best parameters
```
-## ๐ป Installation
+## Installation
### Prerequisites
@@ -200,164 +129,33 @@ uv run python examples/minimal_test.py
pip install -r requirements.txt # Generated from pyproject.toml
```
-## ๐ Usage
-
-### ๐ฏ Production Pipeline
+## Usage
```bash
-# Full six-stack ensemble (recommended)
+# Run production pipeline
uv run python src/main_modular.py
-```
-### ๐ฅ๏ธ Interactive Dashboard
-
-```bash
-# Train models (one-time setup)
+# Launch dashboard (after training models)
make train-models
-
-# Launch dashboard
make dash
# Stop dashboard
make stop-dash
```
-### โก Quick Examples
+## Dashboard
-```bash
-# Lightweight version
-uv run python examples/main_final.py
+## Dashboard
-# Demo with dummy models (educational)
-uv run python examples/main_demo.py
-
-# Test individual modules
-uv run python examples/test_modules.py
-```
-
-### ๐ ๏ธ Development Commands
-
-Available Makefile targets for streamlined development:
-
-```bash
-make install # Install all dependencies
-make format # Format code with Ruff
-make lint # Run linting checks
-make test # Run test suite
-make train-models # Train and save production models
-make dash # Launch dashboard
-make stop-dash # Stop dashboard
-make help # Show all available targets
-```
-
-### ๐ง Development
+See the video demo above for the latest dashboard interface and features. To launch the dashboard:
```bash
-# Run linting
-uv run ruff check src/
-
-# Auto-fix issues
-uv run ruff check --fix src/
-
-# Format code
-uv run ruff format src/
-
-# Run tests
-make test
-
-# Train models
make train-models
-```
-
-## ๐ฅ๏ธ Dashboard
-
-The project includes a modern, interactive Dash web application for real-time personality classification and model exploration.
-
-### Visual Demo
-
-
-*Main dashboard interface with personality feature sliders and input controls*
-
-
-*Prediction results with confidence visualization and detailed personality insights*
-
-### Features
-
-- **Real-time Predictions**: Input personality features and get instant predictions
-- **Confidence Visualization**: Interactive probability bars for all personality types
-- **Model Insights**: Detailed personality descriptions and confidence scores
-- **Professional UI**: Clean, responsive design with modern styling
-- **Production Ready**: Dockerized deployment with health checks
-
-### Quick Start
-
-```bash
-# Ensure models are trained
-make train-models
-
-# Launch dashboard (locally)
make dash
-
-# Dashboard will be available at http://localhost:8050
-```
-
-### Live Demo
-
-Experience the dashboard yourself in just a few commands:
-
-```bash
-git clone && cd Personality-classification
-uv sync && make train-models && make dash
-# Then open http://localhost:8050 in your browser
-```
-
-The dashboard features:
-- ๐๏ธ **Interactive Sliders** for all personality dimensions
-- ๐ **Real-time Predictions** with confidence visualization
-- ๐จ **Professional UI** with responsive design
-- ๐ **Probability Bars** showing prediction confidence
-- ๐ **Personality Insights** with detailed descriptions
-
-### Docker Deployment
-
-```bash
-# Build and run with Docker Compose
-cd dash_app
-docker-compose up --build
-
-# Or run individual Docker container
-docker build -t personality-dashboard .
-docker run -p 8050:8050 personality-dashboard
-```
-
-### Dashboard Usage
-
-1. **Access the Dashboard**: Navigate to `http://localhost:8050`
-2. **Input Features**: Use the sliders to set personality feature values:
- - Gender, Age, openness, neuroticism, conscientiousness
- - extraversion, agreeableness, Text_length, punctuation
-3. **Get Predictions**: Click "Predict Personality" to see results
-4. **Analyze Results**: View confidence scores and personality descriptions
-
-### API Endpoints
-
-The dashboard exposes a simple prediction API:
-
-- **Health Check**: `GET /health` - Service status
-- **Predictions**: Handled through Dash callbacks (internal)
-
-### Stopping the Dashboard
-
-```bash
-# Stop local dashboard
-make stop-dash
-
-# Stop Docker containers
-cd dash_app
-docker-compose down
+# Dashboard available at http://localhost:8050
```
-## โ๏ธ Configuration
+## Configuration
The pipeline is highly configurable through `src/modules/config.py`:
@@ -407,7 +205,7 @@ TESTING_SAMPLE_SIZE = 1000 # Samples in testing mode
LOG_LEVEL = "INFO" # DEBUG, INFO, WARNING, ERROR
```
-## ๐ค Model Stacks
+## Model Stacks
The pipeline employs six specialized ensemble stacks, each optimized for different aspects of the problem:
@@ -427,7 +225,7 @@ The pipeline employs six specialized ensemble stacks, each optimized for differe
- **Meta-learning approach** with Logistic Regression as final combiner
- **Stratified cross-validation** ensures robust evaluation
-## ๐ Performance Metrics
+## Performance Metrics
### Target Performance
@@ -449,22 +247,7 @@ The pipeline is designed to achieve high accuracy through ensemble learning and
โโโ Reproducibility: โ Fixed random seeds
```
-### Stack Configuration
-
-The pipeline employs six specialized ensemble stacks optimized for different aspects:
-
-| Stack | Focus | Algorithms | Hyperparameter Space | Training Approach |
-| ----- | ----------------------- | --------------------------------------------------------------- | ---------------------------- | --------------------------- |
-| **A** | Traditional ML (Narrow) | Random Forest, Logistic Regression, XGBoost, LightGBM, CatBoost | Conservative search space | Stable baseline performance |
-| **B** | Traditional ML (Wide) | Same as Stack A | Extended search space | Broader exploration |
-| **C** | Gradient Boosting | XGBoost, CatBoost | Gradient boosting focused | Tree-based specialists |
-| **D** | Sklearn Ensemble | Extra Trees, Hist Gradient Boosting, SVM, Gaussian NB | Sklearn-native models | Diverse algorithm mix |
-| **E** | Neural Networks | MLPClassifier, Deep architectures | Neural network tuning | Non-linear pattern capture |
-| **F** | Noise-Robust Training | Same as Stack A | Standard space + label noise | Improved generalization |
-
-> **Note**: To see actual performance metrics, run the pipeline with your data. Use `make train-models` to train models and generate real performance reports.
-
-## ๐งช Testing & Validation
+## Testing & Validation
### Quick Validation
@@ -480,8 +263,6 @@ uv run python examples/test_modules.py
```
### Development Testing
-
-```bash
# Enable testing mode (faster execution)
# Edit src/modules/config.py:
TESTING_MODE = True
@@ -491,7 +272,7 @@ TESTING_SAMPLE_SIZE = 1000
uv run python src/main_modular.py
```
-## ๐ง Troubleshooting
+## Troubleshooting
### Common Issues
@@ -530,7 +311,16 @@ uv sync # Reinstall dependencies
uv run python -c "import sklearn, pandas, numpy, dash; print('OK')"
```
-#### Performance Issues
+
+Key folders:
+- src/: Main pipeline and modules
+- dash_app/: Dashboard app and Docker config
+- models/: Trained models and metadata
+- scripts/: Model training scripts
+- examples/: Usage examples
+- data/: Datasets
+- docs/: Documentation
+- best_params/: Optimized parameters
```bash
# Optimize for your system
@@ -558,106 +348,29 @@ LOG_LEVEL = "DEBUG"
uv run python src/main_modular.py 2>&1 | tee debug.log
```
-## ๐ Documentation
-
-Comprehensive documentation is available in the `docs/` directory:
-
-- **[Technical Guide](docs/technical-guide.md)** - Deep dive into architecture, algorithms, and dashboard
-- **[API Reference](docs/api-reference.md)** - Detailed module and function documentation
-- **[MLOps Infrastructure](docs/mlops-infrastructure.md)** - Production deployment and monitoring
-- **[Data Augmentation](docs/data-augmentation.md)** - Advanced synthetic data generation strategies
-- **[Configuration Guide](docs/configuration.md)** - Complete configuration reference
-- **[Performance Tuning](docs/performance-tuning.md)** - Optimization strategies and best practices
-- **[Deployment Guide](docs/deployment.md)** - Production deployment instructions
-
-### Quick References
-
-- [`src/modules/README.md`](src/modules/README.md) - Module overview
-- [`examples/README.md`](examples/README.md) - Usage examples
-- [Architecture Diagram](docs/architecture.md) - Visual system overview
-
-## ๐จโ๐ป Lead Developer & Maintainer
+## Documentation
-**[Jeremy Vachier](https://github.com/jvachier)** - Lead Developer & Maintainer
+See the `docs/` directory for:
+- Technical Guide
+- API Reference
+- Data Augmentation
+- Configuration Guide
+- Performance Tuning
+- Deployment Guide
-For questions, suggestions, or collaboration opportunities:
+## Lead Developer & Maintainer
-- ๐ **Issues & Bug Reports**: [Open an issue](https://github.com/jvachier/Personality-classification/issues)
-- ๐ก **Feature Requests**: [Create a feature request](https://github.com/jvachier/Personality-classification/issues/new)
-- ๐ง **Direct Contact**: Contact the maintainer through GitHub
-- ๐ฌ **Discussions**: Use GitHub Discussions for general questions
+**Lead Developer:** [Jeremy Vachier](https://github.com/jvachier)
+For issues, feature requests, or questions, use GitHub Issues or Discussions.
-## ๐ค Contributing
+## Contributing
-We welcome contributions! Please follow these guidelines:
-
-### Development Setup
-
-```bash
-# Clone and setup development environment
-git clone
-cd Personality-classification
-uv sync --dev
-
-# Install pre-commit hooks
-uv run pre-commit install
-```
+Contributions welcome! Fork the repo, create a feature branch, implement and test your changes, then submit a pull request.
-### Code Standards
+## License
-- **Code Quality**: Use Ruff for linting and formatting
-- **Type Hints**: Required for all public functions
-- **Documentation**: Docstrings for all modules and functions
-- **Testing**: Add tests for new features
+Licensed under the Apache License 2.0. See [LICENSE](LICENSE).
-### Contribution Process
+## Project Status
-1. **Fork** the repository
-2. **Create** a feature branch: `git checkout -b feature/amazing-feature`
-3. **Implement** changes with proper testing
-4. **Lint** code: `uv run ruff check --fix src/`
-5. **Test** thoroughly: `uv run python examples/test_modules.py`
-6. **Commit** with descriptive messages
-7. **Submit** a pull request
-
-### Areas for Contribution
-
-- ๐ง **New model architectures** in Stack builders
-- ๐ **Additional data augmentation** methods
-- โก **Performance optimizations**
-- ๐ **Documentation improvements**
-- ๐งช **Test coverage expansion**
-- ๐ง **Configuration enhancements**
-
-## ๐ License
-
-This project is licensed under the **Apache License 2.0** - see the [LICENSE](LICENSE) file for details.
-
-## ๐ Acknowledgments
-
-- **Optuna Team** - For excellent hyperparameter optimization framework
-- **scikit-learn Community** - For robust machine learning foundations
-- **SDV Team** - For advanced synthetic data generation
-- **uv/Ruff Teams** - For modern Python tooling
-- **Dash/Plotly Team** - For powerful visualization and dashboarding
-
-## ๐ Project Status
-
-| Component | Status | Version | Last Updated |
-| ------------------------ | -------------------- | ------- | ------------ |
-| ๐๏ธ **Architecture** | โ **Production** | v2.0 | 2025-07-14 |
-| ๐ค **ML Pipeline** | โ **Production** | v2.0 | 2025-07-14 |
-| ๐ฅ๏ธ **Dashboard** | โ **Production** | v1.0 | 2025-07-14 |
-| ๐ **Data Augmentation** | โ **Advanced** | v1.5 | 2025-07-14 |
-| ๐ง **Configuration** | โ **Centralized** | v1.0 | 2025-07-14 |
-| ๐ **Documentation** | โ **Comprehensive** | v1.0 | 2025-07-14 |
-| ๐งช **Testing** | โ **CI/CD Ready** | v1.0 | 2025-07-14 |
-| ๐ ๏ธ **DevOps** | โ **Automated** | v1.0 | 2025-07-14 |
-
----
-
-
-
-**๐ฏ Production Ready** | **๏ธ Interactive Dashboard** | **๐๏ธ Fully Modular** | **๐ Well Documented**
-
-
+**Status:** Production Ready | Interactive Dashboard | Modular | Well Documented
diff --git a/dash_app/src/app.py b/dash_app/dashboard/app.py
similarity index 65%
rename from dash_app/src/app.py
rename to dash_app/dashboard/app.py
index 131d2a0..72df9f2 100644
--- a/dash_app/src/app.py
+++ b/dash_app/dashboard/app.py
@@ -6,6 +6,7 @@
from typing import Any
import dash
+import dash_bootstrap_components as dbc
from .callbacks import register_callbacks
from .layout import create_layout
@@ -46,8 +47,49 @@ def __init__(
__name__,
title=f"Personality Classifier - {model_name}",
suppress_callback_exceptions=True,
+ external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME],
)
+ # Add custom CSS to ensure white background
+ self.app.index_string = """
+
+
+
+ {%metas%}
+ {%title%}
+ {%favicon%}
+ {%css%}
+
+
+
+ {%app_entry%}
+
+
+
+ """
+
# Load model
self.model_loader = ModelLoader(model_name, model_version, model_stage)
diff --git a/dash_app/dashboard/assets/enhanced_styles.css b/dash_app/dashboard/assets/enhanced_styles.css
new file mode 100644
index 0000000..edd1c26
--- /dev/null
+++ b/dash_app/dashboard/assets/enhanced_styles.css
@@ -0,0 +1,466 @@
+/* Enhanced UI/UX Styles for Personality Dashboard */
+
+/* CSS Variables for consistent theming */
+:root {
+ /* Personality colors */
+ --intro-color: #3498db;
+ --extro-color: #e74c3c;
+ --neutral-color: #95a5a6;
+
+ /* Brand colors */
+ --primary: #2c3e50;
+ --secondary: #34495e;
+ --success: #27ae60;
+ --warning: #f39c12;
+ --info: #3498db;
+ --light: #ecf0f1;
+ --dark: #2c3e50;
+
+ /* Spacing */
+ --spacing-xs: 0.25rem;
+ --spacing-sm: 0.5rem;
+ --spacing-md: 1rem;
+ --spacing-lg: 1.5rem;
+ --spacing-xl: 2rem;
+
+ /* Border radius */
+ --border-radius: 0.5rem;
+ --border-radius-lg: 1rem;
+
+ /* Shadows */
+ --shadow-sm: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);
+ --shadow-md: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);
+ --shadow-lg: 0 1rem 3rem rgba(0, 0, 0, 0.175);
+
+ /* Transitions */
+ --transition-fast: 0.15s ease-in-out;
+ --transition-normal: 0.3s ease-in-out;
+ --transition-slow: 0.5s ease-in-out;
+}
+
+/* Global styles */
+.personality-dashboard {
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+ min-height: 100vh;
+ padding: var(--spacing-lg);
+}
+
+/* Header styles */
+.personality-dashboard h1 {
+ background: linear-gradient(45deg, var(--intro-color), var(--extro-color));
+ -webkit-background-clip: text;
+ -webkit-text-fill-color: transparent;
+ background-clip: text;
+ font-weight: 700;
+ font-size: 2.5rem;
+}
+
+/* Card enhancements */
+.input-panel,
+.feedback-panel,
+.results-panel {
+ background: rgba(255, 255, 255, 0.95);
+ backdrop-filter: blur(10px);
+ border: none;
+ border-radius: var(--border-radius-lg);
+ box-shadow: var(--shadow-lg);
+ transition: transform var(--transition-normal), box-shadow var(--transition-normal);
+}
+
+.input-panel:hover,
+.feedback-panel:hover,
+.results-panel:hover {
+ transform: translateY(-2px);
+ box-shadow: 0 1rem 4rem rgba(0, 0, 0, 0.2);
+}
+
+/* Section titles */
+.section-title {
+ color: var(--primary);
+ font-weight: 600;
+ margin-bottom: var(--spacing-lg);
+ padding-bottom: var(--spacing-sm);
+ border-bottom: 2px solid var(--light);
+}
+
+/* Enhanced sliders */
+.personality-slider {
+ margin: var(--spacing-lg) 0;
+}
+
+.personality-slider .rc-slider-track {
+ background: linear-gradient(90deg, var(--intro-color), var(--extro-color));
+ height: 8px;
+ border-radius: 4px;
+}
+
+.personality-slider .rc-slider-handle {
+ width: 20px;
+ height: 20px;
+ border: 3px solid #fff;
+ box-shadow: var(--shadow-md);
+ background: var(--primary);
+ transition: all var(--transition-fast);
+}
+
+.personality-slider .rc-slider-handle:hover,
+.personality-slider .rc-slider-handle:focus {
+ transform: scale(1.2);
+ box-shadow: var(--shadow-lg);
+}
+
+.personality-slider .rc-slider-rail {
+ background: var(--light);
+ height: 8px;
+ border-radius: 4px;
+}
+
+/* Slider containers with category styling */
+.slider-social .rc-slider-track {
+ background: linear-gradient(90deg, #e74c3c, #c0392b);
+}
+
+.slider-lifestyle .rc-slider-track {
+ background: linear-gradient(90deg, #27ae60, #229954);
+}
+
+.slider-digital .rc-slider-track {
+ background: linear-gradient(90deg, #9b59b6, #8e44ad);
+}
+
+/* Slider labels and help text */
+.slider-label {
+ color: var(--primary);
+ margin-bottom: var(--spacing-sm);
+ display: block;
+}
+
+.slider-help {
+ font-style: italic;
+ margin-top: var(--spacing-xs);
+ display: block;
+}
+
+.slider-container {
+ background: rgba(52, 73, 94, 0.05);
+ padding: var(--spacing-lg);
+ border-radius: var(--border-radius);
+ transition: background var(--transition-normal);
+}
+
+.slider-container:hover {
+ background: rgba(52, 73, 94, 0.1);
+}
+
+/* Enhanced dropdowns */
+.personality-dropdown .Select-control {
+ border: 2px solid var(--light);
+ border-radius: var(--border-radius);
+ transition: all var(--transition-fast);
+ min-height: 45px;
+}
+
+.personality-dropdown .Select-control:hover {
+ border-color: var(--info);
+}
+
+.personality-dropdown .Select-control.is-focused {
+ border-color: var(--primary);
+ box-shadow: 0 0 0 3px rgba(52, 73, 94, 0.1);
+}
+
+.dropdown-label {
+ color: var(--primary);
+ margin-bottom: var(--spacing-sm);
+ display: block;
+}
+
+.dropdown-container {
+ background: rgba(52, 73, 94, 0.05);
+ padding: var(--spacing-lg);
+ border-radius: var(--border-radius);
+ transition: background var(--transition-normal);
+}
+
+.dropdown-container:hover {
+ background: rgba(52, 73, 94, 0.1);
+}
+
+/* Predict button enhancement */
+.predict-button {
+ background: linear-gradient(45deg, var(--intro-color), var(--extro-color));
+ border: none;
+ border-radius: 25px;
+ padding: var(--spacing-md) var(--spacing-xl);
+ font-weight: 600;
+ font-size: 1.1rem;
+ text-transform: uppercase;
+ letter-spacing: 0.5px;
+ transition: all var(--transition-normal);
+ position: relative;
+ overflow: hidden;
+}
+
+.predict-button:hover {
+ transform: translateY(-2px);
+ box-shadow: var(--shadow-lg);
+}
+
+.predict-button:active {
+ transform: translateY(0);
+}
+
+.predict-button::before {
+ content: '';
+ position: absolute;
+ top: 0;
+ left: -100%;
+ width: 100%;
+ height: 100%;
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+ transition: left var(--transition-slow);
+}
+
+.predict-button:hover::before {
+ left: 100%;
+}
+
+/* Feedback panel styles */
+.meter-container {
+ height: 20px;
+ background: var(--light);
+ border-radius: 10px;
+ position: relative;
+ overflow: hidden;
+ margin: var(--spacing-md) 0;
+}
+
+.meter-container::after {
+ content: '';
+ position: absolute;
+ top: 0;
+ left: 0;
+ height: 100%;
+ width: 50%; /* This would be dynamic based on current input */
+ background: linear-gradient(90deg, var(--intro-color), var(--extro-color));
+ border-radius: 10px;
+ transition: width var(--transition-normal);
+}
+
+.meter-label {
+ font-size: 0.85rem;
+ font-weight: 500;
+}
+
+.meter-label.intro {
+ color: var(--intro-color);
+}
+
+.meter-label.extro {
+ color: var(--extro-color);
+}
+
+.insights-container {
+ background: rgba(52, 73, 94, 0.05);
+ padding: var(--spacing-md);
+ border-radius: var(--border-radius);
+ border-left: 4px solid var(--info);
+}
+
+/* Results panel styles */
+.personality-result {
+ font-size: 3rem;
+ font-weight: 700;
+ background: linear-gradient(45deg, var(--intro-color), var(--extro-color));
+ -webkit-background-clip: text;
+ -webkit-text-fill-color: transparent;
+ background-clip: text;
+ margin-bottom: var(--spacing-md);
+}
+
+.confidence-score {
+ font-size: 1.2rem;
+ color: var(--secondary);
+ margin-bottom: var(--spacing-lg);
+}
+
+.confidence-row {
+ display: flex;
+ align-items: center;
+ gap: var(--spacing-md);
+}
+
+.personality-label {
+ flex: 0 0 100px;
+ font-weight: 500;
+ color: var(--primary);
+}
+
+.confidence-bar {
+ flex: 1;
+ height: 25px;
+ border-radius: 12px;
+}
+
+.confidence-text {
+ flex: 0 0 50px;
+ text-align: right;
+ font-weight: 600;
+ color: var(--primary);
+}
+
+/* Personality insights */
+.insights-list {
+ list-style: none;
+ padding: 0;
+}
+
+.insight-item {
+ background: rgba(52, 152, 219, 0.1);
+ margin: var(--spacing-sm) 0;
+ padding: var(--spacing-md);
+ border-radius: var(--border-radius);
+ border-left: 4px solid var(--info);
+ transition: all var(--transition-fast);
+}
+
+.insight-item:hover {
+ background: rgba(52, 152, 219, 0.15);
+ transform: translateX(5px);
+}
+
+/* Radar chart container */
+.personality-radar {
+ background: rgba(255, 255, 255, 0.9);
+ border-radius: var(--border-radius);
+ padding: var(--spacing-md);
+}
+
+/* Responsive design */
+@media (max-width: 768px) {
+ .personality-dashboard {
+ padding: var(--spacing-md);
+ }
+
+ .personality-dashboard h1 {
+ font-size: 2rem;
+ }
+
+ .personality-result {
+ font-size: 2rem;
+ }
+
+ .slider-container,
+ .dropdown-container {
+ padding: var(--spacing-md);
+ }
+
+ .confidence-row {
+ flex-direction: column;
+ gap: var(--spacing-sm);
+ }
+
+ .personality-label,
+ .confidence-text {
+ flex: none;
+ text-align: center;
+ }
+}
+
+/* Animation keyframes */
+@keyframes pulse {
+ 0%, 100% {
+ opacity: 1;
+ }
+ 50% {
+ opacity: 0.7;
+ }
+}
+
+@keyframes slideIn {
+ from {
+ transform: translateY(20px);
+ opacity: 0;
+ }
+ to {
+ transform: translateY(0);
+ opacity: 1;
+ }
+}
+
+@keyframes fadeIn {
+ from {
+ opacity: 0;
+ }
+ to {
+ opacity: 1;
+ }
+}
+
+/* Loading states */
+.loading {
+ animation: pulse 1.5s ease-in-out infinite;
+}
+
+.slide-in {
+ animation: slideIn 0.5s ease-out;
+}
+
+.fade-in {
+ animation: fadeIn 0.3s ease-in;
+}
+
+/* Focus states for accessibility */
+.personality-slider:focus-within,
+.personality-dropdown:focus-within,
+.dropdown-container:focus-within {
+ outline: 2px solid var(--primary);
+ outline-offset: 2px;
+}
+
+/* High contrast mode support */
+@media (prefers-contrast: high) {
+ :root {
+ --intro-color: #0066cc;
+ --extro-color: #cc0000;
+ --primary: #000000;
+ --light: #ffffff;
+ }
+
+ .input-panel,
+ .feedback-panel,
+ .results-panel {
+ border: 2px solid var(--primary);
+ }
+}
+
+/* Reduced motion support */
+@media (prefers-reduced-motion: reduce) {
+ * {
+ animation-duration: 0.01ms !important;
+ animation-iteration-count: 1 !important;
+ transition-duration: 0.01ms !important;
+ }
+}
+
+/* Dark mode support */
+@media (prefers-color-scheme: dark) {
+ :root {
+ --primary: #ecf0f1;
+ --secondary: #bdc3c7;
+ --light: #34495e;
+ --dark: #ecf0f1;
+ }
+
+ .personality-dashboard {
+ background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%);
+ }
+
+ .input-panel,
+ .feedback-panel,
+ .results-panel {
+ background: rgba(44, 62, 80, 0.95);
+ color: var(--primary);
+ }
+}
diff --git a/dash_app/src/callbacks.py b/dash_app/dashboard/callbacks.py
similarity index 61%
rename from dash_app/src/callbacks.py
rename to dash_app/dashboard/callbacks.py
index 3b16076..717d6da 100644
--- a/dash_app/src/callbacks.py
+++ b/dash_app/dashboard/callbacks.py
@@ -5,7 +5,7 @@
import logging
from datetime import datetime
-from dash import dash_table, html
+from dash import html
from dash.dependencies import Input, Output, State
from .layout import (
@@ -82,6 +82,7 @@ def make_prediction(
# Make prediction
result = model_loader.predict(data)
result["timestamp"] = datetime.now().isoformat()
+ result["input_data"] = data # Add input data for radar chart
# Add to history
prediction_history.append(
@@ -98,64 +99,57 @@ def make_prediction(
logger.error(f"Prediction error: {e}")
return html.Div(f"Error: {e!s}", style={"color": "red"})
+ # Enhanced predict button with loading states
@app.callback(
- Output("prediction-history", "children"),
- Input("interval-component", "n_intervals"),
- Input("predict-button", "n_clicks"),
- )
- def update_prediction_history(n_intervals, n_clicks):
- """Update prediction history display."""
- if not prediction_history:
- return html.Div("No predictions yet", style={"color": "#7f8c8d"})
-
- # Create table data
- table_data = []
- for i, pred in enumerate(reversed(prediction_history[-10:])): # Show last 10
- table_data.append(
- {
- "ID": f"#{len(prediction_history) - i}",
- "Timestamp": pred["timestamp"][:19], # Remove microseconds
- "Prediction": pred["result"].get("prediction", "N/A"),
- "Confidence": f"{pred['result'].get('confidence', 0):.3f}"
- if pred["result"].get("confidence")
- else "N/A",
- }
- )
-
- return dash_table.DataTable(
- data=table_data,
- columns=[
- {"name": "ID", "id": "ID"},
- {"name": "Timestamp", "id": "Timestamp"},
- {"name": "Prediction", "id": "Prediction"},
- {"name": "Confidence", "id": "Confidence"},
- ],
- style_cell={"textAlign": "left", "padding": "10px"},
- style_header={
- "backgroundColor": "#3498db",
- "color": "white",
- "fontWeight": "bold",
- },
- style_data_conditional=[
- {
- "if": {"row_index": 0},
- "backgroundColor": "#ecf0f1",
- }
- ],
- )
-
- @app.callback(
- Output("interval-component", "disabled"), Input("auto-refresh-toggle", "value")
+ [
+ Output("predict-button", "children"),
+ Output("predict-button", "disabled"),
+ Output("predict-button", "color"),
+ ],
+ [Input("predict-button", "n_clicks")],
+ prevent_initial_call=True,
)
- def toggle_auto_refresh(value):
- """Toggle auto-refresh based on checkbox."""
- return "auto" not in value
-
+ def update_predict_button(n_clicks):
+ """Update predict button state with loading animation."""
+ if n_clicks:
+ # Show loading state briefly (will be overridden by prediction callback)
+ return [
+ [
+ html.I(className="fas fa-spinner fa-spin me-2"),
+ "Analyzing Your Personality...",
+ ],
+ True,
+ "warning",
+ ]
+
+ # Default state
+ return [
+ [html.I(className="fas fa-magic me-2"), "Analyze My Personality"],
+ False,
+ "primary",
+ ]
+
+ # Reset button state after prediction
@app.callback(
- Output("json-input", "value"),
- Input("json-input-display", "value"),
+ [
+ Output("predict-button", "children", allow_duplicate=True),
+ Output("predict-button", "disabled", allow_duplicate=True),
+ Output("predict-button", "color", allow_duplicate=True),
+ ],
+ [Input("prediction-results", "children")],
prevent_initial_call=True,
)
- def sync_json_input(value):
- """Sync the display JSON input with the hidden one."""
- return value
+ def reset_predict_button(results):
+ """Reset predict button after prediction is complete."""
+ if results:
+ return [
+ [html.I(className="fas fa-magic me-2"), "Analyze Again"],
+ False,
+ "success",
+ ]
+
+ return [
+ [html.I(className="fas fa-magic me-2"), "Analyze My Personality"],
+ False,
+ "primary",
+ ]
diff --git a/dash_app/dashboard/layout.py b/dash_app/dashboard/layout.py
new file mode 100644
index 0000000..df5644c
--- /dev/null
+++ b/dash_app/dashboard/layout.py
@@ -0,0 +1,663 @@
+"""Layout components for the Dash application."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import dash_bootstrap_components as dbc
+import plotly.graph_objects as go
+from dash import dcc, html
+
+
+def create_layout(model_name: str, model_metadata: dict[str, Any]) -> html.Div:
+ """Create the main layout for the Dash application.
+
+ Args:
+ model_name: Name of the model
+ model_metadata: Model metadata dictionary
+
+ Returns:
+ Dash HTML layout
+ """
+ return html.Div(
+ [
+ # Professional Header
+ create_professional_header(),
+ # Main Content
+ dbc.Container(
+ [
+ dbc.Row(
+ [
+ # Input Panel - Original size
+ dbc.Col(
+ [create_input_panel()],
+ md=5,
+ className="d-flex align-self-stretch",
+ ),
+ # Results Panel - Original size
+ dbc.Col(
+ [
+ html.Div(
+ id="prediction-results",
+ children=[
+ dbc.Card(
+ [
+ dbc.CardHeader(
+ [
+ html.H4(
+ "Analysis Results",
+ className="mb-0 text-center",
+ style={
+ "color": "#2c3e50",
+ "fontWeight": "400",
+ },
+ )
+ ],
+ style={
+ "backgroundColor": "#ffffff",
+ "border": "none",
+ },
+ ),
+ dbc.CardBody(
+ [
+ html.Div(
+ [
+ html.I(
+ className="fas fa-chart-radar fa-3x mb-3",
+ style={
+ "color": "#bdc3c7"
+ },
+ ),
+ html.H5(
+ "Ready for Analysis",
+ style={
+ "color": "#7f8c8d"
+ },
+ ),
+ html.P(
+ "Adjust the parameters and click 'Analyze Personality' to see your results.",
+ style={
+ "color": "#95a5a6"
+ },
+ ),
+ ],
+ className="text-center py-5",
+ )
+ ],
+ style={"padding": "2rem"},
+ ),
+ ],
+ className="shadow-sm h-100",
+ style={
+ "border": "none",
+ "borderRadius": "15px",
+ },
+ )
+ ],
+ className="h-100 d-flex flex-column",
+ )
+ ],
+ md=7,
+ className="d-flex align-self-stretch",
+ ),
+ ],
+ justify="center",
+ className="g-4",
+ style={"minHeight": "80vh"},
+ )
+ ],
+ fluid=True,
+ className="py-4",
+ style={
+ "backgroundColor": "#ffffff",
+ "maxWidth": "1400px",
+ "margin": "0 auto",
+ },
+ ),
+ ],
+ className="personality-dashboard",
+ style={"backgroundColor": "#ffffff !important", "minHeight": "100vh"},
+ )
+
+
+def create_professional_header() -> dbc.Row:
+ """Create a professional header."""
+ return dbc.Container(
+ [
+ dbc.Row(
+ [
+ dbc.Col(
+ [
+ dbc.Card(
+ [
+ dbc.CardBody(
+ [
+ html.Div(
+ [
+ html.I(
+ className="fas fa-brain me-3",
+ style={
+ "fontSize": "2.5rem",
+ "color": "#2c3e50",
+ },
+ ),
+ html.H1(
+ "Personality Classification",
+ className="d-inline-block mb-0",
+ style={
+ "color": "#2c3e50",
+ "fontWeight": "300",
+ },
+ ),
+ ],
+ className="d-flex align-items-center justify-content-center",
+ ),
+ html.P(
+ "Advanced AI-powered personality assessment platform using ensemble machine learning to analyze behavioral patterns and predict introversion-extraversion tendencies based on social, lifestyle, and digital behavior indicators.",
+ className="text-center text-muted mt-2 mb-0",
+ style={
+ "fontSize": "1.0rem",
+ "maxWidth": "800px",
+ "margin": "0 auto",
+ },
+ ),
+ ],
+ className="py-3",
+ )
+ ],
+ className="shadow-sm border-0",
+ style={"backgroundColor": "#ffffff"},
+ )
+ ]
+ )
+ ],
+ className="mb-4",
+ )
+ ],
+ fluid=True,
+ style={"maxWidth": "1400px", "margin": "0 auto"},
+ )
+
+
+def create_input_panel() -> dbc.Card:
+ """Create a clean, professional input panel."""
+ return dbc.Card(
+ [
+ dbc.CardHeader(
+ [
+ html.H4(
+ "Assessment Parameters",
+ className="mb-0 text-center",
+ style={"color": "#2c3e50", "fontWeight": "400"},
+ )
+ ],
+ style={"backgroundColor": "#ffffff", "border": "none"},
+ ),
+ dbc.CardBody(
+ [
+ # Social Behavior Section
+ html.H5(
+ [
+ html.I(
+ className="fas fa-users me-2",
+ style={"color": "#3498db"},
+ ),
+ "Social Behavior",
+ ],
+ className="section-title mb-4",
+ ),
+ create_enhanced_slider(
+ "time-spent-alone",
+ "Time Spent Alone (hours/day)",
+ 0,
+ 24,
+ 8,
+ "Less alone time",
+ "More alone time",
+ "slider-social",
+ ),
+ create_enhanced_slider(
+ "social-event-attendance",
+ "Social Event Attendance (events/month)",
+ 0,
+ 20,
+ 4,
+ "Fewer events",
+ "More events",
+ "slider-social",
+ ),
+ # Lifestyle Section
+ html.H5(
+ [
+ html.I(
+ className="fas fa-compass me-2",
+ style={"color": "#27ae60"},
+ ),
+ "Lifestyle",
+ ],
+ className="section-title mt-5 mb-4",
+ ),
+ create_enhanced_slider(
+ "going-outside",
+ "Going Outside Frequency (times/week)",
+ 0,
+ 15,
+ 5,
+ "Stay indoors",
+ "Go out frequently",
+ "slider-lifestyle",
+ ),
+ create_enhanced_slider(
+ "friends-circle-size",
+ "Friends Circle Size",
+ 0,
+ 50,
+ 12,
+ "Small circle",
+ "Large network",
+ "slider-lifestyle",
+ ),
+ # Digital Behavior Section
+ html.H5(
+ [
+ html.I(
+ className="fas fa-share-alt me-2",
+ style={"color": "#9b59b6"},
+ ),
+ "Digital Behavior",
+ ],
+ className="section-title mt-5 mb-4",
+ ),
+ create_enhanced_slider(
+ "post-frequency",
+ "Social Media Posts (per week)",
+ 0,
+ 20,
+ 3,
+ "Rarely post",
+ "Frequently post",
+ "slider-digital",
+ ),
+ # Psychological Assessment Section
+ html.H5(
+ [
+ html.I(
+ className="fas fa-mind-share me-2",
+ style={"color": "#e67e22"},
+ ),
+ "Psychological Assessment",
+ ],
+ className="section-title mt-5 mb-4",
+ ),
+ create_enhanced_dropdown(
+ "stage-fear",
+ "Do you have stage fear?",
+ [
+ {
+ "label": "No - I'm comfortable with public speaking",
+ "value": "No",
+ },
+ {
+ "label": "Yes - I avoid speaking in public",
+ "value": "Yes",
+ },
+ {
+ "label": "Sometimes - It depends on the situation",
+ "value": "Unknown",
+ },
+ ],
+ "No",
+ ),
+ create_enhanced_dropdown(
+ "drained-after-socializing",
+ "Do you feel drained after socializing?",
+ [
+ {
+ "label": "No - I feel energized by social interaction",
+ "value": "No",
+ },
+ {
+ "label": "Yes - I need time alone to recharge",
+ "value": "Yes",
+ },
+ {
+ "label": "It varies - Depends on the context",
+ "value": "Unknown",
+ },
+ ],
+ "No",
+ ),
+ # Analysis Button
+ html.Div(
+ [
+ dbc.Button(
+ [
+ html.I(className="fas fa-brain me-2"),
+ "Analyze Personality",
+ ],
+ id="predict-button",
+ color="primary",
+ size="lg",
+ className="predict-button px-5 py-3",
+ style={"fontSize": "1.1rem", "fontWeight": "500"},
+ )
+ ],
+ className="text-center mt-5",
+ ),
+ ],
+ style={"padding": "2rem"},
+ ),
+ ],
+ className="shadow-sm h-100",
+ style={"border": "none", "borderRadius": "15px"},
+ )
+
+
+def create_enhanced_slider(
+ slider_id: str,
+ label: str,
+ min_val: int,
+ max_val: int,
+ default: int,
+ intro_text: str,
+ extro_text: str,
+ css_class: str,
+) -> html.Div:
+ """Create an enhanced slider with personality hints."""
+ return html.Div(
+ [
+ html.Label(label, className="slider-label fw-bold"),
+ dcc.Slider(
+ id=slider_id,
+ min=min_val,
+ max=max_val,
+ step=1,
+ value=default,
+ marks={
+ min_val: {
+ "label": intro_text,
+ "style": {"color": "#3498db", "fontSize": "0.8rem"},
+ },
+ max_val: {
+ "label": extro_text,
+ "style": {"color": "#e74c3c", "fontSize": "0.8rem"},
+ },
+ },
+ tooltip={"placement": "bottom", "always_visible": True},
+ className=f"personality-slider {css_class}",
+ ),
+ ],
+ className="slider-container mb-3",
+ )
+
+
+def create_enhanced_dropdown(
+ dropdown_id: str, label: str, options: list, default: str
+) -> html.Div:
+ """Create an enhanced dropdown with better styling."""
+ return html.Div(
+ [
+ html.Label(label, className="dropdown-label fw-bold"),
+ dcc.Dropdown(
+ id=dropdown_id,
+ options=options,
+ value=default,
+ className="personality-dropdown",
+ ),
+ ],
+ className="dropdown-container mb-3",
+ )
+
+
+def format_prediction_result(result: dict[str, Any]) -> html.Div:
+ """Format prediction result for display.
+
+ Args:
+ result: Prediction result dictionary
+
+ Returns:
+ Formatted result component
+ """
+ prediction = result.get("prediction", "Unknown")
+ confidence = result.get("confidence", 0)
+ prob_extrovert = result.get("probability_extrovert", 0)
+ prob_introvert = result.get("probability_introvert", 0)
+ input_data = result.get("input_data", {})
+
+ # Determine confidence level
+ if confidence > 0.7:
+ confidence_color = "success"
+ confidence_badge = "High Confidence"
+ elif confidence > 0.5:
+ confidence_color = "warning"
+ confidence_badge = "Medium Confidence"
+ else:
+ confidence_color = "danger"
+ confidence_badge = "Low Confidence"
+
+ # Create enhanced results with Bootstrap components
+ return dbc.Card(
+ [
+ dbc.CardHeader(
+ [
+ html.H4(
+ "Analysis Results",
+ className="mb-0 text-center",
+ style={"color": "#2c3e50", "fontWeight": "400"},
+ )
+ ],
+ style={"backgroundColor": "#ffffff", "border": "none"},
+ ),
+ dbc.CardBody(
+ [
+ dbc.Row(
+ [
+ # Main Result
+ dbc.Col(
+ [
+ html.Div(
+ [
+ html.H2(
+ f"๐ง {prediction}",
+ className="personality-result text-center",
+ ),
+ html.P(
+ f"Confidence: {confidence:.1%}",
+ className="confidence-score text-center",
+ ),
+ dbc.Badge(
+ confidence_badge,
+ color=confidence_color,
+ className="mb-3",
+ ),
+ ],
+ className="text-center",
+ )
+ ],
+ md=6,
+ ),
+ # Confidence Bars
+ dbc.Col(
+ [
+ html.H5("Probability Breakdown"),
+ create_confidence_bars(
+ {
+ "Extrovert": prob_extrovert,
+ "Introvert": prob_introvert,
+ }
+ ),
+ ],
+ md=6,
+ ),
+ ]
+ ),
+ # Larger Radar Chart - Full Width
+ dbc.Row(
+ [
+ dbc.Col(
+ [
+ html.H5(
+ "Personality Dimensions",
+ className="text-center mb-3",
+ ),
+ html.Div(
+ [
+ dcc.Graph(
+ figure=create_personality_radar(
+ {
+ "Introvert": prob_introvert,
+ "Extrovert": prob_extrovert,
+ },
+ input_data,
+ ),
+ config={"displayModeBar": False},
+ className="personality-radar",
+ style={
+ "height": "450px",
+ "width": "100%",
+ },
+ )
+ ],
+ style={"padding": "0 20px"},
+ ),
+ ],
+ md=12,
+ className="text-center",
+ )
+ ],
+ className="mt-4",
+ ),
+ # Personality Insights
+ html.Hr(),
+ html.Div(
+ [
+ html.H5("Personality Insights"),
+ create_personality_insights(prediction, confidence),
+ ]
+ ),
+ # Metadata
+ html.Hr(),
+ html.Small(
+ [
+ f"Model: {result.get('model_name', 'Unknown')} | ",
+ f"Version: {result.get('model_version', 'Unknown')} | ",
+ f"Timestamp: {result.get('timestamp', 'Unknown')}",
+ ],
+ className="text-muted",
+ ),
+ ],
+ style={"padding": "2rem"},
+ ),
+ ],
+ className="shadow-sm h-100",
+ style={"border": "none", "borderRadius": "15px"},
+ )
+
+
+def create_confidence_bars(probabilities: dict) -> html.Div:
+ """Create animated confidence bars."""
+ bars = []
+ for personality, prob in probabilities.items():
+ color = "primary" if personality == "Introvert" else "danger"
+ bars.append(
+ html.Div(
+ [
+ html.Span(personality, className="personality-label"),
+ dbc.Progress(
+ value=prob * 100,
+ color=color,
+ className="confidence-bar mb-2",
+ animated=True,
+ striped=True,
+ ),
+ html.Span(f"{prob:.1%}", className="confidence-text"),
+ ],
+ className="confidence-row mb-2",
+ )
+ )
+ return html.Div(bars)
+
+
+def create_personality_insights(prediction: str, confidence: float) -> html.Div:
+ """Create personality insights based on prediction."""
+ insights = {
+ "Introvert": [
+ "๐ญ You likely process information internally before sharing",
+ "โก You recharge through quiet, solitary activities",
+ "๐ฅ You prefer deep, meaningful conversations over small talk",
+ "๐ฏ You tend to think before speaking",
+ ],
+ "Extrovert": [
+ "๐ฃ๏ธ You likely think out loud and enjoy verbal processing",
+ "โก You gain energy from social interactions",
+ "๐ฅ You enjoy meeting new people and large gatherings",
+ "๐ฏ You tend to speak spontaneously",
+ ],
+ }
+
+ prediction_insights = insights.get(prediction, ["Analysis in progress..."])
+
+ return html.Ul(
+ [html.Li(insight, className="insight-item") for insight in prediction_insights],
+ className="insights-list",
+ )
+
+
+def create_personality_radar(
+ probabilities: dict, input_data: dict[str, Any] | None = None
+) -> go.Figure:
+ """Create radar chart for personality visualization."""
+ categories = [
+ "Social Energy",
+ "Processing Style",
+ "Decision Making",
+ "Lifestyle",
+ "Communication",
+ ]
+
+ # Calculate values based on probabilities and input data
+ intro_tendency = probabilities.get("Introvert", 0.5)
+
+ # Map input data to personality dimensions (simplified)
+ if input_data:
+ social_energy = 1 - (input_data.get("Time_spent_Alone", 12) / 24)
+ processing_style = 1 - (input_data.get("Post_frequency", 10) / 20)
+ decision_making = 0.8 if input_data.get("Stage_fear_Yes", 0) else 0.3
+ lifestyle = 1 - (input_data.get("Going_outside", 7) / 15)
+ communication = 1 - (input_data.get("Friends_circle_size", 25) / 50)
+
+ values = [
+ social_energy,
+ processing_style,
+ decision_making,
+ lifestyle,
+ communication,
+ ]
+ else:
+ # Default values based on prediction
+ values = [intro_tendency] * len(categories)
+
+ fig = go.Figure()
+ fig.add_trace(
+ go.Scatterpolar(
+ r=values,
+ theta=categories,
+ fill="toself",
+ name="Your Profile",
+ line_color="#3498db" if intro_tendency > 0.5 else "#e74c3c",
+ )
+ )
+
+ fig.update_layout(
+ polar={
+ "radialaxis": {"visible": True, "range": [0, 1]},
+ "angularaxis": {"tickfont": {"size": 12}},
+ },
+ showlegend=False,
+ height=450,
+ font={"size": 12},
+ title="Personality Dimensions",
+ margin={"l": 80, "r": 80, "t": 60, "b": 80},
+ )
+
+ return fig
diff --git a/dash_app/src/model_loader.py b/dash_app/dashboard/model_loader.py
similarity index 99%
rename from dash_app/src/model_loader.py
rename to dash_app/dashboard/model_loader.py
index 82ebb90..008493e 100644
--- a/dash_app/src/model_loader.py
+++ b/dash_app/dashboard/model_loader.py
@@ -49,7 +49,7 @@ def _load_model(self) -> None:
for models_dir in models_paths:
if models_dir.exists():
# Look for saved models based on model name
- if self.model_name == "ensemble":
+ if self.model_name in ["ensemble", "ensemble_model"]:
model_file = models_dir / "ensemble_model.pkl"
metadata_file = models_dir / "ensemble_metadata.json"
else:
diff --git a/dash_app/main.py b/dash_app/main.py
index 84f73bb..81ce7d8 100644
--- a/dash_app/main.py
+++ b/dash_app/main.py
@@ -2,8 +2,15 @@
import argparse
import logging
+import sys
+from pathlib import Path
-from src import PersonalityClassifierApp
+# Add the project root to Python path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+# Import after path modification
+from dash_app.dashboard.app import PersonalityClassifierApp # noqa: E402
def main():
diff --git a/dash_app/src/__init__.py b/dash_app/src/__init__.py
deleted file mode 100644
index 9e00603..0000000
--- a/dash_app/src/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Package initialization for the Dash application."""
-
-from .app import PersonalityClassifierApp, create_app
-
-__all__ = ["PersonalityClassifierApp", "create_app"]
diff --git a/dash_app/src/layout.py b/dash_app/src/layout.py
deleted file mode 100644
index b42a0de..0000000
--- a/dash_app/src/layout.py
+++ /dev/null
@@ -1,604 +0,0 @@
-"""Layout components for the Dash application."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from dash import dcc, html
-
-
-def create_layout(model_name: str, model_metadata: dict[str, Any]) -> html.Div:
- """Create the main layout for the Dash application.
-
- Args:
- model_name: Name of the model
- model_metadata: Model metadata dictionary
-
- Returns:
- Dash HTML layout
- """
- return html.Div(
- [
- # Header
- html.Div(
- [
- html.H1(
- "Personality Classification Dashboard",
- style={
- "textAlign": "center",
- "color": "#2c3e50",
- "marginBottom": "10px",
- },
- ),
- html.H3(
- f"Model: {model_name}",
- style={
- "textAlign": "center",
- "color": "#7f8c8d",
- "marginBottom": "30px",
- },
- ),
- ]
- ),
- # Model Status Section
- html.Div(
- [
- html.H3("Model Status", style={"color": "#34495e"}),
- html.Div(
- id="model-status",
- children=[create_status_cards(model_metadata)],
- style={"marginBottom": "30px"},
- ),
- ]
- ),
- # Prediction Section
- html.Div(
- [
- html.H3("Make Predictions", style={"color": "#34495e"}),
- # Input methods tabs (simplified to manual only)
- html.Div(
- style={
- "display": "none"
- }, # Hide tabs since we only have manual input
- children=[
- dcc.Tabs(
- id="input-tabs",
- value="manual",
- children=[
- dcc.Tab(label="Manual Input", value="manual"),
- ],
- )
- ],
- ),
- # Input content (always manual input)
- html.Div(
- id="input-content",
- style={"marginTop": "20px"},
- children=[create_manual_input()],
- ),
- # Predict button
- html.Div(
- [
- html.Button(
- "Predict",
- id="predict-button",
- style={
- "backgroundColor": "#3498db",
- "color": "white",
- "border": "none",
- "padding": "10px 20px",
- "fontSize": "16px",
- "borderRadius": "5px",
- "cursor": "pointer",
- "marginTop": "20px",
- },
- )
- ],
- style={"textAlign": "center"},
- ),
- # Results
- html.Div(id="prediction-results", style={"marginTop": "30px"}),
- ],
- style={"marginBottom": "30px"},
- ),
- # Prediction History Section
- html.Div(
- [
- html.H3("Prediction History", style={"color": "#34495e"}),
- html.Div(id="prediction-history"),
- # Auto-refresh toggle
- html.Div(
- [
- dcc.Checklist(
- id="auto-refresh-toggle",
- options=[
- {"label": "Auto-refresh (5s)", "value": "auto"}
- ],
- value=[],
- style={"marginTop": "10px"},
- ),
- dcc.Interval(
- id="interval-component",
- interval=5 * 1000, # in milliseconds
- n_intervals=0,
- disabled=True,
- ),
- ]
- ),
- ]
- ),
- ],
- style={"margin": "20px", "fontFamily": "Arial, sans-serif"},
- )
-
-
-def create_status_cards(model_metadata: dict[str, Any]) -> html.Div:
- """Create status cards showing model information.
-
- Args:
- model_metadata: Model metadata dictionary
-
- Returns:
- Div containing status cards
- """
- model_loaded = bool(model_metadata)
- status_color = "#27ae60" if model_loaded else "#e74c3c"
- status_text = "Loaded" if model_loaded else "Not Loaded"
-
- return html.Div(
- [
- # Model Status Card
- html.Div(
- [
- html.H4("Model Status", style={"margin": "0", "color": "#2c3e50"}),
- html.P(
- status_text,
- style={
- "margin": "5px 0",
- "color": status_color,
- "fontWeight": "bold",
- },
- ),
- html.P(
- f"Version: {model_metadata.get('version', 'Unknown')}",
- style={"margin": "5px 0", "color": "#7f8c8d"},
- ),
- html.P(
- f"Stage: {model_metadata.get('stage', 'Unknown')}",
- style={"margin": "5px 0", "color": "#7f8c8d"},
- ),
- ],
- style={
- "border": "1px solid #bdc3c7",
- "padding": "15px",
- "borderRadius": "5px",
- "width": "300px",
- "display": "inline-block",
- "margin": "10px",
- },
- ),
- # Prediction Stats Card (placeholder)
- html.Div(
- [
- html.H4(
- "Prediction Stats", style={"margin": "0", "color": "#2c3e50"}
- ),
- html.P(
- "Total Predictions: 0",
- style={"margin": "5px 0", "color": "#7f8c8d"},
- ),
- html.P(
- "Last Prediction: None",
- style={"margin": "5px 0", "color": "#7f8c8d"},
- ),
- ],
- style={
- "border": "1px solid #bdc3c7",
- "padding": "15px",
- "borderRadius": "5px",
- "width": "300px",
- "display": "inline-block",
- "margin": "10px",
- },
- ),
- ]
- )
-
-
-def create_manual_input() -> html.Div:
- """Create manual input form with actual personality features.
-
- Returns:
- Div containing manual input components
- """
- return html.Div(
- [
- html.P(
- "Enter your personality traits below:",
- style={"fontSize": "16px", "marginBottom": "20px", "color": "#2c3e50"},
- ),
- # Time spent alone
- html.Div(
- [
- html.Label(
- "Time Spent Alone (hours per day):",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Input(
- id="time-spent-alone",
- type="number",
- value=2.0,
- min=0,
- max=24,
- step=0.5,
- style={"margin": "5px", "width": "200px", "padding": "5px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Social event attendance
- html.Div(
- [
- html.Label(
- "Social Event Attendance (events per month):",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Input(
- id="social-event-attendance",
- type="number",
- value=4.0,
- min=0,
- max=30,
- step=1,
- style={"margin": "5px", "width": "200px", "padding": "5px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Going outside
- html.Div(
- [
- html.Label(
- "Going Outside (frequency per week):",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Input(
- id="going-outside",
- type="number",
- value=3.0,
- min=0,
- max=7,
- step=1,
- style={"margin": "5px", "width": "200px", "padding": "5px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Friends circle size
- html.Div(
- [
- html.Label(
- "Friends Circle Size:",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Input(
- id="friends-circle-size",
- type="number",
- value=8.0,
- min=0,
- max=50,
- step=1,
- style={"margin": "5px", "width": "200px", "padding": "5px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Post frequency
- html.Div(
- [
- html.Label(
- "Social Media Post Frequency (posts per week):",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Input(
- id="post-frequency",
- type="number",
- value=3.0,
- min=0,
- max=20,
- step=1,
- style={"margin": "5px", "width": "200px", "padding": "5px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Stage fear
- html.Div(
- [
- html.Label(
- "Do you have stage fear?",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Dropdown(
- id="stage-fear",
- options=[
- {"label": "No", "value": "No"},
- {"label": "Yes", "value": "Yes"},
- {"label": "Unknown", "value": "Unknown"},
- ],
- value="No",
- style={"width": "200px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- # Drained after socializing
- html.Div(
- [
- html.Label(
- "Do you feel drained after socializing?",
- style={
- "display": "block",
- "fontWeight": "bold",
- "marginBottom": "5px",
- },
- ),
- dcc.Dropdown(
- id="drained-after-socializing",
- options=[
- {"label": "No", "value": "No"},
- {"label": "Yes", "value": "Yes"},
- {"label": "Unknown", "value": "Unknown"},
- ],
- value="No",
- style={"width": "200px"},
- ),
- ],
- style={"marginBottom": "15px"},
- ),
- ],
- id="manual-inputs",
- style={
- "padding": "20px",
- "backgroundColor": "#f8f9fa",
- "borderRadius": "10px",
- "border": "1px solid #dee2e6",
- },
- )
-
-
-def format_prediction_result(result: dict[str, Any]) -> html.Div:
- """Format prediction result for display.
-
- Args:
- result: Prediction result dictionary
-
- Returns:
- Formatted result component
- """
- prediction = result.get("prediction", "Unknown")
- confidence = result.get("confidence", 0)
- prob_extrovert = result.get("probability_extrovert", 0)
- prob_introvert = result.get("probability_introvert", 0)
-
- # Create visual elements
- confidence_color = (
- "#27ae60" if confidence > 0.7 else "#f39c12" if confidence > 0.5 else "#e74c3c"
- )
-
- # Choose personality color
- personality_color = "#e74c3c" if prediction == "Extrovert" else "#3498db"
-
- elements = [
- html.H4(
- "Personality Classification Result",
- style={"color": "#2c3e50", "marginBottom": "15px"},
- ),
- # Main prediction with personality-specific styling
- html.Div(
- [
- html.H2(
- f"๐ง You are classified as: {prediction}",
- style={
- "color": personality_color,
- "margin": "10px 0",
- "textAlign": "center",
- "backgroundColor": "#ecf0f1",
- "padding": "15px",
- "borderRadius": "10px",
- "border": f"2px solid {personality_color}",
- },
- )
- ]
- ),
- # Confidence score
- html.Div(
- [
- html.P(
- f"Confidence Score: {confidence:.1%}",
- style={
- "fontSize": "18px",
- "color": confidence_color,
- "margin": "15px 0",
- "textAlign": "center",
- "fontWeight": "bold",
- },
- )
- ]
- ),
- ]
-
- # Add detailed probability breakdown
- if prob_extrovert is not None and prob_introvert is not None:
- elements.append(
- html.Div(
- [
- html.H5(
- "Detailed Probabilities:",
- style={"margin": "20px 0 10px 0", "color": "#2c3e50"},
- ),
- html.Div(
- [
- # Extrovert bar
- html.Div(
- [
- html.Span(
- "Extrovert: ",
- style={
- "fontWeight": "bold",
- "width": "100px",
- "display": "inline-block",
- },
- ),
- html.Div(
- style={
- "backgroundColor": "#e74c3c",
- "width": f"{prob_extrovert * 100}%",
- "height": "20px",
- "borderRadius": "10px",
- "display": "inline-block",
- "marginRight": "10px",
- "minWidth": "2px",
- }
- ),
- html.Span(
- f"{prob_extrovert:.1%}",
- style={"fontWeight": "bold"},
- ),
- ],
- style={
- "margin": "10px 0",
- "display": "flex",
- "alignItems": "center",
- },
- ),
- # Introvert bar
- html.Div(
- [
- html.Span(
- "Introvert: ",
- style={
- "fontWeight": "bold",
- "width": "100px",
- "display": "inline-block",
- },
- ),
- html.Div(
- style={
- "backgroundColor": "#3498db",
- "width": f"{prob_introvert * 100}%",
- "height": "20px",
- "borderRadius": "10px",
- "display": "inline-block",
- "marginRight": "10px",
- "minWidth": "2px",
- }
- ),
- html.Span(
- f"{prob_introvert:.1%}",
- style={"fontWeight": "bold"},
- ),
- ],
- style={
- "margin": "10px 0",
- "display": "flex",
- "alignItems": "center",
- },
- ),
- ],
- style={
- "backgroundColor": "#f8f9fa",
- "padding": "15px",
- "borderRadius": "8px",
- "border": "1px solid #dee2e6",
- },
- ),
- ]
- )
- )
-
- # Add personality description
- if prediction == "Extrovert":
- description = "๐ Extroverts typically enjoy social situations, feel energized by being around people, and tend to be outgoing and expressive."
- description_color = "#e74c3c"
- elif prediction == "Introvert":
- description = "๐ค Introverts typically prefer quieter environments, feel energized by alone time, and tend to be more reflective and reserved."
- description_color = "#3498db"
- else:
- description = "The model could not clearly determine your personality type."
- description_color = "#7f8c8d"
-
- elements.append(
- html.Div(
- [
- html.P(
- description,
- style={
- "fontSize": "14px",
- "color": description_color,
- "margin": "15px 0",
- "padding": "10px",
- "backgroundColor": "#ecf0f1",
- "borderRadius": "5px",
- "fontStyle": "italic",
- },
- )
- ]
- )
- )
-
- # Add metadata
- elements.append(
- html.Div(
- [
- html.Hr(style={"margin": "20px 0"}),
- html.P(
- f"Model: {result.get('model_name', 'Unknown')}",
- style={"color": "#7f8c8d", "margin": "5px 0", "fontSize": "12px"},
- ),
- html.P(
- f"Version: {result.get('model_version', 'Unknown')}",
- style={"color": "#7f8c8d", "margin": "5px 0", "fontSize": "12px"},
- ),
- html.P(
- f"Timestamp: {result.get('timestamp', 'Unknown')}",
- style={"color": "#7f8c8d", "margin": "5px 0", "fontSize": "12px"},
- ),
- ]
- )
- )
-
- return html.Div(
- elements,
- style={
- "border": "2px solid " + confidence_color,
- "padding": "20px",
- "borderRadius": "10px",
- "backgroundColor": "#ffffff",
- "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
- },
- )
diff --git a/docs/README.md b/docs/README.md
index e610682..ff5884e 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,281 +1,64 @@
# Documentation Index
-Welcome to the comprehensive documentation for the Six-Stack Personality Classification Pipeline. This documentation covers everything from basic usage to advanced deployment strategies.
-
-## ๐ Documentation Structure
-
-### ๐ฏ Core Guides
-
-#### [Technical Guide](technical-guide.md)
-
-**Deep technical dive into the architecture and algorithms**
-
-- Modular design principles and SOLID architecture
-- Algorithm implementation details for all 6 stacks
-- Ensemble strategy and out-of-fold prediction methodology
-- Data processing pipeline with external integration
-- Advanced preprocessing and feature engineering
-- Error handling, robustness, and reproducibility
-- Extension points for customization
-
-#### [API Reference](api-reference.md)
-
-**Complete module and function documentation**
-
-- All 8 core modules with detailed interfaces
-- Function signatures, parameters, and return types
-- Type hints and validation patterns
-- Error handling and exception classes
-- Usage examples for each component
-- Configuration options and enums
-
-### ๐ง Configuration and Tuning
-
-#### [Configuration Guide](configuration.md)
-
-**Comprehensive configuration reference**
-
-- Core parameters and reproducibility settings
-- Threading and parallelization configuration
-- Data augmentation method selection and tuning
-- Environment-specific configuration profiles
-- Validation and debugging strategies
-- Best practices for different scenarios
-
-#### [Performance Tuning Guide](performance-tuning.md)
-
-**Optimization strategies for speed, memory, and accuracy**
-
-- Speed optimization for development and production
-- Memory management for constrained environments
-- Accuracy optimization through advanced ensemble strategies
-- Threading and parallelization best practices
-- I/O optimization and caching strategies
-- Model-specific performance tuning
-- Monitoring and profiling techniques
-
-### ๐ค Advanced Features
-
-#### [Data Augmentation Guide](data-augmentation.md)
-
-**Advanced synthetic data generation strategies**
-
-- Adaptive augmentation method selection
-- SDV Copula, SMOTE, ADASYN, and basic methods
-- Quality control framework with multi-dimensional assessment
-- Diversity control and filtering pipeline
-- Configuration options and parameter tuning
-- Performance optimization and best practices
-- Troubleshooting and diagnostic tools
-
-### ๐ Deployment
-
-#### [Deployment Guide](deployment.md)
-
-**Production deployment instructions**
-
-- Local server deployment with systemd services
-- Docker containerization and Docker Compose
-- Kubernetes deployment with scaling and monitoring
-- Cloud platform deployment (AWS, GCP, Azure)
-- REST API service with FastAPI
-- Monitoring, logging, and security best practices
-- Backup, recovery, and troubleshooting
-
-## ๐ Getting Started Path
-
-### For New Users
-
-1. **Start with the main [README](../README.md)** for quick setup
-2. **Try the examples** in `examples/` directory
-3. **Read the [Configuration Guide](configuration.md)** for basic customization
-4. **Explore the [Technical Guide](technical-guide.md)** for deeper understanding
+# Documentation Index
-### For Developers
+Welcome! This documentation covers all aspects of the Six-Stack Personality Classification Pipeline.
-1. **Review the [API Reference](api-reference.md)** for module interfaces
-2. **Study the [Technical Guide](technical-guide.md)** for architecture details
-3. **Follow the [Performance Tuning Guide](performance-tuning.md)** for optimization
-4. **Check the [Data Augmentation Guide](data-augmentation.md)** for advanced features
+## Main Guides
-### For DevOps/Deployment
+- [Technical Guide](technical-guide.md): Architecture, algorithms, and stacks
+- [API Reference](api-reference.md): Modules, functions, and usage
+- [Configuration Guide](configuration.md): All config options
+- [Performance Tuning](performance-tuning.md): Speed, memory, accuracy
+- [Data Augmentation](data-augmentation.md): Synthetic data strategies
+- [Deployment Guide](deployment.md): Docker, Compose, production
-1. **Read the [Deployment Guide](deployment.md)** for production setup
-2. **Configure monitoring** using the deployment examples
-3. **Set up CI/CD** following the containerization examples
-4. **Implement backup strategies** from the deployment guide
+## Quick Start
-## ๐ Quick Reference
+1. See [README](../README.md) for setup
+2. Try examples in `examples/`
+3. Read [Configuration Guide](configuration.md) for customization
+4. Explore [Technical Guide](technical-guide.md) for details
-### Configuration Quick Start
+## Quick Reference
+**Config:**
```python
-# Development (fast iteration)
-TESTING_MODE = True
+TESTING_MODE = True # Fast dev
N_TRIALS_STACK = 5
ENABLE_DATA_AUGMENTATION = False
-
-# Production (high accuracy)
+```
+**Production:**
+```python
TESTING_MODE = False
N_TRIALS_STACK = 100
AUGMENTATION_METHOD = "sdv_copula"
```
-
-### Performance Quick Wins
-
-```python
-# Speed optimization
-ThreadConfig.N_JOBS = 4
-N_TRIALS_STACK = 50
-AUGMENTATION_METHOD = "smote"
-
-# Memory optimization
-TESTING_SAMPLE_SIZE = 1000
-ThreadConfig.N_JOBS = 2
-ENABLE_DATA_AUGMENTATION = False
-```
-
-### Docker Quick Deploy
-
+**Docker:**
```bash
-# Build and run
+docker-compose up --build
+# or
docker build -t personality-classifier .
-docker run -d --name pc -p 8080:8080 personality-classifier
-
-# With Docker Compose
-docker-compose up -d
+docker run -p 8080:8080 personality-classifier
```
-## ๐ Finding What You Need
-
-### By Use Case
-
-| Use Case | Primary Guide | Supporting Docs |
-| ------------------------------ | ------------------------------------------- | ------------------------------------------- |
-| **Quick prototyping** | [README](../README.md) | [Configuration](configuration.md) |
-| **Understanding architecture** | [Technical Guide](technical-guide.md) | [API Reference](api-reference.md) |
-| **Optimizing performance** | [Performance Tuning](performance-tuning.md) | [Configuration](configuration.md) |
-| **Improving accuracy** | [Data Augmentation](data-augmentation.md) | [Technical Guide](technical-guide.md) |
-| **Production deployment** | [Deployment Guide](deployment.md) | [Performance Tuning](performance-tuning.md) |
-| **Custom development** | [API Reference](api-reference.md) | [Technical Guide](technical-guide.md) |
-
-### By Component
-
-| Component | Documentation |
-| ------------------ | ------------------------------------------------------------- |
-| **Config system** | [Configuration Guide](configuration.md) |
-| **Data loading** | [API Reference](api-reference.md#data_loaderpy) |
-| **Preprocessing** | [API Reference](api-reference.md#preprocessingpy) |
-| **Augmentation** | [Data Augmentation Guide](data-augmentation.md) |
-| **Model builders** | [API Reference](api-reference.md#model_builderspy) |
-| **Ensemble** | [Technical Guide](technical-guide.md#ensemble-strategy) |
-| **Optimization** | [API Reference](api-reference.md#optimizationpy) |
-| **Main pipeline** | [Technical Guide](technical-guide.md#architecture-philosophy) |
-
-### By Problem
-
-| Problem | Solution Location |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------ |
-| **Slow training** | [Performance Tuning](performance-tuning.md#speed-optimization) |
-| **Memory issues** | [Performance Tuning](performance-tuning.md#memory-optimization) |
-| **Poor accuracy** | [Data Augmentation](data-augmentation.md), [Performance Tuning](performance-tuning.md#accuracy-optimization) |
-| **Configuration errors** | [Configuration Guide](configuration.md#validation-and-error-handling) |
-| **Deployment issues** | [Deployment Guide](deployment.md#troubleshooting) |
-| **Understanding code** | [API Reference](api-reference.md), [Technical Guide](technical-guide.md) |
-
-## ๐ ๏ธ Development Resources
-
-### Code Examples
-
-- **Basic usage**: `examples/minimal_test.py`
-- **Development workflow**: `examples/main_demo.py`
-- **Production pipeline**: `src/main_modular.py`
-- **Module testing**: `examples/test_modules.py`
-
-### Configuration Templates
-
-- **Development**: [Configuration Guide](configuration.md#development-presets)
-- **Production**: [Configuration Guide](configuration.md#production-server)
-- **Docker**: [Deployment Guide](deployment.md#docker-deployment)
-- **Kubernetes**: [Deployment Guide](deployment.md#kubernetes-deployment)
-
-### Monitoring and Debugging
-
-- **Performance monitoring**: [Performance Tuning](performance-tuning.md#monitoring-and-profiling)
-- **Structured logging**: [Deployment Guide](deployment.md#structured-logging)
-- **Quality diagnostics**: [Data Augmentation](data-augmentation.md#debugging-augmentation)
-
-## ๐ Advanced Topics
-
-### Research and Experimentation
-
-- **Adding new model stacks**: [Technical Guide](technical-guide.md#adding-new-model-stacks)
-- **Custom augmentation methods**: [Data Augmentation](data-augmentation.md#future-enhancements)
-- **Meta-learning approaches**: [Technical Guide](technical-guide.md#future-enhancements)
-
-### Production Optimization
-
-- **Auto-scaling strategies**: [Deployment Guide](deployment.md#kubernetes-deployment)
-- **A/B testing framework**: [Technical Guide](technical-guide.md#future-enhancements)
-- **Model versioning**: [Deployment Guide](deployment.md#api-service-deployment)
-
-### Integration Patterns
-
-- **REST API development**: [Deployment Guide](deployment.md#fastapi-rest-api)
-- **Batch processing**: [Deployment Guide](deployment.md#scheduled-training-with-cron)
-- **Real-time inference**: [Deployment Guide](deployment.md#api-service-deployment)
-
-## ๐ What's New
-
-### Latest Features (v2.0)
-
-- โ **Advanced data augmentation** with SDV Copula and quality control
-- โ **Centralized configuration** system with threading management
-- โ **Modular architecture** with 8 specialized modules
-- โ **Production-ready deployment** with Docker and Kubernetes support
-- โ **Comprehensive documentation** with guides for all use cases
-
-### Upcoming Features
-
-- ๐ **GPU acceleration** for neural network stacks
-- ๐ **AutoML integration** for automatic hyperparameter tuning
-- ๐ **Distributed training** support for large datasets
-- ๐ **Model interpretability** tools and dashboards
-
-## ๐ฌ Support and Contributing
-
-### Getting Help
-
-1. **Check this documentation** for comprehensive guides
-2. **Review examples** in the `examples/` directory
-3. **Search issues** in the repository
-4. **Create new issue** with detailed problem description
-
-### Contributing
-
-1. **Read the [README](../README.md#contributing)** for contribution guidelines
-2. **Focus on modular development** using the established architecture
-3. **Add tests** for new features in the `examples/` directory
-4. **Update documentation** for significant changes
-
-### Community
-
-- **Repository**: [GitHub Repository Link]
-- **Issues**: For bug reports and feature requests
-- **Discussions**: For questions and community support
+## ๏ธ Resources
----
+- Code: `src/main_modular.py`, `examples/`
+- Config templates: [Configuration Guide](configuration.md)
+- Monitoring: [Performance Tuning](performance-tuning.md)
+- Deployment: [Deployment Guide](deployment.md)
-_This documentation is continuously updated. For the latest information, check the repository and individual guide timestamps._
+## Latest Features
-## ๐ Documentation Checklist
+- Advanced SDV Copula augmentation
+- Centralized config system
+- Modular architecture
+- Dockerized deployment
+- Comprehensive documentation
-When working with the pipeline, use this checklist to find the right documentation:
+## Help & Contributing
-- [ ] **New to the project?** โ Start with [README](../README.md)
-- [ ] **Need to configure settings?** โ [Configuration Guide](configuration.md)
-- [ ] **Want to understand the code?** โ [API Reference](api-reference.md)
-- [ ] **Looking to optimize performance?** โ [Performance Tuning](performance-tuning.md)
-- [ ] **Need better accuracy?** โ [Data Augmentation](data-augmentation.md)
-- [ ] **Ready for production?** โ [Deployment Guide](deployment.md)
-- [ ] **Want deep technical details?** โ [Technical Guide](technical-guide.md)
+- Review guides and examples
+- Search or create issues in the repo
+- See [README](../README.md#contributing) for contribution steps
diff --git a/docs/api-reference.md b/docs/api-reference.md
index bb04d89..ee82db2 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -1,63 +1,40 @@
# API Reference - Six-Stack Personality Classification Pipeline
-
-## Module Overview
-
-The pipeline consists of 8 core modules, each with well-defined interfaces and responsibilities.
-
-## config.py
-
-### Configuration Management
-
-#### Global Constants
-
-```python
-RND: int = 42 # Global random seed
-N_SPLITS: int = 5 # Cross-validation folds
-N_TRIALS_STACK: int = 15 # Optuna trials per stack
-N_TRIALS_BLEND: int = 200 # Ensemble blending trials
-LOG_LEVEL: str = "INFO" # Logging level
-```
-
-#### Threading Configuration
-
-```python
-class ThreadConfig(Enum):
- """Centralized threading configuration."""
- N_JOBS: int = 4 # Parallel jobs for sklearn
- THREAD_COUNT: int = 4 # Thread count for XGB/LGB
-```
-
-#### Data Augmentation Configuration
-
-```python
-ENABLE_DATA_AUGMENTATION: bool = True
-AUGMENTATION_METHOD: str = "sdv_copula"
-AUGMENTATION_RATIO: float = 0.05
-DIVERSITY_THRESHOLD: float = 0.95
-QUALITY_THRESHOLD: float = 0.7
-```
-
-#### Functions
-
-```python
-def setup_logging() -> None:
- """Initialize structured logging configuration."""
-
-def get_logger(name: str) -> logging.Logger:
- """Get configured logger instance."""
-```
-
-## data_loader.py
-
-### Data Loading and External Integration
-
-#### Primary Functions
-
-```python
-def load_data_with_external_merge() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
- """
- Load training/test data with external dataset merge using TOP-4 strategy.
-
+## Modules & Functions
+
+**config.py**
+- RND: int = 42
+- N_SPLITS: int = 5
+- N_TRIALS_STACK: int = 15
+- N_TRIALS_BLEND: int = 200
+- LOG_LEVEL: str = "INFO"
+- ENABLE_DATA_AUGMENTATION: bool = True
+- AUGMENTATION_METHOD: str = "sdv_copula"
+- AUGMENTATION_RATIO: float = 0.05
+- DIVERSITY_THRESHOLD: float = 0.95
+- QUALITY_THRESHOLD: float = 0.7
+- class ThreadConfig(Enum): N_JOBS, THREAD_COUNT
+- setup_logging(), get_logger(name)
+
+**data_loader.py**
+- load_data_with_external_merge() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
+
+**preprocessing.py**
+- preprocess_data(df) -> pd.DataFrame
+
+**data_augmentation.py**
+- augment_data(X, y, method, ratio) -> pd.DataFrame
+
+**model_builders.py**
+- build_stack(stack_id, X, y) -> model
+
+**ensemble.py**
+- blend_predictions(preds_list) -> np.ndarray
+
+**optimization.py**
+- optimize_hyperparameters(model, X, y) -> dict
+
+**utils.py**
+- Utility functions for metrics, logging, etc.
Returns:
tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
(train_df, test_df, submission_template)
diff --git a/docs/architecture.md b/docs/architecture.md
index f3e1d0d..a530c85 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -1,52 +1,21 @@
# Architecture Documentation
-## Overview
-
-This project implements a modular personality classification pipeline with ensemble learning and hyperparameter optimization.
-
-## Component Architecture
-
-### Core Modules (`src/modules/`)
-
-1. **config.py** - Configuration management and logging setup
-2. **data_loader.py** - Data loading with external dataset merging
-3. **preprocessing.py** - Feature engineering and data preprocessing
-4. **data_augmentation.py** - Data augmentation strategies
-5. **model_builders.py** - Model construction for different stacks
-6. **ensemble.py** - Out-of-fold predictions and ensemble methods
-7. **optimization.py** - Optuna hyperparameter optimization utilities
-8. **utils.py** - General utility functions
-
-### Execution Scripts
-
-- **src/main_modular.py** - Main production pipeline
-- **examples/main_final.py** - Lightweight working example
-- **examples/main_demo.py** - Demo with simplified models
-- **examples/test_modules.py** - Module testing script
-- **examples/minimal_test.py** - Import verification
-
-## Data Flow
-
-1. **Data Loading** โ External dataset merge โ Feature extraction
-2. **Preprocessing** โ Feature engineering โ Data augmentation
-3. **Model Training** โ 6 specialized stacks with Optuna optimization
-4. **Ensemble** โ Out-of-fold predictions โ Blend optimization
-5. **Pseudo-labeling** โ Conservative high-confidence labeling
-6. **Final Prediction** โ Weighted ensemble โ Submission generation
-
-## Stack Configurations
-
-- **Stack A**: Traditional ML (narrow hyperparameters)
-- **Stack B**: Traditional ML (wide hyperparameters)
-- **Stack C**: XGBoost + CatBoost specialized
-- **Stack D**: Sklearn ensemble models
-- **Stack E**: Neural network models
-- **Stack F**: Noisy label training
-
-## Performance Features
-
-- Memory-efficient processing
-- CPU-optimized configurations
-- Robust error handling with timeouts
-- Modular testing capabilities
-- Comprehensive logging
+## Architecture Overview
+
+- Modular pipeline: 8 core modules in `src/modules/`
+- Main pipeline: `src/main_modular.py`
+- Dashboard: `dash_app/` (Dash, Docker)
+- Model stacks: 6 specialized ensembles (A-F)
+- Data flow: Load โ Preprocess โ Augment โ Train โ Ensemble โ Predict
+
+## Stacks
+- A: Traditional ML (narrow)
+- B: Traditional ML (wide)
+- C: XGBoost/CatBoost
+- D: Sklearn ensemble
+- E: Neural networks
+- F: Noise-robust
+
+## Key Features
+- Efficient, reproducible, and testable
+- Full logging and error handling
diff --git a/docs/configuration.md b/docs/configuration.md
deleted file mode 100644
index 423ebee..0000000
--- a/docs/configuration.md
+++ /dev/null
@@ -1,544 +0,0 @@
-# Configuration Guide
-
-## Overview
-
-The Six-Stack Personality Classification Pipeline provides extensive configuration options through the centralized `src/modules/config.py` file. This guide covers all configuration parameters, their purposes, and best practices for tuning.
-
-## Configuration Architecture
-
-### Centralized Configuration
-
-All configuration is managed through a single module to ensure:
-
-- **Consistency** across all components
-- **Easy maintenance** and updates
-- **Environment-specific** settings
-- **Type safety** with enums and validation
-
-### Configuration Categories
-
-1. **Core Parameters** - Basic pipeline settings
-2. **Threading Configuration** - Parallel processing control
-3. **Data Augmentation** - Synthetic data generation
-4. **Model Training** - Algorithm-specific settings
-5. **Development** - Testing and debugging options
-6. **Logging** - Output and monitoring control
-
-## Core Parameters
-
-### Reproducibility Settings
-
-```python
-# Global random seed for reproducibility
-RND: int = 42
-
-# Description: Controls all random number generation across the pipeline
-# Impact: Ensures reproducible results across runs
-# Tuning: Change only when you need different random behavior
-# Valid Range: Any integer (0-2^31)
-```
-
-### Cross-Validation Configuration
-
-```python
-# Number of stratified folds for cross-validation
-N_SPLITS: int = 5
-
-# Description: Controls k-fold cross-validation splitting
-# Impact: More folds = more reliable estimates but longer training
-# Tuning: 3-10 folds typically, 5 is standard
-# Memory Impact: Linear increase with more folds
-```
-
-### Hyperparameter Optimization
-
-```python
-# Optuna trials per individual stack
-N_TRIALS_STACK: int = 15
-
-# Description: Number of hyperparameter combinations to try per stack
-# Impact: More trials = better optimization but longer training
-# Tuning Guidelines:
-# - Development: 5-15 trials
-# - Production: 50-200 trials
-# - Competition: 500+ trials
-# Time Impact: Linear increase with trial count
-
-# Ensemble blending optimization trials
-N_TRIALS_BLEND: int = 200
-
-# Description: Trials for optimizing ensemble weights
-# Impact: Critical for final performance, usually converges quickly
-# Tuning: 100-500 trials, diminishing returns after 200
-```
-
-## Threading Configuration
-
-### Thread Management Enum
-
-```python
-class ThreadConfig(Enum):
- """Centralized threading configuration for all models."""
-
- N_JOBS: int = 4 # sklearn parallel jobs
- THREAD_COUNT: int = 4 # XGBoost/LightGBM threads
-```
-
-### Optimization Guidelines
-
-#### System-Specific Tuning
-
-```python
-# For development machines (4-8 cores)
-N_JOBS = 2
-THREAD_COUNT = 2
-
-# For production servers (16+ cores)
-N_JOBS = 8
-THREAD_COUNT = 8
-
-# For memory-constrained environments
-N_JOBS = 1
-THREAD_COUNT = 1
-
-# Auto-detection approach
-import multiprocessing
-optimal_threads = min(multiprocessing.cpu_count(), 8)
-```
-
-#### Performance vs Resource Trade-offs
-
-| Setting | Training Speed | Memory Usage | CPU Usage |
-| ----------- | -------------- | ------------ | --------- |
-| 1 thread | Slowest | Lowest | Low |
-| 2-4 threads | Moderate | Moderate | Medium |
-| 8+ threads | Fastest | Highest | High |
-
-## Data Augmentation Configuration
-
-### Main Augmentation Settings
-
-```python
-# Enable/disable data augmentation globally
-ENABLE_DATA_AUGMENTATION: bool = True
-
-# Augmentation method selection
-AUGMENTATION_METHOD: str = "sdv_copula"
-# Options: "auto", "sdv_copula", "smote", "adasyn", "basic"
-
-# Augmentation ratio (fraction of original dataset)
-AUGMENTATION_RATIO: float = 0.05 # 5% additional synthetic data
-```
-
-### Method Selection Guide
-
-#### "auto" (Recommended)
-
-- **Best for**: Most use cases
-- **Behavior**: Automatically selects optimal method based on data characteristics
-- **Fallback**: Always provides a working solution
-
-#### "sdv_copula"
-
-- **Best for**: Large datasets with complex distributions
-- **Pros**: High-quality synthetic data, preserves correlations
-- **Cons**: Computationally intensive, requires more memory
-- **Use when**: Dataset >5K samples, complex feature interactions
-
-#### "smote"
-
-- **Best for**: Small to medium datasets with class imbalance
-- **Pros**: Fast, well-tested, handles imbalance well
-- **Cons**: May create unrealistic edge cases
-- **Use when**: Dataset <5K samples, clear class imbalance
-
-#### "adasyn"
-
-- **Best for**: Severely imbalanced datasets
-- **Pros**: Adaptive to difficult examples, improved boundary learning
-- **Cons**: Sensitive to noise, may overfit to outliers
-- **Use when**: Extreme imbalance (>90% majority class)
-
-#### "basic"
-
-- **Best for**: High-categorical datasets or fallback
-- **Pros**: Fast, simple, always works
-- **Cons**: Lower quality, limited sophistication
-- **Use when**: Many categorical features, quick prototyping
-
-### Quality Control Parameters
-
-```python
-# Quality filtering threshold (0-1, higher = stricter)
-QUALITY_THRESHOLD: float = 0.7
-
-# Diversity requirement (0-1, higher = more diverse)
-DIVERSITY_THRESHOLD: float = 0.95
-
-# Method-specific parameters
-SDV_EPOCHS: int = 100 # SDV training epochs (5 in testing)
-SMOTE_K_NEIGHBORS: int = 5 # k for SMOTE (auto-adjusted)
-BASIC_NOISE_FACTOR: float = 0.1 # Noise factor for basic method
-```
-
-### Advanced Augmentation Tuning
-
-#### Quality Threshold Tuning
-
-```python
-# Conservative (high quality, fewer samples)
-QUALITY_THRESHOLD = 0.8
-
-# Balanced (moderate quality, moderate samples)
-QUALITY_THRESHOLD = 0.7
-
-# Aggressive (lower quality, more samples)
-QUALITY_THRESHOLD = 0.6
-
-# Development/testing (relaxed quality)
-QUALITY_THRESHOLD = 0.5
-```
-
-#### Ratio Optimization Strategy
-
-```python
-# Start conservative and increase
-AUGMENTATION_RATIOS = [0.02, 0.05, 0.10, 0.15, 0.20]
-
-# Monitor cross-validation scores
-for ratio in AUGMENTATION_RATIOS:
- AUGMENTATION_RATIO = ratio
- cv_score = evaluate_pipeline()
- if cv_score < previous_best:
- break # Diminishing returns detected
-```
-
-## Model Training Configuration
-
-### Label Noise for Robustness
-
-```python
-# Label noise rate for Stack F (noise-robust training)
-LABEL_NOISE_RATE: float = 0.02 # 2% of labels randomly flipped
-
-# Description: Improves generalization by training on noisy labels
-# Impact: Better robustness to annotation errors
-# Tuning Range: 0.01-0.05 (1-5%)
-# Warning: Too much noise degrades performance
-```
-
-### Timeout and Resource Limits
-
-```python
-# Training timeout per stack (seconds)
-STACK_TIMEOUT: int = 1800 # 30 minutes
-
-# Memory limit warning threshold (GB)
-MEMORY_WARNING_THRESHOLD: float = 8.0
-
-# Early stopping patience for neural networks
-EARLY_STOPPING_PATIENCE: int = 10
-```
-
-## Development and Testing
-
-### Testing Mode Configuration
-
-```python
-# Enable reduced dataset for faster development
-TESTING_MODE: bool = True
-
-# Sample size in testing mode
-TESTING_SAMPLE_SIZE: int = 1000
-
-# Reduced trials in testing mode
-TESTING_N_TRIALS_STACK: int = 5
-TESTING_N_TRIALS_BLEND: int = 50
-
-# Fast augmentation in testing
-TESTING_SDV_EPOCHS: int = 5
-```
-
-### Development Presets
-
-```python
-# Quick development preset
-def configure_for_development():
- global TESTING_MODE, N_TRIALS_STACK, ENABLE_DATA_AUGMENTATION
- TESTING_MODE = True
- N_TRIALS_STACK = 5
- ENABLE_DATA_AUGMENTATION = False
- logger.info("Configured for rapid development")
-
-# Full production preset
-def configure_for_production():
- global TESTING_MODE, N_TRIALS_STACK, N_TRIALS_BLEND
- TESTING_MODE = False
- N_TRIALS_STACK = 100
- N_TRIALS_BLEND = 300
- logger.info("Configured for production run")
-```
-
-## Logging Configuration
-
-### Log Level Settings
-
-```python
-# Logging level
-LOG_LEVEL: str = "INFO"
-
-# Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
-# DEBUG: Very detailed information for debugging
-# INFO: General information about progress (recommended)
-# WARNING: Important warnings and issues
-# ERROR: Only error messages
-```
-
-### Advanced Logging Configuration
-
-```python
-# Log file configuration
-LOG_FILE: str = "personality_classifier.log"
-LOG_MAX_SIZE: int = 10 * 1024 * 1024 # 10MB
-LOG_BACKUP_COUNT: int = 5
-
-# Performance logging
-ENABLE_PERFORMANCE_LOGGING: bool = True
-LOG_MEMORY_USAGE: bool = True
-LOG_TIMING_INFO: bool = True
-
-# Progress bar configuration
-SHOW_PROGRESS_BARS: bool = True
-PROGRESS_BAR_STYLE: str = "tqdm" # "tqdm" or "simple"
-```
-
-## Environment-Specific Configuration
-
-### Configuration Profiles
-
-#### Local Development
-
-```python
-# config_development.py
-TESTING_MODE = True
-N_TRIALS_STACK = 5
-N_TRIALS_BLEND = 50
-ENABLE_DATA_AUGMENTATION = False
-LOG_LEVEL = "DEBUG"
-ThreadConfig.N_JOBS = 2
-ThreadConfig.THREAD_COUNT = 2
-```
-
-#### CI/CD Pipeline
-
-```python
-# config_ci.py
-TESTING_MODE = True
-N_TRIALS_STACK = 3
-N_TRIALS_BLEND = 20
-ENABLE_DATA_AUGMENTATION = False
-LOG_LEVEL = "WARNING"
-ThreadConfig.N_JOBS = 1
-ThreadConfig.THREAD_COUNT = 1
-```
-
-#### Production Server
-
-```python
-# config_production.py
-TESTING_MODE = False
-N_TRIALS_STACK = 100
-N_TRIALS_BLEND = 300
-ENABLE_DATA_AUGMENTATION = True
-AUGMENTATION_METHOD = "sdv_copula"
-LOG_LEVEL = "INFO"
-ThreadConfig.N_JOBS = 8
-ThreadConfig.THREAD_COUNT = 8
-```
-
-### Environment Variable Integration
-
-```python
-import os
-
-# Override with environment variables
-N_TRIALS_STACK = int(os.getenv('PERSONALITY_TRIALS_STACK', N_TRIALS_STACK))
-TESTING_MODE = os.getenv('PERSONALITY_TESTING_MODE', 'false').lower() == 'true'
-LOG_LEVEL = os.getenv('PERSONALITY_LOG_LEVEL', LOG_LEVEL)
-
-# Docker environment detection
-if os.getenv('RUNNING_IN_DOCKER'):
- ThreadConfig.N_JOBS = min(ThreadConfig.N_JOBS, 4)
- MEMORY_WARNING_THRESHOLD = 2.0 # Lower threshold in containers
-```
-
-## Performance Tuning Guidelines
-
-### Memory Optimization
-
-```python
-# For systems with <8GB RAM
-TESTING_MODE = True
-TESTING_SAMPLE_SIZE = 500
-ThreadConfig.N_JOBS = 1
-ENABLE_DATA_AUGMENTATION = False
-
-# For systems with 8-16GB RAM (recommended)
-TESTING_SAMPLE_SIZE = 1000
-ThreadConfig.N_JOBS = 2
-AUGMENTATION_RATIO = 0.03
-
-# For systems with >16GB RAM
-ThreadConfig.N_JOBS = 4
-AUGMENTATION_RATIO = 0.05
-N_TRIALS_STACK = 50
-```
-
-### Speed Optimization
-
-```python
-# Fastest configuration (for quick iteration)
-TESTING_MODE = True
-N_TRIALS_STACK = 3
-N_TRIALS_BLEND = 20
-ENABLE_DATA_AUGMENTATION = False
-SHOW_PROGRESS_BARS = False
-
-# Balanced configuration (development)
-N_TRIALS_STACK = 15
-N_TRIALS_BLEND = 100
-AUGMENTATION_METHOD = "smote" # Faster than SDV
-
-# Quality-focused configuration (production)
-N_TRIALS_STACK = 100
-N_TRIALS_BLEND = 300
-AUGMENTATION_METHOD = "sdv_copula"
-```
-
-### GPU Configuration (Future)
-
-```python
-# GPU settings (when available)
-USE_GPU: bool = False
-GPU_MEMORY_FRACTION: float = 0.8
-ENABLE_MIXED_PRECISION: bool = False
-
-# GPU-specific model settings
-GPU_BATCH_SIZE: int = 64
-GPU_N_ESTIMATORS_FACTOR: float = 2.0 # Increase for GPU
-```
-
-## Validation and Error Handling
-
-### Configuration Validation
-
-```python
-def validate_configuration():
- """Validate configuration parameters."""
- assert 0 < AUGMENTATION_RATIO <= 1.0, "Invalid augmentation ratio"
- assert N_SPLITS >= 2, "Need at least 2 CV folds"
- assert 0 <= LABEL_NOISE_RATE <= 0.2, "Label noise rate too high"
- assert ThreadConfig.N_JOBS >= 1, "Need at least 1 job"
-
- if TESTING_MODE and N_TRIALS_STACK > 20:
- logger.warning("High trial count in testing mode may be slow")
-
- if not ENABLE_DATA_AUGMENTATION and AUGMENTATION_RATIO > 0:
- logger.warning("Augmentation ratio set but augmentation disabled")
-```
-
-### Configuration Debugging
-
-```python
-def log_configuration():
- """Log current configuration for debugging."""
- logger.info("Configuration Summary:")
- logger.info(f" Mode: {'Testing' if TESTING_MODE else 'Production'}")
- logger.info(f" Trials per stack: {N_TRIALS_STACK}")
- logger.info(f" Augmentation: {AUGMENTATION_METHOD if ENABLE_DATA_AUGMENTATION else 'Disabled'}")
- logger.info(f" Threading: {ThreadConfig.N_JOBS} jobs, {ThreadConfig.THREAD_COUNT} threads")
- logger.info(f" Random seed: {RND}")
-```
-
-## Configuration Best Practices
-
-### 1. Start Conservative
-
-- Begin with default settings
-- Use testing mode for development
-- Gradually increase complexity
-
-### 2. Monitor Resources
-
-- Watch memory usage during training
-- Monitor CPU utilization
-- Adjust threading based on available resources
-
-### 3. Validate Changes
-
-- Test configuration changes on small datasets first
-- Compare cross-validation scores
-- Ensure reproducibility with fixed seeds
-
-### 4. Document Customizations
-
-- Comment configuration changes
-- Track performance impacts
-- Maintain environment-specific configs
-
-### 5. Use Version Control
-
-- Track configuration changes
-- Tag configurations with results
-- Maintain separate configs for different environments
-
-## Troubleshooting Common Issues
-
-### Memory Issues
-
-```python
-# Reduce memory usage
-TESTING_MODE = True
-ThreadConfig.N_JOBS = 1
-ENABLE_DATA_AUGMENTATION = False
-TESTING_SAMPLE_SIZE = 500
-```
-
-### Slow Training
-
-```python
-# Speed up training
-N_TRIALS_STACK = 5
-N_TRIALS_BLEND = 50
-AUGMENTATION_METHOD = "basic"
-SHOW_PROGRESS_BARS = False
-```
-
-### Poor Performance
-
-```python
-# Increase optimization
-N_TRIALS_STACK = 100
-N_TRIALS_BLEND = 300
-AUGMENTATION_METHOD = "sdv_copula"
-AUGMENTATION_RATIO = 0.08
-```
-
-### Reproducibility Issues
-
-```python
-# Ensure reproducibility
-# Set fixed seed
-RND = 42
-
-# Single-threaded for determinism
-ThreadConfig.N_JOBS = 1
-ThreadConfig.THREAD_COUNT = 1
-
-# Disable random augmentation
-ENABLE_DATA_AUGMENTATION = False
-```
-
----
-
-_This configuration guide covers all current options. For the latest parameters and features, check the source code in `src/modules/config.py`._
diff --git a/docs/data-augmentation.md b/docs/data-augmentation.md
index 9ad61c8..df8b39f 100644
--- a/docs/data-augmentation.md
+++ b/docs/data-augmentation.md
@@ -1,63 +1,21 @@
# Data Augmentation Guide
-## Overview
+## Data Augmentation Guide
-The Six-Stack Personality Classification Pipeline features an advanced, adaptive data augmentation system designed to improve model generalization and performance through high-quality synthetic data generation.
-
-## Architecture
-
-### Adaptive Strategy Selection
-
-The pipeline automatically selects the optimal augmentation method based on dataset characteristics:
-
-```python
-def analyze_data_characteristics(X, y):
- """Analyze dataset to determine optimal augmentation strategy."""
- return {
- 'n_samples': len(X),
- 'n_features': X.shape[1],
- 'class_balance_ratio': min(y.value_counts()) / max(y.value_counts()),
- 'categorical_ratio': (X.dtypes == 'object').sum() / len(X.columns),
- 'feature_complexity': calculate_feature_complexity(X),
- 'is_small_dataset': len(X) < 1000,
- 'is_imbalanced': min(y.value_counts()) / max(y.value_counts()) < 0.3,
- 'is_highly_categorical': (X.dtypes == 'object').sum() / len(X.columns) > 0.5
- }
-```
+### Strategy
+- Adaptive selection based on dataset size, balance, and feature types
### Decision Matrix
-
-| Dataset Characteristics | Recommended Method | Rationale |
-| -------------------------------- | ------------------ | --------------------------------- |
-| Small datasets (<1K samples) | SMOTE | Fast, proven for small data |
-| Severe imbalance (<30% minority) | ADASYN | Adaptive sampling for minorities |
-| High categorical (>50%) | Basic | Simple methods for categorical |
-| Complex numerical data | SDV Copula | Preserves complex distributions |
-| Large balanced datasets | SDV Copula | Best quality for complex patterns |
-
-## Augmentation Methods
-
-### 1. SDV Copula (Recommended)
-
-**Best for**: Large datasets with complex feature distributions
-
-#### Features
-
-- **Gaussian Copula modeling** for complex dependency structures
-- **Marginal distribution preservation** for each feature
-- **Correlation structure maintenance** across features
-- **Fast training mode** for development/testing
-
-#### Implementation
-
-```python
-def sdv_copula_augmentation(X, y, n_samples):
- """Generate synthetic data using SDV Gaussian Copula."""
- # Combine features and target
- data = X.copy()
- data['target'] = y
-
- # Configure copula synthesizer
+| Data Type | Method |
+|-------------------|---------------|
+| Small/Imbalanced | SMOTE/ADASYN |
+| High Categorical | Basic |
+| Complex Numeric | SDV Copula |
+
+### Main Method
+**SDV Copula** (recommended):
+- Preserves feature distributions and correlations
+- Fast mode for development
synthesizer = GaussianCopula(
enforce_rounding=True,
enforce_min_max_values=True
diff --git a/docs/deployment.md b/docs/deployment.md
deleted file mode 100644
index 431f90d..0000000
--- a/docs/deployment.md
+++ /dev/null
@@ -1,846 +0,0 @@
-# Deployment Guide
-
-## Overview
-
-This guide covers deploying the Six-Stack Personality Classification Pipeline using modern containerization and orchestration technologies. The focus is on **Docker containerization** and **Kubernetes orchestration** with a **Dash web application** for interactive model serving.
-
-## Deployment Strategy
-
-### Core Technologies
-
-- **๐ณ Docker**: Containerization for consistent environments
-- **โธ๏ธ Kubernetes**: Container orchestration and scaling
-- **๐ Dash**: Interactive web application for model inference
-- **๐ Monitoring**: Prometheus and Grafana integration
-
-### Architecture Overview
-
-```
-โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
-โ Dash Web App โโโโโถโ ML Pipeline โโโโโถโ Data Store โ
-โ (Port 8050) โ โ (Containers) โ โ (Volumes) โ
-โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
- โ โ โ
- โผ โผ โผ
-โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
-โ Load Balancer โ โ Kubernetes โ โ Monitoring โ
-โ (Ingress) โ โ Cluster โ โ (Prometheus) โ
-โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
-```
-
-## ๐ณ Docker Deployment
-
-### Prerequisites
-
-```bash
-# System requirements
-- Docker 20.10+
-- Docker Compose 2.0+
-- 8GB+ RAM available for containers
-- 4+ CPU cores
-- 20GB+ disk space
-
-# Install Docker (Ubuntu/Debian)
-curl -fsSL https://get.docker.com -o get-docker.sh
-sudo sh get-docker.sh
-sudo usermod -aG docker $USER
-
-# Install Docker Compose
-sudo apt install docker-compose-plugin
-```
-
-### Dockerfile
-
-```dockerfile
-# Multi-stage build for optimal image size
-FROM python:3.11-slim as builder
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
- gcc \
- g++ \
- && rm -rf /var/lib/apt/lists/*
-
-# Install uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
-
-# Set working directory
-WORKDIR /app
-
-# Copy dependency files
-COPY pyproject.toml uv.lock ./
-
-# Install dependencies
-RUN uv sync --no-dev --frozen
-
-# Production stage
-FROM python:3.11-slim
-
-# Install runtime dependencies
-RUN apt-get update && apt-get install -y \
- && rm -rf /var/lib/apt/lists/*
-
-# Create non-root user
-RUN useradd --create-home --shell /bin/bash personality
-
-# Set working directory
-WORKDIR /app
-
-# Copy virtual environment from builder
-COPY --from=builder /app/.venv /app/.venv
-
-# Copy source code
-COPY src/ src/
-COPY data/ data/
-COPY examples/ examples/
-
-# Set ownership
-RUN chown -R personality:personality /app
-
-# Switch to non-root user
-USER personality
-
-# Set environment variables
-ENV PATH="/app/.venv/bin:$PATH"
-ENV PYTHONPATH="/app/src"
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
- CMD python -c "import src.modules.config; print('OK')" || exit 1
-
-# Default command
-CMD ["python", "src/main_modular.py"]
-```
-
-### Pipeline Dockerfile
-
-```dockerfile
-# Dockerfile.pipeline - ML Training Pipeline
-FROM python:3.11-slim as builder
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
- gcc \
- g++ \
- && rm -rf /var/lib/apt/lists/*
-
-# Install uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
-
-# Set working directory
-WORKDIR /app
-
-# Copy dependency files
-COPY pyproject.toml uv.lock ./
-
-# Install dependencies
-RUN uv sync --no-dev --frozen
-
-# Production stage
-FROM python:3.11-slim
-
-# Install runtime dependencies
-RUN apt-get update && apt-get install -y \
- && rm -rf /var/lib/apt/lists/*
-
-# Create non-root user
-RUN useradd --create-home --shell /bin/bash personality
-
-# Set working directory
-WORKDIR /app
-
-# Copy virtual environment from builder
-COPY --from=builder /app/.venv /app/.venv
-
-# Copy source code
-COPY src/ src/
-COPY data/ data/
-
-# Create model artifacts directory
-RUN mkdir -p models best_params submissions logs
-
-# Set ownership
-RUN chown -R personality:personality /app
-
-# Switch to non-root user
-USER personality
-
-# Set environment variables
-ENV PATH="/app/.venv/bin:$PATH"
-ENV PYTHONPATH="/app/src"
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
- CMD python -c "import src.modules.config; print('OK')" || exit 1
-
-# Default command
-CMD ["python", "src/main_modular.py"]
-```
-
-### Dash Application Dockerfile
-
-```dockerfile
-# Dockerfile.dash - Interactive Dash Application
-FROM python:3.11-slim
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
- gcc \
- g++ \
- && rm -rf /var/lib/apt/lists/*
-
-# Install uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
-
-# Set working directory
-WORKDIR /app
-
-# Copy dependency files (include dash dependencies)
-COPY pyproject.toml uv.lock ./
-COPY requirements-dash.txt ./
-
-# Install dependencies
-RUN uv sync --no-dev --frozen
-RUN uv pip install -r requirements-dash.txt
-
-# Copy application code
-COPY src/ src/
-COPY dash_app/ dash_app/
-
-# Create non-root user
-RUN useradd --create-home --shell /bin/bash dashuser
-RUN chown -R dashuser:dashuser /app
-
-# Switch to non-root user
-USER dashuser
-
-# Set environment variables
-ENV PATH="/app/.venv/bin:$PATH"
-ENV PYTHONPATH="/app/src"
-
-# Expose Dash port
-EXPOSE 8050
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
- CMD curl -f http://localhost:8050/ || exit 1
-
-# Run Dash application
-CMD ["python", "dash_app/app.py"]
-```
-
-### Dash Requirements
-
-```txt
-# requirements-dash.txt
-dash>=2.14.0
-dash-bootstrap-components>=1.5.0
-plotly>=5.17.0
-pandas>=2.1.0
-numpy>=1.24.0
-gunicorn>=21.2.0
-```
-
-### Multi-Service Docker Compose
-
-```yaml
-# docker-compose.yml
-version: "3.8"
-
-services:
- # ML Pipeline Service
- ml-pipeline:
- build:
- context: .
- dockerfile: Dockerfile.pipeline
- container_name: personality-ml-pipeline
- restart: unless-stopped
-
- deploy:
- resources:
- limits:
- memory: 8G
- cpus: "4"
- reservations:
- memory: 2G
- cpus: "1"
-
- environment:
- - PERSONALITY_LOG_LEVEL=INFO
- - PERSONALITY_TESTING_MODE=false
- - RUNNING_IN_DOCKER=true
-
- volumes:
- - ./data:/app/data:ro
- - ./best_params:/app/best_params
- - ./submissions:/app/submissions
- - ./logs:/app/logs
- - model-artifacts:/app/models
-
- networks:
- - personality-net
-
- # Dash Web Application
- dash-app:
- build:
- context: .
- dockerfile: Dockerfile.dash
- container_name: personality-dash-app
- restart: unless-stopped
- ports:
- - "8050:8050"
-
- depends_on:
- - ml-pipeline
-
- environment:
- - DASH_HOST=0.0.0.0
- - DASH_PORT=8050
- - MODEL_PATH=/app/models
-
- volumes:
- - model-artifacts:/app/models:ro
- - ./data:/app/data:ro
-
- networks:
- - personality-net
-
- # Monitoring with Prometheus
- prometheus:
- image: prom/prometheus:latest
- container_name: prometheus
- ports:
- - "9090:9090"
- volumes:
- - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
- - prometheus-data:/prometheus
- networks:
- - personality-net
-
- # Visualization with Grafana
- grafana:
- image: grafana/grafana:latest
- container_name: grafana
- ports:
- - "3000:3000"
- environment:
- - GF_SECURITY_ADMIN_PASSWORD=admin
- volumes:
- - grafana-storage:/var/lib/grafana
- - ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
- networks:
- - personality-net
-
-volumes:
- model-artifacts:
- prometheus-data:
- grafana-storage:
-
-networks:
- personality-net:
- driver: bridge
-```
-
-### Build and Deploy with Docker Compose
-
-```bash
-# Build all images
-docker-compose build
-
-# Start all services
-docker-compose up -d
-
-# View logs
-docker-compose logs -f dash-app
-docker-compose logs -f ml-pipeline
-
-# Scale pipeline instances
-docker-compose up --scale ml-pipeline=3 -d
-
-# Stop all services
-docker-compose down
-
-# Clean up (removes containers, networks, and volumes)
-docker-compose down -v
-```
-
-## โธ๏ธ Kubernetes Deployment
-
-### ML Pipeline Deployment
-
-````yaml
-# k8s/ml-pipeline-deployment.yaml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: ml-pipeline
- labels:
- app: ml-pipeline
-spec:
- replicas: 2
- selector:
- matchLabels:
- app: ml-pipeline
- template:
- metadata:
- labels:
- app: ml-pipeline
- spec:
- containers:
- - name: ml-pipeline
- image: personality-ml-pipeline:latest
-
- resources:
- requests:
- memory: "2Gi"
- cpu: "500m"
- limits:
- memory: "8Gi"
- cpu: "2000m"
-
- env:
- - name: PERSONALITY_LOG_LEVEL
- value: "INFO"
- - name: RUNNING_IN_KUBERNETES
- value: "true"
-
- volumeMounts:
- - name: data-volume
- mountPath: /app/data
- readOnly: true
- - name: model-artifacts
- mountPath: /app/models
- - name: logs-volume
- mountPath: /app/logs
-
- livenessProbe:
- exec:
- command:
- - python
- - -c
- - "import src.modules.config; print('OK')"
- initialDelaySeconds: 60
- periodSeconds: 30
-
- readinessProbe:
- exec:
- command:
- - python
- - -c
- - "import src.modules.config; print('OK')"
- initialDelaySeconds: 30
- periodSeconds: 10
-
- volumes:
- - name: data-volume
- configMap:
- name: training-data
- - name: model-artifacts
- persistentVolumeClaim:
- claimName: model-artifacts-pvc
- - name: logs-volume
- persistentVolumeClaim:
- claimName: logs-pvc
-
----
-### Dash Application Deployment
-```yaml
-# k8s/dash-app-deployment.yaml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: dash-app
- labels:
- app: dash-app
-spec:
- replicas: 3
- selector:
- matchLabels:
- app: dash-app
- template:
- metadata:
- labels:
- app: dash-app
- spec:
- containers:
- - name: dash-app
- image: personality-dash-app:latest
- ports:
- - containerPort: 8050
-
- resources:
- requests:
- memory: "1Gi"
- cpu: "200m"
- limits:
- memory: "4Gi"
- cpu: "1000m"
-
- env:
- - name: DASH_HOST
- value: "0.0.0.0"
- - name: DASH_PORT
- value: "8050"
- - name: MODEL_PATH
- value: "/app/models"
-
- volumeMounts:
- - name: model-artifacts
- mountPath: /app/models
- readOnly: true
- - name: data-volume
- mountPath: /app/data
- readOnly: true
-
- livenessProbe:
- httpGet:
- path: /
- port: 8050
- initialDelaySeconds: 30
- periodSeconds: 10
-
- readinessProbe:
- httpGet:
- path: /
- port: 8050
- initialDelaySeconds: 15
- periodSeconds: 5
-
- volumes:
- - name: model-artifacts
- persistentVolumeClaim:
- claimName: model-artifacts-pvc
- - name: data-volume
- configMap:
- name: training-data
-````
-
-### Services and Ingress
-
-```yaml
-# k8s/services.yaml
-apiVersion: v1
-kind: Service
-metadata:
- name: dash-app-service
-spec:
- selector:
- app: dash-app
- ports:
- - protocol: TCP
- port: 80
- targetPort: 8050
- type: ClusterIP
-
----
-apiVersion: v1
-kind: Service
-metadata:
- name: ml-pipeline-service
-spec:
- selector:
- app: ml-pipeline
- ports:
- - protocol: TCP
- port: 80
- targetPort: 8080
- type: ClusterIP
-
----
-# k8s/ingress.yaml
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
- name: personality-classifier-ingress
- annotations:
- nginx.ingress.kubernetes.io/rewrite-target: /
- nginx.ingress.kubernetes.io/ssl-redirect: "true"
- cert-manager.io/cluster-issuer: "letsencrypt-prod"
-spec:
- tls:
- - hosts:
- - personality.yourdomain.com
- secretName: personality-tls
- rules:
- - host: personality.yourdomain.com
- http:
- paths:
- - path: /
- pathType: Prefix
- backend:
- service:
- name: dash-app-service
- port:
- number: 80
- - path: /api
- pathType: Prefix
- backend:
- service:
- name: ml-pipeline-service
- port:
- number: 80
-```
-
-### Persistent Storage
-
-```yaml
-# k8s/storage.yaml
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- name: model-artifacts-pvc
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: fast-ssd
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- name: logs-pvc
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 5Gi
- storageClassName: standard
-
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: training-data
-data:
- # Add your training data files here
- # or mount from external storage
-```
-
-### Deploy to Kubernetes
-
-```bash
-# Build and push images to registry
-docker build -f Dockerfile.pipeline -t your-registry/personality-ml-pipeline:latest .
-docker build -f Dockerfile.dash -t your-registry/personality-dash-app:latest .
-
-docker push your-registry/personality-ml-pipeline:latest
-docker push your-registry/personality-dash-app:latest
-
-# Create namespace
-kubectl create namespace personality-classifier
-
-# Apply storage resources
-kubectl apply -f k8s/storage.yaml -n personality-classifier
-
-# Apply deployments
-kubectl apply -f k8s/ml-pipeline-deployment.yaml -n personality-classifier
-kubectl apply -f k8s/dash-app-deployment.yaml -n personality-classifier
-
-# Apply services and ingress
-kubectl apply -f k8s/services.yaml -n personality-classifier
-kubectl apply -f k8s/ingress.yaml -n personality-classifier
-
-# Check deployment status
-kubectl get all -n personality-classifier
-kubectl get pvc -n personality-classifier
-
-# View logs
-kubectl logs -f deployment/ml-pipeline -n personality-classifier
-kubectl logs -f deployment/dash-app -n personality-classifier
-
-# Scale deployments
-kubectl scale deployment dash-app --replicas=5 -n personality-classifier
-kubectl scale deployment ml-pipeline --replicas=3 -n personality-classifier
-
-# Port forward for local access (development)
-kubectl port-forward service/dash-app-service 8050:80 -n personality-classifier
-```
-
-## ๐ง Production Best Practices
-
-### Security Considerations
-
-```bash
-# Use secrets for sensitive configuration
-kubectl create secret generic model-secrets \
- --from-literal=api-key=your-api-key \
- --from-literal=db-password=your-password \
- -n personality-classifier
-
-# Apply security contexts
-securityContext:
- runAsNonRoot: true
- runAsUser: 1000
- fsGroup: 2000
- capabilities:
- drop:
- - ALL
-```
-
-### Backup Strategy
-
-```bash
-#!/bin/bash
-# backup.sh - Automated backup script
-
-# Backup model artifacts
-kubectl exec deployment/ml-pipeline -n personality-classifier -- \
- tar -czf /tmp/models-backup-$(date +%Y%m%d).tar.gz /app/models
-
-# Copy to persistent storage
-kubectl cp personality-classifier/ml-pipeline-pod:/tmp/models-backup-$(date +%Y%m%d).tar.gz \
- ./backups/models-backup-$(date +%Y%m%d).tar.gz
-
-# Upload to cloud storage (optional)
-aws s3 cp ./backups/models-backup-$(date +%Y%m%d).tar.gz \
- s3://your-backup-bucket/models/
-
-# Rotate old backups (keep last 30 days)
-find ./backups -name "models-backup-*.tar.gz" -mtime +30 -delete
-```
-
-### Health Checks and Monitoring
-
-```yaml
-# k8s/monitoring.yaml
-apiVersion: v1
-kind: Service
-metadata:
- name: prometheus-service
-spec:
- selector:
- app: prometheus
- ports:
- - port: 9090
- targetPort: 9090
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: prometheus
-spec:
- replicas: 1
- selector:
- matchLabels:
- app: prometheus
- template:
- metadata:
- labels:
- app: prometheus
- spec:
- containers:
- - name: prometheus
- image: prom/prometheus:latest
- ports:
- - containerPort: 9090
- volumeMounts:
- - name: prometheus-config
- mountPath: /etc/prometheus
- volumes:
- - name: prometheus-config
- configMap:
- name: prometheus-config
-```
-
-## ๐ Quick Deployment Commands
-
-### Local Development
-
-```bash
-# Quick start with Docker Compose
-git clone
-cd Personality-classification
-
-# Build and start all services
-docker-compose up --build -d
-
-# Access Dash application
-open http://localhost:8050
-
-# View logs
-docker-compose logs -f dash-app
-```
-
-### Production Deployment
-
-```bash
-# Build and push images
-docker build -f Dockerfile.pipeline -t your-registry/ml-pipeline:v1.0 .
-docker build -f Dockerfile.dash -t your-registry/dash-app:v1.0 .
-docker push your-registry/ml-pipeline:v1.0
-docker push your-registry/dash-app:v1.0
-
-# Deploy to Kubernetes
-kubectl create namespace personality-classifier
-kubectl apply -f k8s/ -n personality-classifier
-
-# Verify deployment
-kubectl get all -n personality-classifier
-kubectl logs -f deployment/dash-app -n personality-classifier
-```
-
-## ๐ Troubleshooting
-
-### Common Issues
-
-#### Container Memory Issues
-
-```bash
-# Check memory usage
-kubectl top pods -n personality-classifier
-
-# Increase memory limits in deployment
-resources:
- limits:
- memory: "16Gi" # Increase from 8Gi
-```
-
-#### Model Loading Problems
-
-```bash
-# Check persistent volumes
-kubectl get pvc -n personality-classifier
-
-# Verify model artifacts
-kubectl exec -it deployment/ml-pipeline -n personality-classifier -- ls -la /app/models
-```
-
-#### Dash Application Not Starting
-
-```bash
-# Check logs
-kubectl logs deployment/dash-app -n personality-classifier
-
-# Test local connectivity
-kubectl port-forward service/dash-app-service 8050:80 -n personality-classifier
-```
-
-#### Network Connectivity Issues
-
-```bash
-# Test service connectivity
-kubectl exec -it deployment/dash-app -n personality-classifier -- \
- curl http://ml-pipeline-service
-
-# Check ingress status
-kubectl get ingress -n personality-classifier
-kubectl describe ingress personality-classifier-ingress -n personality-classifier
-```
-
----
-
-## ๐ Additional Resources
-
-- **Docker Documentation**: [docs.docker.com](https://docs.docker.com)
-- **Kubernetes Documentation**: [kubernetes.io/docs](https://kubernetes.io/docs)
-- **Dash Documentation**: [dash.plotly.com](https://dash.plotly.com)
-- **Prometheus Monitoring**: [prometheus.io/docs](https://prometheus.io/docs)
-
----
-
-_This deployment guide focuses on containerized deployment with Docker and Kubernetes orchestration. For specific platform requirements or custom deployments, consult the platform documentation or create an issue in the repository._
diff --git a/docs/images/Dash_example1.png b/docs/images/Dash_example1.png
deleted file mode 100644
index c721f52..0000000
Binary files a/docs/images/Dash_example1.png and /dev/null differ
diff --git a/docs/images/Dash_example2.png b/docs/images/Dash_example2.png
deleted file mode 100644
index 1d3db17..0000000
Binary files a/docs/images/Dash_example2.png and /dev/null differ
diff --git a/docs/images/personality_classification_app.mp4 b/docs/images/personality_classification_app.mp4
new file mode 100644
index 0000000..af5c2ed
Binary files /dev/null and b/docs/images/personality_classification_app.mp4 differ
diff --git a/docs/mlops-infrastructure.md b/docs/mlops-infrastructure.md
deleted file mode 100644
index 9425192..0000000
--- a/docs/mlops-infrastructure.md
+++ /dev/null
@@ -1,504 +0,0 @@
-# MLOps Infrastructure Documentation
-
-## Overview
-
-This document describes the comprehensive MLOps (Machine Learning Operations) infrastructure implemented for the personality classification project. The MLOps system provides end-to-end lifecycle management for machine learning models, from development to production deployment and monitoring.
-
-## Architecture
-
-### Components
-
-1. **Experiment Tracking** (`ExperimentTracker`)
- - MLflow-based experiment tracking
- - Parameter and metric logging
- - Model artifacts management
- - Experiment comparison and analysis
-
-2. **Model Registry** (`ModelRegistry`)
- - Centralized model versioning
- - Model stage management (Development, Staging, Production)
- - Model lineage tracking
- - Automated model promotion workflows
-
-3. **Data Validation** (`DataValidator`)
- - Comprehensive data quality checks
- - Data drift detection
- - Schema validation
- - Statistical profiling
-
-4. **Model Monitoring** (`ModelMonitor`)
- - Real-time performance tracking
- - Data drift detection
- - Performance degradation alerts
- - Prediction logging and analysis
-
-5. **Model Serving** (`ModelServer`)
- - HTTP API for model inference
- - Batch prediction support
- - Model versioning in production
- - Health checks and monitoring
-
-6. **MLOps Pipeline** (`MLOpsPipeline`)
- - Integrated workflow orchestration
- - End-to-end pipeline automation
- - Cross-component coordination
-
-## Getting Started
-
-### Prerequisites
-
-```bash
-# Install MLOps dependencies
-pip install mlflow flask joblib
-
-# Or install with all dependencies
-pip install -e ".[dev]"
-```
-
-### Basic Usage
-
-```python
-from src.mlops import MLOpsPipeline
-
-# Initialize MLOps pipeline
-mlops = MLOpsPipeline(
- experiment_name="personality_classification",
- model_name="personality_model"
-)
-
-# Validate data
-validation_results = mlops.validate_and_track_data(train_data, test_data)
-
-# Train and track model
-training_results = mlops.train_and_track_model(
- model=your_model,
- X_train=X_train,
- y_train=y_train,
- X_test=X_test,
- y_test=y_test,
- model_params={"param1": "value1"},
- register_model=True
-)
-
-# Promote model to production
-mlops.promote_model(model_version="1", stage="Production")
-
-# Monitor production model
-monitoring_results = mlops.monitor_production_model(
- prediction_data=recent_predictions,
- reference_data=reference_dataset
-)
-```
-
-## Detailed Component Guide
-
-### 1. Experiment Tracking
-
-The `ExperimentTracker` provides comprehensive experiment management using MLflow.
-
-#### Key Features:
-- **Parameter Logging**: Hyperparameters, model configurations
-- **Metric Tracking**: Performance metrics, custom metrics
-- **Artifact Storage**: Models, plots, datasets
-- **Run Comparison**: Side-by-side experiment comparison
-
-#### Example Usage:
-```python
-tracker = ExperimentTracker("my_experiment")
-
-with tracker.start_run("model_training"):
- # Log parameters
- tracker.log_params({"learning_rate": 0.01, "batch_size": 32})
-
- # Train model
- model.fit(X_train, y_train)
-
- # Log metrics
- tracker.log_metrics({"accuracy": 0.95, "f1_score": 0.93})
-
- # Log model
- tracker.log_model(model, "model")
-
- # Log confusion matrix
- tracker.log_confusion_matrix(y_true, y_pred)
-```
-
-### 2. Model Registry
-
-The `ModelRegistry` manages model versions and deployment stages.
-
-#### Model Stages:
-- **None**: Initial registration
-- **Staging**: Testing and validation
-- **Production**: Live deployment
-- **Archived**: Deprecated models
-
-#### Example Usage:
-```python
-registry = ModelRegistry()
-
-# Register model
-model_version = registry.register_model(
- model_uri="runs:/run_id/model",
- name="personality_model",
- description="Random Forest classifier"
-)
-
-# Promote to production
-registry.promote_model("personality_model", "1", "Production")
-
-# Load production model
-model = registry.load_model("personality_model", stage="Production")
-```
-
-### 3. Data Validation
-
-The `DataValidator` ensures data quality and consistency.
-
-#### Validation Checks:
-- **Missing Data**: Null values, completeness
-- **Data Types**: Schema consistency
-- **Duplicates**: Row-level duplicates
-- **Outliers**: Statistical outlier detection
-- **Distributions**: Class balance, feature distributions
-- **Data Drift**: Distribution changes over time
-
-#### Example Usage:
-```python
-validator = DataValidator()
-
-# Validate dataset
-results = validator.validate_dataset(df, "train_data")
-
-# Check data quality score
-score = validator.get_data_quality_score("train_data")
-
-# Validate train/test split
-split_results = validator.validate_train_test_split(
- X_train, X_test, y_train, y_test
-)
-```
-
-### 4. Model Monitoring
-
-The `ModelMonitor` tracks model performance in production.
-
-#### Monitoring Capabilities:
-- **Performance Metrics**: Accuracy, F1-score, precision, recall
-- **Data Drift Detection**: Feature distribution changes
-- **Prediction Logging**: Request/response tracking
-- **Alerting**: Automatic issue detection
-- **Dashboard Data**: Real-time monitoring metrics
-
-#### Example Usage:
-```python
-monitor = ModelMonitor("personality_model")
-
-# Log predictions
-monitor.log_prediction(
- prediction=pred,
- features=input_features,
- confidence=confidence_score,
- actual=actual_value
-)
-
-# Calculate performance metrics
-metrics = monitor.calculate_performance_metrics(window_hours=24)
-
-# Detect data drift
-drift_results = monitor.detect_data_drift(reference_data)
-```
-
-### 5. Model Serving
-
-The `ModelServer` provides an interactive Dash-based dashboard for model inference and monitoring.
-
-#### Dashboard Features:
-- **๐ Interactive Dashboard**: Modern web-based interface
-- **๐ฎ Multiple Input Methods**: Manual forms, JSON input, file upload
-- **๐ Real-time Monitoring**: Live prediction history and statistics
-- **๐จ Beautiful UI**: Professional styling with confidence visualization
-- **๐ Auto-refresh**: Live updates of prediction history
-
-#### Example Usage:
-```python
-# Create interactive dashboard server
-server = ModelServer(
- model_name="personality_model",
- model_stage="Production",
- port=8050
-)
-
-# Run dashboard server
-server.run()
-# Access at http://localhost:8050
-```
-
-#### Dashboard Components:
-- **Model Status Cards**: Real-time model health and statistics
-- **Prediction Interface**: Multiple input methods with validation
-- **Results Visualization**: Confidence scores and probability distributions
-- **History Table**: Searchable prediction history with timestamps
-
-#### API Examples:
-```bash
-# Health check
-curl http://localhost:5000/health
-
-# Single prediction
-curl -X POST http://localhost:5000/predict \
- -H "Content-Type: application/json" \
- -d '{"features": {"feature1": 1.0, "feature2": 2.0}}'
-
-# Batch prediction
-curl -X POST http://localhost:5000/predict/batch \
- -H "Content-Type: application/json" \
- -d '{"instances": [{"feature1": 1.0}, {"feature1": 2.0}]}'
-```
-
-## Deployment Patterns
-
-### 1. Local Development
-```python
-# Run MLOps demo
-python examples/mlops_demo.py
-
-# Start MLflow UI
-mlflow ui
-
-# Start model server
-python -m src.mlops.serving --model-name personality_model
-```
-
-### 2. Docker Deployment
-```dockerfile
-FROM python:3.11-slim
-
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-
-COPY src/ /app/src/
-WORKDIR /app
-
-EXPOSE 5000
-CMD ["python", "-m", "src.mlops.serving", "--model-name", "personality_model"]
-```
-
-### 3. Kubernetes Deployment
-```yaml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: personality-model-server
-spec:
- replicas: 3
- selector:
- matchLabels:
- app: personality-model-server
- template:
- metadata:
- labels:
- app: personality-model-server
- spec:
- containers:
- - name: model-server
- image: personality-model:latest
- ports:
- - containerPort: 5000
- env:
- - name: MLFLOW_TRACKING_URI
- value: "http://mlflow-server:5000"
-```
-
-## CI/CD Integration
-
-### GitHub Actions Workflow
-```yaml
-name: MLOps Pipeline
-
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
-
-jobs:
- data-validation:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.11'
- - name: Install dependencies
- run: pip install -e ".[dev]"
- - name: Validate data
- run: python scripts/validate_data.py
-
- model-training:
- needs: data-validation
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: Train model
- run: python scripts/train_model.py
- - name: Register model
- run: python scripts/register_model.py
-
- model-deployment:
- needs: model-training
- runs-on: ubuntu-latest
- if: github.ref == 'refs/heads/main'
- steps:
- - name: Deploy to staging
- run: python scripts/deploy_model.py --stage staging
- - name: Run integration tests
- run: python scripts/test_model_api.py
- - name: Promote to production
- run: python scripts/promote_model.py --stage production
-```
-
-## Monitoring and Alerting
-
-### Setting Up Alerts
-```python
-# Configure monitoring thresholds
-monitor = ModelMonitor("personality_model")
-
-# Set up performance degradation alerts
-baseline_metrics = {"accuracy": 0.85, "f1_score": 0.83}
-degradation_results = monitor.detect_performance_degradation(
- baseline_metrics,
- degradation_threshold=0.05 # 5% degradation threshold
-)
-
-# Set up data drift alerts
-drift_results = monitor.detect_data_drift(
- reference_data,
- drift_threshold=0.1 # 10% drift threshold
-)
-```
-
-### Dashboard Integration
-```python
-# Get dashboard data
-dashboard_data = monitor.get_monitoring_dashboard_data(hours=24)
-
-# Generate monitoring report
-report = monitor.generate_monitoring_report()
-```
-
-## Best Practices
-
-### 1. Experiment Organization
-- Use descriptive experiment names
-- Tag experiments with metadata
-- Document parameter choices
-- Compare similar experiments
-
-### 2. Model Versioning
-- Semantic versioning for models
-- Clear version descriptions
-- Tag models with deployment info
-- Maintain model lineage
-
-### 3. Data Quality
-- Validate all data inputs
-- Monitor for drift continuously
-- Set quality thresholds
-- Automate data checks
-
-### 4. Monitoring
-- Log all predictions
-- Track performance metrics
-- Set up alerting thresholds
-- Regular monitoring reviews
-
-### 5. Security
-- Secure MLflow tracking server
-- API authentication/authorization
-- Data privacy compliance
-- Audit trail maintenance
-
-## Troubleshooting
-
-### Common Issues
-
-1. **MLflow Connection Errors**
- ```python
- # Check MLflow server status
- import mlflow
- print(mlflow.get_tracking_uri())
- ```
-
-2. **Model Loading Issues**
- ```python
- # Verify model exists
- registry = ModelRegistry()
- models = registry.list_models()
- print([m.name for m in models])
- ```
-
-3. **Data Validation Failures**
- ```python
- # Check validation details
- validator = DataValidator()
- results = validator.validate_dataset(df)
- print(results['missing_data'])
- ```
-
-4. **Monitoring Data Issues**
- ```python
- # Check monitoring logs
- monitor = ModelMonitor("model_name")
- dashboard = monitor.get_monitoring_dashboard_data()
- print(f"Total predictions: {dashboard['total_predictions']}")
- ```
-
-## Performance Optimization
-
-### 1. MLflow Optimization
-- Use artifact stores (S3, Azure Blob)
-- Configure database backend
-- Enable model caching
-
-### 2. Serving Optimization
-- Use model serialization (joblib, pickle)
-- Implement request batching
-- Add response caching
-
-### 3. Monitoring Optimization
-- Aggregate metrics efficiently
-- Use sampling for large volumes
-- Implement data retention policies
-
-## Future Enhancements
-
-1. **Advanced Monitoring**
- - A/B testing framework
- - Feature importance tracking
- - Bias detection and mitigation
-
-2. **Automated Workflows**
- - Auto-retaining on drift
- - Automated model selection
- - Self-healing deployments
-
-3. **Integration Enhancements**
- - Kubernetes operators
- - Stream processing integration
- - Multi-cloud deployment
-
-4. **Observability**
- - Distributed tracing
- - Custom metrics collection
- - Performance profiling
-
-## Support and Resources
-
-- **Documentation**: See `/docs` directory
-- **Examples**: See `/examples` directory
-- **Issues**: GitHub Issues
-- **MLflow Docs**: https://mlflow.org/docs/latest/
-- **Flask Docs**: https://flask.palletsprojects.com/
diff --git a/docs/mlops-integration-summary.md b/docs/mlops-integration-summary.md
deleted file mode 100644
index 5fc9145..0000000
--- a/docs/mlops-integration-summary.md
+++ /dev/null
@@ -1,225 +0,0 @@
-# MLOps Integration for Six-Stack Personality Classification Pipeline
-
-## Overview
-
-The Six-Stack Personality Classification Pipeline has been enhanced with comprehensive MLOps infrastructure that seamlessly integrates with the existing modular architecture. This integration provides production-ready capabilities while maintaining backward compatibility.
-
-## Integration Features
-
-### ๐ Backward Compatibility
-- The pipeline works exactly as before when MLOps components are not available
-- Graceful degradation: MLOps failures don't break the core pipeline
-- Optional enable/disable flag for MLOps functionality
-
-### ๐๏ธ MLOps Components Integrated
-
-#### 1. **Experiment Tracking** (MLflow)
-- Automatic experiment creation and run tracking
-- Parameter logging (hyperparameters, configuration)
-- Metrics logging (CV scores, ensemble weights, performance metrics)
-- Artifact logging (models, predictions, metadata)
-
-#### 2. **Data Validation**
-- Training and test data quality checks
-- Schema validation and data drift detection
-- Automated data profiling and anomaly detection
-- Statistical validation of feature distributions
-
-#### 3. **Model Registry**
-- Automatic model registration with versioning
-- Model staging (Staging โ Production)
-- Model lineage tracking
-- Easy model loading and deployment
-
-#### 4. **Model Monitoring**
-- Prediction monitoring and drift detection
-- Performance tracking over time
-- Alert generation for model degradation
-- Dashboard-ready metrics collection
-
-#### 5. **Serving Infrastructure**
-- REST API for model inference
-- Batch prediction capabilities
-- Health checks and model reloading
-- Scalable deployment ready
-
-## Usage
-
-### Basic Usage (No Changes Required)
-```python
-# Existing code works exactly the same
-from src.main_modular import main
-
-if __name__ == "__main__":
- main()
-```
-
-### With MLOps Enabled
-```python
-# MLOps is automatically enabled if components are available
-# No code changes needed - everything is handled internally
-from src.main_modular import main
-
-if __name__ == "__main__":
- main() # Now includes MLOps tracking, validation, monitoring
-```
-
-### Customizing MLOps Behavior
-```python
-from src.main_modular import MLOpsIntegration
-
-# Create custom MLOps configuration
-mlops = MLOpsIntegration(enable_mlops=True)
-
-# Use in your own workflows
-mlops.start_experiment("custom_experiment")
-mlops.log_parameters({"custom_param": "value"})
-mlops.log_metrics({"custom_metric": 0.95})
-mlops.end_experiment()
-```
-
-## Key Benefits
-
-### ๐ **Production Ready**
-- **Experiment Tracking**: Full visibility into model training and performance
-- **Reproducibility**: All parameters, metrics, and artifacts are tracked
-- **Model Versioning**: Automatic versioning with promotion workflows
-- **Monitoring**: Real-time performance and drift monitoring
-
-### ๐ง **Developer Friendly**
-- **Zero Breaking Changes**: Existing code continues to work
-- **Gradual Adoption**: Enable MLOps features incrementally
-- **Error Handling**: Robust error handling prevents MLOps issues from breaking training
-- **Logging**: Comprehensive logging for debugging and monitoring
-
-### ๐ **Data Science Workflow**
-- **Experiment Comparison**: Compare different runs and configurations
-- **Model Selection**: Track which models perform best
-- **Performance Tracking**: Monitor model performance over time
-- **Data Quality**: Automated data validation and drift detection
-
-## Technical Implementation
-
-### Code Structure
-```
-src/
-โโโ main_modular.py # Enhanced with MLOpsIntegration class
-โโโ mlops/ # MLOps infrastructure
-โ โโโ __init__.py
-โ โโโ experiment_tracking.py
-โ โโโ data_validation.py
-โ โโโ model_registry.py
-โ โโโ monitoring.py
-โ โโโ serving.py
-โ โโโ pipeline.py
-โโโ modules/
- โโโ config.py # Enhanced with MLOps config
- โโโ ... # Existing modules unchanged
-```
-
-### Integration Points
-
-1. **Data Loading**: Automatic data validation after loading
-2. **Training**: Experiment tracking throughout the training process
-3. **Model Building**: Parameter and metric logging for each stack
-4. **Ensemble**: Ensemble weights and performance tracking
-5. **Prediction**: Model registration and monitoring setup
-
-### Error Handling Strategy
-- **Graceful Degradation**: MLOps failures log warnings but don't stop training
-- **Optional Dependencies**: Pipeline works without MLOps dependencies
-- **Comprehensive Logging**: All MLOps operations are logged for debugging
-
-## Configuration
-
-### Environment Variables
-```bash
-# MLflow Configuration
-export MLFLOW_TRACKING_URI="sqlite:///mlflow.db"
-export MLFLOW_EXPERIMENT_NAME="six_stack_personality"
-
-# Model Registry
-export MODEL_REGISTRY_NAME="six_stack_ensemble"
-```
-
-### Config Options
-```python
-# In modules/config.py
-ENABLE_MLOPS = True
-MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
-MLFLOW_EXPERIMENT_NAME = "six_stack_personality"
-MODEL_REGISTRY_NAME = "six_stack_ensemble"
-```
-
-## Monitoring and Observability
-
-### Metrics Tracked
-- **Training Metrics**: CV scores for each stack, ensemble performance
-- **Data Metrics**: Data quality scores, drift detection results
-- **Model Metrics**: Registration success, version numbers
-- **Pipeline Metrics**: Execution time, success/failure rates
-
-### Dashboards Available
-- **Experiment Tracking**: MLflow UI for experiment comparison
-- **Model Performance**: Real-time performance monitoring
-- **Data Quality**: Data drift and quality dashboards
-- **System Health**: Pipeline execution and error monitoring
-
-## Deployment
-
-### Local Development
-```bash
-# Start MLflow UI
-mlflow ui --backend-store-uri sqlite:///mlflow.db
-
-# Run pipeline with MLOps
-python src/main_modular.py
-```
-
-### Production Deployment
-```bash
-# Set up MLflow tracking server
-mlflow server --backend-store-uri postgresql://user:pass@host/db \
- --default-artifact-root s3://mlflow-artifacts/
-
-# Deploy model serving API
-python -m mlops.serving --model-name six_stack_ensemble --port 8080
-```
-
-## Testing
-
-```bash
-# Test MLOps integration
-python test_mlops_integration.py
-
-# Test individual components
-python -m pytest src/mlops/tests/
-```
-
-## Future Enhancements
-
-### Planned Features
-- **A/B Testing**: Framework for model A/B testing
-- **Auto-retraining**: Triggered retraining based on drift detection
-- **Multi-environment**: Support for dev/staging/prod environments
-- **Advanced Monitoring**: More sophisticated performance metrics
-- **CI/CD Integration**: Automated model validation and deployment
-
-### Extension Points
-- **Custom Validators**: Easy to add domain-specific data validators
-- **Custom Metrics**: Framework for custom monitoring metrics
-- **Plugin Architecture**: Support for different MLOps backends
-- **Integration APIs**: Easy integration with other ML platforms
-
-## Summary
-
-The MLOps integration transforms the Six-Stack Personality Classification Pipeline into a production-ready, enterprise-grade machine learning system while maintaining the simplicity and modularity of the original design. The integration provides:
-
-โ **Complete MLOps Infrastructure**
-โ **Zero Breaking Changes**
-โ **Production Ready**
-โ **Comprehensive Monitoring**
-โ **Easy Deployment**
-โ **Excellent Documentation**
-
-This implementation demonstrates advanced MLOps skills and provides a solid foundation for scaling machine learning operations in production environments.
diff --git a/docs/performance-tuning.md b/docs/performance-tuning.md
index aa3ce58..2558a98 100644
--- a/docs/performance-tuning.md
+++ b/docs/performance-tuning.md
@@ -1,60 +1,24 @@
# Performance Tuning Guide
-## Overview
+## Performance Tuning Guide
-This guide provides comprehensive strategies for optimizing the Six-Stack Personality Classification Pipeline performance across different dimensions: speed, memory usage, accuracy, and resource utilization.
-
-## Performance Dimensions
-
-### 1. Training Speed
-
-- Hyperparameter optimization trials
-- Data augmentation complexity
-- Threading configuration
-- Model complexity
-
-### 2. Memory Efficiency
-
-- Dataset size management
-- Model memory footprint
-- Parallel processing overhead
-- Synthetic data generation
-
-### 3. Prediction Accuracy
-
-- Ensemble optimization
-- Cross-validation strategy
-- Feature engineering
-- Model diversity
-
-### 4. Resource Utilization
-
-- CPU core usage
-- Memory allocation
-- I/O optimization
-- Caching strategies
-
-## Speed Optimization
-
-### Quick Development Setup
+### Key Levers
+- Training speed: TESTING_MODE, N_TRIALS_STACK, N_TRIALS_BLEND
+- Memory: TESTING_SAMPLE_SIZE, ENABLE_DATA_AUGMENTATION
+- Accuracy: Ensemble optimization, feature engineering
+- Resource: N_JOBS, THREAD_COUNT
+### Recommended Settings
+**Fast Dev:**
```python
-# Ultra-fast configuration for development iteration
TESTING_MODE = True
TESTING_SAMPLE_SIZE = 500
N_TRIALS_STACK = 3
N_TRIALS_BLEND = 20
ENABLE_DATA_AUGMENTATION = False
-SHOW_PROGRESS_BARS = False
-
-# Expected time: 2-3 minutes
-# Accuracy trade-off: 2-3% lower than production
```
-
-### Balanced Development Setup
-
+**Balanced:**
```python
-# Moderate speed with reasonable accuracy
TESTING_MODE = True
TESTING_SAMPLE_SIZE = 1000
N_TRIALS_STACK = 10
diff --git a/docs/pre-commit-guide.md b/docs/pre-commit-guide.md
deleted file mode 100644
index 7a89f09..0000000
--- a/docs/pre-commit-guide.md
+++ /dev/null
@@ -1,162 +0,0 @@
-# Pre-commit Setup Guide
-
-This project uses [pre-commit](https://pre-commit.com/) to ensure code quality and consistency.
-
-## Installation
-
-Pre-commit is automatically installed when you run:
-
-```bash
-make setup-env
-# or
-make install-dev
-```
-
-To manually install pre-commit hooks:
-
-```bash
-make pre-commit-install
-# or
-uv run pre-commit install
-```
-
-## Usage
-
-### Automatic (Recommended)
-
-Pre-commit will automatically run on every `git commit`. If any checks fail, the commit will be blocked until issues are fixed.
-
-### Manual Execution
-
-Run on staged files only:
-
-```bash
-make pre-commit-run
-# or
-uv run pre-commit run
-```
-
-Run on all files:
-
-```bash
-make pre-commit-all
-# or
-uv run pre-commit run --all-files
-```
-
-## Configured Hooks
-
-### Code Formatting
-
-- **Black**: Python code formatter
-- **isort**: Import sorting
-- **Ruff**: Fast Python linter and formatter
-- **Prettier**: Markdown, YAML, JSON formatting
-
-### Code Quality
-
-- **Ruff**: Comprehensive Python linting
-- **Bandit**: Security vulnerability scanner
-- **MyPy**: Static type checking (optional)
-
-### Documentation
-
-- **Pydocstyle**: Docstring style checking (Google convention)
-
-### General
-
-- **Trailing whitespace removal**
-- **End-of-file fixing**
-- **Large file detection**
-- **Merge conflict detection**
-- **YAML/TOML/JSON validation**
-
-### Jupyter Notebooks
-
-- **nbstripout**: Remove notebook outputs
-- **nbqa**: Apply formatters to notebooks
-
-## Configuration
-
-Pre-commit configuration is in `.pre-commit-config.yaml`.
-
-Tool-specific configurations are in `pyproject.toml`:
-
-- `[tool.black]`
-- `[tool.isort]`
-- `[tool.ruff]`
-- `[tool.bandit]`
-- `[tool.pydocstyle]`
-- `[tool.mypy]`
-
-## Bypassing Hooks
-
-In emergency situations, you can bypass pre-commit:
-
-```bash
-git commit --no-verify -m "Emergency fix"
-```
-
-**Note**: This should be used sparingly and issues should be fixed in follow-up commits.
-
-## Troubleshooting
-
-### Hook Installation Issues
-
-```bash
-# Reinstall hooks
-uv run pre-commit clean
-uv run pre-commit install
-```
-
-### Update Hook Versions
-
-```bash
-uv run pre-commit autoupdate
-```
-
-### Skip Specific Hooks
-
-```bash
-SKIP=mypy git commit -m "Skip MyPy for this commit"
-```
-
-## IDE Integration
-
-Most IDEs can be configured to run these tools automatically:
-
-### VS Code
-
-Install extensions:
-
-- Python
-- Black Formatter
-- isort
-- Ruff
-- Prettier
-
-### PyCharm
-
-Enable:
-
-- Black integration
-- isort integration
-- Pre-commit integration plugin
-
-## Makefile Targets
-
-The following Makefile targets are available for code quality:
-
-```bash
-make format # Format code with ruff
-make lint # Lint code with ruff
-make check # Run linting and formatting checks
-make fix # Auto-fix issues
-make pre-commit-install # Install pre-commit hooks
-make pre-commit-run # Run on staged files
-make pre-commit-all # Run on all files
-```
-
-## CI/CD Integration
-
-Pre-commit runs automatically on GitHub Actions and other CI platforms. Some intensive hooks (like MyPy) are skipped on CI for performance.
diff --git a/docs/technical-guide.md b/docs/technical-guide.md
index bd59c4c..ee00f70 100644
--- a/docs/technical-guide.md
+++ b/docs/technical-guide.md
@@ -10,10 +10,6 @@ This document provides a deep technical dive into the Six-Stack Personality Clas
The pipeline follows **SOLID principles** and **separation of concerns**:
-- **Single Responsibility**: Each module has one clear purpose
-- **Open/Closed**: Easy to extend without modifying existing code
-- **Dependency Inversion**: High-level modules don't depend on low-level details
-- **Interface Segregation**: Clean, focused interfaces between modules
### Core Architecture Pattern
@@ -42,10 +38,6 @@ Each stack is designed to capture different aspects of the data:
#### Stack A: Gradient Boosting Core (Narrow)
-- **Purpose**: Stable baseline with conservative hyperparameters
-- **Models**: XGBoost, LightGBM, CatBoost
-- **Meta-learner**: Adaptive (Logistic Regression, Ridge, or XGBoost)
-- **Search Space**: Conservative ranges (500-1000 estimators)
```python
# Example hyperparameter ranges for Stack A
@@ -58,6 +50,25 @@ xgb_params = {
}
```
+# Technical Guide
+
+## Philosophy
+- Modular, SOLID design
+- Separation of concerns: data, processing, models, config, ensemble, utils
+
+## Stacks
+- A: Stable baseline (XGBoost, LightGBM, CatBoost)
+- B: Wide search
+- C: XGBoost/CatBoost specialists
+- D: Sklearn ensemble
+- E: Neural networks
+- F: Noise-robust
+
+## Advanced Features
+- Optuna hyperparameter optimization
+- SDV Copula data augmentation
+- Out-of-fold ensemble blending
+
#### Stack B: Gradient Boosting Core (Wide)
- **Purpose**: Broader exploration of hyperparameter space
diff --git a/pre-commit-output.txt b/pre-commit-output.txt
deleted file mode 100644
index 67cd458..0000000
--- a/pre-commit-output.txt
+++ /dev/null
@@ -1,242 +0,0 @@
- Building personality-classification @ file:///Users/jv/Documents/GitHub/Personality-classification
- Built personality-classification @ file:///Users/jv/Documents/GitHub/Personality-classification
-Uninstalled 1 package in 0.99ms
-Installed 1 package in 2ms
-trim trailing whitespace.................................................Passed
-fix end of files.........................................................Passed
-check yaml...........................................(no files to check)Skipped
-check toml...............................................................Passed
-check json...........................................(no files to check)Skipped
-check for added large files..............................................Passed
-check for case conflicts.................................................Passed
-check for merge conflicts................................................Passed
-debug statements (python)................................................Passed
-check docstring is first.................................................Passed
-check that executables have shebangs.................(no files to check)Skipped
-check that scripts with shebangs are executable..........................Failed
-- hook id: check-shebang-scripts-are-executable
-- exit code: 1
-
-src/six_stack_personality_classifier.py: has a shebang but is not marked executable!
- If it is supposed to be executable, try: `chmod +x src/six_stack_personality_classifier.py`
- If on Windows, you may also need to: `git add --chmod=+x src/six_stack_personality_classifier.py`
- If it not supposed to be executable, double-check its shebang is wanted.
-
-src/main_modular.py: has a shebang but is not marked executable!
- If it is supposed to be executable, try: `chmod +x src/main_modular.py`
- If on Windows, you may also need to: `git add --chmod=+x src/main_modular.py`
- If it not supposed to be executable, double-check its shebang is wanted.
-
-mixed line ending........................................................Passed
-fix utf-8 byte order marker..............................................Passed
-black....................................................................Passed
-isort....................................................................Failed
-- hook id: isort
-- files were modified by this hook
-
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/config.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/data_augmentation.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/data_loader.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/ensemble.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/model_builders.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/optimization.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/modules/preprocessing.py
-Fixing /Users/jv/Documents/GitHub/Personality-classification/src/six_stack_personality_classifier.py
-
-ruff.....................................................................Failed
-- hook id: ruff
-- exit code: 1
-
-src/modules/data_augmentation.py:431:5: PLR0912 Too many branches (18 > 12)
-src/modules/data_augmentation.py:609:5: PLR0912 Too many branches (16 > 12)
-src/modules/optimization.py:109:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/modules/optimization.py:151:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/modules/optimization.py:193:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/modules/optimization.py:235:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/modules/optimization.py:282:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/modules/preprocessing.py:16:5: PLR0912 Too many branches (20 > 12)
-src/modules/preprocessing.py:320:5: PLR0912 Too many branches (19 > 12)
-src/six_stack_personality_classifier.py:231:5: PLR0912 Too many branches (18 > 12)
-src/six_stack_personality_classifier.py:530:5: PLR0912 Too many branches (20 > 12)
-src/six_stack_personality_classifier.py:840:5: PLR0912 Too many branches (19 > 12)
-src/six_stack_personality_classifier.py:1668:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/six_stack_personality_classifier.py:1710:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/six_stack_personality_classifier.py:1752:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/six_stack_personality_classifier.py:1794:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-src/six_stack_personality_classifier.py:1841:13: B904 Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
-Found 17 errors.
-
-ruff-format..............................................................Passed
-bandit...................................................................Passed
-pydocstyle...............................................................Failed
-- hook id: pydocstyle
-- exit code: 1
-
-src/modules/data_loader.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/data_loader.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/data_loader.py:13 in public function `load_data_with_external_merge`:
- D205: 1 blank line required between summary line and description (found 0)
-src/modules/data_loader.py:13 in public function `load_data_with_external_merge`:
- D212: Multi-line docstring summary should start at the first line
-src/main_modular.py:2 at module level:
- D205: 1 blank line required between summary line and description (found 0)
-src/main_modular.py:2 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/main_modular.py:2 at module level:
- D415: First line should end with a period, question mark, or exclamation point (not ')')
-src/modules/model_builders.py:1 at module level:
- D205: 1 blank line required between summary line and description (found 0)
-src/modules/model_builders.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/model_builders.py:32 in public function `build_stack`:
- D202: No blank lines allowed after function docstring (found 1)
-src/modules/model_builders.py:32 in public function `build_stack`:
- D415: First line should end with a period, question mark, or exclamation point (not 'y')
-src/modules/model_builders.py:177 in public function `build_stack_c`:
- D202: No blank lines allowed after function docstring (found 1)
-src/modules/__init__.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/__init__.py:1 at module level:
- D415: First line should end with a period, question mark, or exclamation point (not 's')
-src/modules/preprocessing.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/preprocessing.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/preprocessing.py:19 in public function `prep`:
- D212: Multi-line docstring summary should start at the first line
-src/modules/preprocessing.py:71 in private nested function `fill_missing_by_quantile_group`:
- D415: First line should end with a period, question mark, or exclamation point (not ')')
-src/modules/preprocessing.py:226 in public function `add_pseudo_labeling_conservative`:
- D212: Multi-line docstring summary should start at the first line
-src/modules/preprocessing.py:328 in public function `create_domain_balanced_dataset`:
- D205: 1 blank line required between summary line and description (found 0)
-src/modules/preprocessing.py:328 in public function `create_domain_balanced_dataset`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:2 at module level:
- D205: 1 blank line required between summary line and description (found 0)
-src/six_stack_personality_classifier.py:2 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:2 at module level:
- D415: First line should end with a period, question mark, or exclamation point (not 't')
-src/six_stack_personality_classifier.py:93 in public function `load_data_with_external_merge`:
- D205: 1 blank line required between summary line and description (found 0)
-src/six_stack_personality_classifier.py:93 in public function `load_data_with_external_merge`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:168 in public function `simple_mixed_augmentation`:
- D202: No blank lines allowed after function docstring (found 1)
-src/six_stack_personality_classifier.py:168 in public function `simple_mixed_augmentation`:
- D415: First line should end with a period, question mark, or exclamation point (not 's')
-src/six_stack_personality_classifier.py:232 in public function `sdv_augmentation`:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/six_stack_personality_classifier.py:232 in public function `sdv_augmentation`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:232 in public function `sdv_augmentation`:
- D415: First line should end with a period, question mark, or exclamation point (not 'g')
-src/six_stack_personality_classifier.py:368 in public function `smotenc_augmentation`:
- D415: First line should end with a period, question mark, or exclamation point (not 'a')
-src/six_stack_personality_classifier.py:420 in public function `apply_data_augmentation`:
- D415: First line should end with a period, question mark, or exclamation point (not 'd')
-src/six_stack_personality_classifier.py:465 in public function `augment_data_conservative`:
- D205: 1 blank line required between summary line and description (found 0)
-src/six_stack_personality_classifier.py:465 in public function `augment_data_conservative`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:533 in public function `prep`:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/six_stack_personality_classifier.py:533 in public function `prep`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:576 in private nested function `fill_missing_by_quantile_group`:
- D415: First line should end with a period, question mark, or exclamation point (not ')')
-src/six_stack_personality_classifier.py:713 in public function `add_label_noise`:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/six_stack_personality_classifier.py:713 in public function `add_label_noise`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:746 in public function `add_pseudo_labeling_conservative`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:848 in public function `create_domain_balanced_dataset`:
- D205: 1 blank line required between summary line and description (found 0)
-src/six_stack_personality_classifier.py:848 in public function `create_domain_balanced_dataset`:
- D212: Multi-line docstring summary should start at the first line
-src/six_stack_personality_classifier.py:1049 in public function `build_stack`:
- D202: No blank lines allowed after function docstring (found 1)
-src/six_stack_personality_classifier.py:1049 in public function `build_stack`:
- D415: First line should end with a period, question mark, or exclamation point (not 'n')
-src/six_stack_personality_classifier.py:1953 in public function `main`:
- D415: First line should end with a period, question mark, or exclamation point (not 'n')
-src/modules/config.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/config.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/ensemble.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/ensemble.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/data_augmentation.py:1 at module level:
- D205: 1 blank line required between summary line and description (found 0)
-src/modules/data_augmentation.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/data_augmentation.py:368 in public function `simple_mixed_augmentation`:
- D202: No blank lines allowed after function docstring (found 1)
-src/modules/data_augmentation.py:612 in public function `apply_data_augmentation`:
- D202: No blank lines allowed after function docstring (found 1)
-src/modules/optimization.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/optimization.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/optimization.py:58 in public function `add_label_noise`:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/optimization.py:58 in public function `add_label_noise`:
- D212: Multi-line docstring summary should start at the first line
-src/modules/utils.py:1 at module level:
- D200: One-line docstring should fit on one line with quotes (found 3)
-src/modules/utils.py:1 at module level:
- D212: Multi-line docstring summary should start at the first line
-src/modules/utils.py:11 in public function `add_label_noise`:
- D212: Multi-line docstring summary should start at the first line
-
-mypy.....................................................................Failed
-- hook id: mypy
-- exit code: 1
-
-src/main_modular.py:77: error: Function "builtins.callable" is not valid as a type [valid-type]
-src/main_modular.py:77: note: Perhaps you meant "typing.Callable" instead of "callable"?
-src/main_modular.py:249: error: Function "builtins.callable" is not valid as a type [valid-type]
-src/main_modular.py:249: note: Perhaps you meant "typing.Callable" instead of "callable"?
-src/main_modular.py:272: error: Function "builtins.callable" is not valid as a type [valid-type]
-src/main_modular.py:272: note: Perhaps you meant "typing.Callable" instead of "callable"?
-src/main_modular.py:355: error: Function "builtins.callable" is not valid as a type [valid-type]
-src/main_modular.py:355: note: Perhaps you meant "typing.Callable" instead of "callable"?
-src/main_modular.py:364: error: callable? not callable [misc]
-src/main_modular.py:372: error: callable? not callable [misc]
-src/main_modular.py:388: error: "object" has no attribute "inverse_transform" [attr-defined]
-src/main_modular.py:401: error: Function "builtins.callable" is not valid as a type [valid-type]
-src/main_modular.py:401: note: Perhaps you meant "typing.Callable" instead of "callable"?
-src/main_modular.py:419: error: callable? not callable [misc]
-src/main_modular.py:427: error: callable? not callable [misc]
-Found 10 errors in 1 file (checked 11 source files)
-
-nbqa-black...............................................................Passed
-nbqa-isort...............................................................Passed
-nbqa-ruff................................................................Passed
-nbstripout...........................................(no files to check)Skipped
-prettier.................................................................Failed
-- hook id: prettier
-- files were modified by this hook
-
-.github/AUTHORS.md
-.github/ISSUE_TEMPLATE/issue_template.md
-docs/configuration.md
-docs/deployment.md
-.github/CONTRIBUTORS.md
-README.md
-docs/api-reference.md
-docs/technical-guide.md
-.github/pull_request_template.md
-docs/README.md
-docs/performance-tuning.md
-src/modules/README.md
-docs/data-augmentation.md
-
-CRLF end-lines remover...................................................Passed
-Tabs remover.............................................................Passed
diff --git a/pyproject.toml b/pyproject.toml
index 936f90f..acbfb3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,27 +24,23 @@ dependencies = [
"numpy>=1.24.0,<2.0.0",
"pandas>=2.0.0,<3.0.0",
"scikit-learn>=1.3.0,<1.6.0",
-
# Advanced ML models (gradient boosting)
"catboost>=1.2.0,<2.0.0",
"lightgbm>=4.0.0,<5.0.0",
"xgboost>=2.0.0,<3.0.0",
-
# Statistical computing and preprocessing
"scipy>=1.11.0,<2.0.0",
- "imbalanced-learn>=0.11.0,<1.0.0", # For SMOTE data augmentation
-
+ "imbalanced-learn>=0.11.0,<1.0.0", # For SMOTE data augmentation
# Hyperparameter optimization
"optuna>=3.4.0,<4.0.0",
-
# Data augmentation and synthetic data generation
- "sdv>=1.24.0,<2.0.0", # For advanced synthetic data
-
+ "sdv>=1.24.0,<2.0.0", # For advanced synthetic data
# Model serialization and utilities
"joblib>=1.3.0,<2.0.0",
-
# Web application framework
"dash>=2.14.0,<3.0.0",
+ "dash-bootstrap-components>=1.7.1",
+ "plotly>=5.24.1",
]
[project.optional-dependencies]
@@ -172,3 +168,20 @@ skips = ["B101", "B601"] # Skip assert_used and shell_injection in paramiko
[tool.pydocstyle]
convention = "google"
add-ignore = ["D100", "D104", "D105"] # Allow missing docstrings for modules, packages, magic methods
+
+# MyPy configuration (type checking)
+[tool.mypy]
+python_version = "3.11"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+ignore_missing_imports = true
+no_strict_optional = true
+explicit_package_bases = true
+namespace_packages = true
+exclude = [
+ "tests/",
+ "scripts/",
+ "examples/",
+ "catboost_info/",
+]
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..c84f2e4
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+"""Main source package for personality classification pipeline."""
diff --git a/src/modules/config.py b/src/modules/config.py
index 1406b76..b023117 100644
--- a/src/modules/config.py
+++ b/src/modules/config.py
@@ -23,7 +23,7 @@ class Paths(Enum):
TRAIN_CSV = DATA_DIR / "train.csv"
TEST_CSV = DATA_DIR / "test.csv"
SAMPLE_SUBMISSION_CSV = DATA_DIR / "sample_submission.csv"
- PERSONALITY_DATASET_CSV = DATA_DIR / "personality_dataset.csv"
+ PERSONALITY_DATASET_CSV = DATA_DIR / "personality_datasert.csv"
# Log files
PERSONALITY_CLASSIFIER_LOG = BASE_DIR / "personality_classifier.log"
diff --git a/src/modules/data_loader.py b/src/modules/data_loader.py
index f2e2a74..07aba89 100644
--- a/src/modules/data_loader.py
+++ b/src/modules/data_loader.py
@@ -2,6 +2,7 @@
import pandas as pd
+from .config import Paths
from .utils import get_logger
logger = get_logger(__name__)
@@ -17,17 +18,18 @@ def load_data_with_external_merge():
"""
logger.info("๐ Loading data with advanced merge strategy...")
- # Load original datasets
- df_tr = pd.read_csv("./data/train.csv")
- df_te = pd.read_csv("./data/test.csv")
- submission = pd.read_csv("./data/sample_submission.csv")
+ # Use Paths enum from config.py for all file paths
+ df_tr = pd.read_csv(Paths.TRAIN_CSV.value)
+ df_te = pd.read_csv(Paths.TEST_CSV.value)
+ submission = pd.read_csv(Paths.SAMPLE_SUBMISSION_CSV.value)
logger.info(f"Original train shape: {df_tr.shape}")
logger.info(f"Original test shape: {df_te.shape}")
# Load external dataset using advanced merge strategy
+
try:
- df_external = pd.read_csv("./data/personality_datasert.csv")
+ df_external = pd.read_csv(Paths.PERSONALITY_DATASET_CSV.value)
logger.info(f"External dataset shape: {df_external.shape}")
# Rename Personality column to match_p for clarity
@@ -52,7 +54,6 @@ def load_data_with_external_merge():
logger.info(f"External dataset shape after deduplication: {df_external.shape}")
# Merge with training and test data to create match_p feature
- # This adds the match_p column as a new feature
df_tr = df_tr.merge(df_external, how="left", on=merge_cols)
df_te = df_te.merge(df_external, how="left", on=merge_cols)
diff --git a/tests/conftest.py b/tests/conftest.py
index d73635b..a00fd36 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,11 +22,13 @@
# Import Dash app components for testing
try:
- from dash_app.src import PersonalityClassifierApp
+ from dash_app.dashboard.app import PersonalityClassifierApp
+ from dash_app.dashboard.model_loader import ModelLoader
DASH_AVAILABLE = True
except ImportError:
PersonalityClassifierApp = None
+ ModelLoader = None
DASH_AVAILABLE = False
@@ -187,6 +189,40 @@ def mock_environment_variables():
os.environ.update(original_env)
+@pytest.fixture
+def mock_model_file(temp_dir):
+ """Create a mock model file for testing."""
+ model_file = temp_dir / "test_model.pkl"
+ model_file.write_text("mock_model_data")
+ return str(model_file)
+
+
+@pytest.fixture
+def sample_prediction_data():
+ """Sample input data for dashboard predictions."""
+ return {
+ "time_alone": 3.0,
+ "social_events": 2.0,
+ "going_outside": 4.0,
+ "friends_size": 3.0,
+ "post_freq": 2.0,
+ "stage_fear": 1.0,
+ "drained_social": 2.0,
+ }
+
+
+@pytest.fixture
+def sample_prediction_probabilities():
+ """Sample prediction probabilities for testing."""
+ return {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ }
+
+
# Custom assertions for ML testing
def assert_model_performance(y_true, y_pred, min_accuracy: float = 0.5):
"""Assert that model performance meets minimum requirements."""
diff --git a/tests/dash_app/test_callbacks.py b/tests/dash_app/test_callbacks.py
new file mode 100644
index 0000000..eaf2974
--- /dev/null
+++ b/tests/dash_app/test_callbacks.py
@@ -0,0 +1,237 @@
+"""Tests for dashboard callback functions."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from dash import Dash
+
+from dash_app.dashboard.callbacks import register_callbacks
+
+
+class TestCallbackRegistration:
+ """Test suite for callback registration."""
+
+ def test_register_callbacks_success(self):
+ """Test successful callback registration."""
+ # Create mock objects
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+ prediction_history = []
+
+ # Should not raise any exceptions
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # Verify that callbacks were registered (app.callback should be called)
+ assert mock_app.callback.called
+
+ def test_register_callbacks_with_history(self):
+ """Test callback registration with existing prediction history."""
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+ prediction_history = [
+ {"timestamp": "2025-01-15", "prediction": {"Extroversion": 0.8}}
+ ]
+
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+ assert mock_app.callback.called
+
+
+class TestPredictionCallback:
+ """Test suite for prediction callback functionality."""
+
+ @pytest.fixture
+ def mock_setup(self):
+ """Set up mocks for testing prediction callback."""
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+ prediction_history = []
+
+ # Configure mock model loader
+ mock_model_loader.predict.return_value = {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ }
+
+ return mock_app, mock_model_loader, prediction_history
+
+ def test_prediction_callback_registration(self, mock_setup):
+ """Test that prediction callback is properly registered."""
+ mock_app, mock_model_loader, prediction_history = mock_setup
+
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # Verify callback was registered
+ assert mock_app.callback.called
+ # Should have at least one callback call for the prediction
+ assert mock_app.callback.call_count >= 1
+
+ def test_prediction_with_valid_inputs(self, mock_setup):
+ """Test prediction callback with valid input values."""
+ mock_app, mock_model_loader, prediction_history = mock_setup
+
+ # Register callbacks
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # Get the registered callback function
+ callback_calls = mock_app.callback.call_args_list
+ assert len(callback_calls) > 0
+
+ # Find the prediction callback (it should be the one with most State parameters)
+ prediction_callback = None
+ for call in callback_calls:
+ args, kwargs = call
+ if len(args) >= 2: # Output, Input, State...
+ prediction_callback = args
+ break
+
+ assert prediction_callback is not None
+
+ def test_model_loader_integration(self, mock_setup):
+ """Test integration with model loader."""
+ mock_app, mock_model_loader, prediction_history = mock_setup
+
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # Verify model_loader is passed to the callback registration
+ assert mock_app.callback.called
+
+
+class TestCallbackErrorHandling:
+ """Test error handling in callbacks."""
+
+ def test_callback_with_none_model_loader(self):
+ """Test callback registration with None model loader."""
+ mock_app = MagicMock(spec=Dash)
+ prediction_history = []
+
+ # Should handle None model_loader gracefully
+ register_callbacks(mock_app, None, prediction_history)
+ assert mock_app.callback.called
+
+ def test_callback_with_none_history(self):
+ """Test callback registration with None prediction history."""
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+
+ # Should handle None prediction_history gracefully
+ register_callbacks(mock_app, mock_model_loader, None)
+ assert mock_app.callback.called
+
+ def test_callback_with_invalid_app(self):
+ """Test callback registration with invalid app object."""
+ mock_model_loader = MagicMock()
+ prediction_history = []
+
+ # Should handle invalid app object
+ with pytest.raises(AttributeError):
+ register_callbacks("invalid_app", mock_model_loader, prediction_history)
+
+
+class TestCallbackInputValidation:
+ """Test input validation in callbacks."""
+
+ @pytest.fixture
+ def callback_function_mock(self):
+ """Mock the actual callback function for testing."""
+ with patch("dash_app.dashboard.callbacks.register_callbacks") as mock_register:
+ # Create a mock prediction function
+ def mock_prediction_callback(
+ n_clicks,
+ time_alone,
+ social_events,
+ going_outside,
+ friends_size,
+ post_freq,
+ stage_fear,
+ drained_social,
+ ):
+ # Simulate input validation
+ if n_clicks is None or n_clicks == 0:
+ return "No prediction made"
+
+ # Validate input ranges
+ inputs = [
+ time_alone,
+ social_events,
+ going_outside,
+ friends_size,
+ post_freq,
+ stage_fear,
+ drained_social,
+ ]
+
+ if any(x is None for x in inputs):
+ return "Invalid input: None values"
+
+ if any(not isinstance(x, int | float) for x in inputs):
+ return "Invalid input: Non-numeric values"
+
+ return "Valid prediction"
+
+ mock_register.return_value = mock_prediction_callback
+ yield mock_prediction_callback
+
+ def test_callback_with_none_clicks(self, callback_function_mock):
+ """Test callback behavior with no button clicks."""
+ result = callback_function_mock(None, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0)
+ assert result == "No prediction made"
+
+ def test_callback_with_zero_clicks(self, callback_function_mock):
+ """Test callback behavior with zero button clicks."""
+ result = callback_function_mock(0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0)
+ assert result == "No prediction made"
+
+ def test_callback_with_none_inputs(self, callback_function_mock):
+ """Test callback behavior with None input values."""
+ result = callback_function_mock(1, None, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0)
+ assert result == "Invalid input: None values"
+
+ def test_callback_with_invalid_inputs(self, callback_function_mock):
+ """Test callback behavior with invalid input types."""
+ result = callback_function_mock(1, "invalid", 2.0, 4.0, 3.0, 2.0, 1.0, 2.0)
+ assert result == "Invalid input: Non-numeric values"
+
+ def test_callback_with_valid_inputs(self, callback_function_mock):
+ """Test callback behavior with valid inputs."""
+ result = callback_function_mock(1, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0)
+ assert result == "Valid prediction"
+
+
+class TestCallbackHistoryManagement:
+ """Test prediction history management in callbacks."""
+
+ def test_history_updates_after_prediction(self):
+ """Test that prediction history is updated after successful prediction."""
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+ prediction_history = []
+
+ # Configure mock to return a prediction
+ mock_model_loader.predict.return_value = {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ }
+
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # Verify that the history list reference is maintained
+ assert isinstance(prediction_history, list)
+
+ def test_history_size_limit(self):
+ """Test that prediction history respects size limits."""
+ # This would test if there's a maximum history size implementation
+ prediction_history = [{"test": f"prediction_{i}"} for i in range(1000)]
+ mock_app = MagicMock(spec=Dash)
+ mock_model_loader = MagicMock()
+
+ register_callbacks(mock_app, mock_model_loader, prediction_history)
+
+ # The function should handle large histories gracefully
+ assert isinstance(prediction_history, list)
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])
diff --git a/tests/dash_app/test_dash_application.py b/tests/dash_app/test_dash_application.py
new file mode 100644
index 0000000..4fdab43
--- /dev/null
+++ b/tests/dash_app/test_dash_application.py
@@ -0,0 +1,318 @@
+"""Tests for the main Dash application class."""
+
+from unittest.mock import MagicMock, patch
+
+import dash
+import pytest
+
+from dash_app.dashboard.app import PersonalityClassifierApp, create_app
+
+
+class TestPersonalityClassifierApp:
+ """Test suite for PersonalityClassifierApp class."""
+
+ def test_app_initialization_default_params(self):
+ """Test app initialization with default parameters."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert app.model_name == "test_model"
+ assert app.model_version is None
+ assert app.model_stage == "Production"
+ assert app.host == "127.0.0.1"
+ assert app.port == 8050
+
+ def test_app_initialization_custom_params(self):
+ """Test app initialization with custom parameters."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(
+ model_name="custom_model",
+ model_version="v1.0",
+ model_stage="Staging",
+ host="0.0.0.0",
+ port=9000,
+ )
+
+ assert app.model_name == "custom_model"
+ assert app.model_version == "v1.0"
+ assert app.model_stage == "Staging"
+ assert app.host == "0.0.0.0"
+ assert app.port == 9000
+
+ def test_app_has_dash_instance(self):
+ """Test that app creates a Dash instance."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert hasattr(app, "app")
+ assert isinstance(app.app, dash.Dash)
+
+ def test_app_title_configuration(self):
+ """Test that app title is configured correctly."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert "test_model" in app.app.title
+
+ def test_app_layout_is_set(self):
+ """Test that app layout is properly set."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ with patch("dash_app.dashboard.app.create_layout") as mock_layout:
+ mock_layout.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert app.app.layout is not None
+
+ def test_app_callbacks_registration(self):
+ """Test that callbacks are registered."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ with patch("dash_app.dashboard.app.register_callbacks") as mock_callbacks:
+ PersonalityClassifierApp(model_name="test_model")
+
+ # Verify register_callbacks was called
+ mock_callbacks.assert_called_once()
+
+ def test_app_prediction_history_initialization(self):
+ """Test that prediction history is initialized."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert hasattr(app, "prediction_history")
+ assert isinstance(app.prediction_history, list)
+ assert len(app.prediction_history) == 0
+
+ def test_get_app_method(self):
+ """Test the get_app method."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+ dash_app = app.get_app()
+
+ assert isinstance(dash_app, dash.Dash)
+ assert dash_app is app.app
+
+
+class TestAppRunning:
+ """Test suite for app running functionality."""
+
+ def test_app_run_method_exists(self):
+ """Test that run method exists."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert hasattr(app, "run")
+ assert callable(app.run)
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_run_with_debug_false(self, mock_loader):
+ """Test app running with debug=False."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Mock the Dash app's run_server method
+ app.app.run_server = MagicMock()
+
+ app.run(debug=False)
+
+ # Verify run_server was called with correct parameters
+ app.app.run_server.assert_called_once_with(
+ host="127.0.0.1", port=8050, debug=False
+ )
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_run_with_debug_true(self, mock_loader):
+ """Test app running with debug=True."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+ app.app.run_server = MagicMock()
+
+ app.run(debug=True)
+
+ app.app.run_server.assert_called_once_with(
+ host="127.0.0.1", port=8050, debug=True
+ )
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_run_with_custom_host_port(self, mock_loader):
+ """Test app running with custom host and port."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(
+ model_name="test_model", host="0.0.0.0", port=9000
+ )
+ app.app.run_server = MagicMock()
+
+ app.run()
+
+ app.app.run_server.assert_called_once_with(
+ host="0.0.0.0", port=9000, debug=False
+ )
+
+
+class TestCreateAppFunction:
+ """Test suite for the create_app function."""
+
+ def test_create_app_function_exists(self):
+ """Test that create_app function exists."""
+ assert callable(create_app)
+
+ @patch("dash_app.dashboard.app.PersonalityClassifierApp")
+ def test_create_app_with_default_params(self, mock_app_class):
+ """Test create_app function with default parameters."""
+ mock_instance = MagicMock()
+ mock_app_class.return_value = mock_instance
+
+ result = create_app("test_model")
+
+ mock_app_class.assert_called_once_with(
+ model_name="test_model", model_version=None, model_stage="Production"
+ )
+ assert result == mock_instance.get_app.return_value
+
+ @patch("dash_app.dashboard.app.PersonalityClassifierApp")
+ def test_create_app_with_custom_params(self, mock_app_class):
+ """Test create_app function with custom parameters."""
+ mock_instance = MagicMock()
+ mock_app_class.return_value = mock_instance
+
+ result = create_app(
+ model_name="custom_model", model_version="v2.0", model_stage="Staging"
+ )
+
+ mock_app_class.assert_called_once_with(
+ model_name="custom_model", model_version="v2.0", model_stage="Staging"
+ )
+ assert result == mock_instance.get_app.return_value
+
+
+class TestAppErrorHandling:
+ """Test error handling in app initialization and running."""
+
+ def test_app_with_invalid_model_name(self):
+ """Test app initialization with invalid model name."""
+ with patch("dash_app.dashboard.app.ModelLoader") as mock_loader:
+ mock_loader.side_effect = FileNotFoundError("Model not found")
+
+ with pytest.raises(FileNotFoundError):
+ PersonalityClassifierApp(model_name="nonexistent_model")
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_with_model_loading_error(self, mock_loader):
+ """Test app behavior when model loading fails."""
+ mock_loader.side_effect = OSError("Model loading failed")
+
+ with pytest.raises(OSError): # More specific exception
+ PersonalityClassifierApp(model_name="test_model")
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_run_server_error(self, mock_loader):
+ """Test app behavior when run_server fails."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+ app.app.run_server = MagicMock(side_effect=OSError("Server start failed"))
+
+ with pytest.raises(OSError):
+ app.run()
+
+
+class TestAppIntegration:
+ """Integration tests for the complete app."""
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_full_app_initialization_workflow(self, mock_loader):
+ """Test complete app initialization workflow."""
+ # Setup mock model loader
+ mock_model = MagicMock()
+ mock_model.predict.return_value = {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ }
+ mock_loader.return_value = mock_model
+
+ # Initialize app
+ app = PersonalityClassifierApp(model_name="ensemble_model")
+
+ # Verify all components are properly set up
+ assert app.model_name == "ensemble_model"
+ assert isinstance(app.app, dash.Dash)
+ assert app.app.layout is not None
+ assert isinstance(app.prediction_history, list)
+
+ # Verify model loader was called
+ mock_loader.assert_called_once()
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_with_real_model_path(self, mock_loader):
+ """Test app with realistic model path."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="models/ensemble_model.pkl")
+
+ assert app.model_name == "models/ensemble_model.pkl"
+ # Verify model loader was called with the path
+ mock_loader.assert_called_once()
+
+
+class TestAppConfiguration:
+ """Test app configuration and settings."""
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_external_stylesheets(self, mock_loader):
+ """Test that external stylesheets are properly configured."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Check that the app has external stylesheets configured
+ # Since Dash doesn't expose external_stylesheets directly, we check the config
+ assert hasattr(app.app, "config")
+ # Verify the app was created with stylesheets (implicit test)
+ assert app.app is not None
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_suppress_callback_exceptions(self, mock_loader):
+ """Test that callback exceptions are properly configured."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Should suppress callback exceptions for dynamic layouts
+ assert app.app.config.suppress_callback_exceptions is True
+
+ @patch("dash_app.dashboard.app.ModelLoader")
+ def test_app_logging_configuration(self, mock_loader):
+ """Test that logging is properly configured."""
+ mock_loader.return_value = MagicMock()
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert hasattr(app, "logger")
+ assert app.logger is not None
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])
diff --git a/tests/dash_app/test_dashboard_functional.py b/tests/dash_app/test_dashboard_functional.py
new file mode 100644
index 0000000..5c1a5dc
--- /dev/null
+++ b/tests/dash_app/test_dashboard_functional.py
@@ -0,0 +1,193 @@
+"""Simplified functional tests for dashboard components."""
+
+from unittest.mock import MagicMock, patch
+
+import dash_bootstrap_components as dbc
+import pytest
+
+from dash_app.dashboard.app import PersonalityClassifierApp, create_app
+from dash_app.dashboard.layout import create_layout, create_professional_header
+from dash_app.dashboard.model_loader import ModelLoader
+
+
+class TestDashboardFunctionality:
+ """Test the actual dashboard functionality."""
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_initialization(self, mock_load_model):
+ """Test that the app initializes correctly."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert app.model_name == "test_model"
+ assert app.host == "127.0.0.1"
+ assert app.port == 8050
+ assert app.app is not None
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_with_custom_params(self, mock_load_model):
+ """Test app with custom parameters."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(
+ model_name="custom_model", model_version="v1.0", host="0.0.0.0", port=9000
+ )
+
+ assert app.model_name == "custom_model"
+ assert app.model_version == "v1.0"
+ assert app.host == "0.0.0.0"
+ assert app.port == 9000
+
+ def test_create_app_function(self):
+ """Test the create_app factory function."""
+ with patch("dash_app.dashboard.app.PersonalityClassifierApp") as mock_app:
+ mock_instance = MagicMock()
+ mock_app.return_value = mock_instance
+
+ create_app("test_model")
+
+ mock_app.assert_called_once_with(
+ model_name="test_model", model_version=None, model_stage="Production"
+ )
+
+ def test_layout_creation(self):
+ """Test layout creation."""
+ model_name = "test_model"
+ model_metadata = {"version": "1.0"}
+
+ layout = create_layout(model_name, model_metadata)
+
+ assert layout is not None
+
+ def test_professional_header_creation(self):
+ """Test professional header creation."""
+ header = create_professional_header()
+
+ # The function returns a dbc.Container, not html.Div
+ assert isinstance(header, dbc.Container)
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_model_loader_initialization(self, mock_load_model):
+ """Test model loader initialization."""
+ mock_load_model.return_value = None
+
+ loader = ModelLoader("test_model")
+
+ assert loader.model_name == "test_model"
+ assert loader.model_stage == "Production"
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_has_prediction_history(self, mock_load_model):
+ """Test that app has prediction history."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ assert hasattr(app, "prediction_history")
+ assert isinstance(app.prediction_history, list)
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_has_callback_registration(self, mock_load_model):
+ """Test that callbacks are registered."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Check that the app has callbacks registered
+ assert hasattr(app.app, "callback_map")
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_run_method(self, mock_load_model):
+ """Test app run method."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(model_name="test_model")
+ app.app.run_server = MagicMock()
+
+ app.run(debug=True)
+
+ app.app.run_server.assert_called_once_with(
+ host="127.0.0.1", port=8050, debug=True
+ )
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_get_app_method(self, mock_load_model):
+ """Test get_app method."""
+ mock_load_model.return_value = None
+
+ app = PersonalityClassifierApp(model_name="test_model")
+ dash_app = app.get_app()
+
+ assert dash_app is app.app
+
+
+class TestModelLoaderFunctionality:
+ """Test model loader functionality."""
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_model_loader_attributes(self, mock_load_model):
+ """Test model loader has correct attributes."""
+ mock_load_model.return_value = None
+
+ loader = ModelLoader("test_model", "v1.0", "Staging")
+
+ assert loader.model_name == "test_model"
+ assert loader.model_version == "v1.0"
+ assert loader.model_stage == "Staging"
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_model_loader_has_model_attribute(self, mock_load_model):
+ """Test that model loader has model attribute."""
+ mock_load_model.return_value = None
+
+ loader = ModelLoader("test_model")
+
+ assert hasattr(loader, "model")
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_model_loader_has_metadata(self, mock_load_model):
+ """Test that model loader has metadata."""
+ mock_load_model.return_value = None
+
+ loader = ModelLoader("test_model")
+
+ assert hasattr(loader, "model_metadata")
+ assert isinstance(loader.model_metadata, dict)
+
+
+class TestIntegrationWorkflow:
+ """Test integration workflow."""
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_complete_app_creation_workflow(self, mock_load_model):
+ """Test complete app creation workflow."""
+ mock_load_model.return_value = None
+
+ # Create app
+ app = PersonalityClassifierApp(model_name="ensemble_model")
+
+ # Verify all components are set up
+ assert app.model_name == "ensemble_model"
+ assert app.app is not None
+ assert app.app.layout is not None
+ assert app.model_loader is not None
+ assert isinstance(app.prediction_history, list)
+
+ @patch("dash_app.dashboard.model_loader.ModelLoader._load_model")
+ def test_app_scalability(self, mock_load_model):
+ """Test that multiple apps can be created."""
+ mock_load_model.return_value = None
+
+ apps = []
+ for i in range(3):
+ app = PersonalityClassifierApp(model_name=f"model_{i}")
+ apps.append(app)
+
+ assert len(apps) == 3
+ for app in apps:
+ assert app.app is not None
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])
diff --git a/tests/dash_app/test_integration.py b/tests/dash_app/test_integration.py
new file mode 100644
index 0000000..7b5a71b
--- /dev/null
+++ b/tests/dash_app/test_integration.py
@@ -0,0 +1,245 @@
+"""Integration tests for the complete dashboard pipeline."""
+
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from dash_app.dashboard.app import PersonalityClassifierApp
+
+
+class TestDashboardIntegration:
+ """Integration tests for the complete dashboard workflow."""
+
+ @pytest.fixture
+ def temp_model_file(self):
+ """Create a temporary model file for testing."""
+ with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as f:
+ temp_path = f.name
+ yield temp_path
+ # Cleanup
+ Path(temp_path).unlink(missing_ok=True)
+
+ @patch("joblib.load")
+ def test_complete_dashboard_workflow(self, mock_joblib_load, temp_model_file):
+ """Test complete dashboard workflow from initialization to prediction."""
+ # Setup mock model
+ mock_model = MagicMock()
+ mock_model.predict_proba.return_value = [
+ [0.2, 0.8, 0.4, 0.6, 0.3, 0.7, 0.6, 0.4, 0.1, 0.9]
+ ]
+ mock_joblib_load.return_value = mock_model
+
+ # Initialize dashboard with mock model
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(
+ model_name="ensemble", host="127.0.0.1", port=8050
+ )
+
+ # Verify app is properly initialized
+ assert app.model_name == "ensemble"
+ assert app.host == "127.0.0.1"
+ assert app.port == 8050
+ assert app.app is not None
+ assert app.app.layout is not None
+
+ def test_dashboard_with_invalid_model_path(self):
+ """Test dashboard behavior with invalid model path."""
+ # PersonalityClassifierApp doesn't raise FileNotFoundError - it creates dummy models
+ app = PersonalityClassifierApp(model_name="nonexistent_model")
+ assert app.model_name == "nonexistent_model"
+ assert app.app is not None
+
+ @patch("joblib.load")
+ def test_dashboard_layout_rendering(self, mock_joblib_load, temp_model_file):
+ """Test that dashboard layout renders correctly."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Verify layout components exist
+ layout = app.app.layout
+ assert layout is not None
+
+ @patch("joblib.load")
+ def test_dashboard_callbacks_registration(self, mock_joblib_load, temp_model_file):
+ """Test that dashboard callbacks are properly registered."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Verify that callbacks are registered (app should have callback registry)
+ assert hasattr(app.app, "callback_map")
+
+
+class TestDashboardErrorRecovery:
+ """Test dashboard error recovery and graceful degradation."""
+
+ @patch("joblib.load")
+ def test_dashboard_with_corrupted_model(self, mock_joblib_load):
+ """Test dashboard behavior with corrupted model."""
+ mock_joblib_load.side_effect = OSError("Corrupted model file")
+
+ # PersonalityClassifierApp handles corrupted models gracefully with dummy fallback
+ app = PersonalityClassifierApp(model_name="corrupted_model")
+ assert app.model_name == "corrupted_model"
+ assert app.app is not None
+
+ @patch("joblib.load")
+ def test_dashboard_handles_prediction_errors(self, mock_joblib_load):
+ """Test dashboard handles prediction errors gracefully."""
+ # Setup mock model that fails during prediction
+ mock_model = MagicMock()
+ mock_model.predict_proba.side_effect = ValueError("Prediction failed")
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ # Should initialize successfully even if model has issues
+ app = PersonalityClassifierApp(model_name="test_model")
+ assert app is not None
+
+
+class TestDashboardPerformance:
+ """Test dashboard performance and resource usage."""
+
+ @patch("joblib.load")
+ def test_dashboard_memory_usage(self, mock_joblib_load):
+ """Test that dashboard doesn't create memory leaks."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ # Create multiple app instances
+ apps = []
+ for i in range(5):
+ app = PersonalityClassifierApp(model_name=f"test_model_{i}")
+ apps.append(app)
+
+ # Each should be independent
+ assert len(apps) == 5
+ for app in apps:
+ assert app.app is not None
+
+ @patch("joblib.load")
+ def test_dashboard_startup_time(self, mock_joblib_load):
+ """Test dashboard startup performance."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Verify that startup is reasonably fast
+ assert app.app is not None
+
+
+class TestDashboardConfiguration:
+ """Test dashboard configuration options."""
+
+ @patch("joblib.load")
+ def test_dashboard_custom_configuration(self, mock_joblib_load):
+ """Test dashboard with custom configuration."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(
+ model_name="custom_model",
+ model_version="v2.0",
+ model_stage="Staging",
+ host="0.0.0.0",
+ port=9000,
+ )
+
+ assert app.model_name == "custom_model"
+ assert app.model_version == "v2.0"
+ assert app.model_stage == "Staging"
+ assert app.host == "0.0.0.0"
+ assert app.port == 9000
+
+ @patch("joblib.load")
+ def test_dashboard_environment_variables(self, mock_joblib_load):
+ """Test dashboard respects environment configuration."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+ # Test with environment-like configuration
+ with patch.dict("os.environ", {"DASH_HOST": "0.0.0.0", "DASH_PORT": "9000"}):
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # App should still use provided parameters over environment
+ assert app.host == "127.0.0.1" # Default value
+ assert app.port == 8050 # Default value
+
+
+class TestDashboardScalability:
+ """Test dashboard scalability and concurrent usage."""
+
+ @patch("joblib.load")
+ def test_dashboard_concurrent_initialization(self, mock_joblib_load):
+ """Test multiple dashboard instances can be created concurrently."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ # Test creating multiple app instances
+ apps = []
+ for i in range(3):
+ app = PersonalityClassifierApp(model_name=f"model_{i}")
+ apps.append(app)
+
+ # All should succeed
+ assert len(apps) == 3
+ for app in apps:
+ assert isinstance(app, PersonalityClassifierApp)
+
+ @patch("joblib.load")
+ def test_dashboard_prediction_history_management(self, mock_joblib_load):
+ """Test prediction history management under load."""
+ mock_model = MagicMock()
+ mock_joblib_load.return_value = mock_model
+
+ with patch("dash_app.dashboard.model_loader.Path.exists") as mock_exists:
+ mock_exists.return_value = True
+
+ app = PersonalityClassifierApp(model_name="test_model")
+
+ # Simulate adding many predictions to history
+ for i in range(100):
+ app.prediction_history.append(
+ {
+ "timestamp": f"2025-01-15T{i:02d}:00:00",
+ "prediction": {"Extroversion": 0.8},
+ }
+ )
+
+ assert len(app.prediction_history) == 100
+ # History should be manageable even with many entries
+ assert isinstance(app.prediction_history, list)
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])
diff --git a/tests/dash_app/test_layout_components.py b/tests/dash_app/test_layout_components.py
new file mode 100644
index 0000000..86a871b
--- /dev/null
+++ b/tests/dash_app/test_layout_components.py
@@ -0,0 +1,205 @@
+"""Tests for dashboard layout components."""
+
+import dash_bootstrap_components as dbc
+import plotly.graph_objects as go
+import pytest
+from dash import html
+
+from dash_app.dashboard.layout import (
+ create_input_panel,
+ create_layout,
+ create_personality_radar,
+ create_professional_header,
+ format_prediction_result,
+)
+
+
+class TestLayoutComponents:
+ """Test suite for layout components."""
+
+ def test_create_professional_header(self):
+ """Test professional header creation."""
+ header = create_professional_header()
+
+ # The header returns a dbc.Container, not html.Div
+ assert isinstance(header, dbc.Container)
+ # Check for required styling
+ assert hasattr(header, "style")
+ # Check for children components
+ assert hasattr(header, "children")
+
+ def test_create_input_panel(self):
+ """Test input panel creation."""
+ panel = create_input_panel()
+
+ assert isinstance(panel, dbc.Card)
+ # Should have card header and body
+ assert hasattr(panel, "children")
+
+ def test_create_layout_structure(self):
+ """Test main layout structure."""
+ model_name = "test_model"
+ model_metadata = {"version": "1.0", "created": "2025-01-01"}
+
+ layout = create_layout(model_name, model_metadata)
+
+ assert isinstance(layout, html.Div)
+ assert hasattr(layout, "children")
+ assert len(layout.children) >= 2 # Header + Content
+
+
+class TestPersonalityRadar:
+ """Test suite for personality radar chart."""
+
+ def test_create_personality_radar_with_valid_data(self):
+ """Test radar chart creation with valid probability data."""
+ probabilities = {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ }
+
+ fig = create_personality_radar(probabilities)
+
+ assert isinstance(fig, go.Figure)
+ assert len(fig.data) > 0
+ assert fig.data[0].type == "scatterpolar"
+
+ def test_create_personality_radar_with_input_data(self):
+ """Test radar chart creation with input data included."""
+ probabilities = {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ }
+ input_data = {"time_alone": 3.0, "social_events": 2.0}
+
+ fig = create_personality_radar(probabilities, input_data)
+
+ assert isinstance(fig, go.Figure)
+ assert len(fig.data) > 0
+
+ def test_create_personality_radar_empty_data(self):
+ """Test radar chart with empty probability data."""
+ probabilities = {}
+
+ fig = create_personality_radar(probabilities)
+
+ assert isinstance(fig, go.Figure)
+ # Should handle empty data gracefully
+
+ def test_create_personality_radar_invalid_values(self):
+ """Test radar chart with invalid probability values."""
+ probabilities = {
+ "Extroversion": 1.5, # Invalid: > 1.0
+ "Agreeableness": -0.1, # Invalid: < 0.0
+ "Conscientiousness": 0.7,
+ }
+
+ # Should not raise an exception
+ fig = create_personality_radar(probabilities)
+ assert isinstance(fig, go.Figure)
+
+
+class TestPredictionFormatting:
+ """Test suite for prediction result formatting."""
+
+ def test_format_prediction_result_valid(self):
+ """Test formatting of valid prediction results."""
+ result_dict = {
+ "probabilities": {
+ "Extroversion": 0.8,
+ "Agreeableness": 0.6,
+ "Conscientiousness": 0.7,
+ "Neuroticism": 0.4,
+ "Openness": 0.9,
+ },
+ "input_data": {
+ "time_alone": 3.0,
+ "social_events": 2.0,
+ "going_outside": 4.0,
+ "friends_size": 3.0,
+ "post_freq": 2.0,
+ "stage_fear": 1.0,
+ "drained_social": 2.0,
+ },
+ }
+
+ result = format_prediction_result(result_dict)
+
+ assert isinstance(result, dbc.Card)
+ # Should contain formatted components
+ assert hasattr(result, "children")
+
+ def test_format_prediction_result_missing_data(self):
+ """Test formatting with missing input data."""
+ result_dict = {"probabilities": {"Extroversion": 0.8, "Agreeableness": 0.6}}
+
+ # Should handle missing input data gracefully
+ result = format_prediction_result(result_dict)
+ assert isinstance(result, dbc.Card)
+
+
+class TestLayoutIntegration:
+ """Integration tests for layout components."""
+
+ def test_layout_with_mock_model_metadata(self):
+ """Test layout creation with realistic model metadata."""
+ model_name = "six_stack_ensemble"
+ model_metadata = {
+ "model_type": "ensemble",
+ "version": "1.0.0",
+ "created_date": "2025-01-15",
+ "accuracy": 0.92,
+ "features": [
+ "time_alone",
+ "social_events",
+ "going_outside",
+ "friends_size",
+ "post_freq",
+ "stage_fear",
+ "drained_social",
+ ],
+ }
+
+ layout = create_layout(model_name, model_metadata)
+
+ assert isinstance(layout, html.Div)
+ # Verify structure contains expected components
+ assert len(layout.children) >= 2
+
+ def test_layout_responsiveness(self):
+ """Test that layout components have responsive classes."""
+ layout = create_layout("test", {})
+
+ # Check for Bootstrap responsive classes in the layout
+ layout_str = str(layout)
+ assert "dbc.Container" in layout_str or "container" in layout_str.lower()
+
+
+class TestLayoutEdgeCases:
+ """Test edge cases for layout components."""
+
+ def test_empty_model_name(self):
+ """Test layout creation with empty model name."""
+ layout = create_layout("", {})
+ assert isinstance(layout, html.Div)
+
+ def test_none_model_metadata(self):
+ """Test layout creation with None metadata."""
+ layout = create_layout("test_model", {})
+ assert isinstance(layout, html.Div)
+
+ def test_large_model_metadata(self):
+ """Test layout with extensive metadata."""
+ large_metadata = {f"param_{i}": f"value_{i}" for i in range(100)}
+ layout = create_layout("test_model", large_metadata)
+ assert isinstance(layout, html.Div)
+
+
+if __name__ == "__main__":
+ pytest.main([__file__])
diff --git a/tests/dash_app/test_model_loader.py b/tests/dash_app/test_model_loader.py
new file mode 100644
index 0000000..53dfd76
--- /dev/null
+++ b/tests/dash_app/test_model_loader.py
@@ -0,0 +1,138 @@
+"""Tests for dashboard model loader."""
+
+import pytest
+
+from dash_app.dashboard.model_loader import ModelLoader
+
+
+class TestModelLoader:
+ """Test suite for ModelLoader class."""
+
+ def test_model_loader_initialization(self):
+ """Test ModelLoader initialization."""
+ loader = ModelLoader(
+ model_name="test_model", model_version="1.0", model_stage="Testing"
+ )
+ assert loader.model_name == "test_model"
+ assert loader.model_version == "1.0"
+ assert loader.model_stage == "Testing"
+ # Model should be loaded (either real model or dummy)
+ assert loader.model is not None
+
+ def test_model_loader_with_ensemble_name(self):
+ """Test ModelLoader with ensemble model name."""
+ loader = ModelLoader(model_name="ensemble")
+ assert loader.model_name == "ensemble"
+ assert loader.is_loaded() is True
+
+ def test_model_loader_get_metadata(self):
+ """Test model metadata retrieval."""
+ loader = ModelLoader(model_name="test_model")
+ metadata = loader.get_metadata()
+ assert isinstance(metadata, dict)
+ assert "version" in metadata
+ assert "stage" in metadata
+
+ def test_model_loader_is_loaded(self):
+ """Test model loading status check."""
+ loader = ModelLoader(model_name="test_model")
+ assert loader.is_loaded() is True
+
+ def test_model_loader_str_representation(self):
+ """Test string representation of ModelLoader."""
+ loader = ModelLoader(model_name="test_model")
+ # Just check that it doesn't raise an error
+ str_repr = repr(loader)
+ assert isinstance(str_repr, str)
+
+
+class TestModelPrediction:
+ """Test suite for model prediction functionality."""
+
+ @pytest.fixture
+ def model_loader(self):
+ """Create a ModelLoader for testing predictions."""
+ return ModelLoader(model_name="test_model")
+
+ def test_model_prediction_success(self, model_loader):
+ """Test successful model prediction."""
+ input_data = {
+ "Time_spent_Alone": 3.0,
+ "Social_event_attendance": 2.0,
+ "Going_outside": 4.0,
+ "Friends_circle_size": 3.0,
+ "Post_frequency": 2.0,
+ "Stage_fear_No": 1,
+ "Stage_fear_Unknown": 0,
+ "Stage_fear_Yes": 0,
+ "Drained_after_socializing_No": 1,
+ "Drained_after_socializing_Unknown": 0,
+ "Drained_after_socializing_Yes": 0,
+ "match_p_Extrovert": 0,
+ "match_p_Introvert": 0,
+ "match_p_Unknown": 1,
+ }
+
+ result = model_loader.predict(input_data)
+
+ assert isinstance(result, dict)
+ assert "prediction" in result
+ assert "confidence" in result
+ assert result["model_name"] == "test_model"
+
+ def test_model_prediction_with_missing_features(self, model_loader):
+ """Test prediction with missing input features."""
+ input_data = {
+ "Time_spent_Alone": 3.0,
+ "Social_event_attendance": 2.0,
+ # Missing other features - should be handled by default values
+ }
+
+ result = model_loader.predict(input_data)
+ assert isinstance(result, dict)
+ assert "prediction" in result
+
+ def test_model_prediction_with_invalid_input(self, model_loader):
+ """Test prediction with invalid input data."""
+ invalid_input = "invalid_input"
+
+ with pytest.raises((ValueError, TypeError, AttributeError)):
+ model_loader.predict(invalid_input)
+
+ def test_model_prediction_empty_input(self, model_loader):
+ """Test prediction with empty input."""
+ empty_input = {}
+
+ # Should handle empty input with default values
+ result = model_loader.predict(empty_input)
+ assert isinstance(result, dict)
+ assert "prediction" in result
+
+
+class TestModelLoaderEdgeCases:
+ """Test edge cases for ModelLoader."""
+
+ def test_model_loader_with_dummy_fallback(self):
+ """Test ModelLoader creates dummy model when no real model found."""
+ # Use a model name that won't exist
+ loader = ModelLoader(model_name="nonexistent_model")
+
+ # Should still be loaded (with dummy model)
+ assert loader.is_loaded() is True
+ assert loader.model is not None
+
+ # Metadata should indicate dummy model
+ metadata = loader.get_metadata()
+ assert metadata.get("version") == "dummy"
+
+ def test_model_loader_ensemble_vs_stack(self):
+ """Test different model name patterns."""
+ ensemble_loader = ModelLoader(model_name="ensemble")
+ stack_loader = ModelLoader(model_name="A") # Stack A
+
+ assert ensemble_loader.model_name == "ensemble"
+ assert stack_loader.model_name == "A"
+
+ # Both should be loaded
+ assert ensemble_loader.is_loaded()
+ assert stack_loader.is_loaded()
diff --git a/uv.lock b/uv.lock
index e1a8f9f..45a3513 100644
--- a/uv.lock
+++ b/uv.lock
@@ -834,6 +834,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/72/ef/d46131f4817f18b329e4fb7c53ba1d31774239d91266a74bccdc932708cc/dash-2.18.2-py3-none-any.whl", hash = "sha256:0ce0479d1bc958e934630e2de7023b8a4558f23ce1f9f5a4b34b65eb3903a869", size = 7792658, upload-time = "2024-11-04T21:12:56.592Z" },
]
+[[package]]
+name = "dash-bootstrap-components"
+version = "1.7.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "dash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/fa/f702d729a4b788293b796dc92f3d529909641de1e2e13f967211169b807a/dash_bootstrap_components-1.7.1.tar.gz", hash = "sha256:30d48340d6dc89831d6c06e400cd4236f0d5363562c05b2a922f21545695a082", size = 136579, upload-time = "2025-01-16T07:11:28.74Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c3/87/4db3b56e9a6813d413a0f20e053aa163d652babb629a8bf7b871af4a075f/dash_bootstrap_components-1.7.1-py3-none-any.whl", hash = "sha256:5e8eae7ee1d013f69e272c68c1015b53ab71802460152088f33fffa90d245199", size = 229294, upload-time = "2025-01-16T07:11:24.635Z" },
+]
+
[[package]]
name = "dash-core-components"
version = "2.0.0"
@@ -2474,12 +2486,14 @@ source = { editable = "." }
dependencies = [
{ name = "catboost" },
{ name = "dash" },
+ { name = "dash-bootstrap-components" },
{ name = "imbalanced-learn" },
{ name = "joblib" },
{ name = "lightgbm" },
{ name = "numpy" },
{ name = "optuna" },
{ name = "pandas" },
+ { name = "plotly" },
{ name = "scikit-learn" },
{ name = "scipy" },
{ name = "sdv" },
@@ -2521,6 +2535,7 @@ requires-dist = [
{ name = "bandit", marker = "extra == 'dev'", specifier = ">=1.7.0,<2.0.0" },
{ name = "catboost", specifier = ">=1.2.0,<2.0.0" },
{ name = "dash", specifier = ">=2.14.0,<3.0.0" },
+ { name = "dash-bootstrap-components", specifier = ">=1.7.1" },
{ name = "h2o", marker = "extra == 'automl'", specifier = ">=3.44.0,<4.0.0" },
{ name = "imbalanced-learn", specifier = ">=0.11.0,<1.0.0" },
{ name = "joblib", specifier = ">=1.3.0,<2.0.0" },
@@ -2529,6 +2544,7 @@ requires-dist = [
{ name = "numpy", specifier = ">=1.24.0,<2.0.0" },
{ name = "optuna", specifier = ">=3.4.0,<4.0.0" },
{ name = "pandas", specifier = ">=2.0.0,<3.0.0" },
+ { name = "plotly", specifier = ">=5.24.1" },
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.3.0,<4.0.0" },
{ name = "pydocstyle", marker = "extra == 'dev'", specifier = ">=6.3.0,<7.0.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.4.0,<8.0.0" },