diff --git a/solo_server/advanced_cmd.py b/solo_server/advanced_cmd.py
new file mode 100644
index 0000000..0ece57b
--- /dev/null
+++ b/solo_server/advanced_cmd.py
@@ -0,0 +1,51 @@
+import typer
+from rich.console import Console
+from rich.panel import Panel
+
+app = typer.Typer(help="CLI for Advanced Model Operations and Model Export/Optimization")
+console = Console()
+
+# -------------------------------
+# Advanced Model Operations Group
+# -------------------------------
+advanced_app = typer.Typer(help="Commands for benchmarking, profiling, and stress testing your model.")
+app.add_typer(advanced_app, name="advanced")
+
+@advanced_app.command("benchmark")
+def benchmark():
+    """Run performance benchmarks on the model."""
+    console.print(Panel("Benchmark command executed", title="Benchmark", style="blue"))
+
+@advanced_app.command("profile")
+def profile():
+    """Profile model resource usage."""
+    console.print(Panel("Profile command executed", title="Profile", style="blue"))
+
+@advanced_app.command("stress-test")
+def stress_test():
+    """Stress test the model and server under high-load conditions."""
+    console.print(Panel("Stress-Test command executed", title="Stress Test", style="blue"))
+
+# -------------------------------
+# Model Export & Optimization Group
+# -------------------------------
+optimization_app = typer.Typer(help="Commands for exporting, quantizing, and fine-tuning the model.")
+app.add_typer(optimization_app, name="optimization")
+
+@optimization_app.command("export")
+def export_model():
+    """Export the model to various formats (e.g., ONNX, TensorRT, CoreML)."""
+    console.print(Panel("Export command executed", title="Export", style="green"))
+
+@optimization_app.command("quantize")
+def quantize():
+    """Apply quantization to reduce model size and improve efficiency."""
+    console.print(Panel("Quantize command executed", title="Quantize", style="green"))
+
+@optimization_app.command("finetune")
+def finetune():
+    """Fine-tune the model on custom datasets with specified hyperparameters."""
+    console.print(Panel("Finetune command executed", title="Finetune", style="green"))
+
+if __name__ == "__main__":
+    app()
diff --git a/solo_server/commands/query.py b/solo_server/commands/query.py
new file mode 100644
index 0000000..ab5a5e2
--- /dev/null
+++ b/solo_server/commands/query.py
@@ -0,0 +1,68 @@
+import sys
+import typer
+import requests
+from litgpt import LLM
+from rich.console import Console
+
+console = Console()
+
+CORE_SERVER_PORT = 5070  # Change this if your core server runs on a different port
+CORE_SERVER_URL = f"http://localhost:{CORE_SERVER_PORT}/generate"
+
+def redirect_to_core_server(query: str, port: int = CORE_SERVER_PORT) -> None:
+    """
+    Redirect the given query to the core server via an HTTP POST request.
+    """
+    url = f"http://localhost:{port}/generate"
+    try:
+        response = requests.post(url, json={"prompt": query})
+        response.raise_for_status()
+        console.print("[success]Response from core server:[/success]")
+        console.print(response.text)
+    except Exception as e:
+        console.print(f"[warning]Error redirecting to core server: {e}[/warning]")
+
+def query_llm(query: str) -> None:
+    """
+    If the query exceeds 9000 characters, show an error.
+    Otherwise, load the model and generate a response.
+    """
+    if len(query) > 9000:
+        typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!")
+        raise typer.Exit(1)
+    
+    # Load the model and generate a response while showing a spinner
+    llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct")
+    with console.status("Generating response...", spinner="dots"):
+        response = llm.generate(query)
+    typer.echo(response)
+
+def interactive_mode():
+    console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green")
+    while True:
+        query_text = input(">> ").strip()
+        if query_text.lower() in ("exit", "quit"):
+            break
+        # If the query starts with "solo @@", redirect to the core server
+        if query_text.startswith("solo @@"):
+            # Remove the "solo @@" prefix before sending the query
+            core_query = query_text[len("solo @@"):].strip()
+            redirect_to_core_server(core_query)
+        else:
+            query_llm(query_text)
+
+if __name__ == "__main__":
+    # If invoked with "@@" as the first argument, treat the rest as the query.
+    # Otherwise, launch interactive mode.
+    if len(sys.argv) > 1 and sys.argv[1] == "@@":
+        if len(sys.argv) > 2:
+            query_text = " ".join(sys.argv[2:]).strip()
+        else:
+            typer.echo("Enter your query (end with EOF / Ctrl-D):")
+            query_text = sys.stdin.read().strip()
+        # If the query starts with "solo @@", remove that prefix.
+        if query_text.startswith("solo @@"):
+            query_text = query_text[len("solo @@"):].strip()
+        redirect_to_core_server(query_text)
+    else:
+        interactive_mode()
diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
new file mode 100644
index 0000000..ded92a2
--- /dev/null
+++ b/solo_server/ensemble.yaml
@@ -0,0 +1,20 @@
+advanced_modules: true
+checkpoint_dir: checkpoints/HuggingFaceTB/SmolLM2-1.7B-Instruct
+devices: 1
+hardware:
+  category: High Performance
+  cpu_cores: 8
+  cpu_model: Intel i7
+  gpu_memory: 4
+  memory_gb: 16
+max_new_tokens: 50
+model_choice: null
+module_pack: robotics
+port: 5070
+precision: null
+quantize: null
+selected_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
+stream: false
+temperature: 0.8
+top_k: 50
+top_p: 1.0
diff --git a/solo_server/explorative_cmd.py b/solo_server/explorative_cmd.py
new file mode 100644
index 0000000..93d5c29
--- /dev/null
+++ b/solo_server/explorative_cmd.py
@@ -0,0 +1,155 @@
+import typer
+from rich.console import Console
+from rich.panel import Panel
+
+app = typer.Typer(help="Solo CLI - A comprehensive tool for model management and server operations.")
+console = Console()
+
+# ---------------------------------
+# Setup Commands Group
+# ---------------------------------
+setup_app = typer.Typer(help="Commands for initializing and setting up the environment.")
+app.add_typer(setup_app, name="setup")
+
+@setup_app.command("full")
+def full_setup():
+    """Run full server setup."""
+    console.print(Panel("Full Setup executed", title="Setup", style="green"))
+
+@setup_app.command("init")
+def init():
+    """Reinitialize core components."""
+    console.print(Panel("Init executed", title="Init", style="green"))
+
+# ---------------------------------
+# Model Management Group
+# ---------------------------------
+model_app = typer.Typer(help="Manage model downloads, updates, and tests.")
+app.add_typer(model_app, name="model")
+
+@model_app.command("download")
+def download_model():
+    """Download or update the model."""
+    console.print(Panel("Download executed", title="Download", style="green"))
+
+@model_app.command("update")
+def update_model():
+    """Update the model to the latest version."""
+    console.print(Panel("Update Model executed", title="Update Model", style="green"))
+
+@model_app.command("test")
+def test_model():
+    """Test the downloaded model with a sample prompt."""
+    console.print(Panel("Test executed", title="Test", style="green"))
+
+# ---------------------------------
+# Query & Interaction Group
+# ---------------------------------
+query_app = typer.Typer(help="Handle one-off queries or launch interactive mode.")
+app.add_typer(query_app, name="query")
+
+@query_app.command("ask")
+def ask(query: str = typer.Argument(..., help="Query for the model")):
+    """Send a query to the model."""
+    # Check for "solo @@" prefix and adjust query if necessary
+    if query.startswith("solo @@"):
+        query = query[len("solo @@"):].strip()
+    console.print(Panel(f"Query: {query}", title="Query", style="green"))
+
+@query_app.command("interactive")
+def interactive():
+    """Launch interactive query mode."""
+    console.print(Panel("Interactive mode launched", title="Interactive", style="green"))
+    # Add interactive loop logic here if desired
+
+# ---------------------------------
+# Server Management Group
+# ---------------------------------
+server_app = typer.Typer(help="Commands for managing the model server.")
+app.add_typer(server_app, name="server")
+
+@server_app.command("start")
+def start_server():
+    """Start or restart the model server."""
+    console.print(Panel("Server started", title="Server", style="green"))
+
+@server_app.command("restart")
+def restart_server():
+    """Restart the server gracefully."""
+    console.print(Panel("Server restarted", title="Restart", style="green"))
+
+@server_app.command("stop")
+def stop_server():
+    """Stop the running server."""
+    console.print(Panel("Server stopped", title="Stop", style="green"))
+
+# ---------------------------------
+# Diagnostics & Monitoring Group
+# ---------------------------------
+diag_app = typer.Typer(help="Commands for diagnostics and monitoring.")
+app.add_typer(diag_app, name="diagnostics")
+
+@diag_app.command("status")
+def status():
+    """Display the current server status."""
+    console.print(Panel("Status executed", title="Status", style="green"))
+
+@diag_app.command("logs")
+def logs():
+    """Display recent logs."""
+    console.print(Panel("Logs executed", title="Logs", style="green"))
+
+@diag_app.command("health")
+def healthcheck():
+    """Perform a health check of the server."""
+    console.print(Panel("Health check executed", title="Healthcheck", style="green"))
+
+@diag_app.command("diagnose")
+def diagnose():
+    """Run diagnostics to troubleshoot issues."""
+    console.print(Panel("Diagnose executed", title="Diagnose", style="green"))
+
+# ---------------------------------
+# Maintenance Group
+# ---------------------------------
+maint_app = typer.Typer(help="Maintenance and update commands.")
+app.add_typer(maint_app, name="maintenance")
+
+@maint_app.command("update")
+def update_cli():
+    """Update the CLI or associated modules."""
+    console.print(Panel("CLI Update executed", title="Update", style="green"))
+
+@maint_app.command("backup")
+def backup():
+    """Create backups of configuration and checkpoints."""
+    console.print(Panel("Backup executed", title="Backup", style="green"))
+
+@maint_app.command("restore")
+def restore():
+    """Restore a backup configuration or model checkpoint."""
+    console.print(Panel("Restore executed", title="Restore", style="green"))
+
+# ---------------------------------
+# Configuration Group
+# ---------------------------------
+config_app = typer.Typer(help="View or modify configuration settings.")
+app.add_typer(config_app, name="config")
+
+@config_app.command("set")
+def set_config():
+    """Set configuration parameters."""
+    console.print(Panel("Config set executed", title="Config Set", style="green"))
+
+@config_app.command("info")
+def config_info():
+    """Display current configuration info."""
+    console.print(Panel("Config info executed", title="Config Info", style="green"))
+
+@config_app.command("version")
+def version():
+    """Display the CLI version."""
+    console.print(Panel("Version executed", title="Version", style="green"))
+
+if __name__ == "__main__":
+    app()
diff --git a/solo_server/main.py b/solo_server/main.py
index 3c7f76b..05acc6a 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -1,197 +1,344 @@
-import os
-import json
-import typer
+import time
 import subprocess
-import shutil
+import socket
+import sys
+import typer
 import click
-import sys 
-
-from enum import Enum
-from solo_server.config import CONFIG_PATH
-from solo_server.utils.docker_utils import start_docker_engine
-from solo_server.utils.hardware import detect_hardware, display_hardware_info, recommended_server
-from solo_server.utils.nvidia import check_nvidia_toolkit, install_nvidia_toolkit_linux, install_nvidia_toolkit_windows
-from solo_server.simple_setup import run_command, detect_gpu
+import yaml
+from pathlib import Path
+from tqdm import tqdm
 from rich.console import Console
 from rich.panel import Panel
+from rich.theme import Theme
+from rich import box
+
+import commands.query
+
+app = typer.Typer(
+    help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, advanced module loading, and query redirection."
+)
+
+# Google-inspired theme
+google_theme = Theme({
+    "header": "bold #4285F4",
+    "info": "bold #4285F4",
+    "warning": "bold #DB4437",
+    "success": "bold #0F9D58",
+    "panel.border": "bright_blue",
+    "panel.title": "bold white"
+})
+console = Console(theme=google_theme)
+
+# Hard-coded model and starting port
+MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
+START_PORT = 5070
+
+def print_banner():
+    """Display a header banner for the Solo Server CLI."""
+    banner_text = """
+    ___  _             __      __  _ 
+   / _ \\(_)___  ___   / /___  / /_(_)
+  / , _/ / _ \\/ -_) / / __/ / __/ / 
+ /_/|_/_/ .__/\\__/ /_/\\__/  \\__/_/  
+       /_/                         
+    """
+    console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE))
+
+def detect_hardware():
+    """
+    Dummy hardware detection function.
+    Replace with your actual hardware detection logic.
+    """
+    cpu_model = "Intel i7"
+    cpu_cores = 8
+    memory_gb = 16  # Example value
+    gpu_memory = 4  # Example value (in GB)
+    return cpu_model, cpu_cores, memory_gb, gpu_memory
+
+def get_hardware_category(memory_gb: float) -> str:
+    if memory_gb < 8:
+        return "Fresh Adopter"
+    elif memory_gb < 16:
+        return "Mid Range"
+    elif memory_gb < 32:
+        return "High Performance"
+    else:
+        return "Maestro"
+
+def simulate_model_download(model: str, sleep_time: int = 3) -> str:
+    """
+    Simulate model download with a progress bar.
+    (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins)
+    """
+    for _ in tqdm(range(sleep_time),
+                    desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60),
+                    unit="sec", total=sleep_time):
+        time.sleep(1)
+    return f"[success]Model {model} download complete.[/success]"
+
+def prompt_core_initialization(confirm_fn=typer.confirm) -> bool:
+    """
+    Ask user to confirm core initialization.
+    """
+    init_prompt = (
+        "Continue to solo core initialization?\n"
+        "Yes: Proceed with full initialization and model setup\n"
+        "No:  Exit setup"
+    )
+    console.print(Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    return confirm_fn("", default=True)
+
+def test_downloaded_model(model: str, run_subprocess_fn=subprocess.run) -> str:
+    """
+    Prompt the user for a test prompt (defaulting to 'solo @@ test') and use the LitGPT CLI
+    to generate sample output from the downloaded model.
+    A progress bar shows the testing duration.
+    """
+    test_prompt = typer.prompt("Enter test prompt", default="solo @@ test")
+    console.print(f"[info]Testing model {model} with prompt: '{test_prompt}'[/info]")
+    for _ in tqdm(range(5), desc="Testing model (est. 0.08 mins)", unit="sec", total=5):
+        time.sleep(1)
+    try:
+        # Assuming the LitGPT CLI provides a generate command.
+        cmd = ["litgpt", "generate", model, "--prompt", test_prompt]
+        result = run_subprocess_fn(cmd, check=True, capture_output=True, text=True)
+        output = result.stdout.strip()
+        console.print(f"[success]Test generation output:[/success]\n{output}")
+        return output
+    except subprocess.CalledProcessError as e:
+        error_output = e.stderr.strip() if e.stderr else str(e)
+        console.print(f"[warning]Test generation failed: {error_output}[/warning]")
+        return ""
+
+def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str):
+    """
+    Ask user if they want to load advanced modules and select a vertical.
+    New verticals include: secure enterprise, healthcare, robotics, and lean ensemble.
+    Returns a tuple (advanced_modules, module_pack)
+    """
+    adv_prompt = (
+        "Load advanced modules?\n"
+        "Yes: Load additional functionalities for a vertical\n"
+        "No:  Skip advanced modules"
+    )
+    console.print(Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    advanced_modules = confirm_fn("", default=True)
+    module_pack = None
+    if advanced_modules:
+        module_pack_info = (
+            "Choose advanced vertical:\n"
+            "secure enterprise - Modules for security and compliance\n"
+            "healthcare        - Modules for healthcare applications\n"
+            "robotics          - Modules for robotics integration\n"
+            "lean ensemble     - A lean set of general modules\n"
+            "Enter your choice:"
+        )
+        console.print(Panel(module_pack_info, title="Vertical Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+        module_pack = prompt_fn("", type=click.Choice(["secure enterprise", "healthcare", "robotics", "lean ensemble"], case_sensitive=False), default="lean ensemble")
+    return advanced_modules, module_pack
+
+def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
+    """
+    Build an ensemble of Docker images for the selected vertical.
+    Uses the path: commands/containers/<module> (relative to main.py).
+    A tqdm progress bar shows the estimated duration.
+    """
+    # New advanced module packs for different verticals
+    advanced_module_packs = {
+        "secure enterprise": ["auth", "data-encryption", "audit-log"],
+        "healthcare": ["hl7", "fhir-connector", "secure-patient"],
+        "robotics": ["ros", "le-robot", "robotics-core"],
+        "lean ensemble": ["microservice", "edge-ai", "light-transformers"]
+    }
+    modules = advanced_module_packs.get(module_pack.lower(), [])
+    if not modules:
+        console.print(f"[warning]No modules found for vertical '{module_pack}'.[/warning]")
+        return
+
+    for module in tqdm(modules, desc="Building Docker images (est. 2 mins/module)", unit="module", total=len(modules)):
+        build_path = Path("commands") / "containers" / module
+        if not build_path.exists():
+            console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]")
+            continue
+        console.print(f"[info]Building Docker image for module:[/info] {module}")
+        image_tag = module.lower().replace(' ', '-')
+        try:
+            run_subprocess_fn(
+                ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)],
+                check=True,
+                capture_output=True
+            )
+            console.print(f"[success]Successfully built image for:[/success] {module}")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]")
+
+def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml") -> str:
+    """
+    Save setup information to a YAML file.
+    """
+    with open(filename, "w") as f:
+        yaml.dump(setup_info, f)
+    return f"[success]Setup information saved to {filename}.[/success]"
 
-class ServerType(str, Enum):
-    OLLAMA = "Ollama"
-    VLLM = "vLLM"
-    LLAMACPP = "Llama.cpp"
+def get_available_port(start_port: int) -> int:
+    """
+    Return the first available port starting from start_port.
+    """
+    port = start_port
+    while True:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("", port))
+                return port
+            except OSError:
+                port += 1
+
+def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str, int):
+    """
+    Serve the model using the LitGPT CLI syntax.
+    If the given port is in use, automatically increment to the next available port.
+    Returns a tuple of the success message and the port used.
+    """
+    available_port = get_available_port(port)
+    try:
+        cmd = ["litgpt", "serve", model, "--port", str(available_port)]
+        run_subprocess_fn(cmd, check=True, capture_output=True, text=True)
+        success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]"
+        test_curl = f"curl http://localhost:{available_port}/"
+        console.print(f"[info]You can test the server with: {test_curl}[/info]")
+        return success_msg, available_port
+    except subprocess.CalledProcessError as e:
+        error_output = e.stderr.strip() if e.stderr else str(e)
+        console.print(f"ERROR:    {error_output}")
+        return f"[warning]Failed to start server: {e}[/warning]", available_port
+
+def get_hardware_info() -> dict:
+    """
+    Get hardware information and categorization.
+    """
+    cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware()
+    hardware_category = get_hardware_category(memory_gb)
+    return {
+        "cpu_model": cpu_model,
+        "cpu_cores": cpu_cores,
+        "memory_gb": memory_gb,
+        "gpu_memory": gpu_memory,
+        "category": hardware_category
+    }
 
-def setup():
-    """Interactive setup for Solo Server environment"""
-    # Display hardware info
-    display_hardware_info(typer)
-    cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os_name = detect_hardware()
+@app.command()
+def setup(
+    model_choice: str = typer.Option(
+        None,
+        "--model",
+        "-m",
+        help="Optional model choice (ignored in this setup; always uses HuggingFaceTB/SmolLM2-1.7B-Instruct)"
+    )
+):
+    """Run the full solo server setup."""
+    console.print("\n")
+    print_banner()
+    console.print("\n")
+
+    # Step 1: Hardware Detection & Categorization
+    console.print("[info]Detecting hardware...[/info]")
+    hardware_info = get_hardware_info()
+    hardware_info_str = (
+        f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n"
+        f"Memory: {hardware_info['memory_gb']} GB\n"
+        f"GPU Memory: {hardware_info['gpu_memory']} GB\n"
+        f"Category: {hardware_info['category']}"
+    )
+    console.print(Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    
+    # Step 2: Core Initialization Prompt
+    if not prompt_core_initialization():
+        console.print("[warning]Exiting setup.[/warning]")
+        raise typer.Exit()
+    
+    console.print("\n")
+    
+    # Step 3: Model Download Simulation (always uses the specified model)
+    download_message = simulate_model_download(MODEL)
+    console.print(download_message)
+    
+    console.print("\n")
+    
+    # NEW STEP: Test the downloaded model using the solo @@ structure
+    console.print(Panel("Testing downloaded model...", title="Test Model", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    test_output = test_downloaded_model(MODEL)
+    
+    console.print("\n")
     
-    typer.echo("\nStarting Solo Server Setup...\n")
-    gpu = detect_gpu()
-    if gpu:
-        print("💻 Solo Sighting: GPU detected ->", gpu)
-        device_arg = "1"
+    # Step 4: Advanced Modules Prompt (optional)
+    advanced_modules, module_pack = prompt_advanced_modules()
+    if advanced_modules:
+        console.print(f"[info]Vertical selected: {module_pack}[/info]")
     else:
-        print("😎 Solo Mode: No GPU found; rocking CPU mode!")
-        device_arg = "0"
+        console.print("[info]Skipping advanced modules.[/info]")
     
-    # Ask for installation type
-    install_type = typer.prompt("Choose installation type:", type=click.Choice(['simple', 'advanced'], case_sensitive=False))
-    typer.echo(f"Selected installation type: {install_type}")
-
-    if install_type == "simple":
-        # Define port to use
-        port = "5070"
-        device_arg = "0"
-        accelerator_arg = "cpu"
-        
-        console = Console()
-        console.print("Solo setup: Installing optimal inference engine, hold tight...")
-        run_command(["litgpt", "download", "HuggingFaceTB/SmolLM2-135M-Instruct"],
-                    spinner_message="Solo download in progress: Grabbing lightest model...")
-        console.print("\n")
-        
-        
-        console.print(Panel.fit(
-            f"🎉 LIVE: solo server is now live!\n"
-            f"🔗 Swagger docs available at: http://localhost:{port}/docs",
-            title="Solo Server", border_style="blue"))
-        console.print(
-            f"curl -X POST http://127.0.0.1:{port}/predict -H 'Content-Type: application/json' -d '{{\"prompt\": \"hello Solo\"}}'")
-        
-        command = [
-            "litgpt",
-            "serve",
-            "HuggingFaceTB/SmolLM2-135M-Instruct",
-            "--port", port,
-            "--devices", device_arg,
-            "--accelerator", accelerator_arg
-        ]
-        
-        process = subprocess.Popen(command)
-        print(f"Command is running in the background with PID: {process.pid}")
+    console.print("\n")
+    
+    # Step 5: Save Setup Information to YAML and print config details
+    setup_info = {
+        "checkpoint_dir": str(Path("checkpoints") / MODEL),
+        "devices": 1,
+        "max_new_tokens": 50,
+        "port": START_PORT,  # initial port, actual port may change
+        "precision": None,
+        "quantize": None,
+        "stream": False,
+        "temperature": 0.8,
+        "top_k": 50,
+        "top_p": 1.0,
+        "selected_model": MODEL,
+        "hardware": hardware_info,
+        "advanced_modules": advanced_modules,
+        "module_pack": module_pack,
+        "model_choice": model_choice
+    }
+    save_message = save_setup_info(setup_info)
+    console.print(save_message)
+    console.print(setup_info)
+    
+    # Step 6: Docker Ensemble Build for Advanced Modules (if enabled)
+    if advanced_modules and module_pack:
+        console.print(Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+        build_docker_ensemble(module_pack)
+    
+    console.print("\n")
+    console.print(Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    console.print("\n")
+    
+    # Step 7: Serve the Model using LitGPT CLI syntax and capture errors gracefully
+    console.print(Panel(f"Starting server with model: {MODEL}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    server_message, used_port = serve_model(MODEL, port=START_PORT)
+    console.print(server_message)
+
+@app.command()
+def query(query: str = typer.Argument(
+    None,
+    help="Query for the LLM. If omitted, interactive mode is launched."
+)):
+    """
+    Redirect queries to the appropriate functions in query.py.
+    If a query is provided, it is processed; otherwise, interactive mode is launched.
+    If the query starts with 'solo @@', the prefix is stripped and the core server is used.
+    """
+    try:
+        from commands.query import query_llm, redirect_to_core_server, interactive_mode
+    except ModuleNotFoundError:
+        console.print("[warning]Module 'query' not found. Please ensure query.py is in the same directory.[/warning]")
+        raise typer.Exit(1)
+    
+    if query is None:
+        interactive_mode()
     else:
-        # Original code
-        recmd_server = recommended_server(memory_gb, gpu_vendor, gpu_memory) 
-        
-        def server_type_prompt(value: str) -> ServerType:
-            normalized_value = value.lower()
-            for server in ServerType:
-                if server.value.lower() == normalized_value:
-                    return server
-            raise typer.BadParameter(f"Invalid server type: {value}")
-
-        server_choice = typer.prompt(
-            "\nChoose server",
-            type=server_type_prompt,
-            default=recmd_server,
-        )
-        
-        # GPU Configuration
-        use_gpu = False
-        if gpu_vendor in ["NVIDIA", "AMD", "Intel", "Apple Silicon"]:
-            use_gpu = True
-            if use_gpu and gpu_vendor == "NVIDIA":
-                if not check_nvidia_toolkit(os_name):
-                    if typer.confirm("NVIDIA GPU Detected, but GPU drivers not found. Install now?", default=True):
-                        if os_name == "Linux":
-                            try:
-                                install_nvidia_toolkit_linux()
-                            except subprocess.CalledProcessError as e:
-                                typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True)
-                                use_gpu = False
-                        elif os_name == "Windows":
-                            try:
-                                install_nvidia_toolkit_windows()
-                            except subprocess.CalledProcessError as e:
-                                typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True)
-                                use_gpu = False
-                    else:
-                        typer.echo("Falling back to CPU inference.")
-                        use_gpu = False
-        
-        # Save GPU configuration to config file
-        config = {}
-        if os.path.exists(CONFIG_PATH):
-            with open(CONFIG_PATH, 'r') as f:
-                config = json.load(f)
-        config['hardware'] = {'use_gpu': use_gpu}
-        with open(CONFIG_PATH, 'w') as f:
-            json.dump(config, f, indent=4)
-        
-        # Docker Engine Check for Docker-based servers
-        if server_choice in [ServerType.OLLAMA, ServerType.VLLM]:
-            # Check Docker installation
-            docker_path = shutil.which("docker")
-            if not docker_path:
-                typer.echo("Docker is not installed or not in the system PATH. Please install Docker first.\n", err=True)
-                typer.secho("Install Here: https://docs.docker.com/get-docker/", fg=typer.colors.GREEN)
-                raise typer.Exit(code=1)
-
-            
-            try:
-                subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
-            except subprocess.CalledProcessError:
-                typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True)
-                if not start_docker_engine(os_name):
-                    raise typer.Exit(code=1)
-                # Re-check if Docker is running
-                try:
-                    subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
-                except subprocess.CalledProcessError:
-                    typer.echo("Try restarting the terminal with admin privileges and close any instances of podman.", err=True)
-                    raise typer.Exit(code=1)
-
-            
-            
-        # Server setup
-        try:
-            if server_choice == ServerType.VLLM:
-                # pull the appropriate vLLM image
-                typer.echo("Pulling vLLM image...")
-                if gpu_vendor == "NVIDIA" and use_gpu:
-                    subprocess.run(["docker", "pull", "vllm/vllm-openai:latest"], check=True)
-                elif gpu_vendor == "AMD" and use_gpu:
-                    subprocess.run(["docker", "pull", "rocm/vllm"], check=True)
-                elif cpu_model and "Apple" in cpu_model:
-                    subprocess.run(["docker", "pull", "getsolo/vllm-arm"], check=True)
-                elif cpu_model and any(vendor in cpu_model for vendor in ["Intel", "AMD"]):
-                    subprocess.run(["docker", "pull", "getsolo/vllm-cpu"], check=True)
-                else:
-                    typer.echo("vLLM currently does not support your machine", err=True)
-                    return False
-                    
-                typer.secho(
-                    "Solo server vLLM setup complete! Use 'solo serve -s vllm -m MODEL_NAME' to start the server.",
-                    fg=typer.colors.BRIGHT_GREEN
-                )
-                
-            elif server_choice == ServerType.OLLAMA:
-                # Just pull the Ollama image
-                typer.echo("Pulling Ollama image...")
-                if gpu_vendor == "AMD" and use_gpu:
-                    subprocess.run(["docker", "pull", "ollama/ollama-rocm"], check=True)
-                else:
-                    subprocess.run(["docker", "pull", "ollama/ollama"], check=True)
-                
-                typer.secho(
-                    "Solo server ollama setup complete! \nUse 'solo serve -s ollama -m MODEL_NAME' to start the server.",
-                    fg=typer.colors.BRIGHT_GREEN
-                )
-                
-            elif server_choice == ServerType.LLAMACPP:
-                from solo_server.utils.server_utils import setup_llama_cpp_server
-                setup_success = setup_llama_cpp_server(use_gpu, gpu_vendor, os_name, install_only=True)
-                if setup_success:
-                    typer.secho(
-                        "Solo server llama.cpp setup complete! Use 'solo serve -s llama.cpp -m MODEL_PATH' to start the server.",
-                        fg=typer.colors.BRIGHT_GREEN
-                    )
-                else:
-                    typer.echo("Failed to setup llama.cpp", err=True)
-        except Exception as e:
-            typer.echo(f"\nSetup failed: {e}", err=True)
-            raise typer.Exit(code=1)
+        if query.startswith("solo @@"):
+            core_query = query[len("solo @@"):].strip()
+            redirect_to_core_server(core_query)
+        else:
+            query_llm(query)
 
 if __name__ == "__main__":
-    typer.run(setup)
\ No newline at end of file
+    app()