diff --git a/solo_server/advanced_cmd.py b/solo_server/advanced_cmd.py new file mode 100644 index 0000000..0ece57b --- /dev/null +++ b/solo_server/advanced_cmd.py @@ -0,0 +1,51 @@ +import typer +from rich.console import Console +from rich.panel import Panel + +app = typer.Typer(help="CLI for Advanced Model Operations and Model Export/Optimization") +console = Console() + +# ------------------------------- +# Advanced Model Operations Group +# ------------------------------- +advanced_app = typer.Typer(help="Commands for benchmarking, profiling, and stress testing your model.") +app.add_typer(advanced_app, name="advanced") + +@advanced_app.command("benchmark") +def benchmark(): + """Run performance benchmarks on the model.""" + console.print(Panel("Benchmark command executed", title="Benchmark", style="blue")) + +@advanced_app.command("profile") +def profile(): + """Profile model resource usage.""" + console.print(Panel("Profile command executed", title="Profile", style="blue")) + +@advanced_app.command("stress-test") +def stress_test(): + """Stress test the model and server under high-load conditions.""" + console.print(Panel("Stress-Test command executed", title="Stress Test", style="blue")) + +# ------------------------------- +# Model Export & Optimization Group +# ------------------------------- +optimization_app = typer.Typer(help="Commands for exporting, quantizing, and fine-tuning the model.") +app.add_typer(optimization_app, name="optimization") + +@optimization_app.command("export") +def export_model(): + """Export the model to various formats (e.g., ONNX, TensorRT, CoreML).""" + console.print(Panel("Export command executed", title="Export", style="green")) + +@optimization_app.command("quantize") +def quantize(): + """Apply quantization to reduce model size and improve efficiency.""" + console.print(Panel("Quantize command executed", title="Quantize", style="green")) + +@optimization_app.command("finetune") +def finetune(): + """Fine-tune the model on custom datasets with specified hyperparameters.""" + console.print(Panel("Finetune command executed", title="Finetune", style="green")) + +if __name__ == "__main__": + app() diff --git a/solo_server/commands/query.py b/solo_server/commands/query.py new file mode 100644 index 0000000..ab5a5e2 --- /dev/null +++ b/solo_server/commands/query.py @@ -0,0 +1,68 @@ +import sys +import typer +import requests +from litgpt import LLM +from rich.console import Console + +console = Console() + +CORE_SERVER_PORT = 5070 # Change this if your core server runs on a different port +CORE_SERVER_URL = f"http://localhost:{CORE_SERVER_PORT}/generate" + +def redirect_to_core_server(query: str, port: int = CORE_SERVER_PORT) -> None: + """ + Redirect the given query to the core server via an HTTP POST request. + """ + url = f"http://localhost:{port}/generate" + try: + response = requests.post(url, json={"prompt": query}) + response.raise_for_status() + console.print("[success]Response from core server:[/success]") + console.print(response.text) + except Exception as e: + console.print(f"[warning]Error redirecting to core server: {e}[/warning]") + +def query_llm(query: str) -> None: + """ + If the query exceeds 9000 characters, show an error. + Otherwise, load the model and generate a response. + """ + if len(query) > 9000: + typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!") + raise typer.Exit(1) + + # Load the model and generate a response while showing a spinner + llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct") + with console.status("Generating response...", spinner="dots"): + response = llm.generate(query) + typer.echo(response) + +def interactive_mode(): + console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green") + while True: + query_text = input(">> ").strip() + if query_text.lower() in ("exit", "quit"): + break + # If the query starts with "solo @@", redirect to the core server + if query_text.startswith("solo @@"): + # Remove the "solo @@" prefix before sending the query + core_query = query_text[len("solo @@"):].strip() + redirect_to_core_server(core_query) + else: + query_llm(query_text) + +if __name__ == "__main__": + # If invoked with "@@" as the first argument, treat the rest as the query. + # Otherwise, launch interactive mode. + if len(sys.argv) > 1 and sys.argv[1] == "@@": + if len(sys.argv) > 2: + query_text = " ".join(sys.argv[2:]).strip() + else: + typer.echo("Enter your query (end with EOF / Ctrl-D):") + query_text = sys.stdin.read().strip() + # If the query starts with "solo @@", remove that prefix. + if query_text.startswith("solo @@"): + query_text = query_text[len("solo @@"):].strip() + redirect_to_core_server(query_text) + else: + interactive_mode() diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml new file mode 100644 index 0000000..ded92a2 --- /dev/null +++ b/solo_server/ensemble.yaml @@ -0,0 +1,20 @@ +advanced_modules: true +checkpoint_dir: checkpoints/HuggingFaceTB/SmolLM2-1.7B-Instruct +devices: 1 +hardware: + category: High Performance + cpu_cores: 8 + cpu_model: Intel i7 + gpu_memory: 4 + memory_gb: 16 +max_new_tokens: 50 +model_choice: null +module_pack: robotics +port: 5070 +precision: null +quantize: null +selected_model: HuggingFaceTB/SmolLM2-1.7B-Instruct +stream: false +temperature: 0.8 +top_k: 50 +top_p: 1.0 diff --git a/solo_server/explorative_cmd.py b/solo_server/explorative_cmd.py new file mode 100644 index 0000000..93d5c29 --- /dev/null +++ b/solo_server/explorative_cmd.py @@ -0,0 +1,155 @@ +import typer +from rich.console import Console +from rich.panel import Panel + +app = typer.Typer(help="Solo CLI - A comprehensive tool for model management and server operations.") +console = Console() + +# --------------------------------- +# Setup Commands Group +# --------------------------------- +setup_app = typer.Typer(help="Commands for initializing and setting up the environment.") +app.add_typer(setup_app, name="setup") + +@setup_app.command("full") +def full_setup(): + """Run full server setup.""" + console.print(Panel("Full Setup executed", title="Setup", style="green")) + +@setup_app.command("init") +def init(): + """Reinitialize core components.""" + console.print(Panel("Init executed", title="Init", style="green")) + +# --------------------------------- +# Model Management Group +# --------------------------------- +model_app = typer.Typer(help="Manage model downloads, updates, and tests.") +app.add_typer(model_app, name="model") + +@model_app.command("download") +def download_model(): + """Download or update the model.""" + console.print(Panel("Download executed", title="Download", style="green")) + +@model_app.command("update") +def update_model(): + """Update the model to the latest version.""" + console.print(Panel("Update Model executed", title="Update Model", style="green")) + +@model_app.command("test") +def test_model(): + """Test the downloaded model with a sample prompt.""" + console.print(Panel("Test executed", title="Test", style="green")) + +# --------------------------------- +# Query & Interaction Group +# --------------------------------- +query_app = typer.Typer(help="Handle one-off queries or launch interactive mode.") +app.add_typer(query_app, name="query") + +@query_app.command("ask") +def ask(query: str = typer.Argument(..., help="Query for the model")): + """Send a query to the model.""" + # Check for "solo @@" prefix and adjust query if necessary + if query.startswith("solo @@"): + query = query[len("solo @@"):].strip() + console.print(Panel(f"Query: {query}", title="Query", style="green")) + +@query_app.command("interactive") +def interactive(): + """Launch interactive query mode.""" + console.print(Panel("Interactive mode launched", title="Interactive", style="green")) + # Add interactive loop logic here if desired + +# --------------------------------- +# Server Management Group +# --------------------------------- +server_app = typer.Typer(help="Commands for managing the model server.") +app.add_typer(server_app, name="server") + +@server_app.command("start") +def start_server(): + """Start or restart the model server.""" + console.print(Panel("Server started", title="Server", style="green")) + +@server_app.command("restart") +def restart_server(): + """Restart the server gracefully.""" + console.print(Panel("Server restarted", title="Restart", style="green")) + +@server_app.command("stop") +def stop_server(): + """Stop the running server.""" + console.print(Panel("Server stopped", title="Stop", style="green")) + +# --------------------------------- +# Diagnostics & Monitoring Group +# --------------------------------- +diag_app = typer.Typer(help="Commands for diagnostics and monitoring.") +app.add_typer(diag_app, name="diagnostics") + +@diag_app.command("status") +def status(): + """Display the current server status.""" + console.print(Panel("Status executed", title="Status", style="green")) + +@diag_app.command("logs") +def logs(): + """Display recent logs.""" + console.print(Panel("Logs executed", title="Logs", style="green")) + +@diag_app.command("health") +def healthcheck(): + """Perform a health check of the server.""" + console.print(Panel("Health check executed", title="Healthcheck", style="green")) + +@diag_app.command("diagnose") +def diagnose(): + """Run diagnostics to troubleshoot issues.""" + console.print(Panel("Diagnose executed", title="Diagnose", style="green")) + +# --------------------------------- +# Maintenance Group +# --------------------------------- +maint_app = typer.Typer(help="Maintenance and update commands.") +app.add_typer(maint_app, name="maintenance") + +@maint_app.command("update") +def update_cli(): + """Update the CLI or associated modules.""" + console.print(Panel("CLI Update executed", title="Update", style="green")) + +@maint_app.command("backup") +def backup(): + """Create backups of configuration and checkpoints.""" + console.print(Panel("Backup executed", title="Backup", style="green")) + +@maint_app.command("restore") +def restore(): + """Restore a backup configuration or model checkpoint.""" + console.print(Panel("Restore executed", title="Restore", style="green")) + +# --------------------------------- +# Configuration Group +# --------------------------------- +config_app = typer.Typer(help="View or modify configuration settings.") +app.add_typer(config_app, name="config") + +@config_app.command("set") +def set_config(): + """Set configuration parameters.""" + console.print(Panel("Config set executed", title="Config Set", style="green")) + +@config_app.command("info") +def config_info(): + """Display current configuration info.""" + console.print(Panel("Config info executed", title="Config Info", style="green")) + +@config_app.command("version") +def version(): + """Display the CLI version.""" + console.print(Panel("Version executed", title="Version", style="green")) + +if __name__ == "__main__": + app() diff --git a/solo_server/main.py b/solo_server/main.py index 3c7f76b..05acc6a 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -1,197 +1,344 @@ -import os -import json -import typer +import time import subprocess -import shutil +import socket +import sys +import typer import click -import sys - -from enum import Enum -from solo_server.config import CONFIG_PATH -from solo_server.utils.docker_utils import start_docker_engine -from solo_server.utils.hardware import detect_hardware, display_hardware_info, recommended_server -from solo_server.utils.nvidia import check_nvidia_toolkit, install_nvidia_toolkit_linux, install_nvidia_toolkit_windows -from solo_server.simple_setup import run_command, detect_gpu +import yaml +from pathlib import Path +from tqdm import tqdm from rich.console import Console from rich.panel import Panel +from rich.theme import Theme +from rich import box + +import commands.query + +app = typer.Typer( + help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, advanced module loading, and query redirection." +) + +# Google-inspired theme +google_theme = Theme({ + "header": "bold #4285F4", + "info": "bold #4285F4", + "warning": "bold #DB4437", + "success": "bold #0F9D58", + "panel.border": "bright_blue", + "panel.title": "bold white" +}) +console = Console(theme=google_theme) + +# Hard-coded model and starting port +MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" +START_PORT = 5070 + +def print_banner(): + """Display a header banner for the Solo Server CLI.""" + banner_text = """ + ___ _ __ __ _ + / _ \\(_)___ ___ / /___ / /_(_) + / , _/ / _ \\/ -_) / / __/ / __/ / + /_/|_/_/ .__/\\__/ /_/\\__/ \\__/_/ + /_/ + """ + console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE)) + +def detect_hardware(): + """ + Dummy hardware detection function. + Replace with your actual hardware detection logic. + """ + cpu_model = "Intel i7" + cpu_cores = 8 + memory_gb = 16 # Example value + gpu_memory = 4 # Example value (in GB) + return cpu_model, cpu_cores, memory_gb, gpu_memory + +def get_hardware_category(memory_gb: float) -> str: + if memory_gb < 8: + return "Fresh Adopter" + elif memory_gb < 16: + return "Mid Range" + elif memory_gb < 32: + return "High Performance" + else: + return "Maestro" + +def simulate_model_download(model: str, sleep_time: int = 3) -> str: + """ + Simulate model download with a progress bar. + (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins) + """ + for _ in tqdm(range(sleep_time), + desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), + unit="sec", total=sleep_time): + time.sleep(1) + return f"[success]Model {model} download complete.[/success]" + +def prompt_core_initialization(confirm_fn=typer.confirm) -> bool: + """ + Ask user to confirm core initialization. + """ + init_prompt = ( + "Continue to solo core initialization?\n" + "Yes: Proceed with full initialization and model setup\n" + "No: Exit setup" + ) + console.print(Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + return confirm_fn("", default=True) + +def test_downloaded_model(model: str, run_subprocess_fn=subprocess.run) -> str: + """ + Prompt the user for a test prompt (defaulting to 'solo @@ test') and use the LitGPT CLI + to generate sample output from the downloaded model. + A progress bar shows the testing duration. + """ + test_prompt = typer.prompt("Enter test prompt", default="solo @@ test") + console.print(f"[info]Testing model {model} with prompt: '{test_prompt}'[/info]") + for _ in tqdm(range(5), desc="Testing model (est. 0.08 mins)", unit="sec", total=5): + time.sleep(1) + try: + # Assuming the LitGPT CLI provides a generate command. + cmd = ["litgpt", "generate", model, "--prompt", test_prompt] + result = run_subprocess_fn(cmd, check=True, capture_output=True, text=True) + output = result.stdout.strip() + console.print(f"[success]Test generation output:[/success]\n{output}") + return output + except subprocess.CalledProcessError as e: + error_output = e.stderr.strip() if e.stderr else str(e) + console.print(f"[warning]Test generation failed: {error_output}[/warning]") + return "" + +def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str): + """ + Ask user if they want to load advanced modules and select a vertical. + New verticals include: secure enterprise, healthcare, robotics, and lean ensemble. + Returns a tuple (advanced_modules, module_pack) + """ + adv_prompt = ( + "Load advanced modules?\n" + "Yes: Load additional functionalities for a vertical\n" + "No: Skip advanced modules" + ) + console.print(Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + advanced_modules = confirm_fn("", default=True) + module_pack = None + if advanced_modules: + module_pack_info = ( + "Choose advanced vertical:\n" + "secure enterprise - Modules for security and compliance\n" + "healthcare - Modules for healthcare applications\n" + "robotics - Modules for robotics integration\n" + "lean ensemble - A lean set of general modules\n" + "Enter your choice:" + ) + console.print(Panel(module_pack_info, title="Vertical Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + module_pack = prompt_fn("", type=click.Choice(["secure enterprise", "healthcare", "robotics", "lean ensemble"], case_sensitive=False), default="lean ensemble") + return advanced_modules, module_pack + +def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): + """ + Build an ensemble of Docker images for the selected vertical. + Uses the path: commands/containers/ (relative to main.py). + A tqdm progress bar shows the estimated duration. + """ + # New advanced module packs for different verticals + advanced_module_packs = { + "secure enterprise": ["auth", "data-encryption", "audit-log"], + "healthcare": ["hl7", "fhir-connector", "secure-patient"], + "robotics": ["ros", "le-robot", "robotics-core"], + "lean ensemble": ["microservice", "edge-ai", "light-transformers"] + } + modules = advanced_module_packs.get(module_pack.lower(), []) + if not modules: + console.print(f"[warning]No modules found for vertical '{module_pack}'.[/warning]") + return + + for module in tqdm(modules, desc="Building Docker images (est. 2 mins/module)", unit="module", total=len(modules)): + build_path = Path("commands") / "containers" / module + if not build_path.exists(): + console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]") + continue + console.print(f"[info]Building Docker image for module:[/info] {module}") + image_tag = module.lower().replace(' ', '-') + try: + run_subprocess_fn( + ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)], + check=True, + capture_output=True + ) + console.print(f"[success]Successfully built image for:[/success] {module}") + except subprocess.CalledProcessError as e: + console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]") + +def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml") -> str: + """ + Save setup information to a YAML file. + """ + with open(filename, "w") as f: + yaml.dump(setup_info, f) + return f"[success]Setup information saved to {filename}.[/success]" -class ServerType(str, Enum): - OLLAMA = "Ollama" - VLLM = "vLLM" - LLAMACPP = "Llama.cpp" +def get_available_port(start_port: int) -> int: + """ + Return the first available port starting from start_port. + """ + port = start_port + while True: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind(("", port)) + return port + except OSError: + port += 1 + +def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str, int): + """ + Serve the model using the LitGPT CLI syntax. + If the given port is in use, automatically increment to the next available port. + Returns a tuple of the success message and the port used. + """ + available_port = get_available_port(port) + try: + cmd = ["litgpt", "serve", model, "--port", str(available_port)] + run_subprocess_fn(cmd, check=True, capture_output=True, text=True) + success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]" + test_curl = f"curl http://localhost:{available_port}/" + console.print(f"[info]You can test the server with: {test_curl}[/info]") + return success_msg, available_port + except subprocess.CalledProcessError as e: + error_output = e.stderr.strip() if e.stderr else str(e) + console.print(f"ERROR: {error_output}") + return f"[warning]Failed to start server: {e}[/warning]", available_port + +def get_hardware_info() -> dict: + """ + Get hardware information and categorization. + """ + cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware() + hardware_category = get_hardware_category(memory_gb) + return { + "cpu_model": cpu_model, + "cpu_cores": cpu_cores, + "memory_gb": memory_gb, + "gpu_memory": gpu_memory, + "category": hardware_category + } -def setup(): - """Interactive setup for Solo Server environment""" - # Display hardware info - display_hardware_info(typer) - cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os_name = detect_hardware() +@app.command() +def setup( + model_choice: str = typer.Option( + None, + "--model", + "-m", + help="Optional model choice (ignored in this setup; always uses HuggingFaceTB/SmolLM2-1.7B-Instruct)" + ) +): + """Run the full solo server setup.""" + console.print("\n") + print_banner() + console.print("\n") + + # Step 1: Hardware Detection & Categorization + console.print("[info]Detecting hardware...[/info]") + hardware_info = get_hardware_info() + hardware_info_str = ( + f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n" + f"Memory: {hardware_info['memory_gb']} GB\n" + f"GPU Memory: {hardware_info['gpu_memory']} GB\n" + f"Category: {hardware_info['category']}" + ) + console.print(Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + + # Step 2: Core Initialization Prompt + if not prompt_core_initialization(): + console.print("[warning]Exiting setup.[/warning]") + raise typer.Exit() + + console.print("\n") + + # Step 3: Model Download Simulation (always uses the specified model) + download_message = simulate_model_download(MODEL) + console.print(download_message) + + console.print("\n") + + # NEW STEP: Test the downloaded model using the solo @@ structure + console.print(Panel("Testing downloaded model...", title="Test Model", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + test_output = test_downloaded_model(MODEL) + + console.print("\n") - typer.echo("\nStarting Solo Server Setup...\n") - gpu = detect_gpu() - if gpu: - print("💻 Solo Sighting: GPU detected ->", gpu) - device_arg = "1" + # Step 4: Advanced Modules Prompt (optional) + advanced_modules, module_pack = prompt_advanced_modules() + if advanced_modules: + console.print(f"[info]Vertical selected: {module_pack}[/info]") else: - print("😎 Solo Mode: No GPU found; rocking CPU mode!") - device_arg = "0" + console.print("[info]Skipping advanced modules.[/info]") - # Ask for installation type - install_type = typer.prompt("Choose installation type:", type=click.Choice(['simple', 'advanced'], case_sensitive=False)) - typer.echo(f"Selected installation type: {install_type}") - - if install_type == "simple": - # Define port to use - port = "5070" - device_arg = "0" - accelerator_arg = "cpu" - - console = Console() - console.print("Solo setup: Installing optimal inference engine, hold tight...") - run_command(["litgpt", "download", "HuggingFaceTB/SmolLM2-135M-Instruct"], - spinner_message="Solo download in progress: Grabbing lightest model...") - console.print("\n") - - - console.print(Panel.fit( - f"🎉 LIVE: solo server is now live!\n" - f"🔗 Swagger docs available at: http://localhost:{port}/docs", - title="Solo Server", border_style="blue")) - console.print( - f"curl -X POST http://127.0.0.1:{port}/predict -H 'Content-Type: application/json' -d '{{\"prompt\": \"hello Solo\"}}'") - - command = [ - "litgpt", - "serve", - "HuggingFaceTB/SmolLM2-135M-Instruct", - "--port", port, - "--devices", device_arg, - "--accelerator", accelerator_arg - ] - - process = subprocess.Popen(command) - print(f"Command is running in the background with PID: {process.pid}") + console.print("\n") + + # Step 5: Save Setup Information to YAML and print config details + setup_info = { + "checkpoint_dir": str(Path("checkpoints") / MODEL), + "devices": 1, + "max_new_tokens": 50, + "port": START_PORT, # initial port, actual port may change + "precision": None, + "quantize": None, + "stream": False, + "temperature": 0.8, + "top_k": 50, + "top_p": 1.0, + "selected_model": MODEL, + "hardware": hardware_info, + "advanced_modules": advanced_modules, + "module_pack": module_pack, + "model_choice": model_choice + } + save_message = save_setup_info(setup_info) + console.print(save_message) + console.print(setup_info) + + # Step 6: Docker Ensemble Build for Advanced Modules (if enabled) + if advanced_modules and module_pack: + console.print(Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + build_docker_ensemble(module_pack) + + console.print("\n") + console.print(Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + console.print("\n") + + # Step 7: Serve the Model using LitGPT CLI syntax and capture errors gracefully + console.print(Panel(f"Starting server with model: {MODEL}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + server_message, used_port = serve_model(MODEL, port=START_PORT) + console.print(server_message) + +@app.command() +def query(query: str = typer.Argument( + None, + help="Query for the LLM. If omitted, interactive mode is launched." +)): + """ + Redirect queries to the appropriate functions in query.py. + If a query is provided, it is processed; otherwise, interactive mode is launched. + If the query starts with 'solo @@', the prefix is stripped and the core server is used. + """ + try: + from commands.query import query_llm, redirect_to_core_server, interactive_mode + except ModuleNotFoundError: + console.print("[warning]Module 'query' not found. Please ensure query.py is in the same directory.[/warning]") + raise typer.Exit(1) + + if query is None: + interactive_mode() else: - # Original code - recmd_server = recommended_server(memory_gb, gpu_vendor, gpu_memory) - - def server_type_prompt(value: str) -> ServerType: - normalized_value = value.lower() - for server in ServerType: - if server.value.lower() == normalized_value: - return server - raise typer.BadParameter(f"Invalid server type: {value}") - - server_choice = typer.prompt( - "\nChoose server", - type=server_type_prompt, - default=recmd_server, - ) - - # GPU Configuration - use_gpu = False - if gpu_vendor in ["NVIDIA", "AMD", "Intel", "Apple Silicon"]: - use_gpu = True - if use_gpu and gpu_vendor == "NVIDIA": - if not check_nvidia_toolkit(os_name): - if typer.confirm("NVIDIA GPU Detected, but GPU drivers not found. Install now?", default=True): - if os_name == "Linux": - try: - install_nvidia_toolkit_linux() - except subprocess.CalledProcessError as e: - typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True) - use_gpu = False - elif os_name == "Windows": - try: - install_nvidia_toolkit_windows() - except subprocess.CalledProcessError as e: - typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True) - use_gpu = False - else: - typer.echo("Falling back to CPU inference.") - use_gpu = False - - # Save GPU configuration to config file - config = {} - if os.path.exists(CONFIG_PATH): - with open(CONFIG_PATH, 'r') as f: - config = json.load(f) - config['hardware'] = {'use_gpu': use_gpu} - with open(CONFIG_PATH, 'w') as f: - json.dump(config, f, indent=4) - - # Docker Engine Check for Docker-based servers - if server_choice in [ServerType.OLLAMA, ServerType.VLLM]: - # Check Docker installation - docker_path = shutil.which("docker") - if not docker_path: - typer.echo("Docker is not installed or not in the system PATH. Please install Docker first.\n", err=True) - typer.secho("Install Here: https://docs.docker.com/get-docker/", fg=typer.colors.GREEN) - raise typer.Exit(code=1) - - - try: - subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20) - except subprocess.CalledProcessError: - typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True) - if not start_docker_engine(os_name): - raise typer.Exit(code=1) - # Re-check if Docker is running - try: - subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20) - except subprocess.CalledProcessError: - typer.echo("Try restarting the terminal with admin privileges and close any instances of podman.", err=True) - raise typer.Exit(code=1) - - - - # Server setup - try: - if server_choice == ServerType.VLLM: - # pull the appropriate vLLM image - typer.echo("Pulling vLLM image...") - if gpu_vendor == "NVIDIA" and use_gpu: - subprocess.run(["docker", "pull", "vllm/vllm-openai:latest"], check=True) - elif gpu_vendor == "AMD" and use_gpu: - subprocess.run(["docker", "pull", "rocm/vllm"], check=True) - elif cpu_model and "Apple" in cpu_model: - subprocess.run(["docker", "pull", "getsolo/vllm-arm"], check=True) - elif cpu_model and any(vendor in cpu_model for vendor in ["Intel", "AMD"]): - subprocess.run(["docker", "pull", "getsolo/vllm-cpu"], check=True) - else: - typer.echo("vLLM currently does not support your machine", err=True) - return False - - typer.secho( - "Solo server vLLM setup complete! Use 'solo serve -s vllm -m MODEL_NAME' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - - elif server_choice == ServerType.OLLAMA: - # Just pull the Ollama image - typer.echo("Pulling Ollama image...") - if gpu_vendor == "AMD" and use_gpu: - subprocess.run(["docker", "pull", "ollama/ollama-rocm"], check=True) - else: - subprocess.run(["docker", "pull", "ollama/ollama"], check=True) - - typer.secho( - "Solo server ollama setup complete! \nUse 'solo serve -s ollama -m MODEL_NAME' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - - elif server_choice == ServerType.LLAMACPP: - from solo_server.utils.server_utils import setup_llama_cpp_server - setup_success = setup_llama_cpp_server(use_gpu, gpu_vendor, os_name, install_only=True) - if setup_success: - typer.secho( - "Solo server llama.cpp setup complete! Use 'solo serve -s llama.cpp -m MODEL_PATH' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - else: - typer.echo("Failed to setup llama.cpp", err=True) - except Exception as e: - typer.echo(f"\nSetup failed: {e}", err=True) - raise typer.Exit(code=1) + if query.startswith("solo @@"): + core_query = query[len("solo @@"):].strip() + redirect_to_core_server(core_query) + else: + query_llm(query) if __name__ == "__main__": - typer.run(setup) \ No newline at end of file + app()