From 0f516477e9afc433744afd25064a2ad8360a9286 Mon Sep 17 00:00:00 2001 From: Zeeshaan Mohammed Date: Tue, 4 Feb 2025 20:13:31 -0800 Subject: [PATCH 1/4] updated solo-server --- README.md | 78 ++++++++++-- setup.py | 7 +- solo_server/cli.py | 11 +- solo_server/commands/pull.py | 26 ---- solo_server/commands/run.py | 26 ++++ solo_server/commands/serve.py | 56 ++++++--- solo_server/commands/status.py | 8 +- solo_server/commands/stop.py | 29 ++++- solo_server/setup.py | 224 ++++++++++++--------------------- solo_server/utils/__init__.py | 1 + solo_server/utils/hardware.py | 75 +++++++++++ 11 files changed, 328 insertions(+), 213 deletions(-) delete mode 100644 solo_server/commands/pull.py create mode 100644 solo_server/commands/run.py create mode 100644 solo_server/utils/__init__.py create mode 100644 solo_server/utils/hardware.py diff --git a/README.md b/README.md index 8496ae4..bbdae23 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,13 @@ Solo Server is a lightweight platform that enables users to manage and monitor A ## Features - **Seamless Setup:** Manage your on device AI with a simple CLI and HTTP servers -- **Open Model Registry:** Pull models from registries like Hugging Face and Ollama +- **Open Model Registry:** Pull models from registries like Ollama & Hugging Face - **Lean Load Testing:** Built-in commands to benchmark endpoints - **Cross-Platform Compatibility:** Deploy AI models effortlessly on your hardware - **Configurable Framework:** Auto-detect hardware (CPU, GPU, RAM) and sets configs ## Supported Models -Solo Server supports **multiple model sources**, including **Ollama, Hugging Face, and Ramalama**. +Solo Server supports **multiple model sources**, including **Ollama & Hugging Face**. | **Model Name** | **Source** | |------------------------|----------------------------------------------------------| @@ -39,7 +39,7 @@ Solo Server supports **multiple model sources**, including **Ollama, Hugging Fac | **Mistral 7B v3** | `hf://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF` | | **Hermes 2 Pro** | `hf://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF` | | **Cerebrum 1.0 7B** | `hf://froggeric/Cerebrum-1.0-7b-GGUF` | -| **Dragon Mistral 7B** | `hf://llmware/dragon-mistral-7b-v0` +| **Dragon Mistral 7B** | `hf://llmware/dragon-mistral-7b-v0` | ## Table of Contents @@ -52,6 +52,12 @@ Solo Server supports **multiple model sources**, including **Ollama, Hugging Fac ## Installation +### **๐Ÿ”นPrerequisites** + +- **๐Ÿ‹ Docker:** Required for containerization + - [Install Docker](https://docs.docker.com/get-docker/) + - Ensure Docker daemon is running + ### **๐Ÿ”น Install via PyPI** ```sh pip install solo-server @@ -65,22 +71,39 @@ Creates an isolated environment using `uv` for performance and stability. Run the **interactive setup** to configure Solo Server: ```sh -solo setup +solo start ``` ### **๐Ÿ”น Setup Features** โœ”๏ธ **Detects CPU, GPU, RAM** for **hardware-optimized execution** โœ”๏ธ **Auto-configures `solo.conf` with optimal settings** -โœ”๏ธ **Requests API keys for Ngrok and Replicatea** +โœ”๏ธ **Requests API keys for Ngrok and Replicate** โœ”๏ธ **Recommends the compute backend OCI (CUDA, HIP, SYCL, Vulkan, CPU, Metal)** --- +**Example Output:** +```sh +๐Ÿ–ฅ๏ธ System Information +Operating System: Windows +CPU: AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD +CPU Cores: 8 +Memory: 15.42GB +GPU: NVIDIA +GPU Model: NVIDIA GeForce GTX 1660 Ti +GPU Memory: 6144.0GB +Compute Backend: CUDA + +๐Ÿš€ Setting up Solo Server... +โœ… Solo server is ready! +``` + +--- + ## **Commands** -### **1๏ธโƒฃ Pull a Model** +### **1๏ธโƒฃ Pull & Run a Model** ```sh -solo pull llama3 +solo run llama3.2 ``` - --- @@ -96,6 +119,39 @@ http://127.0.0.1:5070 #SOLO_SERVER_PORT --- +## Diagram + +``` ++-------------------+ +| | +| solo run llama3.2 | +| | ++---------+---------+ + | + | + | +------------------+ +----------------------+ + | | Pull inferencing | | Pull model layer | + +-----------| runtime (cuda) |---------->| llama3.2 | + +------------------+ +----------------------+ + | Repo options | + ++-----------+--------++ + | | | + v v v + +----------+ +----------+ +-------------+ + | Ollama | | vLLM | | HuggingFace | + | Registry | | registry | | Registry | + +-----+------+---+------+-++------------+ + | | | + v v v + +---------------------+ + | Start with | + | cuda runtime | + | and | + | llama3.2 | + +---------------------+ +``` +--- + ### **3๏ธโƒฃ Benchmark a Model** ```sh solo benchmark llama3 @@ -148,12 +204,12 @@ solo status ### **5๏ธโƒฃ Stop a Model** ```sh -solo stop llama3 +solo stop ``` **Example Output:** ```sh -Stopping llama3... -llama3 stopped successfully. +๐Ÿ›‘ Stopping Solo Server... +โœ… Solo server stopped successfully. ``` --- diff --git a/setup.py b/setup.py index 3c66570..4206c92 100644 --- a/setup.py +++ b/setup.py @@ -11,11 +11,14 @@ description="AIOps for the Physical World.", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/AIEngineersDev/solo-server", + url="https://github.com/GetSoloTech/solo-server", packages=find_packages(include=["solo_server", "solo_server.*"]), include_package_data=True, install_requires=[ "typer", + "GPUtil", + "psutil", + "requests", ], extras_require={ "dev": ["pytest", "black", "isort"], @@ -23,7 +26,7 @@ python_requires=">=3.8", entry_points={ "console_scripts": [ - "solo-server=solo_server.cli:app", + "solo=solo_server.cli:app", ], }, ) \ No newline at end of file diff --git a/solo_server/cli.py b/solo_server/cli.py index 0d99c03..bd07026 100644 --- a/solo_server/cli.py +++ b/solo_server/cli.py @@ -1,15 +1,14 @@ import typer -from .commands import pull, serve, stop, status, benchmark -from .setup import interactive_setup +from .commands import run, serve, stop, status +from .setup import start app = typer.Typer() # Commands -app.command()(pull.pull) -app.command()(serve.serve) +app.command()(run.run) app.command()(stop.stop) app.command()(status.status) -app.command()(benchmark.benchmark) -app.command()(interactive_setup) +app.command()(serve.serve) +app.command()(start) if __name__ == "__main__": app() diff --git a/solo_server/commands/pull.py b/solo_server/commands/pull.py deleted file mode 100644 index ef84bab..0000000 --- a/solo_server/commands/pull.py +++ /dev/null @@ -1,26 +0,0 @@ -import typer -import subprocess - -def pull(model: str): - """ - Pulls a model using Ramalama registry. - """ - typer.echo(f"๐Ÿ”„ Pulling model {model} from Ramalama registry...") - - try: - command = ["ramalama", "pull", model] - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - - # Progress tracking - for line in process.stdout: - typer.echo(line.strip()) - - process.wait() - - if process.returncode == 0: - typer.echo(f"โœ… Model {model} pulled successfully.") - else: - typer.echo(f"โŒ Failed to pull model {model}.", err=True) - - except Exception as e: - typer.echo(f"โš ๏ธ Error pulling model {model}: {e}", err=True) diff --git a/solo_server/commands/run.py b/solo_server/commands/run.py new file mode 100644 index 0000000..259b566 --- /dev/null +++ b/solo_server/commands/run.py @@ -0,0 +1,26 @@ +import typer +import subprocess + +def run(model: str): + """ + Serves a model using Ollama and enables interactive chat. + """ + typer.echo(f"๐Ÿš€ Starting model {model}...") + + # Check if Docker container is running + try: + check_cmd = ["docker", "ps", "-q", "-f", "name=ollama"] + if not subprocess.run(check_cmd, capture_output=True, text=True).stdout: + typer.echo("โŒ Solo server is not active. Please run 'solo setup' first.", err=True) + return + + command = ["docker", "exec", "-it", "ollama", "ollama", "run", model] + + # Use subprocess.run with shell=True for interactive terminal + process = subprocess.run( + " ".join(command), + shell=True, + text=True + ) + except subprocess.CalledProcessError as e: + typer.echo(f"โŒ An error occurred: {e}", err=True) diff --git a/solo_server/commands/serve.py b/solo_server/commands/serve.py index c669dcc..d4eb7e0 100644 --- a/solo_server/commands/serve.py +++ b/solo_server/commands/serve.py @@ -1,20 +1,46 @@ +import requests +import json import typer -import subprocess -def serve(name: str, model: str): - """ - Serves a model using Ramalama. - """ - typer.echo(f"๐Ÿš€ Starting model {model} as {name}...") +def serve( + model: str = typer.Option("llama3.2", "--model", "-m", help="Model to use"), + input: str = typer.Option("Hello", "--input", "-i", help="Input text for inference"), + stream: bool = typer.Option(False, "--stream", "-s", help="Enable streaming mode") +): + # API Endpoint + url = "http://localhost:11434/api/chat" - try: - command = ["ramalama", "serve", model] - process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + # Chat request payload + data = { + "model": model, + "messages": [ + { + "role": "user", + "content": input + } + ], + "stream": stream # Set to True for streaming + } - typer.echo(f"โœ… Model {model} is now running as {name}.") - typer.echo(f"๐ŸŒ Access the UI at: http://127.0.0.1:5070") + if data["stream"] == False: + # Sending POST request + response = requests.post(url, json=data) + # Check if response is valid JSON + try: + response_json = response.json() + if "message" in response_json and "content" in response_json["message"]: + print("Assistant Response:", response_json["message"]["content"]) + else: + print("Unexpected Response:", json.dumps(response_json, indent=2)) + except json.JSONDecodeError: + print("Error: API did not return valid JSON.") + print("Raw Response:", response.text) - except subprocess.CalledProcessError as e: - typer.echo(f"โŒ Failed to serve model {model}: {e.stderr}", err=True) - except Exception as e: - typer.echo(f"โš ๏ธ Unexpected error: {e}", err=True) + + else: + with requests.post(url, json=data, stream=True) as response: + for line in response.iter_lines(): + if line: + json_obj = json.loads(line) + if "message" in json_obj and "content" in json_obj["message"]: + print(json_obj["message"]["content"], end="", flush=True) # Streaming output diff --git a/solo_server/commands/status.py b/solo_server/commands/status.py index f5a8512..d87e78c 100644 --- a/solo_server/commands/status.py +++ b/solo_server/commands/status.py @@ -1,10 +1,12 @@ import typer import subprocess +from solo_server.utils.hardware import display_hardware_info app = typer.Typer() @app.command() def status(): - """Check running models.""" - typer.echo("Checking running model containers...") - subprocess.run(["podman", "ps", "--filter", "name=solo-container"], check=True) + """Check running models and system status.""" + display_hardware_info(typer) + typer.echo("\n๐Ÿ” Running Models:") + subprocess.run(["docker", "ps"], check=True) diff --git a/solo_server/commands/stop.py b/solo_server/commands/stop.py index f0f576c..46eee66 100644 --- a/solo_server/commands/stop.py +++ b/solo_server/commands/stop.py @@ -1,17 +1,34 @@ import typer import subprocess -def stop(name: str): +def stop(name: str = ""): """ - Stops a running model container using Ramalama. + Stops the Ollama Docker container and any running models. """ - typer.echo(f"๐Ÿ›‘ Stopping {name} using Ramalama...") + typer.echo("๐Ÿ›‘ Stopping Solo Server...") try: - subprocess.run(["ramalama", "stop", name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - typer.echo(f"โœ… {name} stopped successfully.") + # Stop the Docker container + subprocess.run( + ["docker", "stop", "ollama"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + typer.echo("โœ… Solo server stopped successfully.") + + # # Remove the container + # subprocess.run( + # ["docker", "rm", "ollama"], + # check=True, + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE, + # text=True + # ) + # typer.echo("๐Ÿ—‘๏ธ Ollama container removed.") except subprocess.CalledProcessError as e: - typer.echo(f"โŒ Failed to stop {name}: {e.stderr}", err=True) + typer.echo(f"โŒ Failed to stop Solo Server: {e.stderr}", err=True) except Exception as e: typer.echo(f"โš ๏ธ Unexpected error: {e}", err=True) diff --git a/solo_server/setup.py b/solo_server/setup.py index 73aeecb..5d37e04 100644 --- a/solo_server/setup.py +++ b/solo_server/setup.py @@ -1,151 +1,87 @@ import typer -import os -import configparser -import platform import subprocess +import shutil +import time +from .utils.hardware import display_hardware_info -CONFIG_FILE = os.path.expanduser("~/.solo/solo.conf") +def start(): -def detect_hardware(): - """ - Detects system hardware (CPU, GPU, RAM) and suggests optimal configurations. - """ - typer.echo("๐Ÿ–ฅ๏ธ Detecting hardware specifications...") - - # Detect CPU - cpu_model = "Unknown" - cpu_cores = os.cpu_count() or 1 - - if platform.system() == "Windows": - cpu_model = platform.processor() - elif platform.system() == "Linux": - try: - cpu_model = subprocess.check_output("lscpu | grep 'Model name'", shell=True, text=True).split(":")[1].strip() - except: - cpu_model = "Unknown Linux CPU" - elif platform.system() == "Darwin": - try: - cpu_model = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, text=True).strip() - except: - cpu_model = "Unknown Mac CPU" - - # Detect RAM - memory_gb = "Unknown" - if platform.system() == "Windows": - try: - memory_gb = int(subprocess.check_output("wmic ComputerSystem get TotalPhysicalMemory", shell=True, text=True).split("\n")[1].strip()) // (1024**3) - except: - memory_gb = "Unknown" - elif platform.system() == "Linux": - try: - memory_gb = int(subprocess.check_output("free -g | awk '/^Mem:/{print $2}'", shell=True, text=True).strip()) - except: - memory_gb = "Unknown" - elif platform.system() == "Darwin": - try: - memory_gb = int(subprocess.check_output("sysctl -n hw.memsize", shell=True, text=True)) // (1024**3) - except: - memory_gb = "Unknown" - - # Detect GPU - gpu_vendor = "None" - gpu_model = "None" - - if platform.system() == "Windows": - try: - gpu_info = subprocess.check_output("wmic path win32_VideoController get Name", shell=True, text=True).split("\n")[1].strip() - if "NVIDIA" in gpu_info: - gpu_vendor = "NVIDIA" - elif "AMD" in gpu_info: - gpu_vendor = "AMD" - elif "Intel" in gpu_info: - gpu_vendor = "Intel" - gpu_model = gpu_info - except: - gpu_vendor = "Unknown" - gpu_model = "Unknown" - elif platform.system() == "Linux": - try: - if subprocess.run("nvidia-smi", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).returncode == 0: - gpu_vendor = "NVIDIA" - gpu_model = subprocess.check_output("nvidia-smi --query-gpu=name --format=csv,noheader", shell=True, text=True).split("\n")[0] - elif subprocess.run("rocm-smi", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).returncode == 0: - gpu_vendor = "AMD" - gpu_model = subprocess.check_output("rocm-smi --showproductname | awk -F ': ' '{print $2}'", shell=True, text=True).strip() - elif subprocess.run("lspci | grep -i vga", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).returncode == 0: - gpu_vendor = "Intel" - gpu_model = subprocess.check_output("lspci | grep -i vga | awk -F ': ' '{print $2}'", shell=True, text=True).strip() - except: - gpu_vendor = "Unknown" - gpu_model = "Unknown" - elif platform.system() == "Darwin": - try: - gpu_vendor = "Apple Silicon" - gpu_model = "Integrated GPU" - except: - gpu_vendor = "Unknown" - gpu_model = "Unknown" - - typer.echo(f"๐Ÿ–ฅ๏ธ CPU: {cpu_model} ({cpu_cores} cores)") - typer.echo(f"๐Ÿ’พ RAM: {memory_gb} GB") - typer.echo(f"๐ŸŽฎ GPU: {gpu_vendor} - {gpu_model}") - - # Recommend Compute Backend - if gpu_vendor == "NVIDIA": - compute_backend = "CUDA" - elif gpu_vendor == "AMD": - compute_backend = "HIP" - elif gpu_vendor == "Intel": - compute_backend = "SYCL" - elif gpu_vendor == "Apple Silicon": - compute_backend = "Metal" - else: - compute_backend = "CPU" - - typer.echo(f"โš™๏ธ Recommended Compute Backend: {compute_backend}") - - return cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, compute_backend - -def interactive_setup(): - """ - Runs an interactive setup to configure Solo CLI with hardware detection. - """ - typer.echo("๐Ÿ”ง Welcome to Solo Setup!") - typer.echo("Let'sconfigure your settings and API keys.") - - # Ensure config directory exists - os.makedirs(os.path.dirname(CONFIG_FILE), exist_ok=True) - - config = configparser.ConfigParser() - config["DEFAULT"] = {} - - # Detect hardware - cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, compute_backend = detect_hardware() - - # User Inputs - config["DEFAULT"]["MODEL_REGISTRY"] = typer.prompt("๐ŸŒ Model registry (ramalama/ollama)", default="ramalama") - config["DEFAULT"]["MODEL_PATH"] = typer.prompt("๐Ÿ“‚ Model storage path", default=os.path.expanduser("~/solo/models")) - config["DEFAULT"]["COMPUTE_BACKEND"] = typer.prompt(f"โš™๏ธ Compute backend (CPU/CUDA/HIP/SYCL/Vulkan) [Recommended: {compute_backend}]", default=compute_backend) - config["DEFAULT"]["SERVER_PORT"] = typer.prompt("๐ŸŒ Server port", default="5070") - config["DEFAULT"]["LOG_LEVEL"] = typer.prompt("๐Ÿ” Logging level (INFO/DEBUG/ERROR)", default="INFO") - - # API Keys - typer.echo("๐Ÿ”‘ Enter API keys (leave blank to skip).") - config["DEFAULT"]["NGROK_API_KEY"] = typer.prompt("Ngrok API Key", default="", show_default=False) - config["DEFAULT"]["REPLICATE_API_KEY"] = typer.prompt("Replicate API Key", default="", show_default=False) + """Setup solo-server environment.""" - # Store detected hardware details - config["DEFAULT"]["CPU_MODEL"] = cpu_model - config["DEFAULT"]["CPU_CORES"] = str(cpu_cores) - config["DEFAULT"]["MEMORY_GB"] = str(memory_gb) - config["DEFAULT"]["GPU_VENDOR"] = gpu_vendor - config["DEFAULT"]["GPU_MODEL"] = gpu_model - - # Save to file - with open(CONFIG_FILE, "w") as configfile: - config.write(configfile) - - typer.echo("โœ… Setup complete! Run `solo --help` to get started.") + display_hardware_info(typer) + typer.echo("\n๐Ÿš€ Setting up Solo Server...") + + if not shutil.which("docker"): + typer.echo("โŒ Docker is not installed. Please install Docker first.", err=True) + return + + try: + # Check if Docker daemon is running + subprocess.run(["docker", "info"], check=True, capture_output=True) + + # Check if container exists (running or stopped) + container_exists = subprocess.run( + ["docker", "ps", "-aq", "-f", "name=ollama"], + capture_output=True, + text=True + ).stdout.strip() + + if container_exists: + # Check if container is running + check_cmd = ["docker", "ps", "-q", "-f", "name=ollama"] + is_running = subprocess.run(check_cmd, capture_output=True, text=True).stdout.strip() + if not is_running: + subprocess.run(["docker", "start", "ollama"], check=True, capture_output=True) + else: + # Pull Ollama image + typer.echo("๐Ÿ“ฅ Pulling Ollama Docker image...") + subprocess.run(["docker", "pull", "ollama/ollama"], check=True) + + # Check if port is available + try: + subprocess.run( + ["docker", "run", "--rm", "-p", "11434:11434", "alpine", "true"], + check=True, + capture_output=True + ) + except subprocess.CalledProcessError: + typer.echo("โŒ Port 11434 is already in use", err=True) + return + + # Start Ollama container + typer.echo("๐Ÿš€ Starting Solo Server...") + subprocess.run([ + "docker", "run", "-d", + "--name", "solo", + "-v", "ollama:/root/.ollama", + "-p", "11434:11434", + "ollama/ollama" + ], check=True) + + # Wait for container to be ready with timeout + timeout = 30 + start_time = time.time() + while time.time() - start_time < timeout: + try: + subprocess.run( + ["docker", "exec", "ollama", "ollama", "list"], + check=True, + stdout=subprocess.DEVNULL # Only suppress stdout + ) + typer.echo("โœ… Solo server is ready!") + return + except subprocess.CalledProcessError: + time.sleep(1) + + typer.echo("โŒ Solo server failed to start within timeout", err=True) + + except subprocess.CalledProcessError as e: + typer.echo(f"โŒ Docker command failed: {e}", err=True) + # Cleanup on failure + if container_exists: + subprocess.run(["docker", "stop", "ollama"], check=False) + except Exception as e: + typer.echo(f"โŒ Unexpected error: {e}", err=True) if __name__ == "__main__": - interactive_setup() + start() diff --git a/solo_server/utils/__init__.py b/solo_server/utils/__init__.py new file mode 100644 index 0000000..62da138 --- /dev/null +++ b/solo_server/utils/__init__.py @@ -0,0 +1 @@ +# solo_server/__init__.py diff --git a/solo_server/utils/hardware.py b/solo_server/utils/hardware.py new file mode 100644 index 0000000..e4d6691 --- /dev/null +++ b/solo_server/utils/hardware.py @@ -0,0 +1,75 @@ +import platform +import psutil +import GPUtil +import subprocess +from typing import Tuple + +def detect_hardware() -> Tuple[str, int, float, str, str, float, str, str]: + #OS Info + os = platform.system() + + # CPU Info + cpu_model = "Unknown" + cpu_cores = psutil.cpu_count(logical=False) + + if os == "Windows": + cpu_model = platform.processor() + elif os == "Linux": + try: + cpu_model = subprocess.check_output("lscpu | grep 'Model name'", shell=True, text=True).split(":")[1].strip() + except: + cpu_model = "Unknown Linux CPU" + elif platform.system() == "Darwin": + try: + cpu_model = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, text=True).strip() + except: + cpu_model = "Unknown Mac CPU" + + # Memory Info + memory_gb = round(psutil.virtual_memory().total / (1024**3), 2) + + # GPU Info + gpu_vendor = "None" + gpu_model = "None" + compute_backend = "CPU" + try: + gpus = GPUtil.getGPUs() + if gpus: + gpu = gpus[0] # Get first GPU + gpu_model = gpu.name + gpu_memory = round(gpu.memoryTotal, 2) # GPU memory in GB + if "NVIDIA" in gpu_model: + gpu_vendor = "NVIDIA" + compute_backend = "CUDA" + elif "AMD" in gpu_model: + gpu_vendor = "AMD" + compute_backend = "HIP" + elif "Intel" in gpu_model: + gpu_vendor = "Intel" + compute_backend = "OpenCL" + elif "Apple Silicon" in gpu_model: + gpu_vendor = "Apple Silicon" + compute_backend = "Metal" + else: + gpu_vendor = "Unknown" + compute_backend = "CPU" + except: + gpu_memory = 0.0 + pass + + return cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os + +def display_hardware_info(typer): + cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os = detect_hardware() + + typer.echo("------------------------------->") + typer.echo("๐Ÿ–ฅ๏ธ System Information") + typer.echo(f"Operating System: {os}") + typer.echo(f"CPU: {cpu_model}") + typer.echo(f"CPU Cores: {cpu_cores}") + typer.echo(f"Memory: {memory_gb}GB") + typer.echo(f"GPU: {gpu_vendor}") + typer.echo(f"GPU Model: {gpu_model}") + typer.echo(f'GPU Memory: {gpu_memory}GB') + typer.echo(f"Compute Backend: {compute_backend}") + \ No newline at end of file From fa1261d63a84142f85b6e3ad9bc82af4b6702643 Mon Sep 17 00:00:00 2001 From: Zeeshaan Mohammed Date: Tue, 4 Feb 2025 20:16:16 -0800 Subject: [PATCH 2/4] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bbdae23..299729a 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,8 @@ http://127.0.0.1:5070 #SOLO_SERVER_PORT | solo run llama3.2 | | | +---------+---------+ - | - | + | + | | +------------------+ +----------------------+ | | Pull inferencing | | Pull model layer | +-----------| runtime (cuda) |---------->| llama3.2 | From 93bca1a1116e886aa42d74b83feececd111a3671 Mon Sep 17 00:00:00 2001 From: Zeeshaan Mohammed Date: Wed, 5 Feb 2025 18:42:29 -0800 Subject: [PATCH 3/4] nvidia cuda support --- setup.py | 3 +- solo_server/cli.py | 5 +- solo_server/commands/run.py | 6 +- solo_server/commands/status.py | 38 ++++++- solo_server/commands/stop.py | 2 +- solo_server/setup.py | 87 ----------------- solo_server/start.py | 174 +++++++++++++++++++++++++++++++++ 7 files changed, 218 insertions(+), 97 deletions(-) delete mode 100644 solo_server/setup.py create mode 100644 solo_server/start.py diff --git a/setup.py b/setup.py index 4206c92..167151d 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ "typer", "GPUtil", "psutil", - "requests", + "requests", + "tabulate", ], extras_require={ "dev": ["pytest", "black", "isort"], diff --git a/solo_server/cli.py b/solo_server/cli.py index bd07026..b7b3a3e 100644 --- a/solo_server/cli.py +++ b/solo_server/cli.py @@ -1,13 +1,12 @@ import typer -from .commands import run, serve, stop, status -from .setup import start +from .commands import run, stop, status +from .start import start app = typer.Typer() # Commands app.command()(run.run) app.command()(stop.stop) app.command()(status.status) -app.command()(serve.serve) app.command()(start) if __name__ == "__main__": diff --git a/solo_server/commands/run.py b/solo_server/commands/run.py index 259b566..79a8972 100644 --- a/solo_server/commands/run.py +++ b/solo_server/commands/run.py @@ -9,12 +9,12 @@ def run(model: str): # Check if Docker container is running try: - check_cmd = ["docker", "ps", "-q", "-f", "name=ollama"] + check_cmd = ["docker", "ps", "-q", "-f", "name=solo"] if not subprocess.run(check_cmd, capture_output=True, text=True).stdout: - typer.echo("โŒ Solo server is not active. Please run 'solo setup' first.", err=True) + typer.echo("โŒ Solo server is not active. Please start solo server first.", err=True) return - command = ["docker", "exec", "-it", "ollama", "ollama", "run", model] + command = ["docker", "exec", "-it", "solo", "ollama", "run", model] # Use subprocess.run with shell=True for interactive terminal process = subprocess.run( diff --git a/solo_server/commands/status.py b/solo_server/commands/status.py index d87e78c..2e2c7ec 100644 --- a/solo_server/commands/status.py +++ b/solo_server/commands/status.py @@ -1,6 +1,8 @@ import typer import subprocess from solo_server.utils.hardware import display_hardware_info +from tabulate import tabulate +import json app = typer.Typer() @@ -8,5 +10,37 @@ def status(): """Check running models and system status.""" display_hardware_info(typer) - typer.echo("\n๐Ÿ” Running Models:") - subprocess.run(["docker", "ps"], check=True) + + # Check for running solo container + container_result = subprocess.run(["docker", "ps", "-f", "name=solo", "--format", "{{json .}}"], + capture_output=True, text=True, check=True) + + if container_result.stdout.strip(): + # Container is running, show available models + typer.echo("\n๐Ÿ” Available Models:") + models_result = subprocess.run(["docker", "exec", "solo", "ollama", "list"], + capture_output=True, text=True, check=True) + models = [] + for line in models_result.stdout.strip().split('\n'): + parts = line.split() + if len(parts) >= 7: + size = f"{parts[2]} {parts[3]}" + modified = f"{parts[4]} {parts[5]} {parts[6]}" + models.append([parts[0], parts[1], size, modified]) + + if models: + print(tabulate(models, headers=['NAME', 'ID', 'SIZE', 'MODIFIED'], tablefmt='grid')) + + # Show running containers section (will be empty if none running) + typer.echo("\n๐Ÿ” Running Containers:") + containers = [] + if container_result.stdout.strip(): + for line in container_result.stdout.strip().split('\n'): + container = json.loads(line) + containers.append([ + container['Names'], + container['Status'], + container['Ports'] + ]) + + print(tabulate(containers, headers=['NAME', 'STATUS', 'PORTS'], tablefmt='grid')) diff --git a/solo_server/commands/stop.py b/solo_server/commands/stop.py index 46eee66..e9486a8 100644 --- a/solo_server/commands/stop.py +++ b/solo_server/commands/stop.py @@ -10,7 +10,7 @@ def stop(name: str = ""): try: # Stop the Docker container subprocess.run( - ["docker", "stop", "ollama"], + ["docker", "stop", "solo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, diff --git a/solo_server/setup.py b/solo_server/setup.py deleted file mode 100644 index 5d37e04..0000000 --- a/solo_server/setup.py +++ /dev/null @@ -1,87 +0,0 @@ -import typer -import subprocess -import shutil -import time -from .utils.hardware import display_hardware_info - -def start(): - - """Setup solo-server environment.""" - - display_hardware_info(typer) - typer.echo("\n๐Ÿš€ Setting up Solo Server...") - - if not shutil.which("docker"): - typer.echo("โŒ Docker is not installed. Please install Docker first.", err=True) - return - - try: - # Check if Docker daemon is running - subprocess.run(["docker", "info"], check=True, capture_output=True) - - # Check if container exists (running or stopped) - container_exists = subprocess.run( - ["docker", "ps", "-aq", "-f", "name=ollama"], - capture_output=True, - text=True - ).stdout.strip() - - if container_exists: - # Check if container is running - check_cmd = ["docker", "ps", "-q", "-f", "name=ollama"] - is_running = subprocess.run(check_cmd, capture_output=True, text=True).stdout.strip() - if not is_running: - subprocess.run(["docker", "start", "ollama"], check=True, capture_output=True) - else: - # Pull Ollama image - typer.echo("๐Ÿ“ฅ Pulling Ollama Docker image...") - subprocess.run(["docker", "pull", "ollama/ollama"], check=True) - - # Check if port is available - try: - subprocess.run( - ["docker", "run", "--rm", "-p", "11434:11434", "alpine", "true"], - check=True, - capture_output=True - ) - except subprocess.CalledProcessError: - typer.echo("โŒ Port 11434 is already in use", err=True) - return - - # Start Ollama container - typer.echo("๐Ÿš€ Starting Solo Server...") - subprocess.run([ - "docker", "run", "-d", - "--name", "solo", - "-v", "ollama:/root/.ollama", - "-p", "11434:11434", - "ollama/ollama" - ], check=True) - - # Wait for container to be ready with timeout - timeout = 30 - start_time = time.time() - while time.time() - start_time < timeout: - try: - subprocess.run( - ["docker", "exec", "ollama", "ollama", "list"], - check=True, - stdout=subprocess.DEVNULL # Only suppress stdout - ) - typer.echo("โœ… Solo server is ready!") - return - except subprocess.CalledProcessError: - time.sleep(1) - - typer.echo("โŒ Solo server failed to start within timeout", err=True) - - except subprocess.CalledProcessError as e: - typer.echo(f"โŒ Docker command failed: {e}", err=True) - # Cleanup on failure - if container_exists: - subprocess.run(["docker", "stop", "ollama"], check=False) - except Exception as e: - typer.echo(f"โŒ Unexpected error: {e}", err=True) - -if __name__ == "__main__": - start() diff --git a/solo_server/start.py b/solo_server/start.py new file mode 100644 index 0000000..479361c --- /dev/null +++ b/solo_server/start.py @@ -0,0 +1,174 @@ +import typer +import subprocess +import shutil +import time +from .utils.hardware import detect_hardware, display_hardware_info + +def check_nvidia_toolkit() -> bool: + """ + Checks if Docker can actually run a GPU container using the NVIDIA runtime. + """ + try: + test_cmd = [ + "docker", "run", "--rm", "--gpus", "all", + "nvidia/cuda:11.0.3-base-ubuntu20.04", "nvidia-smi" + ] + subprocess.run(test_cmd, check=True, capture_output=True, text=True) + return True + except subprocess.CalledProcessError: + return False + + +def install_nvidia_toolkit_linux(): + """ + Installs the NVIDIA Container Toolkit on Linux (Debian & Ubuntu). + """ + typer.echo("Configuring the repository") + try: + subprocess.run("curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg", shell=True, check=True) + subprocess.run("curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list", shell=True, check=True) + subprocess.run("sudo apt-get update", shell=True, check=True) + + typer.echo("Installing Nvidia Container Toolkit") + subprocess.run("sudo apt-get install -y nvidia-container-toolkit", shell=True, check=True) + subprocess.run("sudo nvidia-ctk runtime configure --runtime=docker", shell=True, check=True) + subprocess.run("sudo systemctl restart docker", shell=True, check=True) + + typer.echo("NVIDIA Container Toolkit installed successfully on Linux.") + except subprocess.CalledProcessError as e: + typer.echo(f"Failed to install NVIDIA Container Toolkit on Linux. Error: {e}", err=True) + + +def install_nvidia_toolkit_windows(): + """ + Provide a structured step-by-step guide for Windows users to configure + their system for NVIDIA GPU support, including driver & CUDA installation. + """ + # Step-by-step instructions + typer.secho("\n========================================", fg=typer.colors.CYAN) + typer.secho(" Windows NVIDIA GPU Setup ", fg=typer.colors.CYAN, bold=True) + typer.secho("========================================\n", fg=typer.colors.CYAN) + + typer.echo("Follow these steps to enable NVIDIA GPU support on Windows:\n") + + steps = [ + ("Step 1: Install or Update NVIDIA Drivers", "https://www.nvidia.com/Download/index.aspx"), + ("Step 2: Install the NVIDIA CUDA Toolkit", "https://developer.nvidia.com/cuda-downloads") + ] + for step_num, (step_title, link) in enumerate(steps, start=1): + typer.secho(f"{step_title}", fg=typer.colors.BRIGHT_GREEN) + typer.echo(f" Link: {link}\n") + + typer.echo("Once you've completed the above steps:") + typer.echo(" - Ensure Docker Desktop is installed and running.") + typer.echo(" - Enable 'Use the WSL 2 based engine' in Docker Desktop settings.\n") + + typer.secho("โš ๏ธ Please restart Solo Server after installing the required tools.", fg=typer.colors.YELLOW) + raise typer.Exit(1) + +def start(): + + """Setup solo-server environment.""" + + display_hardware_info(typer) + typer.echo("\n๐Ÿš€ Setting up Solo Server...") + + if not shutil.which("docker"): + typer.echo( + "โŒ Docker is not installed. Please install Docker first.\n" + "Link: https://docs.docker.com/get-docker/\n", + err=True + ) + raise typer.Exit(code=1) + + try: + # Check if Docker daemon is running + subprocess.run(["docker", "info"], check=True, capture_output=True) + cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os = detect_hardware() + use_gpu = False + + if gpu_vendor == "NVIDIA": + if check_nvidia_toolkit(): + typer.echo("โœ… NVIDIA Docker Toolkit is already installed.\n") + use_gpu = True + else: + if typer.confirm("NVIDIA GPU detected but Toolkit is not installed. Do you want to install it?", default=False): + if os == "Linux": + install_nvidia_toolkit_linux() + elif os == "Windows": + install_nvidia_toolkit_windows() + else: + typer.echo("Unsupported OS for automated NVIDIA toolkit installation.") + else: + typer.echo("โš ๏ธ Falling back to CPU.\n") + + # Check if container exists (running or stopped) + container_exists = subprocess.run( + ["docker", "ps", "-aq", "-f", "name=solo"], + capture_output=True, + text=True + ).stdout.strip() + + if container_exists: + # Check if container is running + check_cmd = ["docker", "ps", "-q", "-f", "name=solo"] + is_running = subprocess.run(check_cmd, capture_output=True, text=True).stdout.strip() + if not is_running: + subprocess.run(["docker", "start", "solo"], check=True, capture_output=True) + else: + # Pull Ollama image + typer.echo("๐Ÿ“ฅ Pulling Ollama Registry...") + subprocess.run(["docker", "pull", "ollama/ollama"], check=True) + + # Check if port is available + try: + subprocess.run( + ["docker", "run", "--rm", "-p", "11434:11434", "alpine", "true"], + check=True, + capture_output=True + ) + except subprocess.CalledProcessError: + typer.echo("โŒ Port 11434 is already in use", err=True) + return + + # Start Ollama container + docker_run_cmd = ["docker", "run", "-d", "--name", "solo", "-v", "ollama:/root/.ollama", "-p", "11434:11434"] + if use_gpu: + docker_run_cmd += ["--gpus", "all"] + docker_run_cmd.append("ollama/ollama") + + typer.echo("๐Ÿš€ Starting Solo Server...") + subprocess.run(docker_run_cmd, check=True) + + # Wait for container to be ready with timeout + timeout = 30 + start_time = time.time() + while time.time() - start_time < timeout: + try: + subprocess.run( + ["docker", "exec", "solo", "ollama", "list"], + check=True, + stdout=subprocess.DEVNULL # Only suppress stdout + ) + typer.secho( + "โœ… Solo server is ready!\nYou can now access the UI at: https://solo-chatbot.vercel.app/", + fg=typer.colors.BRIGHT_CYAN, + bold=True + ) + + return + except subprocess.CalledProcessError: + time.sleep(1) + + typer.echo("โŒ Solo server failed to start within timeout", err=True) + + except subprocess.CalledProcessError as e: + typer.echo(f"โŒ Docker command failed: {e}", err=True) + # Cleanup on failure + if container_exists: + subprocess.run(["docker", "stop", "solo"], check=False) + except Exception as e: + typer.echo(f"โŒ Unexpected error: {e}", err=True) + +if __name__ == "__main__": + start() From cc90f60d2331b621f820c5478efb2bf47c6049d4 Mon Sep 17 00:00:00 2001 From: Zeeshaan Mohammed Date: Thu, 6 Feb 2025 09:40:34 -0800 Subject: [PATCH 4/4] updated hardware.py --- solo_server/utils/hardware.py | 48 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/solo_server/utils/hardware.py b/solo_server/utils/hardware.py index e4d6691..6002dfe 100644 --- a/solo_server/utils/hardware.py +++ b/solo_server/utils/hardware.py @@ -32,31 +32,29 @@ def detect_hardware() -> Tuple[str, int, float, str, str, float, str, str]: gpu_vendor = "None" gpu_model = "None" compute_backend = "CPU" - try: - gpus = GPUtil.getGPUs() - if gpus: - gpu = gpus[0] # Get first GPU - gpu_model = gpu.name - gpu_memory = round(gpu.memoryTotal, 2) # GPU memory in GB - if "NVIDIA" in gpu_model: - gpu_vendor = "NVIDIA" - compute_backend = "CUDA" - elif "AMD" in gpu_model: - gpu_vendor = "AMD" - compute_backend = "HIP" - elif "Intel" in gpu_model: - gpu_vendor = "Intel" - compute_backend = "OpenCL" - elif "Apple Silicon" in gpu_model: - gpu_vendor = "Apple Silicon" - compute_backend = "Metal" - else: - gpu_vendor = "Unknown" - compute_backend = "CPU" - except: - gpu_memory = 0.0 - pass - + gpu_memory = 0 + + gpus = GPUtil.getGPUs() + if gpus: + gpu = gpus[0] # Get first GPU + gpu_model = gpu.name + gpu_memory = round(gpu.memoryTotal, 2) # GPU memory in GB + if "NVIDIA" in gpu_model: + gpu_vendor = "NVIDIA" + compute_backend = "CUDA" + elif "AMD" in gpu_model: + gpu_vendor = "AMD" + compute_backend = "HIP" + elif "Intel" in gpu_model: + gpu_vendor = "Intel" + compute_backend = "OpenCL" + elif "Apple Silicon" in gpu_model: + gpu_vendor = "Apple Silicon" + compute_backend = "Metal" + else: + gpu_vendor = "Unknown" + compute_backend = "CPU" + return cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os def display_hardware_info(typer):