GetSoloTech · ddiddi · Feb 7, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 6, 2025
diff --git a/README.md b/README.md
@@ -20,13 +20,13 @@ Solo Server is a lightweight platform that enables users to manage and monitor A
 ## Features
 
 - **Seamless Setup:** Manage your on device AI with a simple CLI and HTTP servers
-- **Open Model Registry:** Pull models from registries like Hugging Face and Ollama
+- **Open Model Registry:** Pull models from registries like  Ollama & Hugging Face
 - **Lean Load Testing:** Built-in commands to benchmark endpoints
 - **Cross-Platform Compatibility:** Deploy AI models effortlessly on your hardware
 - **Configurable Framework:** Auto-detect hardware (CPU, GPU, RAM) and sets configs
 
 ## Supported Models
-Solo Server supports **multiple model sources**, including **Ollama, Hugging Face, and Ramalama**.
+Solo Server supports **multiple model sources**, including **Ollama & Hugging Face**.
 
 | **Model Name**         | **Source**                                                |
 |------------------------|----------------------------------------------------------|
@@ -39,7 +39,7 @@ Solo Server supports **multiple model sources**, including **Ollama, Hugging Fac
 | **Mistral 7B v3**      | `hf://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF`       |
 | **Hermes 2 Pro**       | `hf://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF`        |
 | **Cerebrum 1.0 7B**    | `hf://froggeric/Cerebrum-1.0-7b-GGUF`                    |
-| **Dragon Mistral 7B**  | `hf://llmware/dragon-mistral-7b-v0`  
+| **Dragon Mistral 7B**  | `hf://llmware/dragon-mistral-7b-v0`                      |
 
 ## Table of Contents
 
@@ -52,6 +52,12 @@ Solo Server supports **multiple model sources**, including **Ollama, Hugging Fac
 
 ## Installation
 
+### **🔹Prerequisites** 
+
+- **🐋 Docker:** Required for containerization 
+  - [Install Docker](https://docs.docker.com/get-docker/)
+  - Ensure Docker daemon is running
+
 ### **🔹 Install via PyPI**
 ```sh
 pip install solo-server
@@ -65,22 +71,39 @@ Creates an isolated environment using `uv` for performance and stability.
 
 Run the **interactive setup** to configure Solo Server:
 ```sh
-solo setup
+solo start
 ```
 ### **🔹 Setup Features**
 ✔️ **Detects CPU, GPU, RAM** for **hardware-optimized execution**  
 ✔️ **Auto-configures `solo.conf` with optimal settings**  
-✔️ **Requests API keys for Ngrok and Replicatea**  
+✔️ **Requests API keys for Ngrok and Replicate**  
 ✔️ **Recommends the compute backend OCI (CUDA, HIP, SYCL, Vulkan, CPU, Metal)**  
 
 ---
 
+**Example Output:**
+```sh
+🖥️  System Information
+Operating System: Windows
+CPU: AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD
+CPU Cores: 8
+Memory: 15.42GB
+GPU: NVIDIA
+GPU Model: NVIDIA GeForce GTX 1660 Ti
+GPU Memory: 6144.0GB
+Compute Backend: CUDA
+
+🚀 Setting up Solo Server...
+✅ Solo server is ready!
+```
+
+---
+
 ## **Commands**
-### **1️⃣ Pull a Model**
+### **1️⃣ Pull & Run a Model**
 ```sh
-solo pull llama3
+solo run llama3.2
 ```
-
 
 ---
 
@@ -96,6 +119,39 @@ http://127.0.0.1:5070  #SOLO_SERVER_PORT
 
 ---
 
+## Diagram
+
+```
++-------------------+
+|                   |
+| solo run llama3.2 |
+|                   |
++---------+---------+
+          |
+          |
+          |           +------------------+           +----------------------+
+          |           | Pull inferencing |           |   Pull model layer   |
+          +-----------| runtime (cuda)   |---------->|       llama3.2       | 
+                      +------------------+           +----------------------+
+                                                     |     Repo options     |
+                                                     ++-----------+--------++
+                                                      |           |        |
+                                                      v           v        v
+                                                +----------+ +----------+ +-------------+
+                                                | Ollama   | | vLLM     | | HuggingFace |
+                                                | Registry | | registry | |  Registry   |
+                                                +-----+------+---+------+-++------------+
+                                                      |          |         |
+                                                      v          v         v
+                                                      +---------------------+
+                                                      |   Start with        |
+                                                      |   cuda runtime      |
+                                                      |   and               |
+                                                      |   llama3.2          |
+                                                      +---------------------+
+```
+---
+
 ### **3️⃣ Benchmark a Model**
 ```sh
 solo benchmark llama3
@@ -148,12 +204,12 @@ solo status
 
 ### **5️⃣ Stop a Model**
 ```sh
-solo stop llama3
+solo stop 
 ```
 **Example Output:**
 ```sh
-Stopping llama3...
-llama3 stopped successfully.
+🛑 Stopping Solo Server...
+✅ Solo server stopped successfully.
 ```
 
 ---

diff --git a/setup.py b/setup.py
@@ -11,19 +11,23 @@
     description="AIOps for the Physical World.",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    url="https://github.com/AIEngineersDev/solo-server",
+    url="https://github.com/GetSoloTech/solo-server",
     packages=find_packages(include=["solo_server", "solo_server.*"]),
     include_package_data=True,
     install_requires=[
         "typer",
+        "GPUtil",
+        "psutil",
+        "requests", 
+        "tabulate", 
     ],
     extras_require={
         "dev": ["pytest", "black", "isort"],
     },
     python_requires=">=3.8",
     entry_points={
         "console_scripts": [
-            "solo-server=solo_server.cli:app",
+            "solo=solo_server.cli:app",
         ],
     },
 )
diff --git a/solo_server/cli.py b/solo_server/cli.py
@@ -1,15 +1,13 @@
 import typer
-from .commands import pull, serve, stop, status, benchmark
-from .setup import interactive_setup    
+from .commands import run, stop, status
+from .start import start    
 app = typer.Typer()
 
 # Commands
-app.command()(pull.pull)
-app.command()(serve.serve)
+app.command()(run.run)
 app.command()(stop.stop)
 app.command()(status.status)
-app.command()(benchmark.benchmark)
-app.command()(interactive_setup)
+app.command()(start)
 
 if __name__ == "__main__":
     app()
diff --git a/solo_server/commands/pull.py b/solo_server/commands/pull.py
diff --git a/solo_server/commands/run.py b/solo_server/commands/run.py
@@ -0,0 +1,26 @@
+import typer
+import subprocess
+
+def run(model: str):
+    """
+    Serves a model using Ollama and enables interactive chat.
+    """
+    typer.echo(f"🚀 Starting model {model}...")
+
+    # Check if Docker container is running
+    try:
+        check_cmd = ["docker", "ps", "-q", "-f", "name=solo"]
+        if not subprocess.run(check_cmd, capture_output=True, text=True).stdout:
+            typer.echo("❌ Solo server is not active. Please start solo server first.", err=True)
+            return
+
+        command = ["docker", "exec", "-it", "solo", "ollama", "run", model]
+
+        # Use subprocess.run with shell=True for interactive terminal
+        process = subprocess.run(
+            " ".join(command),
+            shell=True,
+            text=True
+        )
+    except subprocess.CalledProcessError as e:
+        typer.echo(f"❌ An error occurred: {e}", err=True)
diff --git a/solo_server/commands/serve.py b/solo_server/commands/serve.py
@@ -1,20 +1,46 @@
+import requests
+import json
 import typer
-import subprocess
 
-def serve(name: str, model: str):
-    """
-    Serves a model using Ramalama.
-    """
-    typer.echo(f"🚀 Starting model {model} as {name}...")
+def serve(
+    model: str = typer.Option("llama3.2", "--model", "-m", help="Model to use"),
+    input: str = typer.Option("Hello", "--input", "-i", help="Input text for inference"),
+    stream: bool = typer.Option(False, "--stream", "-s", help="Enable streaming mode")
+):
+    # API Endpoint
+    url = "http://localhost:11434/api/chat"
 
-    try:
-        command = ["ramalama", "serve", model]
-        process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    # Chat request payload
+    data = {
+        "model": model, 
+        "messages": [
+            {
+                "role": "user",
+                "content": input
+            }
+        ],
+        "stream": stream  # Set to True for streaming
+    }
 
-        typer.echo(f"✅ Model {model} is now running as {name}.")
-        typer.echo(f"🌐 Access the UI at: http://127.0.0.1:5070")
+    if data["stream"] == False:
+        # Sending POST request
+        response = requests.post(url, json=data)
+        # Check if response is valid JSON
+        try:
+            response_json = response.json()
+            if "message" in response_json and "content" in response_json["message"]:
+                print("Assistant Response:", response_json["message"]["content"])
+            else:
+                print("Unexpected Response:", json.dumps(response_json, indent=2))
+        except json.JSONDecodeError:
+            print("Error: API did not return valid JSON.")
+            print("Raw Response:", response.text)
 
-    except subprocess.CalledProcessError as e:
-        typer.echo(f"❌ Failed to serve model {model}: {e.stderr}", err=True)
-    except Exception as e:
-        typer.echo(f"⚠️ Unexpected error: {e}", err=True)
+
+    else:
+        with requests.post(url, json=data, stream=True) as response:
+            for line in response.iter_lines():
+                if line:
+                    json_obj = json.loads(line)
+                    if "message" in json_obj and "content" in json_obj["message"]:
+                        print(json_obj["message"]["content"], end="", flush=True)  # Streaming output
diff --git a/solo_server/commands/status.py b/solo_server/commands/status.py
@@ -1,10 +1,46 @@
 import typer
 import subprocess
+from solo_server.utils.hardware import display_hardware_info
+from tabulate import tabulate
+import json
 
 app = typer.Typer()
 
 @app.command()
 def status():
-    """Check running models."""
-    typer.echo("Checking running model containers...")
-    subprocess.run(["podman", "ps", "--filter", "name=solo-container"], check=True)
+    """Check running models and system status."""
+    display_hardware_info(typer)
+
+    # Check for running solo container
+    container_result = subprocess.run(["docker", "ps", "-f", "name=solo", "--format", "{{json .}}"],
+                                    capture_output=True, text=True, check=True)
+
+    if container_result.stdout.strip():
+        # Container is running, show available models
+        typer.echo("\n🔍 Available Models:")
+        models_result = subprocess.run(["docker", "exec", "solo", "ollama", "list"], 
+                                     capture_output=True, text=True, check=True)
+        models = []
+        for line in models_result.stdout.strip().split('\n'):
+            parts = line.split()
+            if len(parts) >= 7:
+                size = f"{parts[2]} {parts[3]}"
+                modified = f"{parts[4]} {parts[5]} {parts[6]}"
+                models.append([parts[0], parts[1], size, modified])
+
+        if models:
+            print(tabulate(models, headers=['NAME', 'ID', 'SIZE', 'MODIFIED'], tablefmt='grid'))
+
+    # Show running containers section (will be empty if none running)
+    typer.echo("\n🔍 Running Containers:")
+    containers = []
+    if container_result.stdout.strip():
+        for line in container_result.stdout.strip().split('\n'):
+            container = json.loads(line)
+            containers.append([
+                container['Names'],
+                container['Status'],
+                container['Ports']
+            ])
+
+    print(tabulate(containers, headers=['NAME', 'STATUS', 'PORTS'], tablefmt='grid'))
diff --git a/solo_server/commands/stop.py b/solo_server/commands/stop.py
@@ -1,17 +1,34 @@
 import typer
 import subprocess
 
-def stop(name: str):
+def stop(name: str = ""):
     """
-    Stops a running model container using Ramalama.
+    Stops the Ollama Docker container and any running models.
     """
-    typer.echo(f"🛑 Stopping {name} using Ramalama...")
+    typer.echo("🛑 Stopping Solo Server...")
 
     try:
-        subprocess.run(["ramalama", "stop", name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-        typer.echo(f"✅ {name} stopped successfully.")
+        # Stop the Docker container
+        subprocess.run(
+            ["docker", "stop", "solo"],
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True
+        )
+        typer.echo("✅ Solo server stopped successfully.")
+
+        # # Remove the container
+        # subprocess.run(
+        #     ["docker", "rm", "ollama"],
+        #     check=True,
+        #     stdout=subprocess.PIPE,
+        #     stderr=subprocess.PIPE,
+        #     text=True
+        # )
+        # typer.echo("🗑️ Ollama container removed.")
 
     except subprocess.CalledProcessError as e:
-        typer.echo(f"❌ Failed to stop {name}: {e.stderr}", err=True)
+        typer.echo(f"❌ Failed to stop Solo Server: {e.stderr}", err=True)
     except Exception as e:
         typer.echo(f"⚠️ Unexpected error: {e}", err=True)