From 2b3afd87e9163e2001e9c30e46b1b7806ecc6417 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 13:41:38 -0800
Subject: [PATCH 1/3] feat: add ability to get aid from base solo coder

---
 solo_server/commands/aid.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 solo_server/commands/aid.py
diff --git a/solo_server/commands/aid.py b/solo_server/commands/aid.py
new file mode 100644
index 0000000..702fae2
--- /dev/null
+++ b/solo_server/commands/aid.py
@@ -0,0 +1,29 @@
+import sys
+import subprocess
+import typer
+
+def aid (query: str):
+    # Check if docker is running
+    try:
+        subprocess.run(["docker", "ps"], capture_output=True, check=True)
+    except subprocess.CalledProcessError:
+        typer.echo("Solo server not running. Please start solo-server first.")
+        return
+    
+    # Execute the query in the solo-ollama container.
+    try:
+        result = subprocess.run(
+            ["docker", "exec", "solo-ollama", "ollama", "ask", query],
+            capture_output=True, text=True, check=True
+        )
+        typer.echo(result.stdout)
+    except subprocess.CalledProcessError:
+        typer.echo("Failed to get response from the LLM.")
+
+if __name__ == "__main__":
+    # If invoked with ">>" as the first argument, join the remaining tokens as the query.
+    if len(sys.argv) > 1 and sys.argv[1] == ">>":
+        query_text = " ".join(sys.argv[2:])
+        aid(query_text)
+    else:
+        typer.echo("Usage: solo >> <your query>")

From eea8e2edabd73de3390417a0cc65b1a522a54908 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 13:57:50 -0800
Subject: [PATCH 2/3] add @@ token for prompts

---
 solo_server/commands/aid.py | 54 ++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/solo_server/commands/aid.py b/solo_server/commands/aid.py
index 702fae2..60b94a9 100644
--- a/solo_server/commands/aid.py
+++ b/solo_server/commands/aid.py
@@ -1,29 +1,39 @@
 import sys
-import subprocess
 import typer
+from litgpt import LLM
+from rich.console import Console
 
-def aid (query: str):
-    # Check if docker is running
-    try:
-        subprocess.run(["docker", "ps"], capture_output=True, check=True)
-    except subprocess.CalledProcessError:
-        typer.echo("Solo server not running. Please start solo-server first.")
-        return
+console = Console()
+
+def query_llm(query: str):
+    # Check if the query exceeds 9000 characters
+    if len(query) > 9000:
+        typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!")
+        raise typer.Exit(1)
     
-    # Execute the query in the solo-ollama container.
-    try:
-        result = subprocess.run(
-            ["docker", "exec", "solo-ollama", "ollama", "ask", query],
-            capture_output=True, text=True, check=True
-        )
-        typer.echo(result.stdout)
-    except subprocess.CalledProcessError:
-        typer.echo("Failed to get response from the LLM.")
+    # Load the model and generate a response while showing a spinner
+    llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct")
+    with console.status("Generating response...", spinner="dots"):
+        response = llm.generate(query)
+    typer.echo(response)
+
+def interactive_mode():
+    console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green")
+    while True:
+        query_text = input(">> ")
+        if query_text.lower() in ("exit", "quit"):
+            break
+        query_llm(query_text)
 
 if __name__ == "__main__":
-    # If invoked with ">>" as the first argument, join the remaining tokens as the query.
-    if len(sys.argv) > 1 and sys.argv[1] == ">>":
-        query_text = " ".join(sys.argv[2:])
-        aid(query_text)
+    # If invoked with "@@" as the first argument, treat the rest as the query.
+    # Otherwise, launch interactive mode.
+    if len(sys.argv) > 1 and sys.argv[1] == "@@":
+        if len(sys.argv) > 2:
+            query_text = " ".join(sys.argv[2:])
+        else:
+            typer.echo("Enter your query (end with EOF / Ctrl-D):")
+            query_text = sys.stdin.read().strip()
+        query_llm(query_text)
     else:
-        typer.echo("Usage: solo >> <your query>")
+        interactive_mode()

From 3b76bad4497d2e46a4031df0486e9d4cd3b9afa7 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 14:03:19 -0800
Subject: [PATCH 3/3] add solo active ensemble file

---
 solo_server/solo.ensemble.yaml | 46 ++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 solo_server/solo.ensemble.yaml

diff --git a/solo_server/solo.ensemble.yaml b/solo_server/solo.ensemble.yaml
new file mode 100644
index 0000000..23503e0
--- /dev/null
+++ b/solo_server/solo.ensemble.yaml
@@ -0,0 +1,46 @@
+system_information:
+  operating_system: "Windows"
+  cpu: "AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD"
+  cpu_cores: 8
+  memory: "15.42GB"
+  gpu:
+    vendor: "NVIDIA"
+    model: "NVIDIA GeForce GTX 1660 Ti"
+    memory: "6144MB"
+  compute_backend: "CUDA"
+
+server_options:
+  - name: "Ollama"
+    recommended: true
+    details: "Optimized for systems with NVIDIA GPUs and CUDA support. (Recommended for your system.)"
+  - name: "vLLM"
+    recommended: false
+    details: "High-performance inference engine, best suited for Linux environments."
+  - name: "Llama.cpp"
+    recommended: false
+    details: "Lightweight and cross-platform; runs efficiently on CPU-only systems."
+    - name: "LitGPT"
+    recommended: false
+    details: "Lightnight AI based PyTorch implementation."
+
+default_server: "Ollama"
+
+models:
+  solo-core-model:
+    model: "Qwen/Qwen2.5-1.5B-Instruct"
+    description: "Primary general-purpose model."
+  coding:
+    model: "qwen2.5-3b-coder"
+    description: "Optimized for code generation and programming tasks."
+  chat:
+    model: "deepseekr1-instruct-distill"
+    description: "Fine-tuned for conversational and chat applications."
+  robots:
+    model: "ottonomy-distill"
+    description: "Targeted for robotics and automation-related tasks."
+  healthcare_classification:
+    model: "palm"
+    description: "Optimized for healthcare data classification and analysis."
+  general:
+    model: "Qwen/Qwen2.5-1.5B-Instruct"
+    description: "Primary general-purpose model."
\ No newline at end of file