diff --git a/solo_server/Dockerfile.finetune b/solo_server/Dockerfile.finetune
new file mode 100644
index 0000000..230d1b5
--- /dev/null
+++ b/solo_server/Dockerfile.finetune
@@ -0,0 +1,29 @@
+FROM pytorch/pytorch:2.1.1-cuda12.1-cudnn8-runtime
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    gcc \
+    g++ \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python packages
+RUN pip install --no-cache-dir \
+    datasets \
+    peft \
+    typer \
+    requests \
+    transformers \
+    triton \
+    bitsandbytes \
+    trl \
+    accelerate \
+    unsloth_zoo \
+    "unsloth[cu121-torch221] @ git+https://github.com/unslothai/unsloth.git"
+
+# Set working directory
+WORKDIR /app
+
+# Copy the application code
+COPY . .
diff --git a/solo_server/cli.py b/solo_server/cli.py
index 343cad8..5e251a9 100644
--- a/solo_server/cli.py
+++ b/solo_server/cli.py
@@ -1,8 +1,11 @@
 import typer
 from solo_server.commands import run, stop, status, benchmark, download_hf as download  
+from solo_server.commands import finetune
 from solo_server.main import setup
 
 app = typer.Typer()
+finetune_app = typer.Typer()
+app.add_typer(finetune_app, name="finetune")
 
 # Commands
 app.command()(run.run)
@@ -12,6 +15,11 @@
 app.command()(benchmark.benchmark)
 app.command()(setup)
 
+# Finetune commands
+finetune_app.command(name="gen")(finetune.gen)
+finetune_app.command(name="status")(finetune.status)
+finetune_app.command(name="download")(finetune.download)
+finetune_app.command(name="run")(finetune.run)
 
 if __name__ == "__main__":
     app()
diff --git a/solo_server/commands/finetune.py b/solo_server/commands/finetune.py
new file mode 100644
index 0000000..3f83d6e
--- /dev/null
+++ b/solo_server/commands/finetune.py
@@ -0,0 +1,302 @@
+import typer
+import requests
+import json
+from typing import Optional
+from pathlib import Path
+import subprocess
+import os
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from rich.box import ROUNDED
+from solo_server.config import CONFIG_PATH
+
+BASE_URL = "https://api.starfishdata.ai/v1"
+
+def get_starfish_api_key() -> str:
+    """Get Starfish API key from environment or config file"""
+    # First check environment variable
+    api_key = os.getenv('STARFISH_API_KEY', '')
+
+    if not api_key:  # If not in env, try config file
+        if os.path.exists(CONFIG_PATH):
+            with open(CONFIG_PATH, 'r') as f:
+                config = json.load(f)
+                api_key = config.get('starfish', {}).get('api_key', '')
+
+    if not api_key:
+        if os.name in ["Linux", "Windows"]:
+            typer.echo("Use Ctrl + Shift + V to paste your token.")
+        api_key = typer.prompt("Please enter your Starfish API key")
+        
+        # Save token if provided
+        if api_key:
+            if os.path.exists(CONFIG_PATH):
+                with open(CONFIG_PATH, 'r') as f:
+                    config = json.load(f)
+            else:
+                config = {}
+            
+            config['starfish'] = {'api_key': api_key}
+            with open(CONFIG_PATH, 'w') as f:
+                json.dump(config, f, indent=4)
+
+    return api_key
+
+def gen(
+    prompt: str,
+    num_records: Optional[int] = typer.Option(100, "--num-records", "-n", help="Number of records to generate"),
+    model: Optional[str] = typer.Option("gpt-4o-mini-2024-07-18", "--model", "-m", help="Model to use for generation")
+):
+    """
+    Generate synthetic data using StarfishData API.
+
+    Example:
+        solo finetune gen "Generate customer service conversations about product returns"
+    """
+    api_key = get_starfish_api_key()
+    if not api_key:
+        typer.echo("❌ Starfish API key is required", err=True)
+        raise typer.Exit(1)
+
+    data = {
+        "prompt": prompt,
+        "numOfRecords": num_records,
+        "model": model
+    }
+
+    headers = {
+        'Content-Type': 'application/json',
+        'x-api-key': api_key
+    }
+
+    try:
+        response = requests.post(
+            f'{BASE_URL}/generateData',
+            headers=headers,
+            data=json.dumps(data)
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        console = Console()
+        
+        # Create a table
+        table = Table(show_header=False, box=ROUNDED)
+        table.add_column("Key", style="cyan")
+        table.add_column("Value", style="green")
+        
+        table.add_row("Job ID", result.get('jobId'))
+        table.add_row("Project ID", result.get('projectId'))
+        
+        # Create a panel with success message and table
+        content = [
+            Text("✅ Successfully started data generation", style="bold green"),
+            "",  # Empty line
+            Text("Available commands:", style="yellow"),
+            Text(f"• Check status:  solo finetune status {result.get('jobId')}", style="blue"),
+            Text(f"• Download data: solo finetune download {result.get('projectId')}", style="blue")
+        ]
+        
+        panel = Panel(
+            "\n".join(str(item) for item in content),
+            title="[bold magenta]Generation Details[/]",
+            border_style="bright_blue"
+        )
+        console.print(panel)
+    except requests.exceptions.RequestException as e:
+        typer.echo(f"❌ Error: {str(e)}", err=True)
+
+def status(job_id: str):
+    """
+    Check the status of a data generation job.
+
+    Example:
+        solo finetune status "job-123-456"
+    """
+    api_key = get_starfish_api_key()
+    if not api_key:
+        typer.echo("❌ Starfish API key is required", err=True)
+        raise typer.Exit(1)
+
+    headers = {
+        'Content-Type': 'application/json',
+        'x-api-key': api_key
+    }
+
+    data = {
+        "jobId": job_id
+    }
+
+    try:
+        response = requests.post(
+            f'{BASE_URL}/jobStatus',
+            headers=headers,
+            data=json.dumps(data)
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        status = result.get('status', 'UNKNOWN')
+        typer.echo(f"📊 Data generation status: {status}")
+        
+        if status == "COMPLETE":
+            typer.echo(f"✅ Data generation completed, Now you can download the data")
+        elif status == "FAILED":
+            typer.echo(f"❌ Error: {result.get('error')}")
+    except requests.exceptions.RequestException as e:
+        typer.echo(f"❌ Error: {str(e)}", err=True)
+
+def download(
+    project_id: str,
+    output: Optional[str] = typer.Option("data.json", "--output", "-o", help="Output file path")
+):
+    """
+    Download generated data for a project.
+
+    Example:
+        solo finetune download "project-123-456" --output my_data.json
+    """
+    api_key = get_starfish_api_key()
+    if not api_key:
+        typer.echo("❌ Starfish API key is required", err=True)
+        raise typer.Exit(1)
+
+    headers = {
+        'Content-Type': 'application/json',
+        'x-api-key': api_key
+    }
+
+    data = {
+        "projectId": project_id
+    }
+
+    try:
+        response = requests.post(
+            f'{BASE_URL}/data',
+            headers=headers,
+            data=json.dumps(data)
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        
+        # Save the data to a file
+        with open(output, 'w') as f:
+            json.dump(result, f, indent=2)
+            
+        typer.echo(f"✅ Successfully downloaded data to {output}")
+        typer.echo(f"📊 Number of records: {len(result['data'])}")  
+    except requests.exceptions.RequestException as e:
+        typer.echo(f"❌ Error: {str(e)}", err=True)
+    except IOError as e:
+        typer.echo(f"❌ Error writing to file: {str(e)}", err=True)
+
+def run(
+    data_path: str = typer.Argument(..., help="Path to the JSON data file"),
+    output_dir: str = typer.Option("./finetuned_model", "--output-dir", "-o", help="Directory to save the finetuned model"),
+    batch_size: int = typer.Option(2, "--batch-size", "-b", help="Training batch size"),
+    epochs: int = typer.Option(1, "--epochs", "-e", help="Number of training epochs"),
+    learning_rate: float = typer.Option(2e-4, "--learning-rate", "-lr", help="Learning rate"),
+    lora_r: int = typer.Option(2, "--lora-r", help="LoRA attention dimension"),
+    lora_alpha: int = typer.Option(8, "--lora-alpha", help="LoRA alpha parameter"),
+    lora_dropout: float = typer.Option(0.05, "--lora-dropout", help="LoRA dropout value"),
+    rebuild_image: bool = typer.Option(False, "--rebuild-image", help="Force rebuild the Docker image"),
+):
+    """
+    Finetune a model on generated data using unsloth with LoRA in a Docker container.
+
+    Example:
+        solo finetune run data.json --output-dir ./my_model --batch-size 8
+    """
+    try:
+        # Convert paths to absolute paths
+        data_path = os.path.abspath(data_path)
+        output_dir = os.path.abspath(output_dir)
+        
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Check if container exists (running or stopped)
+        container_exists = subprocess.run(
+            ["docker", "ps", "-aq", "-f", "name=solo-finetune"],
+            capture_output=True,
+            text=True
+        ).stdout.strip()
+
+        if container_exists:
+            # Check if container is running
+            is_running = subprocess.run(
+                ["docker", "ps", "-q", "-f", "name=solo-finetune"],
+                capture_output=True,
+                text=True
+            ).stdout.strip()
+            
+            if is_running:
+                typer.echo("✅ Finetune is already running")
+            else:
+                subprocess.run(["docker", "start", "solo-finetune"], check=True)
+        else:
+            # Check if image exists
+            docker_finetune = "getsolo/finetune:latest"
+            image_exists = subprocess.run(
+                ["docker", "images", "-q", docker_finetune],
+                capture_output=True,
+                text=True
+            ).stdout.strip()
+
+            if not image_exists or rebuild_image:
+                typer.echo("📥 Pulling finetune image...")
+                try:
+                    subprocess.run(["docker", "pull", docker_finetune], check=True)
+                except subprocess.CalledProcessError as e:
+                    typer.echo(f"❌ Error: {str(e)}", err=True)
+                    raise typer.Exit(1)
+
+        # Prepare arguments for the training script
+        training_args = {
+            "data_path": "/app/data.json",
+            "output_dir": "/app/output",
+            "epochs": epochs,
+            "batch_size": batch_size,
+            "learning_rate": learning_rate,
+            "lora_r": lora_r,
+            "lora_alpha": lora_alpha,
+            "lora_dropout": lora_dropout,
+        }
+        
+        # Convert arguments to command line format
+        args_list = []
+        for key, value in training_args.items():
+            args_list.extend([f"--{key.replace('_', '-')}", str(value)])
+
+        # Run the finetuning command in the container
+        docker_cmd = [
+            "docker", "run",
+            "--name", "solo-finetune",
+            "--gpus", "all",  # Enable GPU access
+            "-v", f"{data_path}:/app/data.json:ro",  # Mount data file
+            "-v", f"{output_dir}:/app/output",  # Mount output directory
+            docker_finetune,
+            "python", "./finetune_script.py",
+            *args_list
+        ]
+
+        typer.echo("🚀 Starting finetuning process...")
+        subprocess.run(docker_cmd, check=True)
+        
+        typer.echo("✅ Finetuning completed successfully!")
+        typer.echo(f"📁 Model saved to: {output_dir}")
+        typer.echo(f"📁 GGUF Model converted and saved to {os.path.join(output_dir, 'gguf_path')}")
+
+    except subprocess.CalledProcessError as e:
+        typer.echo(f"❌ Error during Docker operation: {str(e)}", err=True)
+        raise typer.Exit(1)
+    except Exception as e:
+        typer.echo(f"❌ Error: {str(e)}", err=True)
+        raise typer.Exit(1)
+
+
+
+
diff --git a/solo_server/data.json b/solo_server/data.json
new file mode 100644
index 0000000..559bfbe
--- /dev/null
+++ b/solo_server/data.json
@@ -0,0 +1,57 @@
+✅ Successfully retrieved job status for job: 767d7ae6-f478-46c4-8469-d00d18e52efc
+{
+  "data": [
+    {
+      "id": "ab50896d894d4154ba5c4c1d73e5d93a",
+      "data": "{\"question\":\"What is a popular food item at the Gilroy Garlic Festival?\",\"answer\":\"Garlic ice cream is a unique and popular food item served at the Gilroy Garlic Festival.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "942e67de3d9c437ea4793f9b8ade2ee4",
+      "data": "{\"question\":\"What major highways run through Gilroy?\",\"answer\":\"Major highways include U.S. Route 101 and California State Route 152.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "e66c4448d43142c6b3275090e7c04c8e",
+      "data": "{\"question\":\"What nearby cities are close to Gilroy?\",\"answer\":\"Nearby cities include Morgan Hill to the north and Hollister to the south.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "7a8f6e29d0e24b12872added76b68fc0",
+      "data": "{\"question\":\"Does Gilroy have any parks?\",\"answer\":\"Yes, Gilroy has several parks, including Christmas Hill Park and Gilroy Sports Park.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "94695387eb8f42ad9dea10cd498f7343",
+      "data": "{\"question\":\"When is the Gilroy Garlic Festival held?\",\"answer\":\"The Gilroy Garlic Festival is typically held in late July each year.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "5bc6d4f763c74fada8885cd5a3a0eb8f",
+      "data": "{\"question\":\"What is the climate like in Gilroy?\",\"answer\":\"Gilroy has a Mediterranean climate, characterized by hot, dry summers and mild, wet winters.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "a9a29fb13f8b4b9ea84ccfd3c0b89cba",
+      "data": "{\"question\":\"What kind of agriculture is prominent in Gilroy?\",\"answer\":\"Gilroy is known for its garlic production, as well as other crops like strawberries and wine grapes.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "d9890944793442338d445089408cbc0f",
+      "data": "{\"question\":\"What are popular attractions in Gilroy, California?\",\"answer\":\"Popular attractions in Gilroy include Gilroy Gardens Family Theme Park and the historic Gilroy Museum.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "6b59224a01244b76b8f9e736cdf8a103",
+      "data": "{\"question\":\"What is Gilroy known for?\",\"answer\":\"Gilroy is known as the 'Garlic Capital of the World' and hosts the annual Gilroy Garlic Festival.\"}",
+      "topic": "California cities"
+    },
+    {
+      "id": "cde50fba0577480bb5f223ffced04781",
+      "data": "{\"question\":\"What is the population of Gilroy, California?\",\"answer\":\"As of the 2020 census, the population of Gilroy is approximately 60,000.\"}",
+      "topic": "California cities"
+    }
+  ],
+  "nextToken": null,
+  "success": true
+}
diff --git a/solo_server/finetune_script.py b/solo_server/finetune_script.py
new file mode 100644
index 0000000..ed9cf9e
--- /dev/null
+++ b/solo_server/finetune_script.py
@@ -0,0 +1,196 @@
+import json
+from datasets import Dataset
+from unsloth import FastLanguageModel, is_bfloat16_supported, standardize_sharegpt
+from pathlib import Path
+import typer
+from peft import LoraConfig, TaskType
+from trl import SFTTrainer
+from transformers import TrainingArguments
+import torch
+
+def run_training(
+    data_path: str,
+    output_dir: str,
+    epochs: int,
+    batch_size: int,
+    learning_rate: float,
+    lora_r: int,
+    lora_alpha: int,
+    lora_dropout: float,
+):
+    """Run the finetuning process"""
+
+    # Check GPU compatibility
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name()
+        compute_capability = torch.cuda.get_device_capability()
+        print(f"Found GPU: {gpu_name} with compute capability {compute_capability}")
+        
+        # Use 8-bit quantization for older GPUs
+        use_4bit = compute_capability[0] >= 8  # Use 4-bit only for Ampere (8.0) and newer
+    else:
+        print("No GPU found, using CPU mode")
+        use_4bit = False
+
+    try:
+        print("Initializing model and tokenizer...")
+        # Initialize model with appropriate quantization
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name="unsloth/DeepSeek-R1-Distill-Qwen-1.5B",
+            max_seq_length=2048,
+            dtype=None,
+            load_in_4bit=use_4bit,  # Use 4-bit quantization only for compatible GPUs
+            load_in_8bit=not use_4bit,  # Use 8-bit quantization for older GPUs
+        )
+        print("Model and tokenizer initialized successfully")
+
+    except Exception as e:
+        print(f"Error initializing model: {str(e)}")
+        raise
+
+    try:
+        print("Applying PEFT configuration...")
+        model = FastLanguageModel.get_peft_model(
+            model, 
+            r=lora_r,
+            target_modules=[
+                "q_proj", "k_proj", "v_proj", "o_proj",
+                "gate_proj", "up_proj", "down_proj",
+            ],
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            use_gradient_checkpointing="unsloth",
+            use_rslora=False,
+            random_state=3407,
+        )
+        print("PEFT configuration applied successfully")
+
+    except Exception as e:
+        print(f"Error applying PEFT configuration: {str(e)}")
+        raise
+
+    with open(data_path) as f:
+        raw_data = json.load(f)
+
+    dataset = prepare_dataset(raw_data, tokenizer)
+
+    # Training arguments
+    training_args = TrainingArguments(
+        output_dir=output_dir,
+        num_train_epochs=epochs,
+        per_device_train_batch_size=batch_size,
+        gradient_accumulation_steps=4,
+        learning_rate=learning_rate,
+        logging_steps=10,
+        fp16=is_bfloat16_supported(),
+        bf16 = is_bfloat16_supported(),
+        warmup_ratio=0.03,
+        weight_decay=0.01,
+        optim="adamw_8bit",
+        lr_scheduler_type="linear",
+        seed=3407,
+        report_to="none",
+    )
+
+    # Initialize SFT trainer with eval_dataset
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=dataset,
+        dataset_text_field="text",
+        max_seq_length=2048,
+        dataset_num_proc=2,
+        args=training_args,
+        packing=False,
+    )
+
+    # Train
+    trainer.train()
+
+    # Replace the saving code with this:
+    print("Saving model...")
+    try:
+        merged_path = Path(output_dir) / "merged_model"
+        print("Merging and saving full model...")
+        model.save_pretrained_merged(
+            merged_path,
+            tokenizer,
+            save_method="merged_16bit",  #
+        )
+        print(f"✓ Saved merged model to {merged_path}")
+    except Exception as e:
+        print(f"Warning: Could not save merged model: {e}")
+        print("Continuing with GGUF conversion...")
+
+    # Save GGUF version
+    try:
+        gguf_path = Path(output_dir) / "gguf"
+        gguf_path.mkdir(exist_ok=True)
+        print("Converting model to GGUF format...")
+        
+        # Use the adapter model for GGUF conversion
+        model.save_pretrained_gguf(
+            str(gguf_path / "model"),
+            tokenizer,
+            quantization_method="q4_k_m",
+        )
+    except Exception as e:
+        print(f"Warning: Could not save GGUF model: {e}")
+
+    print("Training and saving completed!")
+    print(tokenizer._ollama_modelfile)
+    print(tokenizer._ollama_modelfile.read())
+
+
+def format_instruction(question: str, answer: str) -> str:
+    """Format a single Q&A pair into instruction format"""
+    return f"""You are a helpful assistant. Based on the following question, provide a relevant answer:
+
+### Question:
+{question}
+
+### Response:
+{answer}"""
+
+def prepare_dataset(raw_data: dict, tokenizer):
+    """Prepare dataset from raw data"""
+    formatted_data = []
+    
+    for item in raw_data["data"]:
+        data_dict = json.loads(item["data"])
+        formatted_text = format_instruction(
+            data_dict["question"],
+            data_dict["answer"]
+        )
+        formatted_data.append({"text": formatted_text + tokenizer.eos_token})
+    # Create dataset
+    dataset = Dataset.from_list(formatted_data)
+
+    return dataset
+
+if __name__ == "__main__":
+    app = typer.Typer()
+    
+    @app.command()
+    def main(
+        data_path: str = typer.Option(..., "--data-path", help="Path to the JSON data file"),
+        output_dir: str = typer.Option(..., "--output-dir", help="Directory to save the model"),
+        epochs: int = typer.Option(..., "--epochs", help="Number of training epochs"),
+        batch_size: int = typer.Option(..., "--batch-size", help="Training batch size"),
+        learning_rate: float = typer.Option(..., "--learning-rate", help="Learning rate"),
+        lora_r: int = typer.Option(..., "--lora-r", help="LoRA attention dimension"),
+        lora_alpha: int = typer.Option(..., "--lora-alpha", help="LoRA alpha parameter"),
+        lora_dropout: float = typer.Option(..., "--lora-dropout", help="LoRA dropout value"),
+    ):
+        run_training(
+            data_path=data_path,
+            output_dir=output_dir,
+            epochs=epochs,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            lora_r=lora_r,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+        )
+    
+    app() 
\ No newline at end of file
diff --git a/solo_server/main.py b/solo_server/main.py
index 28c457b..177bebc 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -6,7 +6,7 @@
 from enum import Enum
 from pathlib import Path
 from solo_server.utils.docker_utils import start_docker_engine
-from solo_server.utils.hardware import detect_hardware, display_hardware_info
+from solo_server.utils.hardware import detect_hardware, display_hardware_info, recommended_server
 from solo_server.utils.nvidia import check_nvidia_toolkit, install_nvidia_toolkit_linux, install_nvidia_toolkit_windows
 from solo_server.utils.server_utils import setup_vllm_server, setup_ollama_server, setup_llama_cpp_server
 
@@ -28,6 +28,8 @@ def setup():
     typer.echo("📊 Available Server Options:")
     for server in ServerType:
         typer.echo(f"  • {server.value}")
+
+    recmd_server = recommended_server(memory_gb, gpu_vendor, gpu_memory) 
     
     def server_type_prompt(value: str) -> ServerType:
         normalized_value = value.lower()
@@ -39,20 +41,18 @@ def server_type_prompt(value: str) -> ServerType:
     server_choice = typer.prompt(
         "\nChoose server",
         type=server_type_prompt,
-        default="ollama",
+        default=recmd_server,
     )
 
     # GPU Configuration
     use_gpu = False
-    if gpu_vendor in ["NVIDIA", "AMD", "Intel",  "Apple Silicon"]:
-        use_gpu = typer.confirm(
-                f"\n🎮 {gpu_vendor} GPU detected ({gpu_model}). Use GPU acceleration?",
-                default=True
-            )
+    if gpu_vendor in ["NVIDIA", "AMD", "Intel", "Apple Silicon"]:
+        use_gpu = True
         if use_gpu and gpu_vendor == "NVIDIA":
             if not check_nvidia_toolkit(os_name):
-                if typer.confirm("NVIDIA toolkit not found. Install now?", default=True):
+                if typer.confirm("NVIDIA GPU Detected, but GPU drivers not found. Install now?", default=True):
                     if os_name == "Linux":
+                        install_nvidia_toolkit_linux()
                         try:
                             install_nvidia_toolkit_linux()
                         except subprocess.CalledProcessError as e:
@@ -76,50 +76,35 @@ def server_type_prompt(value: str) -> ServerType:
             typer.echo("❌ Docker is not installed or not in the system PATH. Please install Docker first.\n", err=True)
             typer.secho("Install Here: https://docs.docker.com/get-docker/", fg=typer.colors.GREEN)
             raise typer.Exit(code=1)
-        else:
+
+        
+        try:
+            subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
+        except subprocess.CalledProcessError:
+            typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True)
+            if not start_docker_engine(os_name):
+                raise typer.Exit(code=1)
+            # Re-check if Docker is running
             try:
-                subprocess.run([docker_path, "info"], check=True, capture_output=True, timeout=30)
+                subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
             except subprocess.CalledProcessError:
-                typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True)
-                if not start_docker_engine(os_name):
-                    raise typer.Exit(code=1)
-                # Re-check if Docker is running
-                try:
-                    subprocess.run([docker_path, "info"], check=True, capture_output=True)
-                except subprocess.CalledProcessError:
-                    typer.echo("Try running the terminal with admin privileges.", err=True)
-                    raise typer.Exit(code=1)
+                typer.echo("Try restarting the terminal with admin privileges and close any instances of podman.", err=True)
+                raise typer.Exit(code=1)
+
             
     # Server setup
     try:
         if server_choice == ServerType.VLLM:
             setup_success = setup_vllm_server(use_gpu, cpu_model, gpu_vendor)
             if setup_success:
-                def is_port_in_use(port: int) -> bool:
-                    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-                        return s.connect_ex(('localhost', port)) == 0
-
-                # Wait for the port to be in use
-                port = 8000
-                timeout = 60  # seconds
-                start_time = time.time()
-                while time.time() - start_time < timeout:
-                    if is_port_in_use(port):
-                        typer.secho(
-                            f"Access the API at: http://localhost:{port}",
-                            fg=typer.colors.BLUE
-                        )
-                        typer.secho(
-                            "If you experience issues, check docker logs with 'docker logs solo-vllm'\n",
+                typer.secho(
+                    "Access the API at: http://localhost:8000\n",
+                    fg=typer.colors.BLUE
+                )
+                typer.secho(
+                            "If you experience any issues, check docker logs with 'docker logs solo-vllm'\n",
                             fg=typer.colors.YELLOW
                         )
-                        break
-                    time.sleep(1)
-                else:
-                    typer.secho(
-                        f"Port {port} is not listening after {timeout} seconds. Please check docker logs for for more information.\n",
-                        fg=typer.colors.RED
-                    )
             
         elif server_choice == ServerType.OLLAMA:
             setup_success = setup_ollama_server(use_gpu, gpu_vendor)
@@ -155,8 +140,7 @@ def is_port_in_use(port: int) -> bool:
             typer.secho("\n✅ Custom API configuration saved!", fg=typer.colors.BRIGHT_GREEN)
     
     except Exception as e:
-        typer.echo(f"\n❌ Unexpected error: {e}", err=True)
-        typer.echo("Please check docker logs for more information.", err=True)
+        typer.echo(f"\n❌ Setup failed: {e}", err=True)
         raise typer.Exit(code=1)
 
 if __name__ == "__main__":
diff --git a/solo_server/utils/docker_utils.py b/solo_server/utils/docker_utils.py
index 51088ac..5a36b45 100644
--- a/solo_server/utils/docker_utils.py
+++ b/solo_server/utils/docker_utils.py
@@ -52,7 +52,7 @@ def start_docker_engine(os_name):
             subprocess.run(["open", "/Applications/Docker.app"], check=True, capture_output=True)
 
         # Wait for Docker to start
-        timeout = 60
+        timeout = 30
         start_time = time.time()
         while time.time() - start_time < timeout:
             try:
diff --git a/solo_server/utils/hardware.py b/solo_server/utils/hardware.py
index 516780b..5c3db76 100644
--- a/solo_server/utils/hardware.py
+++ b/solo_server/utils/hardware.py
@@ -1,6 +1,7 @@
 import platform
 import psutil
 import GPUtil
+import typer
 import subprocess
 import os
 import json
@@ -64,6 +65,26 @@ def detect_hardware() -> Tuple[str, int, float, str, str, float, str, str]:
 
     return cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os_name
 
+def recommended_server(memory_gb, gpu_vendor, gpu_memory) -> str:
+    """
+    Determines the recommended server based on hardware specifications.
+    Returns the recommended server type after displaying the recommendation.
+    """
+    # vLLM recommendation criteria
+    if (gpu_vendor in ["NVIDIA","AMD","Intel"] and gpu_memory >= 8) and (memory_gb >= 16):
+        typer.echo(f"\n✨ vLLM Recommended for your system")
+        return "vLLM"
+    
+    # Ollama recommendation criteria
+    elif (gpu_vendor in ["NVIDIA", "AMD"] and gpu_memory >= 6) or (memory_gb >= 16):
+        typer.echo(f"\n✨ Ollama is recommended for your system")
+        return "ollama"
+    
+    # Llama.cpp recommendation criteria
+    else:
+        typer.echo("\n✨ Llama.cpp is recommended for your system")
+        return "llama.cpp"
+
 def display_hardware_info(typer):
     
     # Check if system info exists in config file
@@ -127,3 +148,15 @@ def display_hardware_info(typer):
         title="[bold cyan]System Information[/]"
     )
     console.print(panel)
+
+    # After displaying the hardware panel, show the recommendation
+    recommended_server, reasoning = get_recommended_server()
+    typer.secho(
+        "\n💡 Recommended Server:",
+        fg=typer.colors.BRIGHT_CYAN,
+        bold=True
+    )
+    typer.secho(
+        f"► {recommended_server}: {reasoning}",
+        fg=typer.colors.BRIGHT_GREEN
+    )
diff --git a/solo_server/utils/hf_utils.py b/solo_server/utils/hf_utils.py
index 530582a..ebd7d82 100644
--- a/solo_server/utils/hf_utils.py
+++ b/solo_server/utils/hf_utils.py
@@ -20,4 +20,3 @@ def get_available_models(repo_id: str, suffix: list[str] | str = ".gguf") -> lis
     except Exception as e:
         typer.echo(f"Error fetching models from {repo_id}: {e}")
         return []
-
diff --git a/solo_server/utils/server_utils.py b/solo_server/utils/server_utils.py
index b5bfad1..247fe69 100644
--- a/solo_server/utils/server_utils.py
+++ b/solo_server/utils/server_utils.py
@@ -6,12 +6,10 @@
 import subprocess
 from solo_server.config import CONFIG_PATH
 from solo_server.utils.nvidia import is_cuda_toolkit_installed
-from solo_server.utils.llama_cpp_utils import start_llama_cpp_server, is_uv_available
+from solo_server.utils.llama_cpp_utils import is_uv_available, start_llama_cpp_server
 
 def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None, os_name:str = None, port: int = 8000):
     """Setup vLLM server with Docker"""
-    typer.echo("\n🔧 Setting up vLLM server...")
-    
     # Initialize container_exists flag
     container_exists = False
     try:
@@ -30,7 +28,7 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
                 typer.echo("✅ vLLM server is already setup!")
                 return True
             else:
-                remove_container = typer.confirm("vLLM server already exists. Do you want to run with a new model?", default=True)
+                remove_container = typer.confirm("vLLM server already exists. Do you want to run with a new model?", default=False)
                 if remove_container:
                     subprocess.run(["docker", "rm", "solo-vllm"], check=True, capture_output=True)
                 else:
@@ -47,6 +45,11 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
                 subprocess.run(["docker", "pull", "rocm/vllm"], check=True)
             elif cpu == "Apple":
                 subprocess.run(["docker", "pull", "getsolo/vllm-arm"], check=True)
+            elif cpu in ["Intel", "AMD"]:
+                subprocess.run(["docker", "pull", "getsolo/vllm-cpu"], check=True)
+            else:
+                typer.echo("❌ vLLM currently do not support your machine", err=True)
+                return False
             
             # Check if port is available
             try:
@@ -68,8 +71,8 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
                     with open(CONFIG_PATH, 'r') as f:
                         config = json.load(f)
                         hf_token = config.get('hugging_face', {}).get('token', '')
-                    
-            if not hf_token:  # If not in config file, prompt user
+
+            if not hf_token:
                 if os_name in ["Linux", "Windows"]:
                     typer.echo("Use Ctrl + Shift + V to paste your token.")
                 hf_token = typer.prompt("Please add your HuggingFace token (Recommended)")
@@ -121,6 +124,8 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
             elif cpu == "Apple":
                 docker_run_cmd.append("getsolo/vllm-arm")
 
+            elif cpu in ["Intel", "AMD"]:
+                docker_run_cmd.append("getsolo/vllm-cpu")
             else:
                 typer.echo("❌ Solo server vLLM currently do not support your machine", err=True)
                 return False
@@ -128,7 +133,7 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
             # Ask user for model name
             default_model = "meta-llama/Llama-3.2-1B-Instruct"
             model_name = typer.prompt(f"Enter the model name", default=default_model)
-            
+
             # Add the model argument and additional parameters
             docker_run_cmd.append("--model")
             docker_run_cmd.append(model_name)
@@ -141,6 +146,20 @@ def setup_vllm_server(gpu_enabled: bool, cpu: str = None, gpu_vendor: str = None
         
             typer.echo("🚀 Starting vLLM server...")
             subprocess.run(docker_run_cmd, check=True, capture_output=True)
+            # Check docker logs for any errors
+            try:
+                logs = subprocess.run(
+                    ["docker", "logs", "solo-vllm"],
+                    capture_output=True,
+                    text=True,
+                    check=True
+                )
+                if logs.stderr:
+                    typer.echo(f"⚠️ Server logs show errors:\n{logs.stderr}", err=True)
+                if logs.stdout:
+                    typer.echo(f"Server logs:\n{logs.stdout}")
+            except subprocess.CalledProcessError as e:
+                typer.echo(f"❌ Failed to fetch docker logs: {e}", err=True)
 
         # Wait for container to be ready with timeout
         timeout = 30
@@ -197,7 +216,6 @@ def setup_ollama_server(gpu_enabled: bool = False, gpu_vendor: str = None, port:
             else:    
                 subprocess.run(["docker", "start", "solo-ollama"], check=True, capture_output=True)
         else:
-            typer.echo("\n🔧 Setting up Ollama server...")
             # Pull Ollama image
             typer.echo("📥 Pulling Ollama Registry...")
             subprocess.run(["docker", "pull", "ollama/ollama"], check=True)
@@ -268,7 +286,13 @@ def setup_llama_cpp_server(gpu_enabled: bool, gpu_vendor: str = None, os_name: s
     gpu_vendor (str, optional): The GPU vendor (e.g., NVIDIA, AMD, Apple Silicon).
     os_name (str, optional): The name of the operating system.
     """
-    typer.echo("\n🔧 Setting up llama_cpp server...")
+     # Check if llama-cpp-python is already installed
+    try:
+        import llama_cpp
+        typer.echo("✅ llama.cpp server is already installed")
+        return start_llama_cpp_server(os_name)
+    except ImportError:
+        typer.echo("Installing llama.cpp server...")
 
     # Check if llama-cpp-python is already installed
     try:
@@ -280,7 +304,6 @@ def setup_llama_cpp_server(gpu_enabled: bool, gpu_vendor: str = None, os_name: s
 
     # Set CMAKE_ARGS based on hardware and OS
     cmake_args = []
-
     if gpu_enabled:
         if gpu_vendor == "NVIDIA":
             if not is_cuda_toolkit_installed():
@@ -292,7 +315,7 @@ def setup_llama_cpp_server(gpu_enabled: bool, gpu_vendor: str = None, os_name: s
             cmake_args.append("-DGGML_HIPBLAS=on")
         elif gpu_vendor == "Apple Silicon":
             cmake_args.append("-DGGML_METAL=on")
-    
+  
     cmake_args_str = " ".join(cmake_args)
 
     try:
@@ -309,16 +332,15 @@ def setup_llama_cpp_server(gpu_enabled: bool, gpu_vendor: str = None, os_name: s
             installer_cmd = [sys.executable, "-m", "pip", "install", "--no-cache-dir", "llama-cpp-python[server]"]
 
         subprocess.check_call(installer_cmd, env=env)
-
         try:
             if start_llama_cpp_server(os_name):
-                typer.echo("\n ✅ llama-cpp server is ready!")
+                typer.echo("\n ✅ llama.cpp server is ready!")
+                return True
         except Exception as e:
-            typer.echo(f"❌ Failed to start llama_cpp server: {e}", err=True)
+            typer.echo(f"❌ Failed to start llama.cpp server: {e}", err=True)
             return False
-        return True
 
     except subprocess.CalledProcessError as e:
-        typer.echo(f"❌ Failed to setup llama_cpp_python server: {e}", err=True)
+        typer.echo(f"❌ Failed to setup llama.cpp server: {e}", err=True)
         return False