From a117a88c0198b97135953db2f33bf37e1fa78729 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Tue, 17 Dec 2024 16:25:16 -0500
Subject: [PATCH 01/28] add to readme

---
 README.md | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/README.md b/README.md
index 9612118..6b70606 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,13 @@ Alternatively to build from source, clone this repo then inside the project's ba
 pip install . 
 ```
 
+### Un-installation
+
+To uninstall `centml`, simple do:
+```bash
+pip uninstall centml
+```
+
 ### CLI
 Once installed, use the centml CLI tool with the following command:
 ```bash
@@ -85,3 +92,30 @@ To run all the tests, use:
 ```bash
 pytest
 ```
+
+### Common Issues
+
+- **`SSL` certificate on `MacOS`**
+
+    Sometimes, you will see issues when using command like `centml cluster [CMD]`, where the output might look like:
+
+    ```logs
+
+    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/urllib3/util/retry.py", line 519, in increment
+
+    raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
+
+    urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.centml.com', port=443):
+
+    Max retries exceeded with url: /deployments
+
+    (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))
+    ```
+
+    **Solution**:
+    To fix this issue, navigate to your `python` installation directory and run the `Install Certificates.command` file located there.
+
+    For example, if you are using `python3.10`, the file path would be:
+    `
+    /Applications/Python 3.10/Install Certificates.command
+    `
\ No newline at end of file

From 820a0dd844a5fb8594e099019dbe56503e777625 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Wed, 18 Dec 2024 10:34:59 -0500
Subject: [PATCH 02/28] add version message

---
 centml/cli/main.py | 18 ++++++++++++++++++
 setup.py           |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/centml/cli/main.py b/centml/cli/main.py
index 64dfa64..42b9378 100644
--- a/centml/cli/main.py
+++ b/centml/cli/main.py
@@ -5,6 +5,24 @@
 
 
 @click.group()
+# this is the version and prog name set in setup.py
+@click.version_option(
+    prog_name="CentML CLI",
+    message=f"""
+     ______              __   __  ___ __ 
+    / ____/___   ____   / /_ /  |/  // / 
+   / /    / _ \\ / __ \\ / __// /|_/ // /  
+  / /___ /  __// / / // /_ / /  / // /___
+  \\____/ \\___//_/ /_/ \\__//_/  /_//_____/
+                                         
+    🚀 Welcome to %(prog)s v%(version)s 🚀
+
+     ✨ AI Deployment Made Simple ✨
+📚 Documentation: https://docs.centml.ai/
+🛠  Need help? Reach out to support@centml.ai
+"""
+)
+
 def cli():
     pass
 
diff --git a/setup.py b/setup.py
index 8c3c36e..cbae9e8 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='centml',
-    version='0.3.0',
+    version='0.3.1',
     packages=find_packages(),
     python_requires=">=3.10",
     long_description=open('README.md').read(),

From 7b2b8f20fff35ba215c78c32c3b6605311dab7e2 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Wed, 18 Dec 2024 10:44:22 -0500
Subject: [PATCH 03/28] match versions to new python client spec

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f5e734b..63a2130 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ cryptography==43.0.1
 prometheus-client>=0.20.0
 scipy>=1.6.0
 scikit-learn>=1.5.1
-platform-api-python-client==0.3.1
+platform-api-python-client==0.3.2
diff --git a/setup.py b/setup.py
index cbae9e8..8eea8e2 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='centml',
-    version='0.3.1',
+    version='0.3.2',
     packages=find_packages(),
     python_requires=">=3.10",
     long_description=open('README.md').read(),

From eac91077ac3cece70146f5e3d08c092559bf0065 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 10:44:30 -0500
Subject: [PATCH 04/28] GONNA REMOVE - Change to local cluster

---
 centml/sdk/auth.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py
index 8646864..193f432 100644
--- a/centml/sdk/auth.py
+++ b/centml/sdk/auth.py
@@ -48,13 +48,13 @@ def load_centml_cred():
 def get_centml_token():
     cred = load_centml_cred()
 
-    if not cred:
-        sys.exit("CentML credentials not found. Please login...")
+    # if not cred:
+    #     sys.exit("CentML credentials not found. Please login...")
 
-    exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"])
+    # exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"])
 
-    if time.time() >= exp_time - 100:
-        cred = refresh_centml_token(cred["refresh_token"])
+    # if time.time() >= exp_time - 100:
+    #     cred = refresh_centml_token(cred["refresh_token"])
 
     return cred["id_token"]
 

From ed8ecf1881fec701eab1b67af652172e2d0f5f2e Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 13:09:05 -0500
Subject: [PATCH 05/28] sync version to 3.1.4, the same as platform and api
 client

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 63a2130..9f9980c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ cryptography==43.0.1
 prometheus-client>=0.20.0
 scipy>=1.6.0
 scikit-learn>=1.5.1
-platform-api-python-client==0.3.2
+platform-api-python-client==3.1.4
diff --git a/setup.py b/setup.py
index 8eea8e2..51be960 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='centml',
-    version='0.3.2',
+    version='3.1.4',
     packages=find_packages(),
     python_requires=">=3.10",
     long_description=open('README.md').read(),

From fa04007e409a03c1159573f66d7bdcf2de5aac2f Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 14:48:28 -0500
Subject: [PATCH 06/28] remove empty lines

---
 centml/cli/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/centml/cli/main.py b/centml/cli/main.py
index 42b9378..d8d263b 100644
--- a/centml/cli/main.py
+++ b/centml/cli/main.py
@@ -14,7 +14,7 @@
    / /    / _ \\ / __ \\ / __// /|_/ // /  
   / /___ /  __// / / // /_ / /  / // /___
   \\____/ \\___//_/ /_/ \\__//_/  /_//_____/
-                                         
+
     🚀 Welcome to %(prog)s v%(version)s 🚀
 
      ✨ AI Deployment Made Simple ✨

From 11719172c40206963e88dad8234680262c6e5ff0 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 15:02:44 -0500
Subject: [PATCH 07/28] change cli get to use name as parameter rather than
 type with ID

---
 centml/cli/cluster.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index f6f88f8..8add3e9 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -96,21 +96,29 @@ def ls(type):
 
 
 @click.command(help="Get deployment details")
-@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())))
-@click.argument("id", type=int)
+@click.argument("name", type=str)
 @handle_exception
-def get(type, id):
+def get(name):
     with get_centml_client() as cclient:
-        depl_type = depl_name_to_type_map[type]
+        # Retrieve all deployments and search for the given name
+        deployments = cclient.get(None)
+        deployment = next((d for d in deployments if d.name == name), None)
+
+        if deployment is None:
+            sys.exit(f"Deployment with name '{name}' not found.")
+
+        depl_type = deployment.type
+        depl_id = deployment.id
 
+        # Now retrieve the full deployment details based on the type
         if depl_type == DeploymentType.INFERENCE_V2:
-            deployment = cclient.get_inference(id)
+            deployment = cclient.get_inference(depl_id)
         elif depl_type == DeploymentType.COMPUTE_V2:
-            deployment = cclient.get_compute(id)
+            deployment = cclient.get_compute(depl_id)
         elif depl_type == DeploymentType.CSERVE:
-            deployment = cclient.get_cserve(id)
+            deployment = cclient.get_cserve(depl_id)
         else:
-            sys.exit("Please enter correct deployment type")
+            sys.exit("Unknown deployment type.")
 
         ready_status = _get_ready_status(cclient, deployment)
         _, id_to_hw_map = _get_hw_to_id_map(cclient, deployment.cluster_id)

From 4ed05f6979e3deb35555df3fb20618b6dcf147a7 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 16:14:09 -0500
Subject: [PATCH 08/28] NOT WORKING - attempt to add create deployment

---
 centml/cli/cluster.py | 114 ++++++++++++++++++++++++++++++++++++++++++
 centml/cli/main.py    |   3 +-
 2 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 8add3e9..24a2048 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -181,6 +181,120 @@ def get(name):
             )
 
 
+@click.command(help="Create a new deployment")
+@handle_exception
+def create():
+    with get_centml_client() as cclient:
+        # Prompt for general fields
+        name = click.prompt("Enter a name for the deployment")
+        dtype_str = click.prompt(
+            "Select a deployment type",
+            type=click.Choice(list(depl_name_to_type_map.keys())),
+            show_choices=True
+        )
+        depl_type = depl_name_to_type_map[dtype_str]
+
+        # Select cluster
+        clusters = cclient.get_clusters().results
+        if not clusters:
+            click.echo("No clusters available. Please ensure you have a cluster setup.")
+            return
+        cluster_names = [c.name for c in clusters]
+        cluster_name = click.prompt(
+            "Select a cluster",
+            type=click.Choice(cluster_names),
+            show_choices=True
+        )
+        cluster_id = next(c.id for c in clusters if c.name == cluster_name)
+
+        # Hardware selection
+        hw_resp = cclient.get_hardware_instances(cluster_id)
+        if not hw_resp:
+            click.echo("No hardware instances available for this cluster.")
+            return
+        hw_names = [h.name for h in hw_resp]
+        hw_name = click.prompt(
+            "Select a hardware instance",
+            type=click.Choice(hw_names),
+            show_choices=True
+        )
+        hw_id = next(h.id for h in hw_resp if h.name == hw_name)
+
+        # Common fields
+        min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
+        max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
+        concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
+        concurrency = int(concurrency) if concurrency else None
+
+        # Depending on type:
+        if depl_type == DeploymentType.INFERENCE_V2:
+            image = click.prompt("Enter the image URL")
+            container_port = click.prompt("Enter the container port", default=8080, type=int)
+            healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True)
+            env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False)
+            env_vars = {}
+            if env_vars_str.strip():
+                for kv in env_vars_str.split(","):
+                    k, v = kv.strip().split("=")
+                    env_vars[k] = v
+
+            # Construct the inference request
+            from platform_api_python_client import CreateInferenceDeploymentRequest
+            req = CreateInferenceDeploymentRequest(
+                name=name,
+                cluster_id=cluster_id,
+                hardware_instance_id=hw_id,
+                image_url=image,
+                container_port=container_port,
+                healthcheck=healthcheck,
+                min_scale=min_scale,
+                max_scale=max_scale,
+                concurrency=concurrency,
+                env_vars=env_vars if env_vars else None
+            )
+            created = cclient.create_inference(req)
+            click.echo(f"Inference deployment created with ID: {created.id}")
+
+        elif depl_type == DeploymentType.COMPUTE_V2:
+            # For compute deployments, we might ask for a public SSH key
+            ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
+
+            from platform_api_python_client import CreateComputeDeploymentRequest
+            req = CreateComputeDeploymentRequest(
+                name=name,
+                cluster_id=cluster_id,
+                hardware_instance_id=hw_id,
+                ssh_public_key=ssh_key if ssh_key.strip() else None
+            )
+            created = cclient.create_compute(req)
+            click.echo(f"Compute deployment created with ID: {created.id}")
+
+        elif depl_type == DeploymentType.CSERVE:
+            # For cserve deployments, ask for model and parallelism
+            model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b")
+            tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int)
+            pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
+            # concurrency asked above
+
+            from platform_api_python_client import CreateCServeDeploymentRequest
+            req = CreateCServeDeploymentRequest(
+                name=name,
+                cluster_id=cluster_id,
+                hardware_instance_id=hw_id,
+                model=model,
+                tensor_parallel_size=tensor_parallel_size,
+                pipeline_parallel_size=pipeline_parallel_size,
+                min_scale=min_scale,
+                max_scale=max_scale,
+                concurrency=concurrency
+            )
+            created = cclient.create_cserve(req)
+            click.echo(f"CServe deployment created with ID: {created.id}")
+
+        else:
+            click.echo("Unknown deployment type.")
+
+
 @click.command(help="Delete a deployment")
 @click.argument("id", type=int)
 @handle_exception
diff --git a/centml/cli/main.py b/centml/cli/main.py
index d8d263b..60e8658 100644
--- a/centml/cli/main.py
+++ b/centml/cli/main.py
@@ -1,7 +1,7 @@
 import click
 
 from centml.cli.login import login, logout
-from centml.cli.cluster import ls, get, delete, pause, resume
+from centml.cli.cluster import ls, get, delete, pause, resume, create
 
 
 @click.group()
@@ -45,6 +45,7 @@ def ccluster():
 
 ccluster.add_command(ls)
 ccluster.add_command(get)
+ccluster.add_command(create)
 ccluster.add_command(delete)
 ccluster.add_command(pause)
 ccluster.add_command(resume)

From 7ca22d6e2991e7cf36b557a0939a5a875799d80a Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Thu, 19 Dec 2024 16:40:45 -0500
Subject: [PATCH 09/28] NOT WORKING - refractor variable names, now hitting
 Internal Server Error

---
 centml/cli/cluster.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 24a2048..1ea669c 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -145,7 +145,7 @@ def get(name):
                 tabulate(
                     [
                         ("Image", deployment.image_url),
-                        ("Container port", deployment.container_port),
+                        ("Container port", deployment.port),
                         ("Healthcheck", deployment.healthcheck or "/"),
                         ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
                         ("Environment variables", deployment.env_vars or "None"),
@@ -199,13 +199,13 @@ def create():
         if not clusters:
             click.echo("No clusters available. Please ensure you have a cluster setup.")
             return
-        cluster_names = [c.name for c in clusters]
+        cluster_names = [c.display_name for c in clusters]
         cluster_name = click.prompt(
             "Select a cluster",
             type=click.Choice(cluster_names),
             show_choices=True
         )
-        cluster_id = next(c.id for c in clusters if c.name == cluster_name)
+        cluster_id = next(c.id for c in clusters if c.display_name == cluster_name)
 
         # Hardware selection
         hw_resp = cclient.get_hardware_instances(cluster_id)
@@ -229,7 +229,7 @@ def create():
         # Depending on type:
         if depl_type == DeploymentType.INFERENCE_V2:
             image = click.prompt("Enter the image URL")
-            container_port = click.prompt("Enter the container port", default=8080, type=int)
+            port = click.prompt("Enter the container port", default=8080, type=int)
             healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True)
             env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False)
             env_vars = {}
@@ -245,7 +245,7 @@ def create():
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
                 image_url=image,
-                container_port=container_port,
+                port=port,
                 healthcheck=healthcheck,
                 min_scale=min_scale,
                 max_scale=max_scale,

From a4ac8839ea61d08cc635103e27fb0a19ce86a068 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Fri, 20 Dec 2024 13:14:27 -0500
Subject: [PATCH 10/28] change exception handling to print detail, add echo
 deployment name.

---
 centml/cli/cluster.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 1ea669c..4accc21 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -21,7 +21,7 @@ def wrapper(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except ApiException as e:
-            click.echo(f"Error: {e.reason}")
+            click.echo(f"Error: {e.body or e.reason}")
             return None
 
     return wrapper
@@ -253,7 +253,7 @@ def create():
                 env_vars=env_vars if env_vars else None
             )
             created = cclient.create_inference(req)
-            click.echo(f"Inference deployment created with ID: {created.id}")
+            click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.COMPUTE_V2:
             # For compute deployments, we might ask for a public SSH key
@@ -267,7 +267,7 @@ def create():
                 ssh_public_key=ssh_key if ssh_key.strip() else None
             )
             created = cclient.create_compute(req)
-            click.echo(f"Compute deployment created with ID: {created.id}")
+            click.echo(f"Compute deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.CSERVE:
             # For cserve deployments, ask for model and parallelism
@@ -289,7 +289,7 @@ def create():
                 concurrency=concurrency
             )
             created = cclient.create_cserve(req)
-            click.echo(f"CServe deployment created with ID: {created.id}")
+            click.echo(f"CServe deployment {name} created with ID: {created.id}")
 
         else:
             click.echo("Unknown deployment type.")

From eed79f2dd03165c52d68b0252cddb9b542082f17 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Fri, 20 Dec 2024 13:18:12 -0500
Subject: [PATCH 11/28] fix container port for inf dep

---
 centml/cli/cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 4accc21..36002ae 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -145,7 +145,7 @@ def get(name):
                 tabulate(
                     [
                         ("Image", deployment.image_url),
-                        ("Container port", deployment.port),
+                        ("Container port", deployment.container_port),
                         ("Healthcheck", deployment.healthcheck or "/"),
                         ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
                         ("Environment variables", deployment.env_vars or "None"),

From db15900efedcf928a1c096454adaf7aad7ac8f30 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Fri, 20 Dec 2024 13:32:02 -0500
Subject: [PATCH 12/28] add get prebuilt img api call

---
 centml/sdk/api.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/centml/sdk/api.py b/centml/sdk/api.py
index f83dfd4..8945f3f 100644
--- a/centml/sdk/api.py
+++ b/centml/sdk/api.py
@@ -61,6 +61,14 @@ def get_clusters(self):
     def get_hardware_instances(self, cluster_id):
         return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results
 
+    def get_prebuilt_images(self, depl_type: DeploymentType = None):
+        """Get Prebuilt Images
+
+        :param depl_type: DeploymentType, optional
+        :return: ListPrebuiltImageResponse
+        """
+        return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
+
 
 @contextmanager
 def get_centml_client():

From 6ffbee882c05b042878e3301c87e1dd594d03d87 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Fri, 20 Dec 2024 14:11:12 -0500
Subject: [PATCH 13/28] added prebuilt img list for all, but only inf now
 working

---
 centml/cli/cluster.py | 42 +++++++++++++++++++++++++++++++++++++-----
 centml/sdk/api.py     |  6 +-----
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 36002ae..2af2a0d 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -226,11 +226,38 @@ def create():
         concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
         concurrency = int(concurrency) if concurrency else None
 
-        # Depending on type:
         if depl_type == DeploymentType.INFERENCE_V2:
-            image = click.prompt("Enter the image URL")
-            port = click.prompt("Enter the container port", default=8080, type=int)
-            healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True)
+            # Retrieve prebuilt images for inference deployments
+            prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
+            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+
+            chosen_image = click.prompt(
+                "Select a prebuilt image or provide a custom image URL",
+                type=click.Choice(image_choices),
+                show_choices=True
+            )
+
+            if chosen_image == "Other":
+                image = click.prompt("Enter the image URL")
+                port = click.prompt("Enter the container port", default=8080, type=int)
+                healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True)
+            else:
+                # Find the selected prebuilt image details
+                selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image)
+                image = selected_prebuilt.image_name
+                # Use the prebuilt image port and healthcheck as defaults
+                port = click.prompt(
+                    "Enter the container port",
+                    default=selected_prebuilt.port,
+                    type=int
+                )
+                default_healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
+                healthcheck = click.prompt(
+                    "Enter healthcheck endpoint (default '/')",
+                    default=default_healthcheck,
+                    show_default=True
+                )
+
             env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False)
             env_vars = {}
             if env_vars_str.strip():
@@ -260,10 +287,15 @@ def create():
             ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
+            # If compute deployments also use prebuilt images and require image_url,
+            # we could similarly fetch them and prompt just like inference above.
+            # For now, if the schema doesn't require image_url for compute:
             req = CreateComputeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
+                # If needed, you can do similar logic for prebuilt images here:
+                # image_url = ...
                 ssh_public_key=ssh_key if ssh_key.strip() else None
             )
             created = cclient.create_compute(req)
@@ -274,9 +306,9 @@ def create():
             model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b")
             tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int)
             pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
-            # concurrency asked above
 
             from platform_api_python_client import CreateCServeDeploymentRequest
+            # If cserve deployments also require images, we could do similar logic here.
             req = CreateCServeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
diff --git a/centml/sdk/api.py b/centml/sdk/api.py
index 8945f3f..272ca57 100644
--- a/centml/sdk/api.py
+++ b/centml/sdk/api.py
@@ -2,6 +2,7 @@
 
 import platform_api_python_client
 from platform_api_python_client import (
+    DeploymentType,
     DeploymentStatus,
     CreateInferenceDeploymentRequest,
     CreateComputeDeploymentRequest,
@@ -62,11 +63,6 @@ def get_hardware_instances(self, cluster_id):
         return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results
 
     def get_prebuilt_images(self, depl_type: DeploymentType = None):
-        """Get Prebuilt Images
-
-        :param depl_type: DeploymentType, optional
-        :return: ListPrebuiltImageResponse
-        """
         return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
 
 

From f0b8a855153aa0d5f8c6cdd6b065ad21d6b89479 Mon Sep 17 00:00:00 2001
From: Honglin Cao <honglin.cao@centml.ai>
Date: Fri, 20 Dec 2024 14:33:01 -0500
Subject: [PATCH 14/28] add default to choice, add image lists from db

---
 centml/cli/cluster.py | 68 +++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 2af2a0d..33280c6 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -190,7 +190,8 @@ def create():
         dtype_str = click.prompt(
             "Select a deployment type",
             type=click.Choice(list(depl_name_to_type_map.keys())),
-            show_choices=True
+            show_choices=True,
+            default=list(depl_name_to_type_map.keys())[0]
         )
         depl_type = depl_name_to_type_map[dtype_str]
 
@@ -203,7 +204,8 @@ def create():
         cluster_name = click.prompt(
             "Select a cluster",
             type=click.Choice(cluster_names),
-            show_choices=True
+            show_choices=True,
+            default=cluster_names[0]
         )
         cluster_id = next(c.id for c in clusters if c.display_name == cluster_name)
 
@@ -216,47 +218,35 @@ def create():
         hw_name = click.prompt(
             "Select a hardware instance",
             type=click.Choice(hw_names),
-            show_choices=True
+            show_choices=True,
+            default=hw_names[0]
         )
         hw_id = next(h.id for h in hw_resp if h.name == hw_name)
 
-        # Common fields
-        min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
-        max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
-        concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
-        concurrency = int(concurrency) if concurrency else None
-
         if depl_type == DeploymentType.INFERENCE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
             image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+            image_choices.append("Other")
 
             chosen_image = click.prompt(
-                "Select a prebuilt image or provide a custom image URL",
+                "Select a prebuilt image or choose 'Other' to provide a custom image URL",
                 type=click.Choice(image_choices),
-                show_choices=True
+                show_choices=True,
+                default=image_choices[0]
             )
 
             if chosen_image == "Other":
-                image = click.prompt("Enter the image URL")
-                port = click.prompt("Enter the container port", default=8080, type=int)
-                healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True)
+                image = click.prompt("Enter the custom image URL")
+                port = click.prompt("Enter the container port for the image", default=8080, type=int)
+                healthcheck = click.prompt("Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True)
             else:
                 # Find the selected prebuilt image details
                 selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image)
                 image = selected_prebuilt.image_name
                 # Use the prebuilt image port and healthcheck as defaults
-                port = click.prompt(
-                    "Enter the container port",
-                    default=selected_prebuilt.port,
-                    type=int
-                )
-                default_healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
-                healthcheck = click.prompt(
-                    "Enter healthcheck endpoint (default '/')",
-                    default=default_healthcheck,
-                    show_default=True
-                )
+                port = selected_prebuilt.port
+                healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
 
             env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False)
             env_vars = {}
@@ -265,6 +255,12 @@ def create():
                     k, v = kv.strip().split("=")
                     env_vars[k] = v
 
+            # Common fields
+            min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
+            max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
+            concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
+            concurrency = int(concurrency) if concurrency else None
+
             # Construct the inference request
             from platform_api_python_client import CreateInferenceDeploymentRequest
             req = CreateInferenceDeploymentRequest(
@@ -283,19 +279,30 @@ def create():
             click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.COMPUTE_V2:
+            
+            # Retrieve prebuilt images for inference deployments
+            prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
+            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+            
+            # Right now we don't support custom compute images
+            # TODO: add image tags to the url, right now its required by compute but not inference
+            chosen_image = click.prompt(
+                "Select a prebuilt image",
+                type=click.Choice(image_choices),
+                show_choices=True,
+                default=image_choices[0]
+            )
+                
             # For compute deployments, we might ask for a public SSH key
             ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
+            #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
-            # If compute deployments also use prebuilt images and require image_url,
-            # we could similarly fetch them and prompt just like inference above.
-            # For now, if the schema doesn't require image_url for compute:
             req = CreateComputeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
-                # If needed, you can do similar logic for prebuilt images here:
-                # image_url = ...
+                image_url = chosen_image,
                 ssh_public_key=ssh_key if ssh_key.strip() else None
             )
             created = cclient.create_compute(req)
@@ -308,7 +315,6 @@ def create():
             pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
 
             from platform_api_python_client import CreateCServeDeploymentRequest
-            # If cserve deployments also require images, we could do similar logic here.
             req = CreateCServeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,

From 35cf1d9108eacc97107b7f4d0ba97c1169a2e952 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 10:46:32 -0400
Subject: [PATCH 15/28] change platform version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9f9980c..49d0f6b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ cryptography==43.0.1
 prometheus-client>=0.20.0
 scipy>=1.6.0
 scikit-learn>=1.5.1
-platform-api-python-client==3.1.4
+platform-api-python-client==3.1.15

From 6cbe9a0a1c58049de100fc5f804f1cb706645073 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 10:52:58 -0400
Subject: [PATCH 16/28] update version 3.1.15

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 51be960..c2d18d5 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='centml',
-    version='3.1.4',
+    version='3.1.15',
     packages=find_packages(),
     python_requires=">=3.10",
     long_description=open('README.md').read(),

From 3f7b9c5e504528a6441a8d6167ed1b422d15f29a Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 10:59:54 -0400
Subject: [PATCH 17/28] ruff format

---
 centml/cli/cluster.py | 177 ++++++++++++++++++++++++++++++++----------
 centml/cli/main.py    |   3 +-
 2 files changed, 136 insertions(+), 44 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 33280c6..a51bdf3 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -3,7 +3,13 @@
 from typing import Dict
 import click
 from tabulate import tabulate
-from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse
+from centml.sdk import (
+    DeploymentType,
+    DeploymentStatus,
+    ServiceStatus,
+    ApiException,
+    HardwareInstanceResponse,
+)
 from centml.sdk.api import get_centml_client
 
 
@@ -43,45 +49,82 @@ def _get_hw_to_id_map(cclient, cluster_id):
 def _format_ssh_key(ssh_key):
     if not ssh_key:
         return "No SSH Key Found"
-    return ssh_key[:10] + '...'
+    return ssh_key[:10] + "..."
 
 
 def _get_ready_status(cclient, deployment):
     api_status = deployment.status
     service_status = (
-        cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None
+        cclient.get_status(deployment.id).service_status
+        if deployment.status == DeploymentStatus.ACTIVE
+        else None
     )
 
     status_styles = {
         (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"),
         (DeploymentStatus.DELETED, None): ("deleted", "white", "black"),
         (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"),
-        (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"),
-        (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"),
+        (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): (
+            "starting",
+            "black",
+            "white",
+        ),
+        (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): (
+            "starting",
+            "black",
+            "white",
+        ),
         (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"),
         (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): (
             "createContainerConfigError",
             "red",
             "black",
         ),
-        (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"),
-        (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"),
-        (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"),
+        (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): (
+            "crashLoopBackOff",
+            "red",
+            "black",
+        ),
+        (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): (
+            "imagePullBackOff",
+            "red",
+            "black",
+        ),
+        (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): (
+            "progressDeadlineExceeded",
+            "red",
+            "black",
+        ),
     }
 
-    style = status_styles.get((api_status, service_status), ("unknown", "black", "white"))
+    style = status_styles.get(
+        (api_status, service_status), ("unknown", "black", "white")
+    )
     # Handle foreground and background colors
     return click.style(style[0], fg=style[1], bg=style[2])
 
 
 @click.command(help="List all deployments")
-@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None)
+@click.argument(
+    "type",
+    type=click.Choice(list(depl_name_to_type_map.keys())),
+    required=False,
+    default=None,
+)
 def ls(type):
     with get_centml_client() as cclient:
-        depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None
+        depl_type = (
+            depl_name_to_type_map[type] if type in depl_name_to_type_map else None
+        )
         deployments = cclient.get(depl_type)
         rows = [
-            [d.id, d.name, depl_type_to_name_map[d.type], d.status.value, d.created_at.strftime("%Y-%m-%d %H:%M:%S")]
+            [
+                d.id,
+                d.name,
+                depl_type_to_name_map[d.type],
+                d.status.value,
+                d.created_at.strftime("%Y-%m-%d %H:%M:%S"),
+            ]
             for d in deployments
         ]
 
@@ -132,7 +175,7 @@ def get(name):
                     ("Endpoint", deployment.endpoint_url),
                     ("Created at", deployment.created_at.strftime("%Y-%m-%d %H:%M:%S")),
                     ("Hardware", f"{hw.name} ({hw.num_gpu}x {hw.gpu_type})"),
-                    ("Cost", f"{hw.cost_per_hr/100} credits/hr"),
+                    ("Cost", f"{hw.cost_per_hr / 100} credits/hr"),
                 ],
                 tablefmt="rounded_outline",
                 disable_numparse=True,
@@ -147,7 +190,10 @@ def get(name):
                         ("Image", deployment.image_url),
                         ("Container port", deployment.container_port),
                         ("Healthcheck", deployment.healthcheck or "/"),
-                        ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
+                        (
+                            "Replicas",
+                            {"min": deployment.min_scale, "max": deployment.max_scale},
+                        ),
                         ("Environment variables", deployment.env_vars or "None"),
                         ("Max concurrency", deployment.concurrency or "None"),
                     ],
@@ -158,7 +204,10 @@ def get(name):
         elif depl_type == DeploymentType.COMPUTE_V2:
             click.echo(
                 tabulate(
-                    [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))],
+                    [
+                        ("Username", "centml"),
+                        ("SSH key", _format_ssh_key(deployment.ssh_public_key)),
+                    ],
                     tablefmt="rounded_outline",
                     disable_numparse=True,
                 )
@@ -170,9 +219,15 @@ def get(name):
                         ("Hugging face model", deployment.model),
                         (
                             "Parallelism",
-                            {"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size},
+                            {
+                                "tensor": deployment.tensor_parallel_size,
+                                "pipeline": deployment.pipeline_parallel_size,
+                            },
+                        ),
+                        (
+                            "Replicas",
+                            {"min": deployment.min_scale, "max": deployment.max_scale},
                         ),
-                        ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
                         ("Max concurrency", deployment.concurrency or "None"),
                     ],
                     tablefmt="rounded_outline",
@@ -191,7 +246,7 @@ def create():
             "Select a deployment type",
             type=click.Choice(list(depl_name_to_type_map.keys())),
             show_choices=True,
-            default=list(depl_name_to_type_map.keys())[0]
+            default=list(depl_name_to_type_map.keys())[0],
         )
         depl_type = depl_name_to_type_map[dtype_str]
 
@@ -205,7 +260,7 @@ def create():
             "Select a cluster",
             type=click.Choice(cluster_names),
             show_choices=True,
-            default=cluster_names[0]
+            default=cluster_names[0],
         )
         cluster_id = next(c.id for c in clusters if c.display_name == cluster_name)
 
@@ -219,36 +274,58 @@ def create():
             "Select a hardware instance",
             type=click.Choice(hw_names),
             show_choices=True,
-            default=hw_names[0]
+            default=hw_names[0],
         )
         hw_id = next(h.id for h in hw_resp if h.name == hw_name)
 
         if depl_type == DeploymentType.INFERENCE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+            image_choices = (
+                [img.image_name for img in prebuilt_images.results]
+                if prebuilt_images.results
+                else []
+            )
             image_choices.append("Other")
 
             chosen_image = click.prompt(
                 "Select a prebuilt image or choose 'Other' to provide a custom image URL",
                 type=click.Choice(image_choices),
                 show_choices=True,
-                default=image_choices[0]
+                default=image_choices[0],
             )
 
             if chosen_image == "Other":
                 image = click.prompt("Enter the custom image URL")
-                port = click.prompt("Enter the container port for the image", default=8080, type=int)
-                healthcheck = click.prompt("Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True)
+                port = click.prompt(
+                    "Enter the container port for the image", default=8080, type=int
+                )
+                healthcheck = click.prompt(
+                    "Enter healthcheck endpoint (default '/') for the image",
+                    default="/",
+                    show_default=True,
+                )
             else:
                 # Find the selected prebuilt image details
-                selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image)
+                selected_prebuilt = next(
+                    img
+                    for img in prebuilt_images.results
+                    if img.image_name == chosen_image
+                )
                 image = selected_prebuilt.image_name
                 # Use the prebuilt image port and healthcheck as defaults
                 port = selected_prebuilt.port
-                healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
+                healthcheck = (
+                    selected_prebuilt.healthcheck
+                    if selected_prebuilt.healthcheck
+                    else "/"
+                )
 
-            env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False)
+            env_vars_str = click.prompt(
+                "Enter environment variables in KEY=VALUE format (comma separated) or leave blank",
+                default="",
+                show_default=False,
+            )
             env_vars = {}
             if env_vars_str.strip():
                 for kv in env_vars_str.split(","):
@@ -258,11 +335,14 @@ def create():
             # Common fields
             min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
             max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
-            concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
+            concurrency = click.prompt(
+                "Max concurrency (or leave blank)", default="", show_default=False
+            )
             concurrency = int(concurrency) if concurrency else None
 
             # Construct the inference request
             from platform_api_python_client import CreateInferenceDeploymentRequest
+
             req = CreateInferenceDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
@@ -273,48 +353,61 @@ def create():
                 min_scale=min_scale,
                 max_scale=max_scale,
                 concurrency=concurrency,
-                env_vars=env_vars if env_vars else None
+                env_vars=env_vars if env_vars else None,
             )
             created = cclient.create_inference(req)
             click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.COMPUTE_V2:
-            
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
-            
+            image_choices = (
+                [img.image_name for img in prebuilt_images.results]
+                if prebuilt_images.results
+                else []
+            )
+
             # Right now we don't support custom compute images
             # TODO: add image tags to the url, right now its required by compute but not inference
             chosen_image = click.prompt(
                 "Select a prebuilt image",
                 type=click.Choice(image_choices),
                 show_choices=True,
-                default=image_choices[0]
+                default=image_choices[0],
             )
-                
+
             # For compute deployments, we might ask for a public SSH key
-            ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
-            #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False)
+            ssh_key = click.prompt(
+                "Enter your public SSH key", default="", show_default=False
+            )
+            # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
+
             req = CreateComputeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
-                image_url = chosen_image,
-                ssh_public_key=ssh_key if ssh_key.strip() else None
+                image_url=chosen_image,
+                ssh_public_key=ssh_key if ssh_key.strip() else None,
             )
             created = cclient.create_compute(req)
             click.echo(f"Compute deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.CSERVE:
             # For cserve deployments, ask for model and parallelism
-            model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b")
-            tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int)
-            pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
+            model = click.prompt(
+                "Enter the Hugging Face model", default="facebook/opt-1.3b"
+            )
+            tensor_parallel_size = click.prompt(
+                "Tensor parallel size", default=1, type=int
+            )
+            pipeline_parallel_size = click.prompt(
+                "Pipeline parallel size", default=1, type=int
+            )
 
             from platform_api_python_client import CreateCServeDeploymentRequest
+
             req = CreateCServeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
@@ -324,7 +417,7 @@ def create():
                 pipeline_parallel_size=pipeline_parallel_size,
                 min_scale=min_scale,
                 max_scale=max_scale,
-                concurrency=concurrency
+                concurrency=concurrency,
             )
             created = cclient.create_cserve(req)
             click.echo(f"CServe deployment {name} created with ID: {created.id}")
diff --git a/centml/cli/main.py b/centml/cli/main.py
index 60e8658..bd6679c 100644
--- a/centml/cli/main.py
+++ b/centml/cli/main.py
@@ -20,9 +20,8 @@
      ✨ AI Deployment Made Simple ✨
 📚 Documentation: https://docs.centml.ai/
 🛠  Need help? Reach out to support@centml.ai
-"""
+""",
 )
-
 def cli():
     pass
 

From 1d13b3149efef0798006a20b9f16e08b457e7c08 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 11:03:10 -0400
Subject: [PATCH 18/28] black reformatt

---
 centml/cli/cluster.py | 152 ++++++++----------------------------------
 1 file changed, 29 insertions(+), 123 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index a51bdf3..f7b45ee 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -3,13 +3,7 @@
 from typing import Dict
 import click
 from tabulate import tabulate
-from centml.sdk import (
-    DeploymentType,
-    DeploymentStatus,
-    ServiceStatus,
-    ApiException,
-    HardwareInstanceResponse,
-)
+from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse
 from centml.sdk.api import get_centml_client
 
 
@@ -55,76 +49,39 @@ def _format_ssh_key(ssh_key):
 def _get_ready_status(cclient, deployment):
     api_status = deployment.status
     service_status = (
-        cclient.get_status(deployment.id).service_status
-        if deployment.status == DeploymentStatus.ACTIVE
-        else None
+        cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None
     )
 
     status_styles = {
         (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"),
         (DeploymentStatus.DELETED, None): ("deleted", "white", "black"),
         (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"),
-        (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): (
-            "starting",
-            "black",
-            "white",
-        ),
-        (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): (
-            "starting",
-            "black",
-            "white",
-        ),
+        (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"),
+        (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"),
         (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"),
         (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): (
             "createContainerConfigError",
             "red",
             "black",
         ),
-        (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): (
-            "crashLoopBackOff",
-            "red",
-            "black",
-        ),
-        (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): (
-            "imagePullBackOff",
-            "red",
-            "black",
-        ),
-        (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): (
-            "progressDeadlineExceeded",
-            "red",
-            "black",
-        ),
+        (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"),
+        (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"),
+        (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"),
     }
 
-    style = status_styles.get(
-        (api_status, service_status), ("unknown", "black", "white")
-    )
+    style = status_styles.get((api_status, service_status), ("unknown", "black", "white"))
     # Handle foreground and background colors
     return click.style(style[0], fg=style[1], bg=style[2])
 
 
 @click.command(help="List all deployments")
-@click.argument(
-    "type",
-    type=click.Choice(list(depl_name_to_type_map.keys())),
-    required=False,
-    default=None,
-)
+@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None)
 def ls(type):
     with get_centml_client() as cclient:
-        depl_type = (
-            depl_name_to_type_map[type] if type in depl_name_to_type_map else None
-        )
+        depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None
         deployments = cclient.get(depl_type)
         rows = [
-            [
-                d.id,
-                d.name,
-                depl_type_to_name_map[d.type],
-                d.status.value,
-                d.created_at.strftime("%Y-%m-%d %H:%M:%S"),
-            ]
+            [d.id, d.name, depl_type_to_name_map[d.type], d.status.value, d.created_at.strftime("%Y-%m-%d %H:%M:%S")]
             for d in deployments
         ]
 
@@ -190,10 +147,7 @@ def get(name):
                         ("Image", deployment.image_url),
                         ("Container port", deployment.container_port),
                         ("Healthcheck", deployment.healthcheck or "/"),
-                        (
-                            "Replicas",
-                            {"min": deployment.min_scale, "max": deployment.max_scale},
-                        ),
+                        ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
                         ("Environment variables", deployment.env_vars or "None"),
                         ("Max concurrency", deployment.concurrency or "None"),
                     ],
@@ -204,10 +158,7 @@ def get(name):
         elif depl_type == DeploymentType.COMPUTE_V2:
             click.echo(
                 tabulate(
-                    [
-                        ("Username", "centml"),
-                        ("SSH key", _format_ssh_key(deployment.ssh_public_key)),
-                    ],
+                    [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))],
                     tablefmt="rounded_outline",
                     disable_numparse=True,
                 )
@@ -219,15 +170,9 @@ def get(name):
                         ("Hugging face model", deployment.model),
                         (
                             "Parallelism",
-                            {
-                                "tensor": deployment.tensor_parallel_size,
-                                "pipeline": deployment.pipeline_parallel_size,
-                            },
-                        ),
-                        (
-                            "Replicas",
-                            {"min": deployment.min_scale, "max": deployment.max_scale},
+                            {"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size},
                         ),
+                        ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
                         ("Max concurrency", deployment.concurrency or "None"),
                     ],
                     tablefmt="rounded_outline",
@@ -257,10 +202,7 @@ def create():
             return
         cluster_names = [c.display_name for c in clusters]
         cluster_name = click.prompt(
-            "Select a cluster",
-            type=click.Choice(cluster_names),
-            show_choices=True,
-            default=cluster_names[0],
+            "Select a cluster", type=click.Choice(cluster_names), show_choices=True, default=cluster_names[0]
         )
         cluster_id = next(c.id for c in clusters if c.display_name == cluster_name)
 
@@ -271,21 +213,14 @@ def create():
             return
         hw_names = [h.name for h in hw_resp]
         hw_name = click.prompt(
-            "Select a hardware instance",
-            type=click.Choice(hw_names),
-            show_choices=True,
-            default=hw_names[0],
+            "Select a hardware instance", type=click.Choice(hw_names), show_choices=True, default=hw_names[0]
         )
         hw_id = next(h.id for h in hw_resp if h.name == hw_name)
 
         if depl_type == DeploymentType.INFERENCE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = (
-                [img.image_name for img in prebuilt_images.results]
-                if prebuilt_images.results
-                else []
-            )
+            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
             image_choices.append("Other")
 
             chosen_image = click.prompt(
@@ -297,29 +232,17 @@ def create():
 
             if chosen_image == "Other":
                 image = click.prompt("Enter the custom image URL")
-                port = click.prompt(
-                    "Enter the container port for the image", default=8080, type=int
-                )
+                port = click.prompt("Enter the container port for the image", default=8080, type=int)
                 healthcheck = click.prompt(
-                    "Enter healthcheck endpoint (default '/') for the image",
-                    default="/",
-                    show_default=True,
+                    "Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True
                 )
             else:
                 # Find the selected prebuilt image details
-                selected_prebuilt = next(
-                    img
-                    for img in prebuilt_images.results
-                    if img.image_name == chosen_image
-                )
+                selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image)
                 image = selected_prebuilt.image_name
                 # Use the prebuilt image port and healthcheck as defaults
                 port = selected_prebuilt.port
-                healthcheck = (
-                    selected_prebuilt.healthcheck
-                    if selected_prebuilt.healthcheck
-                    else "/"
-                )
+                healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
 
             env_vars_str = click.prompt(
                 "Enter environment variables in KEY=VALUE format (comma separated) or leave blank",
@@ -335,9 +258,7 @@ def create():
             # Common fields
             min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
             max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
-            concurrency = click.prompt(
-                "Max concurrency (or leave blank)", default="", show_default=False
-            )
+            concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
             concurrency = int(concurrency) if concurrency else None
 
             # Construct the inference request
@@ -361,25 +282,16 @@ def create():
         elif depl_type == DeploymentType.COMPUTE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = (
-                [img.image_name for img in prebuilt_images.results]
-                if prebuilt_images.results
-                else []
-            )
+            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
 
             # Right now we don't support custom compute images
             # TODO: add image tags to the url, right now its required by compute but not inference
             chosen_image = click.prompt(
-                "Select a prebuilt image",
-                type=click.Choice(image_choices),
-                show_choices=True,
-                default=image_choices[0],
+                "Select a prebuilt image", type=click.Choice(image_choices), show_choices=True, default=image_choices[0]
             )
 
             # For compute deployments, we might ask for a public SSH key
-            ssh_key = click.prompt(
-                "Enter your public SSH key", default="", show_default=False
-            )
+            ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
             # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
@@ -396,15 +308,9 @@ def create():
 
         elif depl_type == DeploymentType.CSERVE:
             # For cserve deployments, ask for model and parallelism
-            model = click.prompt(
-                "Enter the Hugging Face model", default="facebook/opt-1.3b"
-            )
-            tensor_parallel_size = click.prompt(
-                "Tensor parallel size", default=1, type=int
-            )
-            pipeline_parallel_size = click.prompt(
-                "Pipeline parallel size", default=1, type=int
-            )
+            model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b")
+            tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int)
+            pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
 
             from platform_api_python_client import CreateCServeDeploymentRequest
 

From e11732690fbe1aa720bdbb32d713e638a749a110 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 11:11:09 -0400
Subject: [PATCH 19/28] reformat

---
 centml/cli/cluster.py |  3 ++-
 centml/cli/main.py    |  2 +-
 centml/sdk/auth.py    | 10 +++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index f7b45ee..50ad1f4 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -292,7 +292,8 @@ def create():
 
             # For compute deployments, we might ask for a public SSH key
             ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
-            # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False)
+            # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?",
+            # default="n", show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
 
diff --git a/centml/cli/main.py b/centml/cli/main.py
index bd6679c..be231f8 100644
--- a/centml/cli/main.py
+++ b/centml/cli/main.py
@@ -8,7 +8,7 @@
 # this is the version and prog name set in setup.py
 @click.version_option(
     prog_name="CentML CLI",
-    message=f"""
+    message="""
      ______              __   __  ___ __ 
     / ____/___   ____   / /_ /  |/  // / 
    / /    / _ \\ / __ \\ / __// /|_/ // /  
diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py
index 193f432..8646864 100644
--- a/centml/sdk/auth.py
+++ b/centml/sdk/auth.py
@@ -48,13 +48,13 @@ def load_centml_cred():
 def get_centml_token():
     cred = load_centml_cred()
 
-    # if not cred:
-    #     sys.exit("CentML credentials not found. Please login...")
+    if not cred:
+        sys.exit("CentML credentials not found. Please login...")
 
-    # exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"])
+    exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"])
 
-    # if time.time() >= exp_time - 100:
-    #     cred = refresh_centml_token(cred["refresh_token"])
+    if time.time() >= exp_time - 100:
+        cred = refresh_centml_token(cred["refresh_token"])
 
     return cred["id_token"]
 

From 6b568e8530ddd981ddadaee7cab1d9eb8c0d14c1 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Tue, 18 Mar 2025 19:16:44 -0400
Subject: [PATCH 20/28] change to 3.2.4, updated depl create

---
 centml/cli/cluster.py | 54 +++++++++++++++++++++++++++----------------
 setup.py              |  2 +-
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 50ad1f4..886fe40 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -195,55 +195,60 @@ def create():
         )
         depl_type = depl_name_to_type_map[dtype_str]
 
-        # Select cluster
+        # Select cluster using a numbered list
         clusters = cclient.get_clusters().results
         if not clusters:
             click.echo("No clusters available. Please ensure you have a cluster setup.")
             return
-        cluster_names = [c.display_name for c in clusters]
-        cluster_name = click.prompt(
-            "Select a cluster", type=click.Choice(cluster_names), show_choices=True, default=cluster_names[0]
-        )
-        cluster_id = next(c.id for c in clusters if c.display_name == cluster_name)
 
-        # Hardware selection
+        click.echo("Available clusters:")
+        for idx, cluster in enumerate(clusters, start=1):
+            click.echo(f"{idx}. {cluster.display_name}")
+        cluster_choice = click.prompt("Select a cluster by number", type=int, default=1)
+        selected_cluster = clusters[cluster_choice - 1]
+        cluster_id = selected_cluster.id
+
+        # Hardware selection using a numbered list
         hw_resp = cclient.get_hardware_instances(cluster_id)
         if not hw_resp:
             click.echo("No hardware instances available for this cluster.")
             return
-        hw_names = [h.name for h in hw_resp]
-        hw_name = click.prompt(
-            "Select a hardware instance", type=click.Choice(hw_names), show_choices=True, default=hw_names[0]
-        )
-        hw_id = next(h.id for h in hw_resp if h.name == hw_name)
+
+        click.echo("Available hardware instances:")
+        for idx, hw in enumerate(hw_resp, start=1):
+            click.echo(f"{idx}. {hw.name}")
+        hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1)
+        selected_hw = hw_resp[hw_choice - 1]
+        hw_id = selected_hw.id
 
         if depl_type == DeploymentType.INFERENCE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+            # Build list of image labels
+            image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else []
             image_choices.append("Other")
 
-            chosen_image = click.prompt(
-                "Select a prebuilt image or choose 'Other' to provide a custom image URL",
+            chosen_label = click.prompt(
+                "Select a prebuilt image label or choose 'Other' to provide a custom image URL",
                 type=click.Choice(image_choices),
                 show_choices=True,
                 default=image_choices[0],
             )
 
-            if chosen_image == "Other":
+            if chosen_label == "Other":
                 image = click.prompt("Enter the custom image URL")
                 port = click.prompt("Enter the container port for the image", default=8080, type=int)
                 healthcheck = click.prompt(
                     "Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True
                 )
             else:
-                # Find the selected prebuilt image details
-                selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image)
-                image = selected_prebuilt.image_name
-                # Use the prebuilt image port and healthcheck as defaults
+                # Find the prebuilt image with the matching label
+                selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
+                image = selected_prebuilt.image_name  # Use the image_name from the selected prebuilt image
                 port = selected_prebuilt.port
                 healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
 
+
             env_vars_str = click.prompt(
                 "Enter environment variables in KEY=VALUE format (comma separated) or leave blank",
                 default="",
@@ -255,6 +260,12 @@ def create():
                     k, v = kv.strip().split("=")
                     env_vars[k] = v
 
+            # Prompt for command and command arguments (optional)
+            command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False)
+            command = command_str.split() if command_str.strip() else []
+            command_args_str = click.prompt("Enter command arguments (space-separated) or leave blank", default="", show_default=False)
+            command_args = command_args_str.split() if command_args_str.strip() else []
+
             # Common fields
             min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
             max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
@@ -275,7 +286,10 @@ def create():
                 max_scale=max_scale,
                 concurrency=concurrency,
                 env_vars=env_vars if env_vars else None,
+                command=command,
+                command_args=command_args,
             )
+            print(req)
             created = cclient.create_inference(req)
             click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
diff --git a/setup.py b/setup.py
index c2d18d5..17ea7bd 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='centml',
-    version='3.1.15',
+    version='3.2.4',
     packages=find_packages(),
     python_requires=">=3.10",
     long_description=open('README.md').read(),

From abdf441f66817aa2695fe4af1dbabde057a907fa Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 11:21:37 -0400
Subject: [PATCH 21/28] update to 3.2,4, fix create inf

---
 centml/cli/cluster.py | 13 +++++++------
 requirements.txt      |  2 +-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 886fe40..38ea75c 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -260,11 +260,13 @@ def create():
                     k, v = kv.strip().split("=")
                     env_vars[k] = v
 
-            # Prompt for command and command arguments (optional)
-            command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False)
-            command = command_str.split() if command_str.strip() else []
-            command_args_str = click.prompt("Enter command arguments (space-separated) or leave blank", default="", show_default=False)
-            command_args = command_args_str.split() if command_args_str.strip() else []
+            command_str = click.prompt(
+                "Enter command (space-separated) or leave blank",
+                default="",
+                show_default=False
+            )
+
+            command = command_str.strip() if command_str.strip() else None
 
             # Common fields
             min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
@@ -287,7 +289,6 @@ def create():
                 concurrency=concurrency,
                 env_vars=env_vars if env_vars else None,
                 command=command,
-                command_args=command_args,
             )
             print(req)
             created = cclient.create_inference(req)
diff --git a/requirements.txt b/requirements.txt
index 49d0f6b..43dbfac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ cryptography==43.0.1
 prometheus-client>=0.20.0
 scipy>=1.6.0
 scikit-learn>=1.5.1
-platform-api-python-client==3.1.15
+platform-api-python-client==3.2.4

From 1bb2248139bf8cde302074166808c9481fe6333a Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 12:28:40 -0400
Subject: [PATCH 22/28] update compute / inf create

---
 centml/cli/cluster.py | 60 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 38ea75c..75f57d4 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -224,9 +224,12 @@ def create():
         if depl_type == DeploymentType.INFERENCE_V2:
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
+
             # Build list of image labels
             image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else []
-            image_choices.append("Other")
+
+            # Right now we disable this other option to get a MVP out quickly.
+            #image_choices.append("Other")
 
             chosen_label = click.prompt(
                 "Select a prebuilt image label or choose 'Other' to provide a custom image URL",
@@ -244,7 +247,15 @@ def create():
             else:
                 # Find the prebuilt image with the matching label
                 selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
-                image = selected_prebuilt.image_name  # Use the image_name from the selected prebuilt image
+                # Prompt the user to select a tag from the available tags
+                tag = click.prompt(
+                    "Select a tag for the image",
+                    type=click.Choice(selected_prebuilt.tags),
+                    show_choices=True,
+                    default=selected_prebuilt.tags[0],
+                )
+                # Combine the image URL with the chosen tag
+                image = f"{selected_prebuilt.image_name}:{tag}"
                 port = selected_prebuilt.port
                 healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
 
@@ -290,25 +301,42 @@ def create():
                 env_vars=env_vars if env_vars else None,
                 command=command,
             )
-            print(req)
+
             created = cclient.create_inference(req)
             click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.COMPUTE_V2:
-            # Retrieve prebuilt images for inference deployments
+            # Retrieve prebuilt images for compute deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else []
+            # Build list of image labels
+            image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else []
 
-            # Right now we don't support custom compute images
-            # TODO: add image tags to the url, right now its required by compute but not inference
-            chosen_image = click.prompt(
-                "Select a prebuilt image", type=click.Choice(image_choices), show_choices=True, default=image_choices[0]
+            chosen_label = click.prompt(
+                "Select a prebuilt image label",
+                type=click.Choice(image_choices),
+                show_choices=True,
+                default=image_choices[0],
+            )
+
+            selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
+
+            # Find the prebuilt image with the matching label
+            selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
+            # Prompt the user to select a tag from the available tags
+            tag = click.prompt(
+                "Select a tag for the image",
+                type=click.Choice(selected_prebuilt.tags),
+                show_choices=True,
+                default=selected_prebuilt.tags[0],
             )
+            # Combine the image URL with the chosen tag
+            image_url = f"{selected_prebuilt.image_name}:{tag}"
 
             # For compute deployments, we might ask for a public SSH key
-            ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False)
-            # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?",
-            # default="n", show_default=False)
+            ssh_key = click.prompt("Enter your public SSH key")
+
+            # Right now we not support this on prod platform, just unify the feature
+            #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", type=bool,default=False, show_default=False)
 
             from platform_api_python_client import CreateComputeDeploymentRequest
 
@@ -316,9 +344,11 @@ def create():
                 name=name,
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
-                image_url=chosen_image,
-                ssh_public_key=ssh_key if ssh_key.strip() else None,
-            )
+                image_url=image_url,
+                ssh_public_key=ssh_key,  # we require this
+                #enable_jupyter=jupyter,
+                )
+
             created = cclient.create_compute(req)
             click.echo(f"Compute deployment {name} created with ID: {created.id}")
 

From 47a65bb037a2faeffe469d642ab91def1ca01ada Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 22:01:27 -0400
Subject: [PATCH 23/28] changing cserve side

---
 centml/cli/cluster.py | 180 ++++++++++++++++++++++++++++++++++--------
 centml/sdk/api.py     |   6 +-
 2 files changed, 150 insertions(+), 36 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 75f57d4..074dc2b 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -187,6 +187,7 @@ def create():
     with get_centml_client() as cclient:
         # Prompt for general fields
         name = click.prompt("Enter a name for the deployment")
+
         dtype_str = click.prompt(
             "Select a deployment type",
             type=click.Choice(list(depl_name_to_type_map.keys())),
@@ -195,33 +196,34 @@ def create():
         )
         depl_type = depl_name_to_type_map[dtype_str]
 
-        # Select cluster using a numbered list
-        clusters = cclient.get_clusters().results
-        if not clusters:
-            click.echo("No clusters available. Please ensure you have a cluster setup.")
-            return
-
-        click.echo("Available clusters:")
-        for idx, cluster in enumerate(clusters, start=1):
-            click.echo(f"{idx}. {cluster.display_name}")
-        cluster_choice = click.prompt("Select a cluster by number", type=int, default=1)
-        selected_cluster = clusters[cluster_choice - 1]
-        cluster_id = selected_cluster.id
-
-        # Hardware selection using a numbered list
-        hw_resp = cclient.get_hardware_instances(cluster_id)
-        if not hw_resp:
-            click.echo("No hardware instances available for this cluster.")
-            return
-
-        click.echo("Available hardware instances:")
-        for idx, hw in enumerate(hw_resp, start=1):
-            click.echo(f"{idx}. {hw.name}")
-        hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1)
-        selected_hw = hw_resp[hw_choice - 1]
-        hw_id = selected_hw.id
-
         if depl_type == DeploymentType.INFERENCE_V2:
+
+            # Select cluster using a numbered list
+            clusters = cclient.get_clusters().results
+            if not clusters:
+                click.echo("No clusters available. Please ensure you have a cluster setup.")
+                return
+
+            click.echo("Available clusters:")
+            for idx, cluster in enumerate(clusters, start=1):
+                click.echo(f"{idx}. {cluster.display_name}")
+            cluster_choice = click.prompt("Select a cluster by number", type=int, default=1)
+            selected_cluster = clusters[cluster_choice - 1]
+            cluster_id = selected_cluster.id
+
+            # Hardware selection using a numbered list
+            hw_resp = cclient.get_hardware_instances(cluster_id)
+            if not hw_resp:
+                click.echo("No hardware instances available for this cluster.")
+                return
+
+            click.echo("Available hardware instances:")
+            for idx, hw in enumerate(hw_resp, start=1):
+                click.echo(f"{idx}. {hw.name}")
+            hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1)
+            selected_hw = hw_resp[hw_choice - 1]
+            hw_id = selected_hw.id
+
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
 
@@ -306,6 +308,32 @@ def create():
             click.echo(f"Inference deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.COMPUTE_V2:
+            # Select cluster using a numbered list
+            clusters = cclient.get_clusters().results
+            if not clusters:
+                click.echo("No clusters available. Please ensure you have a cluster setup.")
+                return
+
+            click.echo("Available clusters:")
+            for idx, cluster in enumerate(clusters, start=1):
+                click.echo(f"{idx}. {cluster.display_name}")
+            cluster_choice = click.prompt("Select a cluster by number", type=int, default=1)
+            selected_cluster = clusters[cluster_choice - 1]
+            cluster_id = selected_cluster.id
+
+            # Hardware selection using a numbered list
+            hw_resp = cclient.get_hardware_instances(cluster_id)
+            if not hw_resp:
+                click.echo("No hardware instances available for this cluster.")
+                return
+
+            click.echo("Available hardware instances:")
+            for idx, hw in enumerate(hw_resp, start=1):
+                click.echo(f"{idx}. {hw.name}")
+            hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1)
+            selected_hw = hw_resp[hw_choice - 1]
+            hw_id = selected_hw.id
+
             # Retrieve prebuilt images for compute deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
             # Build list of image labels
@@ -353,27 +381,111 @@ def create():
             click.echo(f"Compute deployment {name} created with ID: {created.id}")
 
         elif depl_type == DeploymentType.CSERVE:
-            # For cserve deployments, ask for model and parallelism
-            model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b")
-            tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int)
-            pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int)
+            # Keep things simple, only use recipe.
+            # Retrieve the recipe and hardware instances
+            recipe = cclient.get_cserve_recipe()
+            models = [r.model for r in recipe] if recipe else []
+
+            if not models:
+                click.echo("No models found in the recipe.")
+                sys.exit(1)
+
+            # --- Model Selection (Indexed) ---
+            click.echo("Select a model:")
+            for idx, m in enumerate(models, start=1):
+                click.echo(f"{idx}. {m}")
+            model_index = click.prompt("Enter the model number", type=int, default=1)
+            if model_index < 1 or model_index > len(models):
+                click.echo("Invalid model selection.")
+                sys.exit(1)
+            selected_model = models[model_index - 1]
+
+            # --- Performance Option Selection (Indexed) ---
+            perf_options = ["fastest", "cheapest", "best_value"]
+            click.echo("Select performance option:")
+            for idx, option in enumerate(perf_options, start=1):
+                click.echo(f"{idx}. {option}")
+            perf_index = click.prompt("Enter the performance option number", type=int, default=1)
+            if perf_index < 1 or perf_index > len(perf_options):
+                click.echo("Invalid performance selection.")
+                sys.exit(1)
+            selected_perf_option = perf_options[perf_index - 1]
+
+            # Retrieve the recipe response for the selected model
+            selected_response = next((r for r in recipe if r.model == selected_model), None)
+            if not selected_response:
+                click.echo("Selected model not found in recipe.")
+                sys.exit(1)
+
+            # Get the performance-specific recipe (this is a CServeRecipePerf instance)
+            selected_perf = getattr(selected_response, selected_perf_option)
+
+            # Retrieve the hardware instance ID from the selected performance option
+            hardware_instance_id = selected_perf.hardware_instance_id
+
+            # Get hardware instance details using cclient.get_hardware_instances()
+            hw_instances = cclient.get_hardware_instances()
+            selected_hw = next((hw for hw in hw_instances["results"] if hw["id"] == hardware_instance_id), None)
+            if not selected_hw:
+                click.echo(f"Hardware instance with id {hardware_instance_id} not found.")
+                sys.exit(1)
+
+            # Display the hardware instance information to the user
+            click.echo("Selected Hardware Instance:")
+            for key, value in selected_hw.items():
+                click.echo(f"{key}: {value}")
+
+            # Use the cluster_id from the hardware instance (no need to prompt the user)
+            cluster_id = selected_hw["cluster_id"]
+
+            # --- Additional Prompts ---
+            # Prompt for Hugging Face token (if required)
+            hf_token = click.prompt(
+                "Enter your Hugging Face token or leave blank (if your model isn't private)",
+                default="",
+                show_default=False,
+            )
+
+            # Prompt for environment variables
+            env_vars_str = click.prompt(
+                "Enter environment variables in KEY=VALUE format (comma separated) or leave blank",
+                default="",
+                show_default=False,
+            )
+            env_vars = {}
+            if env_vars_str.strip():
+                for kv in env_vars_str.split(","):
+                    try:
+                        k, v = kv.strip().split("=")
+                        env_vars[k] = v
+                    except ValueError:
+                        click.echo(f"Skipping invalid env var: {kv}")
+
+            # Prompt for scaling and concurrency settings
+            min_scale = click.prompt("Minimum number of replicas", default=1, type=int)
+            max_scale = click.prompt("Maximum number of replicas", default=1, type=int)
+            concurrency_input = click.prompt("Max concurrency (or leave blank)", default="", show_default=False)
+            concurrency = int(concurrency_input) if concurrency_input else None
 
+            # --- Create the Deployment Request ---
             from platform_api_python_client import CreateCServeDeploymentRequest
 
             req = CreateCServeDeploymentRequest(
                 name=name,
                 cluster_id=cluster_id,
-                hardware_instance_id=hw_id,
-                model=model,
-                tensor_parallel_size=tensor_parallel_size,
-                pipeline_parallel_size=pipeline_parallel_size,
+                hardware_instance_id=hardware_instance_id,
+                recipe=selected_perf.recipe,  # The underlying CServeV2Recipe instance
+                hf_token=hf_token if hf_token.strip() else None,
                 min_scale=min_scale,
                 max_scale=max_scale,
                 concurrency=concurrency,
+                env_vars=env_vars if env_vars else None,
             )
+
             created = cclient.create_cserve(req)
             click.echo(f"CServe deployment {name} created with ID: {created.id}")
 
+
         else:
             click.echo("Unknown deployment type.")
 
diff --git a/centml/sdk/api.py b/centml/sdk/api.py
index 272ca57..eff4e46 100644
--- a/centml/sdk/api.py
+++ b/centml/sdk/api.py
@@ -59,12 +59,14 @@ def resume(self, id):
     def get_clusters(self):
         return self._api.get_clusters_clusters_get()
 
-    def get_hardware_instances(self, cluster_id):
-        return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results
+    def get_hardware_instances(self, cluster_id = None):
+        return self._api.get_hardware_instances_hardware_instances_get(cluster_id = cluster_id if cluster_id else None).results
 
     def get_prebuilt_images(self, depl_type: DeploymentType = None):
         return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
 
+    def get_cserve_recipe(self):
+        return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results
 
 @contextmanager
 def get_centml_client():

From 31430b1ac6510061ac00f7e7b52fe383dbacfcdd Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 22:40:14 -0400
Subject: [PATCH 24/28] complete cserve create

---
 centml/cli/cluster.py | 57 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 074dc2b..e25a381 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -425,18 +425,63 @@ def create():
 
             # Get hardware instance details using cclient.get_hardware_instances()
             hw_instances = cclient.get_hardware_instances()
-            selected_hw = next((hw for hw in hw_instances["results"] if hw["id"] == hardware_instance_id), None)
+            selected_hw = next((hw for hw in hw_instances if hw.id == hardware_instance_id), None)
             if not selected_hw:
                 click.echo(f"Hardware instance with id {hardware_instance_id} not found.")
                 sys.exit(1)
 
-            # Display the hardware instance information to the user
+            # Display the hardware instance information to the user.
+
+            credits = selected_hw.cost_per_hr / 100.0            # e.g., 360 -> 3.60 credits per hour
+            vram_gib = selected_hw.accelerator_memory / 1024       # e.g., 81920 MB -> 80 GiB VRAM
+            memory_gib = selected_hw.memory / 1024                 # e.g., 239616 MB -> 234 GiB memory
+            cpu_cores = selected_hw.cpu / 1000                     # e.g., 26000 millicores -> 26 cores
+
             click.echo("Selected Hardware Instance:")
-            for key, value in selected_hw.items():
-                click.echo(f"{key}: {value}")
+            click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,\nMemory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores")
 
             # Use the cluster_id from the hardware instance (no need to prompt the user)
-            cluster_id = selected_hw["cluster_id"]
+            cluster_id = selected_hw.cluster_id
+
+            # Convert the recipe to a dict
+            recipe_dict = selected_perf.recipe.dict()
+
+            # Merge additional_properties into the top-level dictionary for required keys.
+            additional = recipe_dict.get("additional_properties", {})
+            recipe_dict.update(additional)
+            # Optionally remove the additional_properties key if it's no longer needed
+            recipe_dict.pop("additional_properties", None)
+
+            recipe_payload = {
+            "model": recipe_dict.get("model"),
+            "is_embedding_model": recipe_dict.get("is_embedding_model"),
+            "dtype": recipe_dict.get("dtype"),
+            "tokenizer": recipe_dict.get("tokenizer"),
+            "block_size": recipe_dict.get("block_size"),
+            "swap_space": recipe_dict.get("swap_space"),
+            "cache_dtype": recipe_dict.get("cache_dtype"),
+            "spec_tokens": recipe_dict.get("spec_tokens"),
+            "gpu_mem_util": recipe_dict.get("gpu_mem_util"),
+            "max_num_seqs": recipe_dict.get("max_num_seqs"),
+            "quantization": recipe_dict.get("quantization"),
+            "max_model_len": recipe_dict.get("max_model_len"),
+            "offloading_num": int(recipe_dict.get("offloading_num")),
+            "use_flashinfer": recipe_dict.get("use_flashinfer"),
+            "eager_execution": recipe_dict.get("eager_execution"),
+            "spec_draft_model": recipe_dict.get("spec_draft_model"),
+            "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"),
+            "use_prefix_caching": recipe_dict.get("use_prefix_caching"),
+            "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"),
+            "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"),
+            "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"),
+            "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"),
+            "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"),
+            "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"),
+            "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"),
+            "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"),
+            "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"),
+            "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"),
+            }
 
             # --- Additional Prompts ---
             # Prompt for Hugging Face token (if required)
@@ -474,7 +519,7 @@ def create():
                 name=name,
                 cluster_id=cluster_id,
                 hardware_instance_id=hardware_instance_id,
-                recipe=selected_perf.recipe,  # The underlying CServeV2Recipe instance
+                recipe=recipe_payload,
                 hf_token=hf_token if hf_token.strip() else None,
                 min_scale=min_scale,
                 max_scale=max_scale,

From 4963ce523d0ebece44e12311cc99375b01dee140 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 22:46:24 -0400
Subject: [PATCH 25/28] added TODO, changed all stuff to use name instead of
 ID.

---
 centml/cli/cluster.py | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index e25a381..aacb0a2 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -94,7 +94,7 @@ def ls(type):
             )
         )
 
-
+# TODO: Status for Cserve seems to be broken
 @click.command(help="Get deployment details")
 @click.argument("name", type=str)
 @handle_exception
@@ -536,27 +536,42 @@ def create():
 
 
 @click.command(help="Delete a deployment")
-@click.argument("id", type=int)
+@click.argument("name", type=str)
 @handle_exception
-def delete(id):
+def delete(name):
     with get_centml_client() as cclient:
-        cclient.delete(id)
-        click.echo("Deployment has been deleted")
+        # Retrieve all deployments and search for the given name
+        deployments = cclient.get(None)
+        deployment = next((d for d in deployments if d.name == name), None)
+        if deployment is None:
+            sys.exit(f"Deployment with name '{name}' not found.")
+        cclient.delete(deployment.id)
+        click.echo(f"Deployment {name} has been deleted")
 
 
 @click.command(help="Pause a deployment")
-@click.argument("id", type=int)
+@click.argument("name", type=str)
 @handle_exception
-def pause(id):
+def pause(name):
     with get_centml_client() as cclient:
-        cclient.pause(id)
-        click.echo("Deployment has been paused")
+        # Retrieve all deployments and search for the given name
+        deployments = cclient.get(None)
+        deployment = next((d for d in deployments if d.name == name), None)
+        if deployment is None:
+            sys.exit(f"Deployment with name '{name}' not found.")
+        cclient.pause(deployment.id)
+        click.echo(f"Deployment {name} has been paused")
 
 
 @click.command(help="Resume a deployment")
-@click.argument("id", type=int)
+@click.argument("name", type=str)
 @handle_exception
-def resume(id):
+def resume(name):
     with get_centml_client() as cclient:
-        cclient.resume(id)
-        click.echo("Deployment has been resumed")
+        # Retrieve all deployments and search for the given name
+        deployments = cclient.get(None)
+        deployment = next((d for d in deployments if d.name == name), None)
+        if deployment is None:
+            sys.exit(f"Deployment with name '{name}' not found.")
+        cclient.resume(deployment.id)
+        click.echo(f"Deployment {name} has been resumed")

From 1d4306801a7caadb881e71df448eb158924b561e Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 22:50:53 -0400
Subject: [PATCH 26/28] change selections to be indexed.

---
 centml/cli/cluster.py | 158 ++++++++++++++++++++----------------------
 1 file changed, 77 insertions(+), 81 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index aacb0a2..6a7b777 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -94,6 +94,7 @@ def ls(type):
             )
         )
 
+
 # TODO: Status for Cserve seems to be broken
 @click.command(help="Get deployment details")
 @click.argument("name", type=str)
@@ -188,12 +189,16 @@ def create():
         # Prompt for general fields
         name = click.prompt("Enter a name for the deployment")
 
-        dtype_str = click.prompt(
-            "Select a deployment type",
-            type=click.Choice(list(depl_name_to_type_map.keys())),
-            show_choices=True,
-            default=list(depl_name_to_type_map.keys())[0],
-        )
+        # --- Deployment Type Selection (Indexed) ---
+        deploy_types = list(depl_name_to_type_map.keys())
+        click.echo("Select a deployment type:")
+        for idx, dtype in enumerate(deploy_types, start=1):
+            click.echo(f"{idx}. {dtype}")
+        dtype_index = click.prompt("Enter the deployment type number", type=int, default=1)
+        if dtype_index < 1 or dtype_index > len(deploy_types):
+            click.echo("Invalid selection.")
+            return
+        dtype_str = deploy_types[dtype_index - 1]
         depl_type = depl_name_to_type_map[dtype_str]
 
         if depl_type == DeploymentType.INFERENCE_V2:
@@ -226,19 +231,17 @@ def create():
 
             # Retrieve prebuilt images for inference deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-
-            # Build list of image labels
             image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else []
-
-            # Right now we disable this other option to get a MVP out quickly.
-            #image_choices.append("Other")
-
-            chosen_label = click.prompt(
-                "Select a prebuilt image label or choose 'Other' to provide a custom image URL",
-                type=click.Choice(image_choices),
-                show_choices=True,
-                default=image_choices[0],
-            )
+            # Enable custom image selection by adding "Other" to the list.
+            image_choices.append("Other")
+            click.echo("Available prebuilt image labels:")
+            for idx, label in enumerate(image_choices, start=1):
+                click.echo(f"{idx}. {label}")
+            choice_index = click.prompt("Select a prebuilt image label by number", type=int, default=1)
+            if choice_index < 1 or choice_index > len(image_choices):
+                click.echo("Invalid selection.")
+                return
+            chosen_label = image_choices[choice_index - 1]
 
             if chosen_label == "Other":
                 image = click.prompt("Enter the custom image URL")
@@ -249,19 +252,20 @@ def create():
             else:
                 # Find the prebuilt image with the matching label
                 selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
-                # Prompt the user to select a tag from the available tags
-                tag = click.prompt(
-                    "Select a tag for the image",
-                    type=click.Choice(selected_prebuilt.tags),
-                    show_choices=True,
-                    default=selected_prebuilt.tags[0],
-                )
+                # Prompt the user to select a tag from the available tags (indexed)
+                click.echo("Available tags for the selected image:")
+                for idx, tag in enumerate(selected_prebuilt.tags, start=1):
+                    click.echo(f"{idx}. {tag}")
+                tag_index = click.prompt("Select a tag for the image by number", type=int, default=1)
+                if tag_index < 1 or tag_index > len(selected_prebuilt.tags):
+                    click.echo("Invalid tag selection.")
+                    return
+                tag = selected_prebuilt.tags[tag_index - 1]
                 # Combine the image URL with the chosen tag
                 image = f"{selected_prebuilt.image_name}:{tag}"
                 port = selected_prebuilt.port
                 healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/"
 
-
             env_vars_str = click.prompt(
                 "Enter environment variables in KEY=VALUE format (comma separated) or leave blank",
                 default="",
@@ -336,36 +340,31 @@ def create():
 
             # Retrieve prebuilt images for compute deployments
             prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type)
-            # Build list of image labels
             image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else []
+            click.echo("Available prebuilt image labels:")
+            for idx, label in enumerate(image_choices, start=1):
+                click.echo(f"{idx}. {label}")
+            choice_index = click.prompt("Select a prebuilt image label by number", type=int, default=1)
+            if choice_index < 1 or choice_index > len(image_choices):
+                click.echo("Invalid selection.")
+                return
+            chosen_label = image_choices[choice_index - 1]
 
-            chosen_label = click.prompt(
-                "Select a prebuilt image label",
-                type=click.Choice(image_choices),
-                show_choices=True,
-                default=image_choices[0],
-            )
-
-            selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
-
-            # Find the prebuilt image with the matching label
             selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label)
-            # Prompt the user to select a tag from the available tags
-            tag = click.prompt(
-                "Select a tag for the image",
-                type=click.Choice(selected_prebuilt.tags),
-                show_choices=True,
-                default=selected_prebuilt.tags[0],
-            )
-            # Combine the image URL with the chosen tag
+            # Prompt the user to select a tag from the available tags (indexed)
+            click.echo("Available tags for the selected image:")
+            for idx, tag in enumerate(selected_prebuilt.tags, start=1):
+                click.echo(f"{idx}. {tag}")
+            tag_index = click.prompt("Select a tag for the image by number", type=int, default=1)
+            if tag_index < 1 or tag_index > len(selected_prebuilt.tags):
+                click.echo("Invalid tag selection.")
+                return
+            tag = selected_prebuilt.tags[tag_index - 1]
             image_url = f"{selected_prebuilt.image_name}:{tag}"
 
             # For compute deployments, we might ask for a public SSH key
             ssh_key = click.prompt("Enter your public SSH key")
 
-            # Right now we not support this on prod platform, just unify the feature
-            #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", type=bool,default=False, show_default=False)
-
             from platform_api_python_client import CreateComputeDeploymentRequest
 
             req = CreateComputeDeploymentRequest(
@@ -373,9 +372,8 @@ def create():
                 cluster_id=cluster_id,
                 hardware_instance_id=hw_id,
                 image_url=image_url,
-                ssh_public_key=ssh_key,  # we require this
-                #enable_jupyter=jupyter,
-                )
+                ssh_public_key=ssh_key,
+            )
 
             created = cclient.create_compute(req)
             click.echo(f"Compute deployment {name} created with ID: {created.id}")
@@ -431,7 +429,6 @@ def create():
                 sys.exit(1)
 
             # Display the hardware instance information to the user.
-
             credits = selected_hw.cost_per_hr / 100.0            # e.g., 360 -> 3.60 credits per hour
             vram_gib = selected_hw.accelerator_memory / 1024       # e.g., 81920 MB -> 80 GiB VRAM
             memory_gib = selected_hw.memory / 1024                 # e.g., 239616 MB -> 234 GiB memory
@@ -453,34 +450,34 @@ def create():
             recipe_dict.pop("additional_properties", None)
 
             recipe_payload = {
-            "model": recipe_dict.get("model"),
-            "is_embedding_model": recipe_dict.get("is_embedding_model"),
-            "dtype": recipe_dict.get("dtype"),
-            "tokenizer": recipe_dict.get("tokenizer"),
-            "block_size": recipe_dict.get("block_size"),
-            "swap_space": recipe_dict.get("swap_space"),
-            "cache_dtype": recipe_dict.get("cache_dtype"),
-            "spec_tokens": recipe_dict.get("spec_tokens"),
-            "gpu_mem_util": recipe_dict.get("gpu_mem_util"),
-            "max_num_seqs": recipe_dict.get("max_num_seqs"),
-            "quantization": recipe_dict.get("quantization"),
-            "max_model_len": recipe_dict.get("max_model_len"),
-            "offloading_num": int(recipe_dict.get("offloading_num")),
-            "use_flashinfer": recipe_dict.get("use_flashinfer"),
-            "eager_execution": recipe_dict.get("eager_execution"),
-            "spec_draft_model": recipe_dict.get("spec_draft_model"),
-            "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"),
-            "use_prefix_caching": recipe_dict.get("use_prefix_caching"),
-            "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"),
-            "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"),
-            "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"),
-            "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"),
-            "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"),
-            "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"),
-            "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"),
-            "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"),
-            "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"),
-            "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"),
+                "model": recipe_dict.get("model"),
+                "is_embedding_model": recipe_dict.get("is_embedding_model"),
+                "dtype": recipe_dict.get("dtype"),
+                "tokenizer": recipe_dict.get("tokenizer"),
+                "block_size": recipe_dict.get("block_size"),
+                "swap_space": recipe_dict.get("swap_space"),
+                "cache_dtype": recipe_dict.get("cache_dtype"),
+                "spec_tokens": recipe_dict.get("spec_tokens"),
+                "gpu_mem_util": recipe_dict.get("gpu_mem_util"),
+                "max_num_seqs": recipe_dict.get("max_num_seqs"),
+                "quantization": recipe_dict.get("quantization"),
+                "max_model_len": recipe_dict.get("max_model_len"),
+                "offloading_num": int(recipe_dict.get("offloading_num")),
+                "use_flashinfer": recipe_dict.get("use_flashinfer"),
+                "eager_execution": recipe_dict.get("eager_execution"),
+                "spec_draft_model": recipe_dict.get("spec_draft_model"),
+                "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"),
+                "use_prefix_caching": recipe_dict.get("use_prefix_caching"),
+                "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"),
+                "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"),
+                "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"),
+                "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"),
+                "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"),
+                "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"),
+                "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"),
+                "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"),
+                "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"),
+                "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"),
             }
 
             # --- Additional Prompts ---
@@ -530,7 +527,6 @@ def create():
             created = cclient.create_cserve(req)
             click.echo(f"CServe deployment {name} created with ID: {created.id}")
 
-
         else:
             click.echo("Unknown deployment type.")
 

From 9e25528b99097ce733b8adb12afa00f52eff0229 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 22:56:34 -0400
Subject: [PATCH 27/28] format lint

---
 centml/cli/cluster.py | 17 +++++++----------
 centml/sdk/api.py     |  7 +++++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py
index 6a7b777..9a97ed5 100644
--- a/centml/cli/cluster.py
+++ b/centml/cli/cluster.py
@@ -277,11 +277,7 @@ def create():
                     k, v = kv.strip().split("=")
                     env_vars[k] = v
 
-            command_str = click.prompt(
-                "Enter command (space-separated) or leave blank",
-                default="",
-                show_default=False
-            )
+            command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False)
 
             command = command_str.strip() if command_str.strip() else None
 
@@ -429,13 +425,14 @@ def create():
                 sys.exit(1)
 
             # Display the hardware instance information to the user.
-            credits = selected_hw.cost_per_hr / 100.0            # e.g., 360 -> 3.60 credits per hour
-            vram_gib = selected_hw.accelerator_memory / 1024       # e.g., 81920 MB -> 80 GiB VRAM
-            memory_gib = selected_hw.memory / 1024                 # e.g., 239616 MB -> 234 GiB memory
-            cpu_cores = selected_hw.cpu / 1000                     # e.g., 26000 millicores -> 26 cores
+            credits = selected_hw.cost_per_hr / 100.0  # e.g., 360 -> 3.60 credits per hour
+            vram_gib = selected_hw.accelerator_memory / 1024  # e.g., 81920 MB -> 80 GiB VRAM
+            memory_gib = selected_hw.memory / 1024  # e.g., 239616 MB -> 234 GiB memory
+            cpu_cores = selected_hw.cpu / 1000  # e.g., 26000 millicores -> 26 cores
 
             click.echo("Selected Hardware Instance:")
-            click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,\nMemory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores")
+            click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,")
+            click.echo(f"Memory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores")
 
             # Use the cluster_id from the hardware instance (no need to prompt the user)
             cluster_id = selected_hw.cluster_id
diff --git a/centml/sdk/api.py b/centml/sdk/api.py
index eff4e46..4e27b54 100644
--- a/centml/sdk/api.py
+++ b/centml/sdk/api.py
@@ -59,8 +59,10 @@ def resume(self, id):
     def get_clusters(self):
         return self._api.get_clusters_clusters_get()
 
-    def get_hardware_instances(self, cluster_id = None):
-        return self._api.get_hardware_instances_hardware_instances_get(cluster_id = cluster_id if cluster_id else None).results
+    def get_hardware_instances(self, cluster_id=None):
+        return self._api.get_hardware_instances_hardware_instances_get(
+            cluster_id=cluster_id if cluster_id else None
+        ).results
 
     def get_prebuilt_images(self, depl_type: DeploymentType = None):
         return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
@@ -68,6 +70,7 @@ def get_prebuilt_images(self, depl_type: DeploymentType = None):
     def get_cserve_recipe(self):
         return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results
 
+
 @contextmanager
 def get_centml_client():
     configuration = platform_api_python_client.Configuration(

From 6d35908cdb33a32a831b191ae2dd116ec18d36f6 Mon Sep 17 00:00:00 2001
From: Honglin Cao <h45cao@uwaterloo.ca>
Date: Thu, 20 Mar 2025 23:07:17 -0400
Subject: [PATCH 28/28]  fix typecheck

---
 centml/sdk/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/centml/sdk/api.py b/centml/sdk/api.py
index 4e27b54..3a637c6 100644
--- a/centml/sdk/api.py
+++ b/centml/sdk/api.py
@@ -64,7 +64,7 @@ def get_hardware_instances(self, cluster_id=None):
             cluster_id=cluster_id if cluster_id else None
         ).results
 
-    def get_prebuilt_images(self, depl_type: DeploymentType = None):
+    def get_prebuilt_images(self, depl_type: DeploymentType):
         return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)
 
     def get_cserve_recipe(self):