From 35ae3ee8df952b01fc28c37b6098b18a207fc87b Mon Sep 17 00:00:00 2001
From: Anand J <anandj@cs.toronto.edu>
Date: Mon, 24 Mar 2025 17:31:46 -0400
Subject: [PATCH 1/3] Update create cserve example

---
 examples/sdk/create_cserve.py | 74 ++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py
index 2173b8c..0953162 100644
--- a/examples/sdk/create_cserve.py
+++ b/examples/sdk/create_cserve.py
@@ -1,34 +1,46 @@
 import time
 import centml
 from centml.sdk.api import get_centml_client
-from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
-
-with get_centml_client() as cclient:
-    # Get fastest recipe for the Qwen model
-    fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest
-
-    # Modify the recipe if necessary
-    fastest.recipe.additional_properties["max_num_seqs"] = 512
-
-    # Create CServeV2 deployment
-    request = CreateCServeV2DeploymentRequest(
-        name="qwen-fastest",
-        cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
-        hardware_instance_id=fastest.hardware_instance_id,
-        recipe=fastest.recipe,
-        min_scale=1,
-        max_scale=1,
-        env_vars={},
-    )
-    response = cclient.create_cserve(request)
-    print("Create deployment response: ", response)
-
-    # Get deployment details
-    deployment = cclient.get_cserve(response.id)
-    print("Deployment details: ", deployment)
-
-    # Pause the deployment
-    cclient.pause(deployment.id)
-
-    # Delete the deployment
-    cclient.delete(deployment.id)
+from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
+
+def get_fastest_cserve_config(model):
+    return cclient.get_cserve_recipe(model=model)[0].fastest
+
+def get_default_cserve_config(model):
+    return CServeV2Recipe(model=model)
+
+def main():
+    with get_centml_client() as cclient:
+        # Get fastest recipe for the Qwen model
+        qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct")
+
+        # Modify the recipe if necessary
+        qwen_config.recipe.additional_properties["max_num_seqs"] = 512
+
+        # Create CServeV2 deployment
+        request = CreateCServeV2DeploymentRequest(
+            name="qwen-fastest",
+            cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id),
+            hardware_instance_id=qwen_config.hardware_instance_id,
+            recipe=qwen_config.recipe,
+            min_scale=1,
+            max_scale=1,
+            env_vars={},
+        )
+        response = cclient.create_cserve(request)
+        print("Create deployment response: ", response)
+
+        # Get deployment details
+        deployment = cclient.get_cserve(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        # Pause the deployment
+        cclient.pause(deployment.id)
+
+        # Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()

From c7111db13aa85c6aaeab4369b66186a25c6e8084 Mon Sep 17 00:00:00 2001
From: Anand J <anandj@cs.toronto.edu>
Date: Mon, 24 Mar 2025 17:34:46 -0400
Subject: [PATCH 2/3] fix

---
 examples/sdk/create_cserve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py
index 0953162..2136a70 100644
--- a/examples/sdk/create_cserve.py
+++ b/examples/sdk/create_cserve.py
@@ -3,7 +3,7 @@
 from centml.sdk.api import get_centml_client
 from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
 
-def get_fastest_cserve_config(model):
+def get_fastest_cserve_config(cclient, model):
     return cclient.get_cserve_recipe(model=model)[0].fastest
 
 def get_default_cserve_config(model):
@@ -12,7 +12,7 @@ def get_default_cserve_config(model):
 def main():
     with get_centml_client() as cclient:
         # Get fastest recipe for the Qwen model
-        qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct")
+        qwen_config = get_fastest_cserve_config(cclient, model="Qwen/Qwen2-VL-7B-Instruct")
 
         # Modify the recipe if necessary
         qwen_config.recipe.additional_properties["max_num_seqs"] = 512

From 2b103911e0e87a7de6c339de4be2434d473489fb Mon Sep 17 00:00:00 2001
From: Anand J <anandj@cs.toronto.edu>
Date: Mon, 24 Mar 2025 17:58:44 -0400
Subject: [PATCH 3/3] fix

---
 examples/sdk/create_cserve.py | 58 ++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py
index 2136a70..54e0c9b 100644
--- a/examples/sdk/create_cserve.py
+++ b/examples/sdk/create_cserve.py
@@ -1,44 +1,60 @@
-import time
 import centml
 from centml.sdk.api import get_centml_client
 from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
 
-def get_fastest_cserve_config(cclient, model):
-    return cclient.get_cserve_recipe(model=model)[0].fastest
 
-def get_default_cserve_config(model):
-    return CServeV2Recipe(model=model)
+def get_fastest_cserve_config(cclient, name, model):
+    fastest = cclient.get_cserve_recipe(model=model)[0].fastest
+
+    return CreateCServeV2DeploymentRequest(
+        name=name,
+        cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
+        hardware_instance_id=fastest.hardware_instance_id,
+        recipe=fastest.recipe,
+        min_scale=1,
+        max_scale=1,
+        env_vars={},
+    )
+
+
+def get_default_cserve_config(cclient, name, model):
+    default_recipe = CServeV2Recipe(model=model)
+
+    hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0]
+
+    return CreateCServeV2DeploymentRequest(
+        name=name,
+        cluster_id=hardware_instance.cluster_id,
+        hardware_instance_id=hardware_instance.id,
+        recipe=default_recipe,
+        min_scale=1,
+        max_scale=1,
+        env_vars={},
+    )
+
 
 def main():
     with get_centml_client() as cclient:
-        # Get fastest recipe for the Qwen model
-        qwen_config = get_fastest_cserve_config(cclient, model="Qwen/Qwen2-VL-7B-Instruct")
+        ### Get the configurations for the Qwen model
+        qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct")
+        #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")
 
-        # Modify the recipe if necessary
+        ### Modify the recipe if necessary
         qwen_config.recipe.additional_properties["max_num_seqs"] = 512
 
         # Create CServeV2 deployment
-        request = CreateCServeV2DeploymentRequest(
-            name="qwen-fastest",
-            cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id),
-            hardware_instance_id=qwen_config.hardware_instance_id,
-            recipe=qwen_config.recipe,
-            min_scale=1,
-            max_scale=1,
-            env_vars={},
-        )
-        response = cclient.create_cserve(request)
+        response = cclient.create_cserve(qwen_config)
         print("Create deployment response: ", response)
 
-        # Get deployment details
+        ### Get deployment details
         deployment = cclient.get_cserve(response.id)
         print("Deployment details: ", deployment)
 
         '''
-        # Pause the deployment
+        ### Pause the deployment
         cclient.pause(deployment.id)
 
-        # Delete the deployment
+        ### Delete the deployment
         cclient.delete(deployment.id)
         '''