From 35ae3ee8df952b01fc28c37b6098b18a207fc87b Mon Sep 17 00:00:00 2001 From: Anand J Date: Mon, 24 Mar 2025 17:31:46 -0400 Subject: [PATCH 1/3] Update create cserve example --- examples/sdk/create_cserve.py | 74 ++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 2173b8c..0953162 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -1,34 +1,46 @@ import time import centml from centml.sdk.api import get_centml_client -from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest - -with get_centml_client() as cclient: - # Get fastest recipe for the Qwen model - fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest - - # Modify the recipe if necessary - fastest.recipe.additional_properties["max_num_seqs"] = 512 - - # Create CServeV2 deployment - request = CreateCServeV2DeploymentRequest( - name="qwen-fastest", - cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id), - hardware_instance_id=fastest.hardware_instance_id, - recipe=fastest.recipe, - min_scale=1, - max_scale=1, - env_vars={}, - ) - response = cclient.create_cserve(request) - print("Create deployment response: ", response) - - # Get deployment details - deployment = cclient.get_cserve(response.id) - print("Deployment details: ", deployment) - - # Pause the deployment - cclient.pause(deployment.id) - - # Delete the deployment - cclient.delete(deployment.id) +from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe + +def get_fastest_cserve_config(model): + return cclient.get_cserve_recipe(model=model)[0].fastest + +def get_default_cserve_config(model): + return CServeV2Recipe(model=model) + +def main(): + with get_centml_client() as cclient: + # Get fastest recipe for the Qwen model + qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct") + + # Modify the recipe if necessary + qwen_config.recipe.additional_properties["max_num_seqs"] = 512 + + # Create CServeV2 deployment + request = CreateCServeV2DeploymentRequest( + name="qwen-fastest", + cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id), + hardware_instance_id=qwen_config.hardware_instance_id, + recipe=qwen_config.recipe, + min_scale=1, + max_scale=1, + env_vars={}, + ) + response = cclient.create_cserve(request) + print("Create deployment response: ", response) + + # Get deployment details + deployment = cclient.get_cserve(response.id) + print("Deployment details: ", deployment) + + ''' + # Pause the deployment + cclient.pause(deployment.id) + + # Delete the deployment + cclient.delete(deployment.id) + ''' + +if __name__ == "__main__": + main() From c7111db13aa85c6aaeab4369b66186a25c6e8084 Mon Sep 17 00:00:00 2001 From: Anand J Date: Mon, 24 Mar 2025 17:34:46 -0400 Subject: [PATCH 2/3] fix --- examples/sdk/create_cserve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 0953162..2136a70 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -3,7 +3,7 @@ from centml.sdk.api import get_centml_client from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe -def get_fastest_cserve_config(model): +def get_fastest_cserve_config(cclient, model): return cclient.get_cserve_recipe(model=model)[0].fastest def get_default_cserve_config(model): @@ -12,7 +12,7 @@ def get_default_cserve_config(model): def main(): with get_centml_client() as cclient: # Get fastest recipe for the Qwen model - qwen_config = get_fastest_config(model="Qwen/Qwen2-VL-7B-Instruct") + qwen_config = get_fastest_cserve_config(cclient, model="Qwen/Qwen2-VL-7B-Instruct") # Modify the recipe if necessary qwen_config.recipe.additional_properties["max_num_seqs"] = 512 From 2b103911e0e87a7de6c339de4be2434d473489fb Mon Sep 17 00:00:00 2001 From: Anand J Date: Mon, 24 Mar 2025 17:58:44 -0400 Subject: [PATCH 3/3] fix --- examples/sdk/create_cserve.py | 58 ++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 2136a70..54e0c9b 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -1,44 +1,60 @@ -import time import centml from centml.sdk.api import get_centml_client from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe -def get_fastest_cserve_config(cclient, model): - return cclient.get_cserve_recipe(model=model)[0].fastest -def get_default_cserve_config(model): - return CServeV2Recipe(model=model) +def get_fastest_cserve_config(cclient, name, model): + fastest = cclient.get_cserve_recipe(model=model)[0].fastest + + return CreateCServeV2DeploymentRequest( + name=name, + cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id), + hardware_instance_id=fastest.hardware_instance_id, + recipe=fastest.recipe, + min_scale=1, + max_scale=1, + env_vars={}, + ) + + +def get_default_cserve_config(cclient, name, model): + default_recipe = CServeV2Recipe(model=model) + + hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0] + + return CreateCServeV2DeploymentRequest( + name=name, + cluster_id=hardware_instance.cluster_id, + hardware_instance_id=hardware_instance.id, + recipe=default_recipe, + min_scale=1, + max_scale=1, + env_vars={}, + ) + def main(): with get_centml_client() as cclient: - # Get fastest recipe for the Qwen model - qwen_config = get_fastest_cserve_config(cclient, model="Qwen/Qwen2-VL-7B-Instruct") + ### Get the configurations for the Qwen model + qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct") + #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct") - # Modify the recipe if necessary + ### Modify the recipe if necessary qwen_config.recipe.additional_properties["max_num_seqs"] = 512 # Create CServeV2 deployment - request = CreateCServeV2DeploymentRequest( - name="qwen-fastest", - cluster_id=cclient.get_cluster_id(qwen_config.hardware_instance_id), - hardware_instance_id=qwen_config.hardware_instance_id, - recipe=qwen_config.recipe, - min_scale=1, - max_scale=1, - env_vars={}, - ) - response = cclient.create_cserve(request) + response = cclient.create_cserve(qwen_config) print("Create deployment response: ", response) - # Get deployment details + ### Get deployment details deployment = cclient.get_cserve(response.id) print("Deployment details: ", deployment) ''' - # Pause the deployment + ### Pause the deployment cclient.pause(deployment.id) - # Delete the deployment + ### Delete the deployment cclient.delete(deployment.id) '''