Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 47 additions & 19 deletions examples/sdk/create_cserve.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,62 @@
import time
import centml
from centml.sdk.api import get_centml_client
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe

with get_centml_client() as cclient:
# Get fastest recipe for the Qwen model
fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest

# Modify the recipe if necessary
fastest.recipe.additional_properties["max_num_seqs"] = 512
def get_fastest_cserve_config(cclient, name, model):
fastest = cclient.get_cserve_recipe(model=model)[0].fastest

# Create CServeV2 deployment
request = CreateCServeV2DeploymentRequest(
name="qwen-fastest",
return CreateCServeV2DeploymentRequest(
name=name,
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
hardware_instance_id=fastest.hardware_instance_id,
recipe=fastest.recipe,
min_scale=1,
max_scale=1,
env_vars={},
)
response = cclient.create_cserve(request)
print("Create deployment response: ", response)

# Get deployment details
deployment = cclient.get_cserve(response.id)
print("Deployment details: ", deployment)

# Pause the deployment
cclient.pause(deployment.id)
def get_default_cserve_config(cclient, name, model):
default_recipe = CServeV2Recipe(model=model)

# Delete the deployment
cclient.delete(deployment.id)
hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0]

return CreateCServeV2DeploymentRequest(
name=name,
cluster_id=hardware_instance.cluster_id,
hardware_instance_id=hardware_instance.id,
recipe=default_recipe,
min_scale=1,
max_scale=1,
env_vars={},
)


def main():
with get_centml_client() as cclient:
### Get the configurations for the Qwen model
qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct")
#qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")

### Modify the recipe if necessary
qwen_config.recipe.additional_properties["max_num_seqs"] = 512

# Create CServeV2 deployment
response = cclient.create_cserve(qwen_config)
print("Create deployment response: ", response)

### Get deployment details
deployment = cclient.get_cserve(response.id)
print("Deployment details: ", deployment)

'''
### Pause the deployment
cclient.pause(deployment.id)

### Delete the deployment
cclient.delete(deployment.id)
'''

if __name__ == "__main__":
main()
Loading