From a117a88c0198b97135953db2f33bf37e1fa78729 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 17 Dec 2024 16:25:16 -0500 Subject: [PATCH 01/28] add to readme --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index 9612118..6b70606 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,13 @@ Alternatively to build from source, clone this repo then inside the project's ba pip install . ``` +### Un-installation + +To uninstall `centml`, simple do: +```bash +pip uninstall centml +``` + ### CLI Once installed, use the centml CLI tool with the following command: ```bash @@ -85,3 +92,30 @@ To run all the tests, use: ```bash pytest ``` + +### Common Issues + +- **`SSL` certificate on `MacOS`** + + Sometimes, you will see issues when using command like `centml cluster [CMD]`, where the output might look like: + + ```logs + + File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/urllib3/util/retry.py", line 519, in increment + + raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type] + + urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.centml.com', port=443): + + Max retries exceeded with url: /deployments + + (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)'))) + ``` + + **Solution**: + To fix this issue, navigate to your `python` installation directory and run the `Install Certificates.command` file located there. + + For example, if you are using `python3.10`, the file path would be: + ` + /Applications/Python 3.10/Install Certificates.command + ` \ No newline at end of file From 820a0dd844a5fb8594e099019dbe56503e777625 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 18 Dec 2024 10:34:59 -0500 Subject: [PATCH 02/28] add version message --- centml/cli/main.py | 18 ++++++++++++++++++ setup.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/centml/cli/main.py b/centml/cli/main.py index 64dfa64..42b9378 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -5,6 +5,24 @@ @click.group() +# this is the version and prog name set in setup.py +@click.version_option( + prog_name="CentML CLI", + message=f""" + ______ __ __ ___ __ + / ____/___ ____ / /_ / |/ // / + / / / _ \\ / __ \\ / __// /|_/ // / + / /___ / __// / / // /_ / / / // /___ + \\____/ \\___//_/ /_/ \\__//_/ /_//_____/ + + 🚀 Welcome to %(prog)s v%(version)s 🚀 + + ✨ AI Deployment Made Simple ✨ +📚 Documentation: https://docs.centml.ai/ +🛠 Need help? Reach out to support@centml.ai +""" +) + def cli(): pass diff --git a/setup.py b/setup.py index 8c3c36e..cbae9e8 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='centml', - version='0.3.0', + version='0.3.1', packages=find_packages(), python_requires=">=3.10", long_description=open('README.md').read(), From 7b2b8f20fff35ba215c78c32c3b6605311dab7e2 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 18 Dec 2024 10:44:22 -0500 Subject: [PATCH 03/28] match versions to new python client spec --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index f5e734b..63a2130 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ cryptography==43.0.1 prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 -platform-api-python-client==0.3.1 +platform-api-python-client==0.3.2 diff --git a/setup.py b/setup.py index cbae9e8..8eea8e2 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='centml', - version='0.3.1', + version='0.3.2', packages=find_packages(), python_requires=">=3.10", long_description=open('README.md').read(), From eac91077ac3cece70146f5e3d08c092559bf0065 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 10:44:30 -0500 Subject: [PATCH 04/28] GONNA REMOVE - Change to local cluster --- centml/sdk/auth.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py index 8646864..193f432 100644 --- a/centml/sdk/auth.py +++ b/centml/sdk/auth.py @@ -48,13 +48,13 @@ def load_centml_cred(): def get_centml_token(): cred = load_centml_cred() - if not cred: - sys.exit("CentML credentials not found. Please login...") + # if not cred: + # sys.exit("CentML credentials not found. Please login...") - exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"]) + # exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"]) - if time.time() >= exp_time - 100: - cred = refresh_centml_token(cred["refresh_token"]) + # if time.time() >= exp_time - 100: + # cred = refresh_centml_token(cred["refresh_token"]) return cred["id_token"] From ed8ecf1881fec701eab1b67af652172e2d0f5f2e Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 13:09:05 -0500 Subject: [PATCH 05/28] sync version to 3.1.4, the same as platform and api client --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 63a2130..9f9980c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ cryptography==43.0.1 prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 -platform-api-python-client==0.3.2 +platform-api-python-client==3.1.4 diff --git a/setup.py b/setup.py index 8eea8e2..51be960 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='centml', - version='0.3.2', + version='3.1.4', packages=find_packages(), python_requires=">=3.10", long_description=open('README.md').read(), From fa04007e409a03c1159573f66d7bdcf2de5aac2f Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 14:48:28 -0500 Subject: [PATCH 06/28] remove empty lines --- centml/cli/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centml/cli/main.py b/centml/cli/main.py index 42b9378..d8d263b 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -14,7 +14,7 @@ / / / _ \\ / __ \\ / __// /|_/ // / / /___ / __// / / // /_ / / / // /___ \\____/ \\___//_/ /_/ \\__//_/ /_//_____/ - + 🚀 Welcome to %(prog)s v%(version)s 🚀 ✨ AI Deployment Made Simple ✨ From 11719172c40206963e88dad8234680262c6e5ff0 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 15:02:44 -0500 Subject: [PATCH 07/28] change cli get to use name as parameter rather than type with ID --- centml/cli/cluster.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index f6f88f8..8add3e9 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -96,21 +96,29 @@ def ls(type): @click.command(help="Get deployment details") -@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys()))) -@click.argument("id", type=int) +@click.argument("name", type=str) @handle_exception -def get(type, id): +def get(name): with get_centml_client() as cclient: - depl_type = depl_name_to_type_map[type] + # Retrieve all deployments and search for the given name + deployments = cclient.get(None) + deployment = next((d for d in deployments if d.name == name), None) + + if deployment is None: + sys.exit(f"Deployment with name '{name}' not found.") + + depl_type = deployment.type + depl_id = deployment.id + # Now retrieve the full deployment details based on the type if depl_type == DeploymentType.INFERENCE_V2: - deployment = cclient.get_inference(id) + deployment = cclient.get_inference(depl_id) elif depl_type == DeploymentType.COMPUTE_V2: - deployment = cclient.get_compute(id) + deployment = cclient.get_compute(depl_id) elif depl_type == DeploymentType.CSERVE: - deployment = cclient.get_cserve(id) + deployment = cclient.get_cserve(depl_id) else: - sys.exit("Please enter correct deployment type") + sys.exit("Unknown deployment type.") ready_status = _get_ready_status(cclient, deployment) _, id_to_hw_map = _get_hw_to_id_map(cclient, deployment.cluster_id) From 4ed05f6979e3deb35555df3fb20618b6dcf147a7 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 16:14:09 -0500 Subject: [PATCH 08/28] NOT WORKING - attempt to add create deployment --- centml/cli/cluster.py | 114 ++++++++++++++++++++++++++++++++++++++++++ centml/cli/main.py | 3 +- 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 8add3e9..24a2048 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -181,6 +181,120 @@ def get(name): ) +@click.command(help="Create a new deployment") +@handle_exception +def create(): + with get_centml_client() as cclient: + # Prompt for general fields + name = click.prompt("Enter a name for the deployment") + dtype_str = click.prompt( + "Select a deployment type", + type=click.Choice(list(depl_name_to_type_map.keys())), + show_choices=True + ) + depl_type = depl_name_to_type_map[dtype_str] + + # Select cluster + clusters = cclient.get_clusters().results + if not clusters: + click.echo("No clusters available. Please ensure you have a cluster setup.") + return + cluster_names = [c.name for c in clusters] + cluster_name = click.prompt( + "Select a cluster", + type=click.Choice(cluster_names), + show_choices=True + ) + cluster_id = next(c.id for c in clusters if c.name == cluster_name) + + # Hardware selection + hw_resp = cclient.get_hardware_instances(cluster_id) + if not hw_resp: + click.echo("No hardware instances available for this cluster.") + return + hw_names = [h.name for h in hw_resp] + hw_name = click.prompt( + "Select a hardware instance", + type=click.Choice(hw_names), + show_choices=True + ) + hw_id = next(h.id for h in hw_resp if h.name == hw_name) + + # Common fields + min_scale = click.prompt("Minimum number of replicas", default=1, type=int) + max_scale = click.prompt("Maximum number of replicas", default=1, type=int) + concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) + concurrency = int(concurrency) if concurrency else None + + # Depending on type: + if depl_type == DeploymentType.INFERENCE_V2: + image = click.prompt("Enter the image URL") + container_port = click.prompt("Enter the container port", default=8080, type=int) + healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True) + env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False) + env_vars = {} + if env_vars_str.strip(): + for kv in env_vars_str.split(","): + k, v = kv.strip().split("=") + env_vars[k] = v + + # Construct the inference request + from platform_api_python_client import CreateInferenceDeploymentRequest + req = CreateInferenceDeploymentRequest( + name=name, + cluster_id=cluster_id, + hardware_instance_id=hw_id, + image_url=image, + container_port=container_port, + healthcheck=healthcheck, + min_scale=min_scale, + max_scale=max_scale, + concurrency=concurrency, + env_vars=env_vars if env_vars else None + ) + created = cclient.create_inference(req) + click.echo(f"Inference deployment created with ID: {created.id}") + + elif depl_type == DeploymentType.COMPUTE_V2: + # For compute deployments, we might ask for a public SSH key + ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) + + from platform_api_python_client import CreateComputeDeploymentRequest + req = CreateComputeDeploymentRequest( + name=name, + cluster_id=cluster_id, + hardware_instance_id=hw_id, + ssh_public_key=ssh_key if ssh_key.strip() else None + ) + created = cclient.create_compute(req) + click.echo(f"Compute deployment created with ID: {created.id}") + + elif depl_type == DeploymentType.CSERVE: + # For cserve deployments, ask for model and parallelism + model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b") + tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int) + pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) + # concurrency asked above + + from platform_api_python_client import CreateCServeDeploymentRequest + req = CreateCServeDeploymentRequest( + name=name, + cluster_id=cluster_id, + hardware_instance_id=hw_id, + model=model, + tensor_parallel_size=tensor_parallel_size, + pipeline_parallel_size=pipeline_parallel_size, + min_scale=min_scale, + max_scale=max_scale, + concurrency=concurrency + ) + created = cclient.create_cserve(req) + click.echo(f"CServe deployment created with ID: {created.id}") + + else: + click.echo("Unknown deployment type.") + + @click.command(help="Delete a deployment") @click.argument("id", type=int) @handle_exception diff --git a/centml/cli/main.py b/centml/cli/main.py index d8d263b..60e8658 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -1,7 +1,7 @@ import click from centml.cli.login import login, logout -from centml.cli.cluster import ls, get, delete, pause, resume +from centml.cli.cluster import ls, get, delete, pause, resume, create @click.group() @@ -45,6 +45,7 @@ def ccluster(): ccluster.add_command(ls) ccluster.add_command(get) +ccluster.add_command(create) ccluster.add_command(delete) ccluster.add_command(pause) ccluster.add_command(resume) From 7ca22d6e2991e7cf36b557a0939a5a875799d80a Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 19 Dec 2024 16:40:45 -0500 Subject: [PATCH 09/28] NOT WORKING - refractor variable names, now hitting Internal Server Error --- centml/cli/cluster.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 24a2048..1ea669c 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -145,7 +145,7 @@ def get(name): tabulate( [ ("Image", deployment.image_url), - ("Container port", deployment.container_port), + ("Container port", deployment.port), ("Healthcheck", deployment.healthcheck or "/"), ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), ("Environment variables", deployment.env_vars or "None"), @@ -199,13 +199,13 @@ def create(): if not clusters: click.echo("No clusters available. Please ensure you have a cluster setup.") return - cluster_names = [c.name for c in clusters] + cluster_names = [c.display_name for c in clusters] cluster_name = click.prompt( "Select a cluster", type=click.Choice(cluster_names), show_choices=True ) - cluster_id = next(c.id for c in clusters if c.name == cluster_name) + cluster_id = next(c.id for c in clusters if c.display_name == cluster_name) # Hardware selection hw_resp = cclient.get_hardware_instances(cluster_id) @@ -229,7 +229,7 @@ def create(): # Depending on type: if depl_type == DeploymentType.INFERENCE_V2: image = click.prompt("Enter the image URL") - container_port = click.prompt("Enter the container port", default=8080, type=int) + port = click.prompt("Enter the container port", default=8080, type=int) healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True) env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False) env_vars = {} @@ -245,7 +245,7 @@ def create(): cluster_id=cluster_id, hardware_instance_id=hw_id, image_url=image, - container_port=container_port, + port=port, healthcheck=healthcheck, min_scale=min_scale, max_scale=max_scale, From a4ac8839ea61d08cc635103e27fb0a19ce86a068 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 20 Dec 2024 13:14:27 -0500 Subject: [PATCH 10/28] change exception handling to print detail, add echo deployment name. --- centml/cli/cluster.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 1ea669c..4accc21 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -21,7 +21,7 @@ def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except ApiException as e: - click.echo(f"Error: {e.reason}") + click.echo(f"Error: {e.body or e.reason}") return None return wrapper @@ -253,7 +253,7 @@ def create(): env_vars=env_vars if env_vars else None ) created = cclient.create_inference(req) - click.echo(f"Inference deployment created with ID: {created.id}") + click.echo(f"Inference deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.COMPUTE_V2: # For compute deployments, we might ask for a public SSH key @@ -267,7 +267,7 @@ def create(): ssh_public_key=ssh_key if ssh_key.strip() else None ) created = cclient.create_compute(req) - click.echo(f"Compute deployment created with ID: {created.id}") + click.echo(f"Compute deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.CSERVE: # For cserve deployments, ask for model and parallelism @@ -289,7 +289,7 @@ def create(): concurrency=concurrency ) created = cclient.create_cserve(req) - click.echo(f"CServe deployment created with ID: {created.id}") + click.echo(f"CServe deployment {name} created with ID: {created.id}") else: click.echo("Unknown deployment type.") From eed79f2dd03165c52d68b0252cddb9b542082f17 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 20 Dec 2024 13:18:12 -0500 Subject: [PATCH 11/28] fix container port for inf dep --- centml/cli/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 4accc21..36002ae 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -145,7 +145,7 @@ def get(name): tabulate( [ ("Image", deployment.image_url), - ("Container port", deployment.port), + ("Container port", deployment.container_port), ("Healthcheck", deployment.healthcheck or "/"), ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), ("Environment variables", deployment.env_vars or "None"), From db15900efedcf928a1c096454adaf7aad7ac8f30 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 20 Dec 2024 13:32:02 -0500 Subject: [PATCH 12/28] add get prebuilt img api call --- centml/sdk/api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index f83dfd4..8945f3f 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -61,6 +61,14 @@ def get_clusters(self): def get_hardware_instances(self, cluster_id): return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results + def get_prebuilt_images(self, depl_type: DeploymentType = None): + """Get Prebuilt Images + + :param depl_type: DeploymentType, optional + :return: ListPrebuiltImageResponse + """ + return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) + @contextmanager def get_centml_client(): From 6ffbee882c05b042878e3301c87e1dd594d03d87 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 20 Dec 2024 14:11:12 -0500 Subject: [PATCH 13/28] added prebuilt img list for all, but only inf now working --- centml/cli/cluster.py | 42 +++++++++++++++++++++++++++++++++++++----- centml/sdk/api.py | 6 +----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 36002ae..2af2a0d 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -226,11 +226,38 @@ def create(): concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) concurrency = int(concurrency) if concurrency else None - # Depending on type: if depl_type == DeploymentType.INFERENCE_V2: - image = click.prompt("Enter the image URL") - port = click.prompt("Enter the container port", default=8080, type=int) - healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True) + # Retrieve prebuilt images for inference deployments + prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) + image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + + chosen_image = click.prompt( + "Select a prebuilt image or provide a custom image URL", + type=click.Choice(image_choices), + show_choices=True + ) + + if chosen_image == "Other": + image = click.prompt("Enter the image URL") + port = click.prompt("Enter the container port", default=8080, type=int) + healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True) + else: + # Find the selected prebuilt image details + selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image) + image = selected_prebuilt.image_name + # Use the prebuilt image port and healthcheck as defaults + port = click.prompt( + "Enter the container port", + default=selected_prebuilt.port, + type=int + ) + default_healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" + healthcheck = click.prompt( + "Enter healthcheck endpoint (default '/')", + default=default_healthcheck, + show_default=True + ) + env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False) env_vars = {} if env_vars_str.strip(): @@ -260,10 +287,15 @@ def create(): ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest + # If compute deployments also use prebuilt images and require image_url, + # we could similarly fetch them and prompt just like inference above. + # For now, if the schema doesn't require image_url for compute: req = CreateComputeDeploymentRequest( name=name, cluster_id=cluster_id, hardware_instance_id=hw_id, + # If needed, you can do similar logic for prebuilt images here: + # image_url = ... ssh_public_key=ssh_key if ssh_key.strip() else None ) created = cclient.create_compute(req) @@ -274,9 +306,9 @@ def create(): model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b") tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int) pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) - # concurrency asked above from platform_api_python_client import CreateCServeDeploymentRequest + # If cserve deployments also require images, we could do similar logic here. req = CreateCServeDeploymentRequest( name=name, cluster_id=cluster_id, diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 8945f3f..272ca57 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -2,6 +2,7 @@ import platform_api_python_client from platform_api_python_client import ( + DeploymentType, DeploymentStatus, CreateInferenceDeploymentRequest, CreateComputeDeploymentRequest, @@ -62,11 +63,6 @@ def get_hardware_instances(self, cluster_id): return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results def get_prebuilt_images(self, depl_type: DeploymentType = None): - """Get Prebuilt Images - - :param depl_type: DeploymentType, optional - :return: ListPrebuiltImageResponse - """ return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) From f0b8a855153aa0d5f8c6cdd6b065ad21d6b89479 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 20 Dec 2024 14:33:01 -0500 Subject: [PATCH 14/28] add default to choice, add image lists from db --- centml/cli/cluster.py | 68 +++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 2af2a0d..33280c6 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -190,7 +190,8 @@ def create(): dtype_str = click.prompt( "Select a deployment type", type=click.Choice(list(depl_name_to_type_map.keys())), - show_choices=True + show_choices=True, + default=list(depl_name_to_type_map.keys())[0] ) depl_type = depl_name_to_type_map[dtype_str] @@ -203,7 +204,8 @@ def create(): cluster_name = click.prompt( "Select a cluster", type=click.Choice(cluster_names), - show_choices=True + show_choices=True, + default=cluster_names[0] ) cluster_id = next(c.id for c in clusters if c.display_name == cluster_name) @@ -216,47 +218,35 @@ def create(): hw_name = click.prompt( "Select a hardware instance", type=click.Choice(hw_names), - show_choices=True + show_choices=True, + default=hw_names[0] ) hw_id = next(h.id for h in hw_resp if h.name == hw_name) - # Common fields - min_scale = click.prompt("Minimum number of replicas", default=1, type=int) - max_scale = click.prompt("Maximum number of replicas", default=1, type=int) - concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) - concurrency = int(concurrency) if concurrency else None - if depl_type == DeploymentType.INFERENCE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + image_choices.append("Other") chosen_image = click.prompt( - "Select a prebuilt image or provide a custom image URL", + "Select a prebuilt image or choose 'Other' to provide a custom image URL", type=click.Choice(image_choices), - show_choices=True + show_choices=True, + default=image_choices[0] ) if chosen_image == "Other": - image = click.prompt("Enter the image URL") - port = click.prompt("Enter the container port", default=8080, type=int) - healthcheck = click.prompt("Enter healthcheck endpoint (default '/')", default="/", show_default=True) + image = click.prompt("Enter the custom image URL") + port = click.prompt("Enter the container port for the image", default=8080, type=int) + healthcheck = click.prompt("Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True) else: # Find the selected prebuilt image details selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image) image = selected_prebuilt.image_name # Use the prebuilt image port and healthcheck as defaults - port = click.prompt( - "Enter the container port", - default=selected_prebuilt.port, - type=int - ) - default_healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" - healthcheck = click.prompt( - "Enter healthcheck endpoint (default '/')", - default=default_healthcheck, - show_default=True - ) + port = selected_prebuilt.port + healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False) env_vars = {} @@ -265,6 +255,12 @@ def create(): k, v = kv.strip().split("=") env_vars[k] = v + # Common fields + min_scale = click.prompt("Minimum number of replicas", default=1, type=int) + max_scale = click.prompt("Maximum number of replicas", default=1, type=int) + concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) + concurrency = int(concurrency) if concurrency else None + # Construct the inference request from platform_api_python_client import CreateInferenceDeploymentRequest req = CreateInferenceDeploymentRequest( @@ -283,19 +279,30 @@ def create(): click.echo(f"Inference deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.COMPUTE_V2: + + # Retrieve prebuilt images for inference deployments + prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) + image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + + # Right now we don't support custom compute images + # TODO: add image tags to the url, right now its required by compute but not inference + chosen_image = click.prompt( + "Select a prebuilt image", + type=click.Choice(image_choices), + show_choices=True, + default=image_choices[0] + ) + # For compute deployments, we might ask for a public SSH key ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) + #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest - # If compute deployments also use prebuilt images and require image_url, - # we could similarly fetch them and prompt just like inference above. - # For now, if the schema doesn't require image_url for compute: req = CreateComputeDeploymentRequest( name=name, cluster_id=cluster_id, hardware_instance_id=hw_id, - # If needed, you can do similar logic for prebuilt images here: - # image_url = ... + image_url = chosen_image, ssh_public_key=ssh_key if ssh_key.strip() else None ) created = cclient.create_compute(req) @@ -308,7 +315,6 @@ def create(): pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) from platform_api_python_client import CreateCServeDeploymentRequest - # If cserve deployments also require images, we could do similar logic here. req = CreateCServeDeploymentRequest( name=name, cluster_id=cluster_id, From 35cf1d9108eacc97107b7f4d0ba97c1169a2e952 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 10:46:32 -0400 Subject: [PATCH 15/28] change platform version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9f9980c..49d0f6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ cryptography==43.0.1 prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 -platform-api-python-client==3.1.4 +platform-api-python-client==3.1.15 From 6cbe9a0a1c58049de100fc5f804f1cb706645073 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 10:52:58 -0400 Subject: [PATCH 16/28] update version 3.1.15 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 51be960..c2d18d5 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='centml', - version='3.1.4', + version='3.1.15', packages=find_packages(), python_requires=">=3.10", long_description=open('README.md').read(), From 3f7b9c5e504528a6441a8d6167ed1b422d15f29a Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 10:59:54 -0400 Subject: [PATCH 17/28] ruff format --- centml/cli/cluster.py | 177 ++++++++++++++++++++++++++++++++---------- centml/cli/main.py | 3 +- 2 files changed, 136 insertions(+), 44 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 33280c6..a51bdf3 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -3,7 +3,13 @@ from typing import Dict import click from tabulate import tabulate -from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse +from centml.sdk import ( + DeploymentType, + DeploymentStatus, + ServiceStatus, + ApiException, + HardwareInstanceResponse, +) from centml.sdk.api import get_centml_client @@ -43,45 +49,82 @@ def _get_hw_to_id_map(cclient, cluster_id): def _format_ssh_key(ssh_key): if not ssh_key: return "No SSH Key Found" - return ssh_key[:10] + '...' + return ssh_key[:10] + "..." def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( - cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None + cclient.get_status(deployment.id).service_status + if deployment.status == DeploymentStatus.ACTIVE + else None ) status_styles = { (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"), (DeploymentStatus.DELETED, None): ("deleted", "white", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"), - (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"), + (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ( + "starting", + "black", + "white", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ( + "starting", + "black", + "white", + ), (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): ( "createContainerConfigError", "red", "black", ), - (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ( + "crashLoopBackOff", + "red", + "black", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ( + "imagePullBackOff", + "red", + "black", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ( + "progressDeadlineExceeded", + "red", + "black", + ), } - style = status_styles.get((api_status, service_status), ("unknown", "black", "white")) + style = status_styles.get( + (api_status, service_status), ("unknown", "black", "white") + ) # Handle foreground and background colors return click.style(style[0], fg=style[1], bg=style[2]) @click.command(help="List all deployments") -@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None) +@click.argument( + "type", + type=click.Choice(list(depl_name_to_type_map.keys())), + required=False, + default=None, +) def ls(type): with get_centml_client() as cclient: - depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None + depl_type = ( + depl_name_to_type_map[type] if type in depl_name_to_type_map else None + ) deployments = cclient.get(depl_type) rows = [ - [d.id, d.name, depl_type_to_name_map[d.type], d.status.value, d.created_at.strftime("%Y-%m-%d %H:%M:%S")] + [ + d.id, + d.name, + depl_type_to_name_map[d.type], + d.status.value, + d.created_at.strftime("%Y-%m-%d %H:%M:%S"), + ] for d in deployments ] @@ -132,7 +175,7 @@ def get(name): ("Endpoint", deployment.endpoint_url), ("Created at", deployment.created_at.strftime("%Y-%m-%d %H:%M:%S")), ("Hardware", f"{hw.name} ({hw.num_gpu}x {hw.gpu_type})"), - ("Cost", f"{hw.cost_per_hr/100} credits/hr"), + ("Cost", f"{hw.cost_per_hr / 100} credits/hr"), ], tablefmt="rounded_outline", disable_numparse=True, @@ -147,7 +190,10 @@ def get(name): ("Image", deployment.image_url), ("Container port", deployment.container_port), ("Healthcheck", deployment.healthcheck or "/"), - ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), + ( + "Replicas", + {"min": deployment.min_scale, "max": deployment.max_scale}, + ), ("Environment variables", deployment.env_vars or "None"), ("Max concurrency", deployment.concurrency or "None"), ], @@ -158,7 +204,10 @@ def get(name): elif depl_type == DeploymentType.COMPUTE_V2: click.echo( tabulate( - [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))], + [ + ("Username", "centml"), + ("SSH key", _format_ssh_key(deployment.ssh_public_key)), + ], tablefmt="rounded_outline", disable_numparse=True, ) @@ -170,9 +219,15 @@ def get(name): ("Hugging face model", deployment.model), ( "Parallelism", - {"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size}, + { + "tensor": deployment.tensor_parallel_size, + "pipeline": deployment.pipeline_parallel_size, + }, + ), + ( + "Replicas", + {"min": deployment.min_scale, "max": deployment.max_scale}, ), - ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), ("Max concurrency", deployment.concurrency or "None"), ], tablefmt="rounded_outline", @@ -191,7 +246,7 @@ def create(): "Select a deployment type", type=click.Choice(list(depl_name_to_type_map.keys())), show_choices=True, - default=list(depl_name_to_type_map.keys())[0] + default=list(depl_name_to_type_map.keys())[0], ) depl_type = depl_name_to_type_map[dtype_str] @@ -205,7 +260,7 @@ def create(): "Select a cluster", type=click.Choice(cluster_names), show_choices=True, - default=cluster_names[0] + default=cluster_names[0], ) cluster_id = next(c.id for c in clusters if c.display_name == cluster_name) @@ -219,36 +274,58 @@ def create(): "Select a hardware instance", type=click.Choice(hw_names), show_choices=True, - default=hw_names[0] + default=hw_names[0], ) hw_id = next(h.id for h in hw_resp if h.name == hw_name) if depl_type == DeploymentType.INFERENCE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + image_choices = ( + [img.image_name for img in prebuilt_images.results] + if prebuilt_images.results + else [] + ) image_choices.append("Other") chosen_image = click.prompt( "Select a prebuilt image or choose 'Other' to provide a custom image URL", type=click.Choice(image_choices), show_choices=True, - default=image_choices[0] + default=image_choices[0], ) if chosen_image == "Other": image = click.prompt("Enter the custom image URL") - port = click.prompt("Enter the container port for the image", default=8080, type=int) - healthcheck = click.prompt("Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True) + port = click.prompt( + "Enter the container port for the image", default=8080, type=int + ) + healthcheck = click.prompt( + "Enter healthcheck endpoint (default '/') for the image", + default="/", + show_default=True, + ) else: # Find the selected prebuilt image details - selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image) + selected_prebuilt = next( + img + for img in prebuilt_images.results + if img.image_name == chosen_image + ) image = selected_prebuilt.image_name # Use the prebuilt image port and healthcheck as defaults port = selected_prebuilt.port - healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" + healthcheck = ( + selected_prebuilt.healthcheck + if selected_prebuilt.healthcheck + else "/" + ) - env_vars_str = click.prompt("Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", show_default=False) + env_vars_str = click.prompt( + "Enter environment variables in KEY=VALUE format (comma separated) or leave blank", + default="", + show_default=False, + ) env_vars = {} if env_vars_str.strip(): for kv in env_vars_str.split(","): @@ -258,11 +335,14 @@ def create(): # Common fields min_scale = click.prompt("Minimum number of replicas", default=1, type=int) max_scale = click.prompt("Maximum number of replicas", default=1, type=int) - concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) + concurrency = click.prompt( + "Max concurrency (or leave blank)", default="", show_default=False + ) concurrency = int(concurrency) if concurrency else None # Construct the inference request from platform_api_python_client import CreateInferenceDeploymentRequest + req = CreateInferenceDeploymentRequest( name=name, cluster_id=cluster_id, @@ -273,48 +353,61 @@ def create(): min_scale=min_scale, max_scale=max_scale, concurrency=concurrency, - env_vars=env_vars if env_vars else None + env_vars=env_vars if env_vars else None, ) created = cclient.create_inference(req) click.echo(f"Inference deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.COMPUTE_V2: - # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] - + image_choices = ( + [img.image_name for img in prebuilt_images.results] + if prebuilt_images.results + else [] + ) + # Right now we don't support custom compute images # TODO: add image tags to the url, right now its required by compute but not inference chosen_image = click.prompt( "Select a prebuilt image", type=click.Choice(image_choices), show_choices=True, - default=image_choices[0] + default=image_choices[0], ) - + # For compute deployments, we might ask for a public SSH key - ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) - #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False) + ssh_key = click.prompt( + "Enter your public SSH key", default="", show_default=False + ) + # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest + req = CreateComputeDeploymentRequest( name=name, cluster_id=cluster_id, hardware_instance_id=hw_id, - image_url = chosen_image, - ssh_public_key=ssh_key if ssh_key.strip() else None + image_url=chosen_image, + ssh_public_key=ssh_key if ssh_key.strip() else None, ) created = cclient.create_compute(req) click.echo(f"Compute deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.CSERVE: # For cserve deployments, ask for model and parallelism - model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b") - tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int) - pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) + model = click.prompt( + "Enter the Hugging Face model", default="facebook/opt-1.3b" + ) + tensor_parallel_size = click.prompt( + "Tensor parallel size", default=1, type=int + ) + pipeline_parallel_size = click.prompt( + "Pipeline parallel size", default=1, type=int + ) from platform_api_python_client import CreateCServeDeploymentRequest + req = CreateCServeDeploymentRequest( name=name, cluster_id=cluster_id, @@ -324,7 +417,7 @@ def create(): pipeline_parallel_size=pipeline_parallel_size, min_scale=min_scale, max_scale=max_scale, - concurrency=concurrency + concurrency=concurrency, ) created = cclient.create_cserve(req) click.echo(f"CServe deployment {name} created with ID: {created.id}") diff --git a/centml/cli/main.py b/centml/cli/main.py index 60e8658..bd6679c 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -20,9 +20,8 @@ ✨ AI Deployment Made Simple ✨ 📚 Documentation: https://docs.centml.ai/ 🛠 Need help? Reach out to support@centml.ai -""" +""", ) - def cli(): pass From 1d13b3149efef0798006a20b9f16e08b457e7c08 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 11:03:10 -0400 Subject: [PATCH 18/28] black reformatt --- centml/cli/cluster.py | 152 ++++++++---------------------------------- 1 file changed, 29 insertions(+), 123 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index a51bdf3..f7b45ee 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -3,13 +3,7 @@ from typing import Dict import click from tabulate import tabulate -from centml.sdk import ( - DeploymentType, - DeploymentStatus, - ServiceStatus, - ApiException, - HardwareInstanceResponse, -) +from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse from centml.sdk.api import get_centml_client @@ -55,76 +49,39 @@ def _format_ssh_key(ssh_key): def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( - cclient.get_status(deployment.id).service_status - if deployment.status == DeploymentStatus.ACTIVE - else None + cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None ) status_styles = { (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"), (DeploymentStatus.DELETED, None): ("deleted", "white", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ( - "starting", - "black", - "white", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ( - "starting", - "black", - "white", - ), + (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"), + (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"), (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): ( "createContainerConfigError", "red", "black", ), - (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ( - "crashLoopBackOff", - "red", - "black", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ( - "imagePullBackOff", - "red", - "black", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ( - "progressDeadlineExceeded", - "red", - "black", - ), + (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"), } - style = status_styles.get( - (api_status, service_status), ("unknown", "black", "white") - ) + style = status_styles.get((api_status, service_status), ("unknown", "black", "white")) # Handle foreground and background colors return click.style(style[0], fg=style[1], bg=style[2]) @click.command(help="List all deployments") -@click.argument( - "type", - type=click.Choice(list(depl_name_to_type_map.keys())), - required=False, - default=None, -) +@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None) def ls(type): with get_centml_client() as cclient: - depl_type = ( - depl_name_to_type_map[type] if type in depl_name_to_type_map else None - ) + depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None deployments = cclient.get(depl_type) rows = [ - [ - d.id, - d.name, - depl_type_to_name_map[d.type], - d.status.value, - d.created_at.strftime("%Y-%m-%d %H:%M:%S"), - ] + [d.id, d.name, depl_type_to_name_map[d.type], d.status.value, d.created_at.strftime("%Y-%m-%d %H:%M:%S")] for d in deployments ] @@ -190,10 +147,7 @@ def get(name): ("Image", deployment.image_url), ("Container port", deployment.container_port), ("Healthcheck", deployment.healthcheck or "/"), - ( - "Replicas", - {"min": deployment.min_scale, "max": deployment.max_scale}, - ), + ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), ("Environment variables", deployment.env_vars or "None"), ("Max concurrency", deployment.concurrency or "None"), ], @@ -204,10 +158,7 @@ def get(name): elif depl_type == DeploymentType.COMPUTE_V2: click.echo( tabulate( - [ - ("Username", "centml"), - ("SSH key", _format_ssh_key(deployment.ssh_public_key)), - ], + [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))], tablefmt="rounded_outline", disable_numparse=True, ) @@ -219,15 +170,9 @@ def get(name): ("Hugging face model", deployment.model), ( "Parallelism", - { - "tensor": deployment.tensor_parallel_size, - "pipeline": deployment.pipeline_parallel_size, - }, - ), - ( - "Replicas", - {"min": deployment.min_scale, "max": deployment.max_scale}, + {"tensor": deployment.tensor_parallel_size, "pipeline": deployment.pipeline_parallel_size}, ), + ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), ("Max concurrency", deployment.concurrency or "None"), ], tablefmt="rounded_outline", @@ -257,10 +202,7 @@ def create(): return cluster_names = [c.display_name for c in clusters] cluster_name = click.prompt( - "Select a cluster", - type=click.Choice(cluster_names), - show_choices=True, - default=cluster_names[0], + "Select a cluster", type=click.Choice(cluster_names), show_choices=True, default=cluster_names[0] ) cluster_id = next(c.id for c in clusters if c.display_name == cluster_name) @@ -271,21 +213,14 @@ def create(): return hw_names = [h.name for h in hw_resp] hw_name = click.prompt( - "Select a hardware instance", - type=click.Choice(hw_names), - show_choices=True, - default=hw_names[0], + "Select a hardware instance", type=click.Choice(hw_names), show_choices=True, default=hw_names[0] ) hw_id = next(h.id for h in hw_resp if h.name == hw_name) if depl_type == DeploymentType.INFERENCE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = ( - [img.image_name for img in prebuilt_images.results] - if prebuilt_images.results - else [] - ) + image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] image_choices.append("Other") chosen_image = click.prompt( @@ -297,29 +232,17 @@ def create(): if chosen_image == "Other": image = click.prompt("Enter the custom image URL") - port = click.prompt( - "Enter the container port for the image", default=8080, type=int - ) + port = click.prompt("Enter the container port for the image", default=8080, type=int) healthcheck = click.prompt( - "Enter healthcheck endpoint (default '/') for the image", - default="/", - show_default=True, + "Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True ) else: # Find the selected prebuilt image details - selected_prebuilt = next( - img - for img in prebuilt_images.results - if img.image_name == chosen_image - ) + selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image) image = selected_prebuilt.image_name # Use the prebuilt image port and healthcheck as defaults port = selected_prebuilt.port - healthcheck = ( - selected_prebuilt.healthcheck - if selected_prebuilt.healthcheck - else "/" - ) + healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" env_vars_str = click.prompt( "Enter environment variables in KEY=VALUE format (comma separated) or leave blank", @@ -335,9 +258,7 @@ def create(): # Common fields min_scale = click.prompt("Minimum number of replicas", default=1, type=int) max_scale = click.prompt("Maximum number of replicas", default=1, type=int) - concurrency = click.prompt( - "Max concurrency (or leave blank)", default="", show_default=False - ) + concurrency = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) concurrency = int(concurrency) if concurrency else None # Construct the inference request @@ -361,25 +282,16 @@ def create(): elif depl_type == DeploymentType.COMPUTE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = ( - [img.image_name for img in prebuilt_images.results] - if prebuilt_images.results - else [] - ) + image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] # Right now we don't support custom compute images # TODO: add image tags to the url, right now its required by compute but not inference chosen_image = click.prompt( - "Select a prebuilt image", - type=click.Choice(image_choices), - show_choices=True, - default=image_choices[0], + "Select a prebuilt image", type=click.Choice(image_choices), show_choices=True, default=image_choices[0] ) # For compute deployments, we might ask for a public SSH key - ssh_key = click.prompt( - "Enter your public SSH key", default="", show_default=False - ) + ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest @@ -396,15 +308,9 @@ def create(): elif depl_type == DeploymentType.CSERVE: # For cserve deployments, ask for model and parallelism - model = click.prompt( - "Enter the Hugging Face model", default="facebook/opt-1.3b" - ) - tensor_parallel_size = click.prompt( - "Tensor parallel size", default=1, type=int - ) - pipeline_parallel_size = click.prompt( - "Pipeline parallel size", default=1, type=int - ) + model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b") + tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int) + pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) from platform_api_python_client import CreateCServeDeploymentRequest From e11732690fbe1aa720bdbb32d713e638a749a110 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 11:11:09 -0400 Subject: [PATCH 19/28] reformat --- centml/cli/cluster.py | 3 ++- centml/cli/main.py | 2 +- centml/sdk/auth.py | 10 +++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index f7b45ee..50ad1f4 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -292,7 +292,8 @@ def create(): # For compute deployments, we might ask for a public SSH key ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) - # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", default="n", show_default=False) + # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", + # default="n", show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest diff --git a/centml/cli/main.py b/centml/cli/main.py index bd6679c..be231f8 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -8,7 +8,7 @@ # this is the version and prog name set in setup.py @click.version_option( prog_name="CentML CLI", - message=f""" + message=""" ______ __ __ ___ __ / ____/___ ____ / /_ / |/ // / / / / _ \\ / __ \\ / __// /|_/ // / diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py index 193f432..8646864 100644 --- a/centml/sdk/auth.py +++ b/centml/sdk/auth.py @@ -48,13 +48,13 @@ def load_centml_cred(): def get_centml_token(): cred = load_centml_cred() - # if not cred: - # sys.exit("CentML credentials not found. Please login...") + if not cred: + sys.exit("CentML credentials not found. Please login...") - # exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"]) + exp_time = int(jwt.decode(cred["id_token"], options={"verify_signature": False})["exp"]) - # if time.time() >= exp_time - 100: - # cred = refresh_centml_token(cred["refresh_token"]) + if time.time() >= exp_time - 100: + cred = refresh_centml_token(cred["refresh_token"]) return cred["id_token"] From 6b568e8530ddd981ddadaee7cab1d9eb8c0d14c1 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 18 Mar 2025 19:16:44 -0400 Subject: [PATCH 20/28] change to 3.2.4, updated depl create --- centml/cli/cluster.py | 54 +++++++++++++++++++++++++++---------------- setup.py | 2 +- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 50ad1f4..886fe40 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -195,55 +195,60 @@ def create(): ) depl_type = depl_name_to_type_map[dtype_str] - # Select cluster + # Select cluster using a numbered list clusters = cclient.get_clusters().results if not clusters: click.echo("No clusters available. Please ensure you have a cluster setup.") return - cluster_names = [c.display_name for c in clusters] - cluster_name = click.prompt( - "Select a cluster", type=click.Choice(cluster_names), show_choices=True, default=cluster_names[0] - ) - cluster_id = next(c.id for c in clusters if c.display_name == cluster_name) - # Hardware selection + click.echo("Available clusters:") + for idx, cluster in enumerate(clusters, start=1): + click.echo(f"{idx}. {cluster.display_name}") + cluster_choice = click.prompt("Select a cluster by number", type=int, default=1) + selected_cluster = clusters[cluster_choice - 1] + cluster_id = selected_cluster.id + + # Hardware selection using a numbered list hw_resp = cclient.get_hardware_instances(cluster_id) if not hw_resp: click.echo("No hardware instances available for this cluster.") return - hw_names = [h.name for h in hw_resp] - hw_name = click.prompt( - "Select a hardware instance", type=click.Choice(hw_names), show_choices=True, default=hw_names[0] - ) - hw_id = next(h.id for h in hw_resp if h.name == hw_name) + + click.echo("Available hardware instances:") + for idx, hw in enumerate(hw_resp, start=1): + click.echo(f"{idx}. {hw.name}") + hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1) + selected_hw = hw_resp[hw_choice - 1] + hw_id = selected_hw.id if depl_type == DeploymentType.INFERENCE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + # Build list of image labels + image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else [] image_choices.append("Other") - chosen_image = click.prompt( - "Select a prebuilt image or choose 'Other' to provide a custom image URL", + chosen_label = click.prompt( + "Select a prebuilt image label or choose 'Other' to provide a custom image URL", type=click.Choice(image_choices), show_choices=True, default=image_choices[0], ) - if chosen_image == "Other": + if chosen_label == "Other": image = click.prompt("Enter the custom image URL") port = click.prompt("Enter the container port for the image", default=8080, type=int) healthcheck = click.prompt( "Enter healthcheck endpoint (default '/') for the image", default="/", show_default=True ) else: - # Find the selected prebuilt image details - selected_prebuilt = next(img for img in prebuilt_images.results if img.image_name == chosen_image) - image = selected_prebuilt.image_name - # Use the prebuilt image port and healthcheck as defaults + # Find the prebuilt image with the matching label + selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) + image = selected_prebuilt.image_name # Use the image_name from the selected prebuilt image port = selected_prebuilt.port healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" + env_vars_str = click.prompt( "Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", @@ -255,6 +260,12 @@ def create(): k, v = kv.strip().split("=") env_vars[k] = v + # Prompt for command and command arguments (optional) + command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False) + command = command_str.split() if command_str.strip() else [] + command_args_str = click.prompt("Enter command arguments (space-separated) or leave blank", default="", show_default=False) + command_args = command_args_str.split() if command_args_str.strip() else [] + # Common fields min_scale = click.prompt("Minimum number of replicas", default=1, type=int) max_scale = click.prompt("Maximum number of replicas", default=1, type=int) @@ -275,7 +286,10 @@ def create(): max_scale=max_scale, concurrency=concurrency, env_vars=env_vars if env_vars else None, + command=command, + command_args=command_args, ) + print(req) created = cclient.create_inference(req) click.echo(f"Inference deployment {name} created with ID: {created.id}") diff --git a/setup.py b/setup.py index c2d18d5..17ea7bd 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='centml', - version='3.1.15', + version='3.2.4', packages=find_packages(), python_requires=">=3.10", long_description=open('README.md').read(), From abdf441f66817aa2695fe4af1dbabde057a907fa Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 11:21:37 -0400 Subject: [PATCH 21/28] update to 3.2,4, fix create inf --- centml/cli/cluster.py | 13 +++++++------ requirements.txt | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 886fe40..38ea75c 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -260,11 +260,13 @@ def create(): k, v = kv.strip().split("=") env_vars[k] = v - # Prompt for command and command arguments (optional) - command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False) - command = command_str.split() if command_str.strip() else [] - command_args_str = click.prompt("Enter command arguments (space-separated) or leave blank", default="", show_default=False) - command_args = command_args_str.split() if command_args_str.strip() else [] + command_str = click.prompt( + "Enter command (space-separated) or leave blank", + default="", + show_default=False + ) + + command = command_str.strip() if command_str.strip() else None # Common fields min_scale = click.prompt("Minimum number of replicas", default=1, type=int) @@ -287,7 +289,6 @@ def create(): concurrency=concurrency, env_vars=env_vars if env_vars else None, command=command, - command_args=command_args, ) print(req) created = cclient.create_inference(req) diff --git a/requirements.txt b/requirements.txt index 49d0f6b..43dbfac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ cryptography==43.0.1 prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 -platform-api-python-client==3.1.15 +platform-api-python-client==3.2.4 From 1bb2248139bf8cde302074166808c9481fe6333a Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 12:28:40 -0400 Subject: [PATCH 22/28] update compute / inf create --- centml/cli/cluster.py | 60 ++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 38ea75c..75f57d4 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -224,9 +224,12 @@ def create(): if depl_type == DeploymentType.INFERENCE_V2: # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) + # Build list of image labels image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else [] - image_choices.append("Other") + + # Right now we disable this other option to get a MVP out quickly. + #image_choices.append("Other") chosen_label = click.prompt( "Select a prebuilt image label or choose 'Other' to provide a custom image URL", @@ -244,7 +247,15 @@ def create(): else: # Find the prebuilt image with the matching label selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) - image = selected_prebuilt.image_name # Use the image_name from the selected prebuilt image + # Prompt the user to select a tag from the available tags + tag = click.prompt( + "Select a tag for the image", + type=click.Choice(selected_prebuilt.tags), + show_choices=True, + default=selected_prebuilt.tags[0], + ) + # Combine the image URL with the chosen tag + image = f"{selected_prebuilt.image_name}:{tag}" port = selected_prebuilt.port healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" @@ -290,25 +301,42 @@ def create(): env_vars=env_vars if env_vars else None, command=command, ) - print(req) + created = cclient.create_inference(req) click.echo(f"Inference deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.COMPUTE_V2: - # Retrieve prebuilt images for inference deployments + # Retrieve prebuilt images for compute deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - image_choices = [img.image_name for img in prebuilt_images.results] if prebuilt_images.results else [] + # Build list of image labels + image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else [] - # Right now we don't support custom compute images - # TODO: add image tags to the url, right now its required by compute but not inference - chosen_image = click.prompt( - "Select a prebuilt image", type=click.Choice(image_choices), show_choices=True, default=image_choices[0] + chosen_label = click.prompt( + "Select a prebuilt image label", + type=click.Choice(image_choices), + show_choices=True, + default=image_choices[0], + ) + + selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) + + # Find the prebuilt image with the matching label + selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) + # Prompt the user to select a tag from the available tags + tag = click.prompt( + "Select a tag for the image", + type=click.Choice(selected_prebuilt.tags), + show_choices=True, + default=selected_prebuilt.tags[0], ) + # Combine the image URL with the chosen tag + image_url = f"{selected_prebuilt.image_name}:{tag}" # For compute deployments, we might ask for a public SSH key - ssh_key = click.prompt("Enter your public SSH key", default="", show_default=False) - # jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", - # default="n", show_default=False) + ssh_key = click.prompt("Enter your public SSH key") + + # Right now we not support this on prod platform, just unify the feature + #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", type=bool,default=False, show_default=False) from platform_api_python_client import CreateComputeDeploymentRequest @@ -316,9 +344,11 @@ def create(): name=name, cluster_id=cluster_id, hardware_instance_id=hw_id, - image_url=chosen_image, - ssh_public_key=ssh_key if ssh_key.strip() else None, - ) + image_url=image_url, + ssh_public_key=ssh_key, # we require this + #enable_jupyter=jupyter, + ) + created = cclient.create_compute(req) click.echo(f"Compute deployment {name} created with ID: {created.id}") From 47a65bb037a2faeffe469d642ab91def1ca01ada Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 22:01:27 -0400 Subject: [PATCH 23/28] changing cserve side --- centml/cli/cluster.py | 180 ++++++++++++++++++++++++++++++++++-------- centml/sdk/api.py | 6 +- 2 files changed, 150 insertions(+), 36 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 75f57d4..074dc2b 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -187,6 +187,7 @@ def create(): with get_centml_client() as cclient: # Prompt for general fields name = click.prompt("Enter a name for the deployment") + dtype_str = click.prompt( "Select a deployment type", type=click.Choice(list(depl_name_to_type_map.keys())), @@ -195,33 +196,34 @@ def create(): ) depl_type = depl_name_to_type_map[dtype_str] - # Select cluster using a numbered list - clusters = cclient.get_clusters().results - if not clusters: - click.echo("No clusters available. Please ensure you have a cluster setup.") - return - - click.echo("Available clusters:") - for idx, cluster in enumerate(clusters, start=1): - click.echo(f"{idx}. {cluster.display_name}") - cluster_choice = click.prompt("Select a cluster by number", type=int, default=1) - selected_cluster = clusters[cluster_choice - 1] - cluster_id = selected_cluster.id - - # Hardware selection using a numbered list - hw_resp = cclient.get_hardware_instances(cluster_id) - if not hw_resp: - click.echo("No hardware instances available for this cluster.") - return - - click.echo("Available hardware instances:") - for idx, hw in enumerate(hw_resp, start=1): - click.echo(f"{idx}. {hw.name}") - hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1) - selected_hw = hw_resp[hw_choice - 1] - hw_id = selected_hw.id - if depl_type == DeploymentType.INFERENCE_V2: + + # Select cluster using a numbered list + clusters = cclient.get_clusters().results + if not clusters: + click.echo("No clusters available. Please ensure you have a cluster setup.") + return + + click.echo("Available clusters:") + for idx, cluster in enumerate(clusters, start=1): + click.echo(f"{idx}. {cluster.display_name}") + cluster_choice = click.prompt("Select a cluster by number", type=int, default=1) + selected_cluster = clusters[cluster_choice - 1] + cluster_id = selected_cluster.id + + # Hardware selection using a numbered list + hw_resp = cclient.get_hardware_instances(cluster_id) + if not hw_resp: + click.echo("No hardware instances available for this cluster.") + return + + click.echo("Available hardware instances:") + for idx, hw in enumerate(hw_resp, start=1): + click.echo(f"{idx}. {hw.name}") + hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1) + selected_hw = hw_resp[hw_choice - 1] + hw_id = selected_hw.id + # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) @@ -306,6 +308,32 @@ def create(): click.echo(f"Inference deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.COMPUTE_V2: + # Select cluster using a numbered list + clusters = cclient.get_clusters().results + if not clusters: + click.echo("No clusters available. Please ensure you have a cluster setup.") + return + + click.echo("Available clusters:") + for idx, cluster in enumerate(clusters, start=1): + click.echo(f"{idx}. {cluster.display_name}") + cluster_choice = click.prompt("Select a cluster by number", type=int, default=1) + selected_cluster = clusters[cluster_choice - 1] + cluster_id = selected_cluster.id + + # Hardware selection using a numbered list + hw_resp = cclient.get_hardware_instances(cluster_id) + if not hw_resp: + click.echo("No hardware instances available for this cluster.") + return + + click.echo("Available hardware instances:") + for idx, hw in enumerate(hw_resp, start=1): + click.echo(f"{idx}. {hw.name}") + hw_choice = click.prompt("Select a hardware instance by number", type=int, default=1) + selected_hw = hw_resp[hw_choice - 1] + hw_id = selected_hw.id + # Retrieve prebuilt images for compute deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) # Build list of image labels @@ -353,27 +381,111 @@ def create(): click.echo(f"Compute deployment {name} created with ID: {created.id}") elif depl_type == DeploymentType.CSERVE: - # For cserve deployments, ask for model and parallelism - model = click.prompt("Enter the Hugging Face model", default="facebook/opt-1.3b") - tensor_parallel_size = click.prompt("Tensor parallel size", default=1, type=int) - pipeline_parallel_size = click.prompt("Pipeline parallel size", default=1, type=int) + # Keep things simple, only use recipe. + # Retrieve the recipe and hardware instances + recipe = cclient.get_cserve_recipe() + models = [r.model for r in recipe] if recipe else [] + + if not models: + click.echo("No models found in the recipe.") + sys.exit(1) + + # --- Model Selection (Indexed) --- + click.echo("Select a model:") + for idx, m in enumerate(models, start=1): + click.echo(f"{idx}. {m}") + model_index = click.prompt("Enter the model number", type=int, default=1) + if model_index < 1 or model_index > len(models): + click.echo("Invalid model selection.") + sys.exit(1) + selected_model = models[model_index - 1] + + # --- Performance Option Selection (Indexed) --- + perf_options = ["fastest", "cheapest", "best_value"] + click.echo("Select performance option:") + for idx, option in enumerate(perf_options, start=1): + click.echo(f"{idx}. {option}") + perf_index = click.prompt("Enter the performance option number", type=int, default=1) + if perf_index < 1 or perf_index > len(perf_options): + click.echo("Invalid performance selection.") + sys.exit(1) + selected_perf_option = perf_options[perf_index - 1] + + # Retrieve the recipe response for the selected model + selected_response = next((r for r in recipe if r.model == selected_model), None) + if not selected_response: + click.echo("Selected model not found in recipe.") + sys.exit(1) + + # Get the performance-specific recipe (this is a CServeRecipePerf instance) + selected_perf = getattr(selected_response, selected_perf_option) + + # Retrieve the hardware instance ID from the selected performance option + hardware_instance_id = selected_perf.hardware_instance_id + + # Get hardware instance details using cclient.get_hardware_instances() + hw_instances = cclient.get_hardware_instances() + selected_hw = next((hw for hw in hw_instances["results"] if hw["id"] == hardware_instance_id), None) + if not selected_hw: + click.echo(f"Hardware instance with id {hardware_instance_id} not found.") + sys.exit(1) + + # Display the hardware instance information to the user + click.echo("Selected Hardware Instance:") + for key, value in selected_hw.items(): + click.echo(f"{key}: {value}") + + # Use the cluster_id from the hardware instance (no need to prompt the user) + cluster_id = selected_hw["cluster_id"] + + # --- Additional Prompts --- + # Prompt for Hugging Face token (if required) + hf_token = click.prompt( + "Enter your Hugging Face token or leave blank (if your model isn't private)", + default="", + show_default=False, + ) + + # Prompt for environment variables + env_vars_str = click.prompt( + "Enter environment variables in KEY=VALUE format (comma separated) or leave blank", + default="", + show_default=False, + ) + env_vars = {} + if env_vars_str.strip(): + for kv in env_vars_str.split(","): + try: + k, v = kv.strip().split("=") + env_vars[k] = v + except ValueError: + click.echo(f"Skipping invalid env var: {kv}") + + # Prompt for scaling and concurrency settings + min_scale = click.prompt("Minimum number of replicas", default=1, type=int) + max_scale = click.prompt("Maximum number of replicas", default=1, type=int) + concurrency_input = click.prompt("Max concurrency (or leave blank)", default="", show_default=False) + concurrency = int(concurrency_input) if concurrency_input else None + # --- Create the Deployment Request --- from platform_api_python_client import CreateCServeDeploymentRequest req = CreateCServeDeploymentRequest( name=name, cluster_id=cluster_id, - hardware_instance_id=hw_id, - model=model, - tensor_parallel_size=tensor_parallel_size, - pipeline_parallel_size=pipeline_parallel_size, + hardware_instance_id=hardware_instance_id, + recipe=selected_perf.recipe, # The underlying CServeV2Recipe instance + hf_token=hf_token if hf_token.strip() else None, min_scale=min_scale, max_scale=max_scale, concurrency=concurrency, + env_vars=env_vars if env_vars else None, ) + created = cclient.create_cserve(req) click.echo(f"CServe deployment {name} created with ID: {created.id}") + else: click.echo("Unknown deployment type.") diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 272ca57..eff4e46 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -59,12 +59,14 @@ def resume(self, id): def get_clusters(self): return self._api.get_clusters_clusters_get() - def get_hardware_instances(self, cluster_id): - return self._api.get_hardware_instances_hardware_instances_get(cluster_id).results + def get_hardware_instances(self, cluster_id = None): + return self._api.get_hardware_instances_hardware_instances_get(cluster_id = cluster_id if cluster_id else None).results def get_prebuilt_images(self, depl_type: DeploymentType = None): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) + def get_cserve_recipe(self): + return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results @contextmanager def get_centml_client(): From 31430b1ac6510061ac00f7e7b52fe383dbacfcdd Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 22:40:14 -0400 Subject: [PATCH 24/28] complete cserve create --- centml/cli/cluster.py | 57 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 074dc2b..e25a381 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -425,18 +425,63 @@ def create(): # Get hardware instance details using cclient.get_hardware_instances() hw_instances = cclient.get_hardware_instances() - selected_hw = next((hw for hw in hw_instances["results"] if hw["id"] == hardware_instance_id), None) + selected_hw = next((hw for hw in hw_instances if hw.id == hardware_instance_id), None) if not selected_hw: click.echo(f"Hardware instance with id {hardware_instance_id} not found.") sys.exit(1) - # Display the hardware instance information to the user + # Display the hardware instance information to the user. + + credits = selected_hw.cost_per_hr / 100.0 # e.g., 360 -> 3.60 credits per hour + vram_gib = selected_hw.accelerator_memory / 1024 # e.g., 81920 MB -> 80 GiB VRAM + memory_gib = selected_hw.memory / 1024 # e.g., 239616 MB -> 234 GiB memory + cpu_cores = selected_hw.cpu / 1000 # e.g., 26000 millicores -> 26 cores + click.echo("Selected Hardware Instance:") - for key, value in selected_hw.items(): - click.echo(f"{key}: {value}") + click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,\nMemory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores") # Use the cluster_id from the hardware instance (no need to prompt the user) - cluster_id = selected_hw["cluster_id"] + cluster_id = selected_hw.cluster_id + + # Convert the recipe to a dict + recipe_dict = selected_perf.recipe.dict() + + # Merge additional_properties into the top-level dictionary for required keys. + additional = recipe_dict.get("additional_properties", {}) + recipe_dict.update(additional) + # Optionally remove the additional_properties key if it's no longer needed + recipe_dict.pop("additional_properties", None) + + recipe_payload = { + "model": recipe_dict.get("model"), + "is_embedding_model": recipe_dict.get("is_embedding_model"), + "dtype": recipe_dict.get("dtype"), + "tokenizer": recipe_dict.get("tokenizer"), + "block_size": recipe_dict.get("block_size"), + "swap_space": recipe_dict.get("swap_space"), + "cache_dtype": recipe_dict.get("cache_dtype"), + "spec_tokens": recipe_dict.get("spec_tokens"), + "gpu_mem_util": recipe_dict.get("gpu_mem_util"), + "max_num_seqs": recipe_dict.get("max_num_seqs"), + "quantization": recipe_dict.get("quantization"), + "max_model_len": recipe_dict.get("max_model_len"), + "offloading_num": int(recipe_dict.get("offloading_num")), + "use_flashinfer": recipe_dict.get("use_flashinfer"), + "eager_execution": recipe_dict.get("eager_execution"), + "spec_draft_model": recipe_dict.get("spec_draft_model"), + "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"), + "use_prefix_caching": recipe_dict.get("use_prefix_caching"), + "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"), + "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"), + "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"), + "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"), + "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"), + "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"), + "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"), + "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"), + "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"), + "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"), + } # --- Additional Prompts --- # Prompt for Hugging Face token (if required) @@ -474,7 +519,7 @@ def create(): name=name, cluster_id=cluster_id, hardware_instance_id=hardware_instance_id, - recipe=selected_perf.recipe, # The underlying CServeV2Recipe instance + recipe=recipe_payload, hf_token=hf_token if hf_token.strip() else None, min_scale=min_scale, max_scale=max_scale, From 4963ce523d0ebece44e12311cc99375b01dee140 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 22:46:24 -0400 Subject: [PATCH 25/28] added TODO, changed all stuff to use name instead of ID. --- centml/cli/cluster.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index e25a381..aacb0a2 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -94,7 +94,7 @@ def ls(type): ) ) - +# TODO: Status for Cserve seems to be broken @click.command(help="Get deployment details") @click.argument("name", type=str) @handle_exception @@ -536,27 +536,42 @@ def create(): @click.command(help="Delete a deployment") -@click.argument("id", type=int) +@click.argument("name", type=str) @handle_exception -def delete(id): +def delete(name): with get_centml_client() as cclient: - cclient.delete(id) - click.echo("Deployment has been deleted") + # Retrieve all deployments and search for the given name + deployments = cclient.get(None) + deployment = next((d for d in deployments if d.name == name), None) + if deployment is None: + sys.exit(f"Deployment with name '{name}' not found.") + cclient.delete(deployment.id) + click.echo(f"Deployment {name} has been deleted") @click.command(help="Pause a deployment") -@click.argument("id", type=int) +@click.argument("name", type=str) @handle_exception -def pause(id): +def pause(name): with get_centml_client() as cclient: - cclient.pause(id) - click.echo("Deployment has been paused") + # Retrieve all deployments and search for the given name + deployments = cclient.get(None) + deployment = next((d for d in deployments if d.name == name), None) + if deployment is None: + sys.exit(f"Deployment with name '{name}' not found.") + cclient.pause(deployment.id) + click.echo(f"Deployment {name} has been paused") @click.command(help="Resume a deployment") -@click.argument("id", type=int) +@click.argument("name", type=str) @handle_exception -def resume(id): +def resume(name): with get_centml_client() as cclient: - cclient.resume(id) - click.echo("Deployment has been resumed") + # Retrieve all deployments and search for the given name + deployments = cclient.get(None) + deployment = next((d for d in deployments if d.name == name), None) + if deployment is None: + sys.exit(f"Deployment with name '{name}' not found.") + cclient.resume(deployment.id) + click.echo(f"Deployment {name} has been resumed") From 1d4306801a7caadb881e71df448eb158924b561e Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 22:50:53 -0400 Subject: [PATCH 26/28] change selections to be indexed. --- centml/cli/cluster.py | 158 ++++++++++++++++++++---------------------- 1 file changed, 77 insertions(+), 81 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index aacb0a2..6a7b777 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -94,6 +94,7 @@ def ls(type): ) ) + # TODO: Status for Cserve seems to be broken @click.command(help="Get deployment details") @click.argument("name", type=str) @@ -188,12 +189,16 @@ def create(): # Prompt for general fields name = click.prompt("Enter a name for the deployment") - dtype_str = click.prompt( - "Select a deployment type", - type=click.Choice(list(depl_name_to_type_map.keys())), - show_choices=True, - default=list(depl_name_to_type_map.keys())[0], - ) + # --- Deployment Type Selection (Indexed) --- + deploy_types = list(depl_name_to_type_map.keys()) + click.echo("Select a deployment type:") + for idx, dtype in enumerate(deploy_types, start=1): + click.echo(f"{idx}. {dtype}") + dtype_index = click.prompt("Enter the deployment type number", type=int, default=1) + if dtype_index < 1 or dtype_index > len(deploy_types): + click.echo("Invalid selection.") + return + dtype_str = deploy_types[dtype_index - 1] depl_type = depl_name_to_type_map[dtype_str] if depl_type == DeploymentType.INFERENCE_V2: @@ -226,19 +231,17 @@ def create(): # Retrieve prebuilt images for inference deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - - # Build list of image labels image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else [] - - # Right now we disable this other option to get a MVP out quickly. - #image_choices.append("Other") - - chosen_label = click.prompt( - "Select a prebuilt image label or choose 'Other' to provide a custom image URL", - type=click.Choice(image_choices), - show_choices=True, - default=image_choices[0], - ) + # Enable custom image selection by adding "Other" to the list. + image_choices.append("Other") + click.echo("Available prebuilt image labels:") + for idx, label in enumerate(image_choices, start=1): + click.echo(f"{idx}. {label}") + choice_index = click.prompt("Select a prebuilt image label by number", type=int, default=1) + if choice_index < 1 or choice_index > len(image_choices): + click.echo("Invalid selection.") + return + chosen_label = image_choices[choice_index - 1] if chosen_label == "Other": image = click.prompt("Enter the custom image URL") @@ -249,19 +252,20 @@ def create(): else: # Find the prebuilt image with the matching label selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) - # Prompt the user to select a tag from the available tags - tag = click.prompt( - "Select a tag for the image", - type=click.Choice(selected_prebuilt.tags), - show_choices=True, - default=selected_prebuilt.tags[0], - ) + # Prompt the user to select a tag from the available tags (indexed) + click.echo("Available tags for the selected image:") + for idx, tag in enumerate(selected_prebuilt.tags, start=1): + click.echo(f"{idx}. {tag}") + tag_index = click.prompt("Select a tag for the image by number", type=int, default=1) + if tag_index < 1 or tag_index > len(selected_prebuilt.tags): + click.echo("Invalid tag selection.") + return + tag = selected_prebuilt.tags[tag_index - 1] # Combine the image URL with the chosen tag image = f"{selected_prebuilt.image_name}:{tag}" port = selected_prebuilt.port healthcheck = selected_prebuilt.healthcheck if selected_prebuilt.healthcheck else "/" - env_vars_str = click.prompt( "Enter environment variables in KEY=VALUE format (comma separated) or leave blank", default="", @@ -336,36 +340,31 @@ def create(): # Retrieve prebuilt images for compute deployments prebuilt_images = cclient.get_prebuilt_images(depl_type=depl_type) - # Build list of image labels image_choices = [img.label for img in prebuilt_images.results] if prebuilt_images.results else [] + click.echo("Available prebuilt image labels:") + for idx, label in enumerate(image_choices, start=1): + click.echo(f"{idx}. {label}") + choice_index = click.prompt("Select a prebuilt image label by number", type=int, default=1) + if choice_index < 1 or choice_index > len(image_choices): + click.echo("Invalid selection.") + return + chosen_label = image_choices[choice_index - 1] - chosen_label = click.prompt( - "Select a prebuilt image label", - type=click.Choice(image_choices), - show_choices=True, - default=image_choices[0], - ) - - selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) - - # Find the prebuilt image with the matching label selected_prebuilt = next(img for img in prebuilt_images.results if img.label == chosen_label) - # Prompt the user to select a tag from the available tags - tag = click.prompt( - "Select a tag for the image", - type=click.Choice(selected_prebuilt.tags), - show_choices=True, - default=selected_prebuilt.tags[0], - ) - # Combine the image URL with the chosen tag + # Prompt the user to select a tag from the available tags (indexed) + click.echo("Available tags for the selected image:") + for idx, tag in enumerate(selected_prebuilt.tags, start=1): + click.echo(f"{idx}. {tag}") + tag_index = click.prompt("Select a tag for the image by number", type=int, default=1) + if tag_index < 1 or tag_index > len(selected_prebuilt.tags): + click.echo("Invalid tag selection.") + return + tag = selected_prebuilt.tags[tag_index - 1] image_url = f"{selected_prebuilt.image_name}:{tag}" # For compute deployments, we might ask for a public SSH key ssh_key = click.prompt("Enter your public SSH key") - # Right now we not support this on prod platform, just unify the feature - #jupyter = click.prompt("Enable Jupyter Notebook on this compute deployment?", type=bool,default=False, show_default=False) - from platform_api_python_client import CreateComputeDeploymentRequest req = CreateComputeDeploymentRequest( @@ -373,9 +372,8 @@ def create(): cluster_id=cluster_id, hardware_instance_id=hw_id, image_url=image_url, - ssh_public_key=ssh_key, # we require this - #enable_jupyter=jupyter, - ) + ssh_public_key=ssh_key, + ) created = cclient.create_compute(req) click.echo(f"Compute deployment {name} created with ID: {created.id}") @@ -431,7 +429,6 @@ def create(): sys.exit(1) # Display the hardware instance information to the user. - credits = selected_hw.cost_per_hr / 100.0 # e.g., 360 -> 3.60 credits per hour vram_gib = selected_hw.accelerator_memory / 1024 # e.g., 81920 MB -> 80 GiB VRAM memory_gib = selected_hw.memory / 1024 # e.g., 239616 MB -> 234 GiB memory @@ -453,34 +450,34 @@ def create(): recipe_dict.pop("additional_properties", None) recipe_payload = { - "model": recipe_dict.get("model"), - "is_embedding_model": recipe_dict.get("is_embedding_model"), - "dtype": recipe_dict.get("dtype"), - "tokenizer": recipe_dict.get("tokenizer"), - "block_size": recipe_dict.get("block_size"), - "swap_space": recipe_dict.get("swap_space"), - "cache_dtype": recipe_dict.get("cache_dtype"), - "spec_tokens": recipe_dict.get("spec_tokens"), - "gpu_mem_util": recipe_dict.get("gpu_mem_util"), - "max_num_seqs": recipe_dict.get("max_num_seqs"), - "quantization": recipe_dict.get("quantization"), - "max_model_len": recipe_dict.get("max_model_len"), - "offloading_num": int(recipe_dict.get("offloading_num")), - "use_flashinfer": recipe_dict.get("use_flashinfer"), - "eager_execution": recipe_dict.get("eager_execution"), - "spec_draft_model": recipe_dict.get("spec_draft_model"), - "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"), - "use_prefix_caching": recipe_dict.get("use_prefix_caching"), - "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"), - "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"), - "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"), - "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"), - "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"), - "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"), - "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"), - "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"), - "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"), - "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"), + "model": recipe_dict.get("model"), + "is_embedding_model": recipe_dict.get("is_embedding_model"), + "dtype": recipe_dict.get("dtype"), + "tokenizer": recipe_dict.get("tokenizer"), + "block_size": recipe_dict.get("block_size"), + "swap_space": recipe_dict.get("swap_space"), + "cache_dtype": recipe_dict.get("cache_dtype"), + "spec_tokens": recipe_dict.get("spec_tokens"), + "gpu_mem_util": recipe_dict.get("gpu_mem_util"), + "max_num_seqs": recipe_dict.get("max_num_seqs"), + "quantization": recipe_dict.get("quantization"), + "max_model_len": recipe_dict.get("max_model_len"), + "offloading_num": int(recipe_dict.get("offloading_num")), + "use_flashinfer": recipe_dict.get("use_flashinfer"), + "eager_execution": recipe_dict.get("eager_execution"), + "spec_draft_model": recipe_dict.get("spec_draft_model"), + "spec_max_seq_len": recipe_dict.get("spec_max_seq_len"), + "use_prefix_caching": recipe_dict.get("use_prefix_caching"), + "num_scheduler_steps": recipe_dict.get("num_scheduler_steps"), + "spec_max_batch_size": recipe_dict.get("spec_max_batch_size"), + "use_chunked_prefill": recipe_dict.get("use_chunked_prefill"), + "chunked_prefill_size": recipe_dict.get("chunked_prefill_size"), + "tensor_parallel_size": recipe_dict.get("tensor_parallel_size"), + "max_seq_len_to_capture": recipe_dict.get("max_seq_len_to_capture"), + "pipeline_parallel_size": recipe_dict.get("pipeline_parallel_size"), + "spec_prompt_lookup_max": recipe_dict.get("spec_prompt_lookup_max"), + "spec_prompt_lookup_min": recipe_dict.get("spec_prompt_lookup_min"), + "distributed_executor_backend": recipe_dict.get("distributed_executor_backend"), } # --- Additional Prompts --- @@ -530,7 +527,6 @@ def create(): created = cclient.create_cserve(req) click.echo(f"CServe deployment {name} created with ID: {created.id}") - else: click.echo("Unknown deployment type.") From 9e25528b99097ce733b8adb12afa00f52eff0229 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 22:56:34 -0400 Subject: [PATCH 27/28] format lint --- centml/cli/cluster.py | 17 +++++++---------- centml/sdk/api.py | 7 +++++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 6a7b777..9a97ed5 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -277,11 +277,7 @@ def create(): k, v = kv.strip().split("=") env_vars[k] = v - command_str = click.prompt( - "Enter command (space-separated) or leave blank", - default="", - show_default=False - ) + command_str = click.prompt("Enter command (space-separated) or leave blank", default="", show_default=False) command = command_str.strip() if command_str.strip() else None @@ -429,13 +425,14 @@ def create(): sys.exit(1) # Display the hardware instance information to the user. - credits = selected_hw.cost_per_hr / 100.0 # e.g., 360 -> 3.60 credits per hour - vram_gib = selected_hw.accelerator_memory / 1024 # e.g., 81920 MB -> 80 GiB VRAM - memory_gib = selected_hw.memory / 1024 # e.g., 239616 MB -> 234 GiB memory - cpu_cores = selected_hw.cpu / 1000 # e.g., 26000 millicores -> 26 cores + credits = selected_hw.cost_per_hr / 100.0 # e.g., 360 -> 3.60 credits per hour + vram_gib = selected_hw.accelerator_memory / 1024 # e.g., 81920 MB -> 80 GiB VRAM + memory_gib = selected_hw.memory / 1024 # e.g., 239616 MB -> 234 GiB memory + cpu_cores = selected_hw.cpu / 1000 # e.g., 26000 millicores -> 26 cores click.echo("Selected Hardware Instance:") - click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,\nMemory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores") + click.echo(f"{credits:.2f} credits per hour,\n{vram_gib:.0f}GiB VRAM,") + click.echo(f"Memory {memory_gib:.0f}GiB,\nCPU {cpu_cores:.0f} cores") # Use the cluster_id from the hardware instance (no need to prompt the user) cluster_id = selected_hw.cluster_id diff --git a/centml/sdk/api.py b/centml/sdk/api.py index eff4e46..4e27b54 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -59,8 +59,10 @@ def resume(self, id): def get_clusters(self): return self._api.get_clusters_clusters_get() - def get_hardware_instances(self, cluster_id = None): - return self._api.get_hardware_instances_hardware_instances_get(cluster_id = cluster_id if cluster_id else None).results + def get_hardware_instances(self, cluster_id=None): + return self._api.get_hardware_instances_hardware_instances_get( + cluster_id=cluster_id if cluster_id else None + ).results def get_prebuilt_images(self, depl_type: DeploymentType = None): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) @@ -68,6 +70,7 @@ def get_prebuilt_images(self, depl_type: DeploymentType = None): def get_cserve_recipe(self): return self._api.get_cserve_recipe_deployments_cserve_recipes_get().results + @contextmanager def get_centml_client(): configuration = platform_api_python_client.Configuration( From 6d35908cdb33a32a831b191ae2dd116ec18d36f6 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 20 Mar 2025 23:07:17 -0400 Subject: [PATCH 28/28] fix typecheck --- centml/sdk/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 4e27b54..3a637c6 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -64,7 +64,7 @@ def get_hardware_instances(self, cluster_id=None): cluster_id=cluster_id if cluster_id else None ).results - def get_prebuilt_images(self, depl_type: DeploymentType = None): + def get_prebuilt_images(self, depl_type: DeploymentType): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) def get_cserve_recipe(self):