From c29f8ed6d85f7197b6dc564a9c0a30df5c909b72 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Fri, 3 Oct 2025 12:29:59 +0100 Subject: [PATCH 1/2] Try to expose the GPU count --- charts/azimuth-chat/azimuth-ui.schema.yaml | 1 + charts/azimuth-llm/values.schema.json | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 74bd573c..a1f14887 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -24,6 +24,7 @@ sortOrder: - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version + - /azimuth-llm/api/gpus - /azimuth-llm/ui/appSettings/llm_params/temperature - /azimuth-llm/ui/appSettings/llm_params/max_tokens - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index 59e0e1b8..c13caea6 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -40,6 +40,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs to allocate to the model.", + "default": 1, + "minimum": 1, + "maximum": 8 } } }, From 96bd4ae847b5771aac70a32c7fb3d9e2dc819dac Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Fri, 3 Oct 2025 14:59:20 +0100 Subject: [PATCH 2/2] Try to set --tensor-parallel-size based on GPU count --- charts/azimuth-llm/templates/api/deployment.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 0e6206d5..00bc9af5 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -47,6 +47,10 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} + {{- if gt .Values.api.gpus 1 }} + - --tensor-parallel-size + - {{ .Values.api.gpus }} + {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: - secretRef: