diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 74bd573c..a1f14887 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -24,6 +24,7 @@ sortOrder: - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version + - /azimuth-llm/api/gpus - /azimuth-llm/ui/appSettings/llm_params/temperature - /azimuth-llm/ui/appSettings/llm_params/max_tokens - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json index 0fb9ee0d..4b64ad8f 100644 --- a/charts/azimuth-chat/values.schema.json +++ b/charts/azimuth-chat/values.schema.json @@ -43,6 +43,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs (within a single node) to allocate to the model.", + "default": 1, + "minimum": 0, + "maximum": 8 } } }, diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 0e6206d5..074dbd69 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -47,6 +47,10 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} + {{- if gt (.Values.api.gpus | int) 1 }} + - --tensor-parallel-size + - {{ .Values.api.gpus | quote }} + {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: - secretRef: diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index 59e0e1b8..9a7bae50 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -40,6 +40,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs (within a single node) to allocate to the model.", + "default": 1, + "minimum": 0, + "maximum": 8 } } },