diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a977dec --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Ignore Python virtual environment directories +venv/ + +# Ignore cloned repositories for specific projects +multi-cluster-ai-with-kaito/kubefleet/ +multi-cluster-ai-with-kaito/istio/ +multi-cluster-ai-with-kaito/semantic-router/ + +# Ignore downloaded files for specific projects +multi-cluster-ai-with-kaito/configure-helm-values.sh +multi-cluster-ai-with-kaito/gpu-provisioner-values-template.yaml +multi-cluster-ai-with-kaito/gpu-provisioner-values.yaml diff --git a/README.md b/README.md index e1981d7..814e6b3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # KubeFleet Cookbook -Examples and guides on using KubeFleet to manage multicluster scenarios. + +A collection of various demos, tutorials, and labs for using the KubeFleet project. + diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md new file mode 100644 index 0000000..e68bef7 --- /dev/null +++ b/multi-cluster-ai-with-kaito/SETUP.md @@ -0,0 +1,127 @@ +# How to run the scripts in this tutorial + +The scripts in this tutorial will help you: + +* Create a fleet of 3 AKS (Azure Kubernetes Service) clusters for running LLM inference workloads and routing LLM queries. +* Put the 3 clusters under the management of KubeFleet, a CNCF sandbox project for multi-cluster management, with an +additional KubeFleet hub cluster (also an AKS cluster) as the management portal. +* Set up KAITO, a CNCF sandbox project for easy LLM usage, on the clusters for facilitating LLM workloads with ease. +* Connect the 3 clusters with an Istio service mesh. +* Use Kubernetes Gateway API with Inference Extension for serving LLM queries. + +> Note that even though the scripts are set to use AKS clusters and related resources for simplicity reasons; the tutorial itself is not necessarily Azure specific. It can run on any Kubernetes environment, as long as inter-cluster connectivity can be established. + +## Before you begin + +* This tutorial assumes that you are familiar with basic Azure/AKS usage and Kubernetes usage. +* If you don't have an Azure account, [create a free account](https://azure.microsoft.com/pricing/purchase-options/azure-account) before you begin. +* Make sure that you have the following tools installed in your environment: + * The Azure CLI (`az`). + * The Kubernetes CLI (`kubectl`). + * Helm + * Docker + * The Istio CLI (istioctl) + * Go runtime (>=1.24) + * `git` + * `base64` + * `make` + * `curl` +* The setup in the tutorial requires usage of GPU-enabled nodes (with NVIDIA A100 GPUs or similar specs). + +## Run the scripts + +Switch to the current directory and follow the steps below to run the scripts: + +```sh +chmod +x setup.sh +./setup.sh +``` + +It may take a while for the setup to complete. + +The script includes some configurable parameters; in most cases though, you should be able to just use +the default values. See the list of parameters at the file `setup.sh`, and, if needed, set up +environment variables accordingly to override the default values. + +## Verify the setup + +After the setup script completes, follow the steps below to verify the setup: + +* Switch to one of the clusters that is running the inference workload: + + ```sh + MEMBER_1="${MEMBER_1:-model-serving-cluster-1}" + MEMBER_2="${MEMBER_2:-model-serving-cluster-2}" + MEMBER_3="${MEMBER_3:-query-routing-cluster}" + MEMBER_1_CTX=$MEMBER_1-admin + MEMBER_2_CTX=$MEMBER_2-admin + MEMBER_3_CTX=$MEMBER_3-admin + + kubectl config use-context $MEMBER_1_CTX + kubectl get workspace + ``` + + You should see that the KAITO workspace with the DeepSeek model is up and running. Note that it may take + a while for a GPU node to get ready and have the model downloaded/set up. + +* Similarly, switch to the other cluster that is running the inference workload and make sure that the Phi model +is up and running: + + ```sh + kubectl config use-context $MEMBER_2_CTX + kubectl get workspace + ``` + +* Now, switch to the query routing cluster and send some queries to the inference gateway: + + ```sh + kubectl config use-context $MEMBER_3_CTX + + # Open another shell window. + kubectl port-forward svc/inference-gateway-istio 10000:80 + + curl -X POST http://localhost:10000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "Prove the Pythagorean theorem step by step"}], + "max_tokens": 100 + }' + ``` + + You should see from the response that the query is being served by the DeepSeek model. + + ```sh + curl -X POST -i localhost:10000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "What is the color of the sky?"}], + "max_tokens": 100 + }' + ``` + + You should see from the response that the query is being served by the Phi model. + + > Note: the tutorial features a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try. + +## Additional steps + +You can set up the LiteLLM proxy to interact with the models using a web UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup. + +## Clean things up + +To clean things up, delete the Azure resource group that contains all the resources: + +```sh +export RG="${RG:-kubefleet-kaito-demo-2025}" +az group delete -n $RG +``` + +## Questions or comments? + +If you have any questions or comments please using our [Q&A Discussions](https://github.com/kubefleet-dev/kubefleet/discussions/categories/q-a). + +If you find a bug or the solution doesn't work, please open an [Issue](https://github.com/kubefleet-dev/kubefleet/issues/new) so we can take a look. We welcome submissions too, so if you find a fix please open a PR! + +Also, consider coming to a [Community Meeting](https://bit.ly/kubefleet-cm-meeting) too! diff --git a/multi-cluster-ai-with-kaito/azresources.sh b/multi-cluster-ai-with-kaito/azresources.sh new file mode 100644 index 0000000..733a36f --- /dev/null +++ b/multi-cluster-ai-with-kaito/azresources.sh @@ -0,0 +1,93 @@ +function create_azure_vnet() { + echo "Creating an Azure virtual network..." + az network vnet create \ + --name $VNET \ + -g $RG \ + --location $LOCATION \ + --address-prefix $VNET_ADDR_PREFIX \ + --subnet-name $SUBNET_1 \ + --subnet-prefixes $SUBNET_1_ADDR_PREFIX +} + +function create_azure_vnet_subnet() { + az network vnet subnet create \ + -g $RG \ + --vnet-name $VNET \ + -n $1 \ + --address-prefixes $2 +} + +function create_azure_vnet_subnets() { + echo "Creating additional subnets in the virtual network..." + create_azure_vnet_subnet $SUBNET_2 $SUBNET_2_ADDR_PREFIX + create_azure_vnet_subnet $SUBNET_3 $SUBNET_3_ADDR_PREFIX +} + +function create_aks_cluster() { + echo "Creating AKS cluster $1..." + az aks create \ + --name $1 \ + --resource-group $RG \ + --location $LOCATION \ + --vnet-subnet-id $2 \ + --network-plugin azure \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --enable-managed-identity \ + --generate-ssh-keys \ + --node-vm-size $VM_SIZE \ + --node-count 1 \ + --service-cidr $3 \ + --dns-service-ip $4 +} + +function create_kubefleet_hub_cluster() { + echo "Creating KubeFleet hub cluster $FLEET_HUB..." + az aks create \ + --name $FLEET_HUB \ + --resource-group $RG \ + --location $LOCATION \ + --network-plugin azure \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --enable-managed-identity \ + --generate-ssh-keys \ + --node-vm-size $VM_SIZE \ + --node-count 1 +} + +function create_aks_clusters() { + SUBNET_1_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_1 --query "id" --output tsv) + SUBNET_2_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_2 --query "id" --output tsv) + SUBNET_3_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_3 --query "id" --output tsv) + + echo "Creating AKS clusters..." + create_aks_cluster $MEMBER_1 $SUBNET_1_ID 172.16.0.0/16 172.16.0.10 + create_aks_cluster $MEMBER_2 $SUBNET_2_ID 172.17.0.0/16 172.17.0.10 + create_aks_cluster $MEMBER_3 $SUBNET_3_ID 172.18.0.0/16 172.18.0.10 + create_kubefleet_hub_cluster + + echo "Retrieving admin credentials for AKS clusters..." + az aks get-credentials -n $MEMBER_1 -g $RG --admin + az aks get-credentials -n $MEMBER_2 -g $RG --admin + az aks get-credentials -n $MEMBER_3 -g $RG --admin + az aks get-credentials -n $FLEET_HUB -g $RG --admin +} + +function create_acr() { + echo "Creating Azure Container Registry $ACR..." + az acr create \ + --resource-group $RG \ + --name $ACR \ + --sku Standard \ + --admin-enabled true + + echo "Connecting the ACR to the AKS clusters..." + az aks update -n $MEMBER_1 -g $RG --attach-acr $ACR + az aks update -n $MEMBER_2 -g $RG --attach-acr $ACR + az aks update -n $MEMBER_3 -g $RG --attach-acr $ACR + az aks update -n $FLEET_HUB -g $RG --attach-acr $ACR + + echo "Logging into the ACR..." + az acr login --name $ACR +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/charts/semantic-router.tgz b/multi-cluster-ai-with-kaito/charts/semantic-router.tgz new file mode 100644 index 0000000..90e0a33 Binary files /dev/null and b/multi-cluster-ai-with-kaito/charts/semantic-router.tgz differ diff --git a/multi-cluster-ai-with-kaito/istio.sh b/multi-cluster-ai-with-kaito/istio.sh new file mode 100644 index 0000000..e2c0e3f --- /dev/null +++ b/multi-cluster-ai-with-kaito/istio.sh @@ -0,0 +1,40 @@ +function prep_istio_setup() { + echo "Cloning the Istio source code repository..." + git clone https://github.com/istio/istio.git + pushd istio + + git fetch --all + git checkout $ISTIO_TAG +} + +function connect_to_multi_cluster_service_mesh() { + echo "Connecting AKS cluster $1 to the multi-cluster Istio service mesh..." + kubectl config use-context $2 + go run ./istioctl/cmd/istioctl install \ + --context $2 \ + --set tag=$ISTIO_TAG \ + --set hub=gcr.io/istio-release \ + --set values.global.meshID=simplemesh \ + --set values.global.multiCluster.clusterName=$1 \ + --set values.global.network=simplenet \ + --set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true + + istioctl create-remote-secret --context=$3 --name=$4 --server $5 | kubectl apply --context=$2 -f - + istioctl create-remote-secret --context=$6 --name=$7 --server $8 | kubectl apply --context=$2 -f - +} + +function set_up_istio() { + echo "Performing some preparatory steps before setting Istio up..." + prep_istio_setup + + echo "Setting up the Istio multi-cluster service mesh on the KubeFleet member clusters..." + MEMBER_1_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_1 --query "fqdn" -o tsv):443 + MEMBER_2_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_2 --query "fqdn" -o tsv):443 + MEMBER_3_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_3 --query "fqdn" -o tsv):443 + + connect_to_multi_cluster_service_mesh $MEMBER_1 $MEMBER_1_CTX $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR + connect_to_multi_cluster_service_mesh $MEMBER_2 $MEMBER_2_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR + connect_to_multi_cluster_service_mesh $MEMBER_3 $MEMBER_3_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR + + popd +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/kaito.sh b/multi-cluster-ai-with-kaito/kaito.sh new file mode 100644 index 0000000..46326a1 --- /dev/null +++ b/multi-cluster-ai-with-kaito/kaito.sh @@ -0,0 +1,69 @@ +function prep_kaito_setup() { + echo "Adding the KAITO Helm charts..." + helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito + helm repo update + + echo "Retrieving the KAITO GPU Provisioner setup script..." + GPU_PROVISIONER_VERSION=0.3.7 + curl -sO https://raw.githubusercontent.com/Azure/gpu-provisioner/main/hack/deploy/configure-helm-values.sh +} + +function install_kaito_core() { + echo "Installing KAITO core components in member cluster $1..." + kubectl config use-context $2 + helm upgrade --install kaito-workspace kaito/workspace \ + --namespace kaito-workspace \ + --create-namespace \ + --set clusterName="$1" \ + --set featureGates.gatewayAPIInferenceExtension=true \ + --wait +} + +function install_kaito_gpu_provisioner() { + echo "Installing KAITO GPU provisioner in member cluster $1..." + kubectl config use-context $2 + + echo "Creating managed identity..." + local IDENTITY_NAME="kaitogpuprovisioner-$1" + az identity create --name $IDENTITY_NAME -g $RG + local IDENTITY_PRINCIPAL_ID=$(az identity show --name $IDENTITY_NAME -g $RG --query 'principalId' -o tsv) + az role assignment create \ + --assignee $IDENTITY_PRINCIPAL_ID \ + --scope /subscriptions/$SUBSCRIPTION/resourceGroups/$RG/providers/Microsoft.ContainerService/managedClusters/$1 \ + --role "Contributor" + + echo "Configuring Helm values..." + chmod +x ./configure-helm-values.sh && ./configure-helm-values.sh $1 $RG $IDENTITY_NAME + + echo "Installing Helm chart..." + helm upgrade --install gpu-provisioner \ + --values gpu-provisioner-values.yaml \ + --set settings.azure.clusterName=$1 \ + --wait \ + https://github.com/Azure/gpu-provisioner/raw/gh-pages/charts/gpu-provisioner-$GPU_PROVISIONER_VERSION.tgz \ + --namespace gpu-provisioner \ + --create-namespace + + echo "Enabling federated authentication..." + local AKS_OIDC_ISSUER=$(az aks show -n $1 -g $RG --query "oidcIssuerProfile.issuerUrl" -o tsv) + az identity federated-credential create \ + --name kaito-federated-credential-$1 \ + --identity-name $IDENTITY_NAME \ + -g $RG \ + --issuer $AKS_OIDC_ISSUER \ + --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" \ + --audience api://AzureADTokenExchange +} + +function set_up_kaito() { + echo "Performing some preparatory steps before setting KAITO up..." + prep_kaito_setup + + echo "Installing KAITO in member cluster $MEMBER_1..." + install_kaito_core $MEMBER_1 $MEMBER_1_CTX + install_kaito_gpu_provisioner $MEMBER_1 $MEMBER_1_CTX + + echo "Installing KAITO in member cluster $MEMBER_2..." + install_kaito_core $MEMBER_2 $MEMBER_2_CTX + install_kaito_gpu_provisioner $MEMBER_2 $MEMBER_2_CTX +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/kubefleet_placement.sh b/multi-cluster-ai-with-kaito/kubefleet_placement.sh new file mode 100644 index 0000000..4d8c6e8 --- /dev/null +++ b/multi-cluster-ai-with-kaito/kubefleet_placement.sh @@ -0,0 +1,451 @@ +function install_crds_on_hub_cluster() { + echo "Installing required CRDs for resource placement..." + kubectl config use-context $FLEET_HUB_CTX + + echo "Adding the KAITO workspace CRD..." + kubectl apply -f https://raw.githubusercontent.com/kaito-project/kaito/refs/tags/v0.7.1/charts/kaito/workspace/crds/kaito.sh_workspaces.yaml + + echo "Adding Kubernetes Gateway API CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml + + echo "Adding Kubernetes Gateway API Inference Extension CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml + # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts. + kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found + + echo "Adding the Istio DestinationRule CRD..." + kubectl apply -f https://gist.githubusercontent.com/michaelawyu/b93fec3b8eadc032a14bd52193080380/raw/9336c4c7bb0c5a73864ace6a73b64bc5ef9b9bff/istio-dr-crd.yaml +} + +function install_crds_on_member_cluster() { + echo "Installing required CRDs for resource placement on member cluster $1..." + kubectl config use-context $2 + + echo "Adding Kubernetes Gateway API CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml + + echo "Adding Kubernetes Gateway API Inference Extension CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml + # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts. + kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found +} + +function label_member_clusters() { + echo "Labeling member clusters for resource placement..." + kubectl config use-context $FLEET_HUB_CTX + kubectl label membercluster $MEMBER_1 env=prod + kubectl label membercluster $MEMBER_2 env=staging +} + +function place_kaito_workspaces() { + echo "Placing Kaito workspaces on member cluster $1..." + kubectl config use-context $FLEET_HUB_CTX + + echo "Adding the workspace to the KubeFleet hub cluster..." + cat <