From 1c9e975a90fd812f4460592e1101355ad719983b Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Fri, 31 Oct 2025 00:43:23 +0800 Subject: [PATCH 1/7] Added new demo Signed-off-by: michaelawyu --- .gitignore | 12 + README.md | 7 +- multi-cluster-ai-with-kaito/azresources.sh | 93 ++++ multi-cluster-ai-with-kaito/istio.sh | 41 ++ multi-cluster-ai-with-kaito/kaito.sh | 68 +++ .../kubefleet_placement.sh | 451 ++++++++++++++++++ .../kubefleet_setup.sh | 118 +++++ .../semantic_router.sh | 11 + multi-cluster-ai-with-kaito/setup.sh | 89 ++++ 9 files changed, 888 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 multi-cluster-ai-with-kaito/azresources.sh create mode 100644 multi-cluster-ai-with-kaito/istio.sh create mode 100644 multi-cluster-ai-with-kaito/kaito.sh create mode 100644 multi-cluster-ai-with-kaito/kubefleet_placement.sh create mode 100644 multi-cluster-ai-with-kaito/kubefleet_setup.sh create mode 100644 multi-cluster-ai-with-kaito/semantic_router.sh create mode 100755 multi-cluster-ai-with-kaito/setup.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a977dec --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Ignore Python virtual environment directories +venv/ + +# Ignore cloned repositories for specific projects +multi-cluster-ai-with-kaito/kubefleet/ +multi-cluster-ai-with-kaito/istio/ +multi-cluster-ai-with-kaito/semantic-router/ + +# Ignore downloaded files for specific projects +multi-cluster-ai-with-kaito/configure-helm-values.sh +multi-cluster-ai-with-kaito/gpu-provisioner-values-template.yaml +multi-cluster-ai-with-kaito/gpu-provisioner-values.yaml diff --git a/README.md b/README.md index 92c8d4d..d382b91 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ -# kubefleet-multicluster-ai-with-kaito -How to use KubeFleet to manage multicluster AI in conjunction with KAITO +# KubeFleet Cookbook + +A collection of various demos, tutorials, and labs for using the KubeFleet project. + +**WIP** diff --git a/multi-cluster-ai-with-kaito/azresources.sh b/multi-cluster-ai-with-kaito/azresources.sh new file mode 100644 index 0000000..733a36f --- /dev/null +++ b/multi-cluster-ai-with-kaito/azresources.sh @@ -0,0 +1,93 @@ +function create_azure_vnet() { + echo "Creating an Azure virtual network..." + az network vnet create \ + --name $VNET \ + -g $RG \ + --location $LOCATION \ + --address-prefix $VNET_ADDR_PREFIX \ + --subnet-name $SUBNET_1 \ + --subnet-prefixes $SUBNET_1_ADDR_PREFIX +} + +function create_azure_vnet_subnet() { + az network vnet subnet create \ + -g $RG \ + --vnet-name $VNET \ + -n $1 \ + --address-prefixes $2 +} + +function create_azure_vnet_subnets() { + echo "Creating additional subnets in the virtual network..." + create_azure_vnet_subnet $SUBNET_2 $SUBNET_2_ADDR_PREFIX + create_azure_vnet_subnet $SUBNET_3 $SUBNET_3_ADDR_PREFIX +} + +function create_aks_cluster() { + echo "Creating AKS cluster $1..." + az aks create \ + --name $1 \ + --resource-group $RG \ + --location $LOCATION \ + --vnet-subnet-id $2 \ + --network-plugin azure \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --enable-managed-identity \ + --generate-ssh-keys \ + --node-vm-size $VM_SIZE \ + --node-count 1 \ + --service-cidr $3 \ + --dns-service-ip $4 +} + +function create_kubefleet_hub_cluster() { + echo "Creating KubeFleet hub cluster $FLEET_HUB..." + az aks create \ + --name $FLEET_HUB \ + --resource-group $RG \ + --location $LOCATION \ + --network-plugin azure \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --enable-managed-identity \ + --generate-ssh-keys \ + --node-vm-size $VM_SIZE \ + --node-count 1 +} + +function create_aks_clusters() { + SUBNET_1_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_1 --query "id" --output tsv) + SUBNET_2_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_2 --query "id" --output tsv) + SUBNET_3_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_3 --query "id" --output tsv) + + echo "Creating AKS clusters..." + create_aks_cluster $MEMBER_1 $SUBNET_1_ID 172.16.0.0/16 172.16.0.10 + create_aks_cluster $MEMBER_2 $SUBNET_2_ID 172.17.0.0/16 172.17.0.10 + create_aks_cluster $MEMBER_3 $SUBNET_3_ID 172.18.0.0/16 172.18.0.10 + create_kubefleet_hub_cluster + + echo "Retrieving admin credentials for AKS clusters..." + az aks get-credentials -n $MEMBER_1 -g $RG --admin + az aks get-credentials -n $MEMBER_2 -g $RG --admin + az aks get-credentials -n $MEMBER_3 -g $RG --admin + az aks get-credentials -n $FLEET_HUB -g $RG --admin +} + +function create_acr() { + echo "Creating Azure Container Registry $ACR..." + az acr create \ + --resource-group $RG \ + --name $ACR \ + --sku Standard \ + --admin-enabled true + + echo "Connecting the ACR to the AKS clusters..." + az aks update -n $MEMBER_1 -g $RG --attach-acr $ACR + az aks update -n $MEMBER_2 -g $RG --attach-acr $ACR + az aks update -n $MEMBER_3 -g $RG --attach-acr $ACR + az aks update -n $FLEET_HUB -g $RG --attach-acr $ACR + + echo "Logging into the ACR..." + az acr login --name $ACR +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/istio.sh b/multi-cluster-ai-with-kaito/istio.sh new file mode 100644 index 0000000..ced43a3 --- /dev/null +++ b/multi-cluster-ai-with-kaito/istio.sh @@ -0,0 +1,41 @@ +function prep_istio_setup() { + echo "Cloning the Istio source code repository..." + git clone https://github.com/istio/istio.git + pushd istio + + ISTIO_TAG=$(curl https://storage.googleapis.com/istio-build/dev/1.28-dev) + git fetch --all + git checkout 1.28.0-beta.1 +} + +function connect_to_multi_cluster_service_mesh() { + echo "Connecting AKS cluster $1 to the multi-cluster Istio service mesh..." + kubectl config use-context $2 + go run ./istioctl/cmd/istioctl install \ + --context $2\ + --set tag=$ISTIO_TAG \ + --set hub=gcr.io/istio-testing \ + --set values.global.meshID=simplemesh \ + --set values.global.multiCluster.clusterName=$1 \ + --set values.global.network=simplenet \ + --set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true + + istioctl create-remote-secret --context=$3 --name=$4 --server $5 | kubectl apply --context=$2 -f - + istioctl create-remote-secret --context=$6 --name=$7 --server $8 | kubectl apply --context=$2 -f - +} + +function set_up_istio() { + echo "Performing some preparatory steps before setting Istio up..." + prep_istio_setup + + echo "Setting up the Istio multi-cluster service mesh on the KubeFleet member clusters..." + MEMBER_1_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_1 --query "fqdn" -o tsv):443 + MEMBER_2_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_2 --query "fqdn" -o tsv):443 + MEMBER_3_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_3 --query "fqdn" -o tsv):443 + + connect_to_multi_cluster_service_mesh $MEMBER_1 $MEMBER_1_CTX $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR + connect_to_multi_cluster_service_mesh $MEMBER_2 $MEMBER_2_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR + connect_to_multi_cluster_service_mesh $MEMBER_3 $MEMBER_3_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR + + popd +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/kaito.sh b/multi-cluster-ai-with-kaito/kaito.sh new file mode 100644 index 0000000..d024ef1 --- /dev/null +++ b/multi-cluster-ai-with-kaito/kaito.sh @@ -0,0 +1,68 @@ +function prep_kaito_setup() { + echo "Adding the KAITO Helm charts..." + helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito + helm repo update + + echo "Retrieving the KAITO GPU Provisioner setup script..." + GPU_PROVISIONER_VERSION=0.3.6 + curl -sO https://raw.githubusercontent.com/Azure/gpu-provisioner/main/hack/deploy/configure-helm-values.sh +} + +function install_kaito_core() { + echo "Installing KAITO core components in member cluster $1..." + kubectl config use-context $2 + helm upgrade --install kaito-workspace kaito/workspace \ + --namespace kaito-workspace \ + --create-namespace \ + --set clusterName="$1" \ + --wait +} + +function install_kaito_gpu_provisioner() { + echo "Installing KAITO GPU provisioner in member cluster $1..." + kubectl config use-context $2 + + echo "Creating managed identity..." + local IDENTITY_NAME="kaitogpuprovisioner-$1" + az identity create --name $IDENTITY_NAME -g $RG + local IDENTITY_PRINCIPAL_ID=$(az identity show --name $IDENTITY_NAME -g $RG --query 'principalId' -o tsv) + az role assignment create \ + --assignee $IDENTITY_PRINCIPAL_ID \ + --scope /subscriptions/$SUBSCRIPTION/resourceGroups/$RG/providers/Microsoft.ContainerService/managedClusters/$1 \ + --role "Contributor" + + echo "Configuring Helm values..." + chmod +x ./configure-helm-values.sh && ./configure-helm-values.sh $1 $RG $IDENTITY_NAME + + echo "Installing Helm chart..." + helm upgrade --install gpu-provisioner \ + --values gpu-provisioner-values.yaml \ + --set settings.azure.clusterName=$1 \ + --wait \ + https://github.com/Azure/gpu-provisioner/raw/gh-pages/charts/gpu-provisioner-$GPU_PROVISIONER_VERSION.tgz \ + --namespace gpu-provisioner \ + --create-namespace + + echo "Enabling federated authentication..." + local AKS_OIDC_ISSUER=$(az aks show -n $1 -g $RG --query "oidcIssuerProfile.issuerUrl" -o tsv) + az identity federated-credential create \ + --name kaito-federated-credential-$1 \ + --identity-name $IDENTITY_NAME \ + -g $RG \ + --issuer $AKS_OIDC_ISSUER \ + --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" \ + --audience api://AzureADTokenExchange +} + +function set_up_kaito() { + echo "Performing some preparatory steps before setting KAITO up..." + prep_kaito_setup + + echo "Installing KAITO in member cluster $MEMBER_1..." + install_kaito_core $MEMBER_1 $MEMBER_1_CTX + install_kaito_gpu_provisioner $MEMBER_1 $MEMBER_1_CTX + + echo "Installing KAITO in member cluster $MEMBER_2..." + install_kaito_core $MEMBER_2 $MEMBER_2_CTX + install_kaito_gpu_provisioner $MEMBER_2 $MEMBER_2_CTX +} \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/kubefleet_placement.sh b/multi-cluster-ai-with-kaito/kubefleet_placement.sh new file mode 100644 index 0000000..4d8c6e8 --- /dev/null +++ b/multi-cluster-ai-with-kaito/kubefleet_placement.sh @@ -0,0 +1,451 @@ +function install_crds_on_hub_cluster() { + echo "Installing required CRDs for resource placement..." + kubectl config use-context $FLEET_HUB_CTX + + echo "Adding the KAITO workspace CRD..." + kubectl apply -f https://raw.githubusercontent.com/kaito-project/kaito/refs/tags/v0.7.1/charts/kaito/workspace/crds/kaito.sh_workspaces.yaml + + echo "Adding Kubernetes Gateway API CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml + + echo "Adding Kubernetes Gateway API Inference Extension CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml + # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts. + kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found + + echo "Adding the Istio DestinationRule CRD..." + kubectl apply -f https://gist.githubusercontent.com/michaelawyu/b93fec3b8eadc032a14bd52193080380/raw/9336c4c7bb0c5a73864ace6a73b64bc5ef9b9bff/istio-dr-crd.yaml +} + +function install_crds_on_member_cluster() { + echo "Installing required CRDs for resource placement on member cluster $1..." + kubectl config use-context $2 + + echo "Adding Kubernetes Gateway API CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml + + echo "Adding Kubernetes Gateway API Inference Extension CRDs..." + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml + # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts. + kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found +} + +function label_member_clusters() { + echo "Labeling member clusters for resource placement..." + kubectl config use-context $FLEET_HUB_CTX + kubectl label membercluster $MEMBER_1 env=prod + kubectl label membercluster $MEMBER_2 env=staging +} + +function place_kaito_workspaces() { + echo "Placing Kaito workspaces on member cluster $1..." + kubectl config use-context $FLEET_HUB_CTX + + echo "Adding the workspace to the KubeFleet hub cluster..." + cat < Date: Tue, 4 Nov 2025 13:26:09 +1100 Subject: [PATCH 2/7] Revise README title and description Updated the title and description of the README file. Signed-off-by: michaelawyu --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d382b91..7171531 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # KubeFleet Cookbook +<<<<<<< HEAD A collection of various demos, tutorials, and labs for using the KubeFleet project. **WIP** +======= +Examples and guides on using KubeFleet to manage multicluster scenarios. +>>>>>>> b26101c (Revise README title and description) From 5ef7666ea9c756cb30e35f4b1ebd1b9f1df8dc1a Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 6 Nov 2025 14:11:39 +0800 Subject: [PATCH 3/7] Minor changes Signed-off-by: michaelawyu --- multi-cluster-ai-with-kaito/SETUP.md | 0 .../charts/semantic-router.tgz | Bin 0 -> 8712 bytes multi-cluster-ai-with-kaito/istio.sh | 7 +-- multi-cluster-ai-with-kaito/kaito.sh | 3 +- multi-cluster-ai-with-kaito/litellm/READMD.md | 2 + .../semantic_router.sh | 54 ++++++++++++++++-- multi-cluster-ai-with-kaito/setup.sh | 8 ++- 7 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 multi-cluster-ai-with-kaito/SETUP.md create mode 100644 multi-cluster-ai-with-kaito/charts/semantic-router.tgz create mode 100644 multi-cluster-ai-with-kaito/litellm/READMD.md diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md new file mode 100644 index 0000000..e69de29 diff --git a/multi-cluster-ai-with-kaito/charts/semantic-router.tgz b/multi-cluster-ai-with-kaito/charts/semantic-router.tgz new file mode 100644 index 0000000000000000000000000000000000000000..90e0a338c5403e3ad76fdff44ebe9ffa0ea90557 GIT binary patch literal 8712 zcmV+jBKO@NiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PKBjd)v0MU_bL$%#nLe6Cab3{FYR^xo2I+O}sZwbYr*OKHaPg zk&uKMBv=5nt=KugeIEb>3BE|SVmoPi!w-o?0)xR|Fc<*yB5;fnNkX5qOk&>MpCK-T zIg05w_xAL9z25Hjw*J4@>(&47?LO&$)8Fa$wt8FJo10I*>Gij_`di;X@BUC&{A5BR z{!Q=BZPh#Xg*+%FKVvQkO9pVh>7g_&eo`c{kY0pE$Vn>oyFGY`X$+y_5|}U!$2tGN zTg^YT2x=m$OTp2ojE|IZQ4u()3VuzddacYE8r_4&WEv;B4ce~M=fp5qD1 zs01_edS2F8#!>asYjHXUmqJFA!9H&eW$@qK#(^<#^!j{g_F20m0XQ2m>W|R&Y zCE_E{qFrPLkGl0EU7C2tv%1DGHxum^xB zWfwzE&I!d+d>}$ZkrDc)goe(~iA!f;UMQeOVC~aPNX!q19>A2Rq51-lGA;+O)9dZ@ z^=~6B)pylyO59RUdrx~#p^zdW$Yk!LREgdgOHM*zD&RzMT9l7$3F7xh;;I| zVVVt~v$@lY%^TBlqfY-tt;SBz)L4TfM3MzqsrBi$SZ&o?=r2WRw_bcx^Wl)QF%}A) z37KZxEKQW0;{*$#KPpJ|k&$6~%)&D)-I316Us0AI`W#a zE%i>47_&?kFZ=4{1QD8Xd@|!$%ovRZu;qCiqlnxuR^Q(pt2G#^bzDd_lYeG3i=F9| z+D~CCzCtpNrg&dXiPxI7(ff>JC4$vF3PUV}QnLo|7Nh7FP9%P#R2=|8-eZN;3pIyO zk}#m=r)izxMvNJ3s6`LFt$ z{`sb^Bumcd1-trHCh)$RZDy@fI1~!@J_={Jfrnj_^w&irG)CbWPNEYnqRk9fpx>l9 z6LieG4loX#n9%b~n7J=VOej*z$BB#UQ`2X3d5}m>ux9w45xG**a%$}6FY!4ERa16} zoRA4Ky*puyibkiTdRyyEz6hyGL(bwoz!7rMH_#62{IkkVO+s?FE?~gG}I~;=4n?R%ug{H~DIB5kqN8lIal1 z*+<}0mW&OY;CI22PEIECjo{|g3yV+Hc{k34Ud&xW9fyo#QI3mmUJnc#6t4kT6l7c25g@wy0}$$R>k4_g*Z6Fd>e9Yuh5xP%)=R@Y&yT= z5Iyt=?XJ+NF}Zz@KHDhWhGK=1HN!Cxl0QV3`a$o$y0pp|&4|*A@5*(5D#5_yOrMe7 zp+k$g>oZKF8Aepj9%L3RX8sjf?lufy`=bfALZbGUoxjeSl+Flcf~B*ES!X|wgh-|&!I+nOjz8%@-Nb}i zU$<8TNBQwfWPTsr0(V`%fC_rM)~5GYH*|&iZ(&=KnN#LYI74z|ZxTjl7l=~QynhmEl-PVmP_hBLbdHrJNL&K6z%puQgG zCy`c)hAas)&T$gDM@&0qt^KmV0PI|@TmwMsnU~=ni3r8f3uf+m@d-!aS#z4Ge!sANr%VGx+&P%`c%EW*e64|~*m z!YDQv3s07R>)R-A4fZAzk`OufQ2vLm_qlH=zdjH3)bBKl>)Gz>Hzxwmrf|0GX8fEQSToMG441b%w^3dVEbS)x}CIl}@&$l@3!kr;SaS3Z!5 zy_6KtQF3Rf)*c9cHWnoL)o`E|GWhVp1HhO1F!uGE=Le(Vo3|&=zI}9-jd3U`OtA#t zFK+GO>Iz5_(k#Nzsc~pflvPF?z*N^aG+O2{B(bP5_|h{s0ao#Apr@onb&Q!50vdB$K(3h+-KRBpeQN zn-6r32vzHG;3${yE6X5630x>OK-V_?1U7M5?YsNn32`2Rf3d-s_`a3F(7C$mTwMW2$rQsQl0^6tEO$_c^Z`kxiaVh97(RTsx(d`kfRhM5 zeCT{=;hD0JgR83sE1;C<8Kl^uos|kRk6X<5;hX29*Lz0?HEO0Ty2H$&&_QV$D1bb{ z5{rPat}6BHCNKuVMzc0agjCd25Q4XuVkB_z8pWzNorQBX#u7zHA`LsIwCVT$o7X3M zhp!Lbwh6nGMIYVyZ!JYzMPFekqR}e^Ve#5X!-69^KNCTuWsKs6aqzVFw5O(XCa|&F za#jbm39+5|*iI|K-sj?7xnu~>c_SnB~loLAdU;G5FELk!wGme6vqoFMw2f4YU9*xE*sE=K10qF|T9~p<3aSS9e*1Uuz z8*r{xgVjy%Kld)?>+`YC;{E?)*JWE2@ z-O_)O_&A$P$R%|APO{7xky=1B{HF_gBc)o-OJ!vdATBTDJO}XKs@lGZT`-E-ryQIPN`bS zJOAuJXVh7pNm(4T#9%6J#?+KEsLn8r12OAr><3GAiM)e>L1A$@dlT>~Zzga!DewAT z;qj-wHFe*@?M!Ler~KGlBM>4(y^=?1un_3QFB*FXVO)SRgnGf3MrEOu5e8Se|kOjK^77vO1>bRTYcR zy=33(vsnK#ju0_Q@9YeEga2o5bLUA-|9i6e_5SD6JQe@%zIR5FXpmpu*e%+G%F9_+E^=KE{(Bz z#r%FmA|u;#0g=}%9Dah_NWZu&aV_hY)hmjXH8X)n+PiL9J!7M<<)1bQ$h$rYOP3$V z0HO)0;v(c8YnYn3bI{0GoZM91aV2sWe`~E^-GxR=X;KB*;$&2UjM3!?TQ8%VAeA0T z&frKo64oQ>CIF@PQVGC1FOAaNd+A0BSG=8EDrtz0bJNmc)8(4OC0RN;TU=hU^gmZ0 z5L0*QK+WtLa3c-A-!x zoN_8x zEe+X9W4=l5*``Ws@IlS#t+l(>v#mY`HLMN5rXQROOAUk~PFp(;iHfEPu_Dp|p?&2> zQT379O^D~rLS6b8DBmrODI9b2XFBVy^W+;2z7zjxMAKe1lJ-2 zmWqL8cvWPAOUxUD!a|rWu7eE$QFriLC>+XyOC*DXz8P~-v#@r+I~Le=HqWi36)+J?@X~Zzm2-`$m&}|xkLS~5Tn2Pope9qHf^4ge*0aW=C{j#7BCwkn7E@d|fenE-DxH&AEm1 zC~P%xHp;3gVFE5Rn?NZETCAi4*F3s1Crj(+@~z-{)`m05!>N1TN8v6tA{ABh%T47nb59nVX8Q-e8-_d9^KL za6aPN;(YZ6N_+Jh;c|QR(b$@mUtNJ@|68zRWujG0-)rcT_xK*#a17x8a%XRUhG+5l zUuj%**8{+t&i|hDHtXkq+nZm{|31m%p8ugV72V+3?G3hU4Qtm>#?CFUwt&s$y%K6o)WbyS>V9EK$P3I&96w0s za>kOuR?zSJf|9s&rD_JkYwJ{TFI6e+bhY2BDrnj#VgSE&x?_@b$4JaN8_@B?PU*#k z__X5mdF`%~Sb~pTO-Y;O9wtH}N|jdNM$dx%SBF6mbe!@jNr9Ns;QI;lwSMbIgu~3# zoS+boX4&|+kdk*^qp9aPEma$31h2FVQ+BSyl+~IIqpI0rm73KZU46(Y5wfZ+tbxU% zb#DiXfqsL(fgeG~DbRs;{{!RfzS5Ypg zO!tNJ%<$lm64j!Ksp*by3ONnOs=SF{K^aX{ z?VKB*C7Qx@-2kX!i_xjJF?2$VHlUU9M=NKfO6Gy^o`#JBT1VGtE#I7jDQA*}j1J&rf4C9?uFK0U+E>MI+x+d#j8{R}R$l>W zTV)l*YiGQzV&c{F(W}EB-tN8qbu`>NdD)Z{9Xo~NRy-ph8ahL}5DTqo&7e(WuWVVii}$@jgW) zb`cHLi`T1SyIkvPY)jp!6=ktXEQ`BtM=R~wNEc<1BKF0-w&O(GaLp>p5F8)Z+TG57 z7N7qW$1x9V|JUnn^_%v8z5dtpzfbYhghx5(rFn3xiC}4jyV~L4Lmcw`Rc&)W#j|++ zr|024rGdF&{`WWgJB|1+TVL}(eUj&P%0sIN7`AB(ce5BSG=g@o}g_S*=FEck3L9ZQ^x6*h~(g3R|wDsH{dBTXnB5;!duHXfi-Q{&r|sfk1?Fj1zfT7Gsy^K5sjvUR4AYoQ z6UOnqwQk zO}pwJ`%|1J-4f6kg=bl+Gnoo+JrEbEw;qhqnYSLuSpCg7nR@I0_SWEM#0kp;96mn~ zUXVva@&XcJ)HTI9``ru9MaUxD{SpLHeg3~a*n56-5X8|vwy}KvZ$8=H>ec4|*6#M# z`Tt3twMGVSo#0#Bqj;X4?oP&UV-qY|vVNoFR*AotrTqq$z<-vpHau@_4Tc0Ajll1w-fl4qg9 z6CO=;IJ5{;d$RFZx9^zYDFBu%tgyH%gVSg+rhal0Kf;9eIF0A^)WxR*&pSOm9V0Q* zF<%N;&7u%ALam*io_gLJZW3!EE1V;PL>+SewGolzre^EOmUYVUKu+JA=GWHXr-Upb zO=drJBTR7~=cJ7ePW;X~;suNr>k~G<;aBVO)xvw7(;kxmgLfACw7{sZLt&m__VPAkQP2Eb+>K&xk z-Tu^M;I7n?Xj&FGws)r{Gj~B6C&Tsw(#N$qR~Ws{;xXom3$2N|Lakq5WdkHVAWH5& zQlVs<%$@p9i(t<=MH`1As?zlBa9?jaRSli#yH?sBR;a>z!^5KN)85lwt!QoIuWB6V zKkc5lh?H~ znrq%&y0r>)>O}TA)d>d17?=s17~v$)ZJs)fo)*~)@}R^3@kjT@pFpUA`p(E16!Ia0(`JEOv({x@D(HmRmAlgFO{t;606N<* zh_O-D(G)tH>W7huwSrHp1-Cn1J$b+J0N0dra>_5uES_=}Wjb=uvK-(&6QI9W%>n+j zS7g*GbAT5k_ES3LD8g!!c5q3ARQhoq<*u%G=OynnLV8N;@b8XJb*uyQm(4XpwkNPe zgY~MR@Rybap8`tLXA9?GF2ar8Z!QnKp;8WPIBv=EOL<*Jhh1+VUWwm*1480FY!I-F zq^|KQV!l^fyF-li+eTQ|@zpi+)Pt<&FP%W^*8K1`@zza-22Re+aUh$(<^S9KSI68R zyz>A)d{=Dc%mQfaJC*P_mK;euots^qB^~6XBjrg0otu}Un9_R5ArpP5Anh_VMgm8` z5|Buo<%0i`$hq0!%~Kq}iAL8fsMRGWznl;tFPH_WmLY7VkB_{Cmc;CB!p`?nWrQ~6plljAWoQYR`|s${gQXyHLS1e z9FXhlY8fV4f+|{w4Yk#k8>-79qsEX0uHoNXJ%79Rccld)ks+FwvT4}0-(!~K%(MZ* zK>V+N>}});nRN#yD3ls`SYKb3CQ@UyZ$_mIW?~Rj$n5s~CmNX}BxjIv#wKFJq(YTM z(F`Ss&IJ)0K=BmO62?Z6tk;a30-I|cnjS9CS!x@mD+y-wphx}hH-rA}(_k~`4|aOJ z-ebk&ER`fC?{TEHijXDemht`HU)}SB28#%+g`vNDJBMSR!$~<~WrZXi4 zhMX|2m{Wn&NIdpDAJ*4(@S64YJe{Y`0}AJ&Op>Y7GOL1CjagqGcpiWiCMNg38BPb& zFDI45OH3)-fEl~c`9$U{1IZ#Z&r>=8=q_5SYI&d&9eyz#Ji(Mgi~+&#>e<5^-5&8mEhD*LhCx#h25|Lfnj5X0*=}luY6I zJV8Rvo9wb3z);M?8KZ1ESNpv}Wmw2GD=$#TQpa*M5#x3EkuQdvx#TgZ!v*7KS}_!& z$}$Hr%N2xDkqz;e(@m@304LE)dpDN6vlJ?f2&qAoXrIZ1?=UD`~#8t$zOrg2bU>D zB!OwJ=jIkSb~~#t1SN^F>d!fn8E;V}w{(WSSZz?}aD>AdN{EObitM|8C9*3Xk1fT3 zO@LjJNH^zmoS)z+iKG7zt&8X@y-6*3UG4l;b@$JpgzZCV%=@Ai2xrlA}b#MCgDt6^@Ttf+gbf{L;O# z;-6r1$Xq+t)8--Pn3KsIE@mX0jO9X})Mv=DO z;CZgp;=pvx-ea9(K4t;~%Ve7AWI6(4FyTF?7C?T!dj2VnSnn_@zbbmz;|!SLT%v2( zoB#2TK3v0;;}m=XolD=6(zw%g3X`Fdjwy@kbi4ccI{3asgg)EpS#&zbc2HeR^!0$g zIw0>`nZ#xaN73(@kk%}#&=XU0JVvlk8b!<^GMQT|q|-8L^eKqE0!gYDfB_t!a8|^- zh8YsJ5fdF+Dg`&f67!fOCC@}S)P`lkxn?S_cIttx?Y)jYbjXj+oha09;F~<&vlb>! zz=b^UwBCH{&E|B5xp@oEbU0efEkWi^JvaLF0Ce1^W@F3r>oy#o+|Yoz$6#s(hZv9! zKB!M;b$c5+)SX^DHLdl{JHAxYg-ANrmxCBZO1vnFP4j8;8SX;LiK>TV^%F`X5AEH? z31``uVliV(AEY_=V?!3*+r?OKAg4 z93u>KrIu%l3Dnej>}@Hyt^VSw4Qm>spdY=;Z`izTRj0f_Rt1lgW>Rn4naFxyucfLF zhY68o95pEIdkb?EZ9d`#*EAJ%BV|RNC5c^KYE64P3dQ^eSs!JU0IY?YYFFFFDXQ!Z z5247*kr%F6w5jq#&SETQI1?a3j?!Fcz;Pac&+vtl{N7$r4m=-Tt8EBlQb}m8XOFpl zRA1CeaZV_nV!$Fqxk-z!)ZfTw37yw2K6Rusp$p_ja>f|KTHofEn+m-j83!UnhK25F zSbSuO*nrenM1N#Q)&pXCuKk(bD!~KKT#|Q5otq86=rfl4e5lOl^h{xLZ mLzCXbffvD-;JWek`TBf)zCL$){(k@f0RR8k+;J8FvH$=R4F Date: Thu, 6 Nov 2025 14:54:00 +0800 Subject: [PATCH 4/7] Added LiteLLM setup part Signed-off-by: michaelawyu --- multi-cluster-ai-with-kaito/litellm/READMD.md | 2 - multi-cluster-ai-with-kaito/litellm/README.md | 79 +++++++++++++++++++ .../litellm/secret.yaml | 11 +++ .../litellm/values.yaml | 28 +++++++ 4 files changed, 118 insertions(+), 2 deletions(-) delete mode 100644 multi-cluster-ai-with-kaito/litellm/READMD.md create mode 100644 multi-cluster-ai-with-kaito/litellm/README.md create mode 100644 multi-cluster-ai-with-kaito/litellm/secret.yaml create mode 100644 multi-cluster-ai-with-kaito/litellm/values.yaml diff --git a/multi-cluster-ai-with-kaito/litellm/READMD.md b/multi-cluster-ai-with-kaito/litellm/READMD.md deleted file mode 100644 index cc02a9e..0000000 --- a/multi-cluster-ai-with-kaito/litellm/READMD.md +++ /dev/null @@ -1,2 +0,0 @@ -# Instructions for setting up the LiteLLM proxy - diff --git a/multi-cluster-ai-with-kaito/litellm/README.md b/multi-cluster-ai-with-kaito/litellm/README.md new file mode 100644 index 0000000..fda2e96 --- /dev/null +++ b/multi-cluster-ai-with-kaito/litellm/README.md @@ -0,0 +1,79 @@ +# Instructions for setting up the LiteLLM proxy + +This document provides additional instructions for setting up the LiteLLM proxy in your environment. + +## Before you begin + +* Make sure that you have completed other parts of the tutorial. +* Set up a PostgreSQL database server, which LiteLLM requires for storing information. + * Any PostgreSQL installation should work, as long as the Kubernetes clusters you have created in this + tutorial can access the PostgreSQL instance. You may use an + [Azure DB for PostgreSQL instance](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/quickstart-create-server), + or deploy a PostgreSQL operator inside the query routing cluster. + * After the PostgreSQL database server is set up, create a database `litellm` in the server. + + ```sql + CREATE DATABASE litellm + ``` + + * Write down the address of the server, the password of the default `postgres` user, and a username/password combo that LiteLLM + will use to access the server. + +## Setting up LiteLLM + +* Edit the `secret.yaml` file in the directory, replace `POSTGRES-PASSWORD`, `YOUR-USERNAME`, and `YOUR-PASSWORD` with +the password of the default `postgres` user, and the username/password for the account that LiteLLM will use respectively. +* Edit the `values.yaml` file in the directory, replace `YOUR-POSTGRES-ENDPOINT` with the address of your PostgreSQL database server. + * You may find out that there are various placeholders in the file; it is OK to leave them as they are. +* Switch to the current directory, and run the command below to deploy the LiteLLM proxy: + + ```sh + helm install litellm --values ./values.yaml oci://ghcr.io/berriai/litellm-helm:0.1.742 --namespace litellm --create-namespace + kubectl apply -f ./secret.yaml + ``` + + It may take a few moments before the LiteLLM proxy starts up. + +* LiteLLM will create a secret in the `litellm` namespace, `litellm-masterkey`, that contains the password of the `admin` user, which +you can use to access the LiteLLM UI. To retrieve the password, run the commands below: + + ```sh + kubectl get secret -n litellm litellm-masterkey -o jsonpath='{.data.masterkey}' | base64 -d + ``` + + Write down the output. Depending on the shell program you use, you may see a precentage sign `%` at the end of the output, + which represents a missing new line character; ignore it: for example, if the output is `123456%`, the password + should be `123456`. + +* Port forward the LiteLLM service: + + ```sh + export LITELLM_FORWARDING_PORT=10000 + kubectl port-forward svc/litellm -n litellm $LITELLM_FORWARDING_PORT:4000 + ``` + +* Open a browser window, and go to `localhost:10000/ui`. You should see that the LiteLLM UI loads up. If prompted for username/password, +use the username `admin` and the master password you just wrote down. + +* On the left panel, click `Models + Endpoints`. Then switch to the `Add Model` tab. + +* Add a new model using the setup below: + + * For the `Provider` part, pick `OpenAI-Compatible Endpoints`. + * For the `LiteLLM Model Name(s)` part, type `openai/auto`. + * For the `Mode` part, pick `Chat - /chat/completions`. + * For the `API Base` part, type `http://inference-gateway-istio.default.svc.cluster.local/v1` if you haven't updated the name of + the inference gateway when you set up the environment; replace `inference-gateway` with the value of your own if the name + has been modified. + * No need to change other parts. + +* Click the `Test Connect` button; you should see a connection successful message. +* Click the `Add Model` button to add the model. + +* On the left panel, check `Test Key`. + +* Make sure that in the `Configurations` panel, the model `openai/auto` has been selected and the endpoint type is `/v1/chat/completions`. + +* You can now use the chat panel to interact with the models. + * Note that conversational continuity may lead to your messages keep landing on the same model; remember to clear the chat history + using the `Clear Chat` button as necessary. diff --git a/multi-cluster-ai-with-kaito/litellm/secret.yaml b/multi-cluster-ai-with-kaito/litellm/secret.yaml new file mode 100644 index 0000000..f3007f8 --- /dev/null +++ b/multi-cluster-ai-with-kaito/litellm/secret.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + namespace: litellm + name: postgres +data: + # Password for the "postgres" user + postgres-password: POSTGRES-PASSWORD + username: YOUR-USERNAME + password: YOUR-PASSWORD +type: Opaque \ No newline at end of file diff --git a/multi-cluster-ai-with-kaito/litellm/values.yaml b/multi-cluster-ai-with-kaito/litellm/values.yaml new file mode 100644 index 0000000..213e737 --- /dev/null +++ b/multi-cluster-ai-with-kaito/litellm/values.yaml @@ -0,0 +1,28 @@ +db: + deployStandalone: false + # Use an existing postgres server/cluster + useExisting: true + + # How to connect to the existing postgres server/cluster + endpoint: YOUR-POSTGRES-ENDPOINT + database: litellm + url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME) + secret: + name: postgres + usernameKey: username + passwordKey: password + +# The elements within proxy_config are rendered as config.yaml for the proxy +# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml +# Reference: https://docs.litellm.ai/docs/proxy/configs +proxy_config: + model_list: + # At least one model must exist for the proxy to start; this model might not actually exist. + - model_name: phi-4 # used in litellm proxy + litellm_params: + model: openai/$KAITO_MODEL_NAME # openai prefix is required + api_key: fake-key + api_base: http://$WORKSPACE_SVC/v1 + general_settings: + master_key: os.environ/PROXY_MASTER_KEY + store_model_in_db: true From 03d169f8c6d9677a14b29b546a0999fabeb6ca85 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 6 Nov 2025 15:27:29 +0800 Subject: [PATCH 5/7] Added some additional notes Signed-off-by: michaelawyu --- multi-cluster-ai-with-kaito/SETUP.md | 122 +++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md index e69de29..cbab13f 100644 --- a/multi-cluster-ai-with-kaito/SETUP.md +++ b/multi-cluster-ai-with-kaito/SETUP.md @@ -0,0 +1,122 @@ +# How to run the scripts in this tutorial + +The scripts in this tutorial will help you: + +* Create a fleet of 3 AKS (Azure Kubernetes Service) clusters for running LLM inference workloads and routing LLM queries. +* Put the 3 clusters under the management of KubeFleet, a CNCF sandbox project for multi-cluster management, with an +additional KubeFleet hub cluster (also an AKS cluster) as the management portal. +* Set up KAITO, a CNCF sandbox project for easy LLM usage, on the clusters for facilitating LLM workloads with ease. +* Connect the 3 clusters with an Istio service mesh. +* Use Kubernetes Gateway API with Inference Extension for serving LLM queries. + +> Note that even though the scripts are set to use AKS clusters and related resources for simplicity reasons; the tutorial itself is not necessarily Azure specific. It can run on any Kubernetes environment, as long as inter-cluster connectivity can be established. + +## Before you begin + +* This tutorial assumes that you are familiar with basic Azure/AKS usage and Kubernetes usage. +* If you don't have an Azure account, [create a free account](https://azure.microsoft.com/pricing/purchase-options/azure-account) before you begin. +* Make sure that you have the following tools installed in your environment: + * The Azure CLI (`az`). + * The Kubernetes CLI (`kubectl`). + * Helm + * Docker + * The Istio CLI (istioctl) + * Go runtime (>=1.24) + * `git` + * `base64` + * `make` + * `curl` +* The setup in the tutorial requires usage of GPU-enabled nodes (with NVIDIA A100 GPUs or similar specs). + +## Run the scripts + +Switch to the current directory and follow the steps below to run the scripts: + +```sh +chmod +x setup.sh +./setup.sh +``` + +It may take a while for the setup to complete. + +The script includes some configurable parameters; in most cases though, you should be able to just use +the default values. See the list of parameters at the file `setup.sh`, and, if needed, set up +environment variables accordingly to override the default values. + +## Verify the setup + +After the setup script completes, follow the steps below to verify the setup: + +* Switch to one of the clusters that is running the inference workload: + + ```sh + MEMBER_1="${MEMBER_1:-model-serving-cluster-1}" + MEMBER_2="${MEMBER_2:-model-serving-cluster-2}" + MEMBER_3="${MEMBER_3:-query-routing-cluster}" + MEMBER_1_CTX=$MEMBER_1-admin + MEMBER_2_CTX=$MEMBER_2-admin + MEMBER_3_CTX=$MEMBER_3-admin + + kubectl config use-context $MEMBER_1_CTX + kubectl get workspace + ``` + + You should see that the KAITO workspace with the DeepSeek model is up and running. Note that it may take + a while for a GPU node to get ready and have the model downloaded/set up. + +* Similarly, switch to the other cluster that is running the inference workload and make sure that the Phi model +is up and running: + + ```sh + kubectl config use-context $MEMBER_2_CTX + kubectl get workspace + ``` + +* Now, switch to the query routing cluster and send some queries to the inference gateway: + + ```sh + kubectl config use-context $MEMBER_3_CTX + + # Open another shell window. + kubectl port-forward svc/inference-gateway-istio 10000:80 + + curl -X POST http://localhost:10000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "Prove the Pythagorean theorem step by step"}], + "max_tokens": 100 + }' + ``` + + You should see from the response that the query is being served by the DeepSeek model. + + ```sh + curl -X POST -i localhost:10000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "What is the color of the sky?"}], + "max_tokens": 100 + }' + ``` + + You should see from the response that the query is being served by the Phi model. + + > Note: the tutorial featuers a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try. + +## Additional steps + +You can set up the LiteLLM proxy to interact with the models using a UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup. + +## Clean things up + +To clean things up, delete the Azure resource group that contains all the resources: + +```sh +export RG="${RG:-kubefleet-kaito-demo-2025}" +az group delete -n $RG +``` + + + From 31d02d90695edcdb1450bdd90032a7aff56eb404 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Fri, 7 Nov 2025 07:31:59 +0800 Subject: [PATCH 6/7] Re-sign Simon's commit Signed-off-by: michaelawyu --- multi-cluster-ai-with-kaito/SETUP.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md index cbab13f..12b1cae 100644 --- a/multi-cluster-ai-with-kaito/SETUP.md +++ b/multi-cluster-ai-with-kaito/SETUP.md @@ -103,11 +103,11 @@ is up and running: You should see from the response that the query is being served by the Phi model. - > Note: the tutorial featuers a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try. + > Note: the tutorial features a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try. ## Additional steps -You can set up the LiteLLM proxy to interact with the models using a UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup. +You can set up the LiteLLM proxy to interact with the models using a web UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup. ## Clean things up @@ -118,5 +118,12 @@ export RG="${RG:-kubefleet-kaito-demo-2025}" az group delete -n $RG ``` +## Questions or comments? + +If you have any questions or comments please using our [Q&A Discussions](https://github.com/kubefleet-dev/kubefleet/discussions/categories/q-a). + +If you find a bug or the solution doesn't work, please open an [Issue](https://github.com/kubefleet-dev/kubefleet/issues/new) so we can take a look. We welcome submissions too, so if you find a fix please open a PR! + +Also, consider coming to a [Community Meeting](https://bit.ly/kubefleet-cm-meeting) too! From 2373d41f7a3bd74942a765a8c31a04fb4d1f7d0a Mon Sep 17 00:00:00 2001 From: Simon Waight Date: Fri, 7 Nov 2025 03:25:29 +0000 Subject: [PATCH 7/7] Remove whitespace so I can sign-off. Signed-off-by: Simon Waight Signed-off-by: michaelawyu --- multi-cluster-ai-with-kaito/SETUP.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md index 12b1cae..e68bef7 100644 --- a/multi-cluster-ai-with-kaito/SETUP.md +++ b/multi-cluster-ai-with-kaito/SETUP.md @@ -125,5 +125,3 @@ If you have any questions or comments please using our [Q&A Discussions](https:/ If you find a bug or the solution doesn't work, please open an [Issue](https://github.com/kubefleet-dev/kubefleet/issues/new) so we can take a look. We welcome submissions too, so if you find a fix please open a PR! Also, consider coming to a [Community Meeting](https://bit.ly/kubefleet-cm-meeting) too! - -