From 1c9e975a90fd812f4460592e1101355ad719983b Mon Sep 17 00:00:00 2001
From: michaelawyu <chenyu1@microsoft.com>
Date: Fri, 31 Oct 2025 00:43:23 +0800
Subject: [PATCH 1/7] Added new demo

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 .gitignore                                    |  12 +
 README.md                                     |   7 +-
 multi-cluster-ai-with-kaito/azresources.sh    |  93 ++++
 multi-cluster-ai-with-kaito/istio.sh          |  41 ++
 multi-cluster-ai-with-kaito/kaito.sh          |  68 +++
 .../kubefleet_placement.sh                    | 451 ++++++++++++++++++
 .../kubefleet_setup.sh                        | 118 +++++
 .../semantic_router.sh                        |  11 +
 multi-cluster-ai-with-kaito/setup.sh          |  89 ++++
 9 files changed, 888 insertions(+), 2 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 multi-cluster-ai-with-kaito/azresources.sh
 create mode 100644 multi-cluster-ai-with-kaito/istio.sh
 create mode 100644 multi-cluster-ai-with-kaito/kaito.sh
 create mode 100644 multi-cluster-ai-with-kaito/kubefleet_placement.sh
 create mode 100644 multi-cluster-ai-with-kaito/kubefleet_setup.sh
 create mode 100644 multi-cluster-ai-with-kaito/semantic_router.sh
 create mode 100755 multi-cluster-ai-with-kaito/setup.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a977dec
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+# Ignore Python virtual environment directories
+venv/
+
+# Ignore cloned repositories for specific projects
+multi-cluster-ai-with-kaito/kubefleet/
+multi-cluster-ai-with-kaito/istio/
+multi-cluster-ai-with-kaito/semantic-router/
+
+# Ignore downloaded files for specific projects
+multi-cluster-ai-with-kaito/configure-helm-values.sh
+multi-cluster-ai-with-kaito/gpu-provisioner-values-template.yaml
+multi-cluster-ai-with-kaito/gpu-provisioner-values.yaml
diff --git a/README.md b/README.md
index 92c8d4d..d382b91 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,5 @@
-# kubefleet-multicluster-ai-with-kaito
-How to use KubeFleet to manage multicluster AI in conjunction with KAITO
+# KubeFleet Cookbook
+
+A collection of various demos, tutorials, and labs for using the KubeFleet project.
+
+**WIP**
diff --git a/multi-cluster-ai-with-kaito/azresources.sh b/multi-cluster-ai-with-kaito/azresources.sh
new file mode 100644
index 0000000..733a36f
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/azresources.sh
@@ -0,0 +1,93 @@
+function create_azure_vnet() {
+    echo "Creating an Azure virtual network..."
+    az network vnet create \
+        --name $VNET \
+        -g $RG \
+        --location $LOCATION \
+        --address-prefix $VNET_ADDR_PREFIX \
+        --subnet-name $SUBNET_1 \
+        --subnet-prefixes $SUBNET_1_ADDR_PREFIX
+}
+
+function create_azure_vnet_subnet() {
+    az network vnet subnet create \
+        -g $RG \
+        --vnet-name $VNET \
+        -n $1 \
+        --address-prefixes $2
+}
+
+function create_azure_vnet_subnets() {
+    echo "Creating additional subnets in the virtual network..."
+    create_azure_vnet_subnet $SUBNET_2 $SUBNET_2_ADDR_PREFIX
+    create_azure_vnet_subnet $SUBNET_3 $SUBNET_3_ADDR_PREFIX
+}
+
+function create_aks_cluster() {
+    echo "Creating AKS cluster $1..."
+    az aks create \
+        --name $1 \
+        --resource-group $RG \
+        --location $LOCATION \
+        --vnet-subnet-id $2 \
+        --network-plugin azure \
+        --enable-oidc-issuer \
+        --enable-workload-identity \
+        --enable-managed-identity \
+        --generate-ssh-keys \
+        --node-vm-size $VM_SIZE \
+        --node-count 1 \
+        --service-cidr $3 \
+        --dns-service-ip $4
+}
+
+function create_kubefleet_hub_cluster() {
+    echo "Creating KubeFleet hub cluster $FLEET_HUB..."
+    az aks create \
+        --name $FLEET_HUB \
+        --resource-group $RG \
+        --location $LOCATION \
+        --network-plugin azure \
+        --enable-oidc-issuer \
+        --enable-workload-identity \
+        --enable-managed-identity \
+        --generate-ssh-keys \
+        --node-vm-size $VM_SIZE \
+        --node-count 1
+}
+
+function create_aks_clusters() {
+    SUBNET_1_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_1 --query "id" --output tsv)
+    SUBNET_2_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_2 --query "id" --output tsv)
+    SUBNET_3_ID=$(az network vnet subnet show --resource-group $RG --vnet-name $VNET --name $SUBNET_3 --query "id" --output tsv)
+
+    echo "Creating AKS clusters..."
+    create_aks_cluster $MEMBER_1 $SUBNET_1_ID 172.16.0.0/16 172.16.0.10
+    create_aks_cluster $MEMBER_2 $SUBNET_2_ID 172.17.0.0/16 172.17.0.10
+    create_aks_cluster $MEMBER_3 $SUBNET_3_ID 172.18.0.0/16 172.18.0.10
+    create_kubefleet_hub_cluster
+
+    echo "Retrieving admin credentials for AKS clusters..."
+    az aks get-credentials -n $MEMBER_1 -g $RG --admin
+    az aks get-credentials -n $MEMBER_2 -g $RG --admin
+    az aks get-credentials -n $MEMBER_3 -g $RG --admin
+    az aks get-credentials -n $FLEET_HUB -g $RG --admin
+}
+
+function create_acr() {
+    echo "Creating Azure Container Registry $ACR..."
+    az acr create \
+        --resource-group $RG \
+        --name $ACR \
+        --sku Standard \
+        --admin-enabled true
+
+    echo "Connecting the ACR to the AKS clusters..."
+    az aks update -n $MEMBER_1 -g $RG --attach-acr $ACR
+    az aks update -n $MEMBER_2 -g $RG --attach-acr $ACR
+    az aks update -n $MEMBER_3 -g $RG --attach-acr $ACR
+    az aks update -n $FLEET_HUB -g $RG --attach-acr $ACR
+
+    echo "Logging into the ACR..."
+    az acr login --name $ACR
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/istio.sh b/multi-cluster-ai-with-kaito/istio.sh
new file mode 100644
index 0000000..ced43a3
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/istio.sh
@@ -0,0 +1,41 @@
+function prep_istio_setup() {
+    echo "Cloning the Istio source code repository..."
+    git clone https://github.com/istio/istio.git
+    pushd istio
+
+    ISTIO_TAG=$(curl https://storage.googleapis.com/istio-build/dev/1.28-dev)
+    git fetch --all
+    git checkout 1.28.0-beta.1
+}
+
+function connect_to_multi_cluster_service_mesh() {
+    echo "Connecting AKS cluster $1 to the multi-cluster Istio service mesh..."
+    kubectl config use-context $2
+    go run ./istioctl/cmd/istioctl install \
+        --context $2\
+        --set tag=$ISTIO_TAG \
+        --set hub=gcr.io/istio-testing \
+        --set values.global.meshID=simplemesh \
+        --set values.global.multiCluster.clusterName=$1 \
+        --set values.global.network=simplenet \
+        --set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true
+
+    istioctl create-remote-secret --context=$3 --name=$4 --server $5 | kubectl apply --context=$2 -f -
+    istioctl create-remote-secret --context=$6 --name=$7 --server $8 | kubectl apply --context=$2 -f -
+}
+
+function set_up_istio() {
+    echo "Performing some preparatory steps before setting Istio up..."
+    prep_istio_setup
+
+    echo "Setting up the Istio multi-cluster service mesh on the KubeFleet member clusters..."
+    MEMBER_1_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_1 --query "fqdn" -o tsv):443
+    MEMBER_2_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_2 --query "fqdn" -o tsv):443
+    MEMBER_3_ADDR=https://$(az aks show --resource-group $RG --name $MEMBER_3 --query "fqdn" -o tsv):443
+
+    connect_to_multi_cluster_service_mesh $MEMBER_1 $MEMBER_1_CTX $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR
+    connect_to_multi_cluster_service_mesh $MEMBER_2 $MEMBER_2_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_3_CTX $MEMBER_3 $MEMBER_3_ADDR
+    connect_to_multi_cluster_service_mesh $MEMBER_3 $MEMBER_3_CTX $MEMBER_1_CTX $MEMBER_1 $MEMBER_1_ADDR $MEMBER_2_CTX $MEMBER_2 $MEMBER_2_ADDR
+
+    popd
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/kaito.sh b/multi-cluster-ai-with-kaito/kaito.sh
new file mode 100644
index 0000000..d024ef1
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/kaito.sh
@@ -0,0 +1,68 @@
+function prep_kaito_setup() {
+    echo "Adding the KAITO Helm charts..."
+    helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito
+    helm repo update
+
+    echo "Retrieving the KAITO GPU Provisioner setup script..."
+    GPU_PROVISIONER_VERSION=0.3.6
+    curl -sO https://raw.githubusercontent.com/Azure/gpu-provisioner/main/hack/deploy/configure-helm-values.sh
+}
+
+function install_kaito_core() {
+    echo "Installing KAITO core components in member cluster $1..."
+    kubectl config use-context $2
+    helm upgrade --install kaito-workspace kaito/workspace \
+        --namespace kaito-workspace \
+        --create-namespace \
+        --set clusterName="$1" \
+        --wait
+}
+
+function install_kaito_gpu_provisioner() {
+    echo "Installing KAITO GPU provisioner in member cluster $1..."
+    kubectl config use-context $2
+
+    echo "Creating managed identity..."
+    local IDENTITY_NAME="kaitogpuprovisioner-$1"
+    az identity create --name $IDENTITY_NAME -g $RG
+    local IDENTITY_PRINCIPAL_ID=$(az identity show --name $IDENTITY_NAME -g $RG --query 'principalId' -o tsv)
+    az role assignment create \
+        --assignee $IDENTITY_PRINCIPAL_ID \
+        --scope /subscriptions/$SUBSCRIPTION/resourceGroups/$RG/providers/Microsoft.ContainerService/managedClusters/$1 \
+        --role "Contributor"
+
+    echo "Configuring Helm values..."
+    chmod +x ./configure-helm-values.sh && ./configure-helm-values.sh $1 $RG $IDENTITY_NAME
+
+    echo "Installing Helm chart..."
+    helm upgrade --install gpu-provisioner \
+        --values gpu-provisioner-values.yaml \
+        --set settings.azure.clusterName=$1 \
+        --wait \
+        https://github.com/Azure/gpu-provisioner/raw/gh-pages/charts/gpu-provisioner-$GPU_PROVISIONER_VERSION.tgz \
+        --namespace gpu-provisioner \
+        --create-namespace
+
+    echo "Enabling federated authentication..."
+    local AKS_OIDC_ISSUER=$(az aks show -n $1 -g $RG --query "oidcIssuerProfile.issuerUrl" -o tsv)
+    az identity federated-credential create \
+        --name kaito-federated-credential-$1 \
+        --identity-name $IDENTITY_NAME \
+        -g $RG \
+        --issuer $AKS_OIDC_ISSUER \
+        --subject system:serviceaccount:"gpu-provisioner:gpu-provisioner" \
+        --audience api://AzureADTokenExchange
+}
+
+function set_up_kaito() {
+    echo "Performing some preparatory steps before setting KAITO up..."
+    prep_kaito_setup
+
+    echo "Installing KAITO in member cluster $MEMBER_1..."
+    install_kaito_core $MEMBER_1 $MEMBER_1_CTX
+    install_kaito_gpu_provisioner $MEMBER_1 $MEMBER_1_CTX
+
+    echo "Installing KAITO in member cluster $MEMBER_2..."
+    install_kaito_core $MEMBER_2 $MEMBER_2_CTX
+    install_kaito_gpu_provisioner $MEMBER_2 $MEMBER_2_CTX
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/kubefleet_placement.sh b/multi-cluster-ai-with-kaito/kubefleet_placement.sh
new file mode 100644
index 0000000..4d8c6e8
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/kubefleet_placement.sh
@@ -0,0 +1,451 @@
+function install_crds_on_hub_cluster() {
+    echo "Installing required CRDs for resource placement..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Adding the KAITO workspace CRD..."
+    kubectl apply -f https://raw.githubusercontent.com/kaito-project/kaito/refs/tags/v0.7.1/charts/kaito/workspace/crds/kaito.sh_workspaces.yaml
+
+    echo "Adding Kubernetes Gateway API CRDs..."
+    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml
+
+    echo "Adding Kubernetes Gateway API Inference Extension CRDs..."
+    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml
+    # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts.
+    kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found
+
+    echo "Adding the Istio DestinationRule CRD..."
+    kubectl apply -f https://gist.githubusercontent.com/michaelawyu/b93fec3b8eadc032a14bd52193080380/raw/9336c4c7bb0c5a73864ace6a73b64bc5ef9b9bff/istio-dr-crd.yaml
+}
+
+function install_crds_on_member_cluster() {
+    echo "Installing required CRDs for resource placement on member cluster $1..."
+    kubectl config use-context $2
+
+    echo "Adding Kubernetes Gateway API CRDs..."
+    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml
+
+    echo "Adding Kubernetes Gateway API Inference Extension CRDs..."
+    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml
+    # Delete the v1alpha1 Gateway Inference Extension CRD to avoid conflicts.
+    kubectl delete customresourcedefinition.apiextensions.k8s.io/inferencepools.inference.networking.x-k8s.io --ignore-not-found
+}
+
+function label_member_clusters() {
+    echo "Labeling member clusters for resource placement..."
+    kubectl config use-context $FLEET_HUB_CTX
+    kubectl label membercluster $MEMBER_1 env=prod
+    kubectl label membercluster $MEMBER_2 env=staging
+}
+
+function place_kaito_workspaces() {
+    echo "Placing Kaito workspaces on member cluster $1..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Adding the workspace to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: kaito.sh/v1beta1
+kind: Workspace
+metadata:
+  name: $2
+  namespace: default
+inference:
+  preset:
+    accessMode: public
+    name: $3
+    presetOptions: {}
+resource:
+  count: 1
+  instanceType: $GPU_VM_SIZE
+  labelSelector:
+    matchLabels:
+      apps: $2
+EOF
+
+    echo "Adding the ResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ResourcePlacement
+metadata:
+  name: kaito-workspace-$4
+  namespace: default
+spec:
+  resourceSelectors:
+    - group: kaito.sh
+      kind: Workspace
+      name: $2
+      version: v1beta1
+  policy:
+    placementType: PickN
+    numberOfClusters: 1
+    affinity:
+      clusterAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          clusterSelectorTerms:
+            - labelSelector:
+                matchLabels:
+                  env: $5
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+      allowCoOwnership: true
+    reportBackStrategy:
+      type: Mirror
+      destination: OriginalResource
+EOF
+}
+
+function place_inf_pool_epp_via_kubefleet() {
+    echo "Placing inference pools + EPPs on member cluster $3..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Installing related resources on the KubeFleet hub cluster..."
+    helm upgrade --install $1 \
+        --set inferencePool.modelServers.matchLabels."kaito\.sh\/workspace"=$2 \
+        --set inferencePool.targetPortNumber=5000 \
+        --set provider.name=istio \
+        --version v1.0.0 \
+        oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
+    kubectl patch infpool $1 --type='json' -p='[{"op": "replace", "path": "/spec/targetPorts/0/number", "value":5000}]'
+
+    cat <<EOF | kubectl apply -f -
+apiVersion: networking.istio.io/v1
+kind: DestinationRule
+metadata:
+  name: $1-epp
+  namespace: default
+spec:
+  host: $1-epp
+  trafficPolicy:
+    tls:
+      insecureSkipVerify: true
+      mode: SIMPLE
+EOF
+
+    echo "Adding the ClusterResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ClusterResourcePlacement
+metadata:
+  name: infpool-epp-$1
+spec:
+  resourceSelectors:
+    - group: rbac.authorization.k8s.io
+      kind: ClusterRole
+      name: $1-epp
+      version: v1
+    - group: rbac.authorization.k8s.io
+      kind: ClusterRoleBinding
+      name: $1-epp
+      version: v1
+  policy:
+    placementType: PickFixed
+    clusterNames:
+    - $3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+EOF
+
+    echo "Adding the ResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ResourcePlacement
+metadata:
+  name: infpool-epp-$1
+  namespace: default
+spec:
+  resourceSelectors:
+    - group: ""
+      kind: ConfigMap
+      name: $1-epp
+      version: v1
+    - group: apps
+      kind: Deployment
+      name: $1-epp
+      version: v1
+    - group: ""
+      kind: Service
+      name: $1-epp
+      version: v1
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: $1
+      version: v1
+    - group: rbac.authorization.k8s.io
+      kind: Role
+      name: $1-epp
+      version: v1
+    - group: rbac.authorization.k8s.io
+      kind: RoleBinding
+      name: $1-epp
+      version: v1
+    - group: ""
+      kind: ServiceAccount
+      name: $1-epp
+      version: v1
+    - group: networking.istio.io
+      kind: DestinationRule
+      name: $1-epp
+      version: v1
+  policy:
+    placementType: PickFixed
+    clusterNames:
+    - $3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+EOF
+}
+
+function place_inf_pool_epp_for_routing_via_kubefleet() {
+    echo "Placing inference pools + EPPs on member cluster $MEMBER_3..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Adding the ResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ResourcePlacement
+metadata:
+  name: infpool-epp-routing
+  namespace: default
+spec:
+  resourceSelectors:
+    - group: ""
+      kind: Service
+      name: $DEEPSEEK_INF_POOL_INSTALLATION-epp
+      version: v1
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: $DEEPSEEK_INF_POOL_INSTALLATION
+      version: v1
+    - group: networking.istio.io
+      kind: DestinationRule
+      name: $DEEPSEEK_INF_POOL_INSTALLATION-epp
+      version: v1
+    - group: ""
+      kind: Service
+      name: $PHI4_INF_POOL_INSTALLATION-epp
+      version: v1
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: $PHI4_INF_POOL_INSTALLATION
+      version: v1
+    - group: networking.istio.io
+      kind: DestinationRule
+      name: $PHI4_INF_POOL_INSTALLATION-epp
+      version: v1
+  policy:
+    placementType: PickFixed
+    clusterNames:
+    - $MEMBER_3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+EOF
+}
+
+function place_single_cluster_gateway_via_kubefleet() {
+    echo "Placing gateways on member cluster $1..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Adding the Gateway API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: $INFERENCE_GATEWAY-$2
+spec:
+  gatewayClassName: istio
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+EOF
+
+    echo "Adding the HTTPRoute API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: single-model-routes-$2
+spec:
+  parentRefs:
+  - name: $INFERENCE_GATEWAY-$2
+  rules:
+  - matches:
+    - path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: $2
+      group: inference.networking.k8s.io
+      kind: InferencePool
+EOF
+
+    echo "Adding the ResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ResourcePlacement
+metadata:
+  name: gateway-$2
+  namespace: default
+spec:
+  resourceSelectors:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: $INFERENCE_GATEWAY-$2
+      version: v1
+    - group: gateway.networking.k8s.io
+      kind: HTTPRoute
+      name: single-model-routes-$2
+      version: v1
+  policy:
+    placementType: PickFixed
+    clusterNames:
+    - $1
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+EOF
+}
+
+function place_multi_cluster_gateway_via_kubefleet() {
+    echo "Placing multi-cluster gateways on member cluster $MEMBER_3..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    echo "Adding the Gateway API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: $INFERENCE_GATEWAY
+spec:
+  gatewayClassName: istio
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+EOF
+
+    echo "Adding the HTTPRoute API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: multi-model-routes
+spec:
+  parentRefs:
+  - name: $INFERENCE_GATEWAY
+  rules:
+  - matches:
+    - headers:
+      - type: Exact
+        name: x-selected-model
+        value: deepseek-r1-distill-qwen-14b
+      path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: deepseek
+      group: inference.networking.k8s.io
+      kind: InferencePool
+  - matches:
+    - headers:
+      - type: Exact
+        name: x-selected-model
+        value: phi-4
+      path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: phi4
+      group: inference.networking.k8s.io
+      kind: InferencePool
+EOF
+
+    echo "Adding the ResourcePlacement API object to the KubeFleet hub cluster..."
+    cat <<EOF | kubectl apply -f -
+apiVersion: placement.kubernetes-fleet.io/v1beta1
+kind: ResourcePlacement
+metadata:
+  name: llm-routing-gateway
+  namespace: default
+spec:
+  resourceSelectors:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: $INFERENCE_GATEWAY
+      version: v1
+    - group: gateway.networking.k8s.io
+      kind: HTTPRoute
+      name: multi-model-routes
+      version: v1
+    - group: networking.istio.io
+      kind: DestinationRule
+      name: $DEEPSEEK_INF_POOL_INSTALLATION-epp
+      version: v1
+    - group: networking.istio.io
+      kind: DestinationRule
+      name: $PHI4_INF_POOL_INSTALLATION-epp
+      version: v1
+  policy:
+    placementType: PickFixed
+    clusterNames:
+    - $MEMBER_3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 100%
+      unavailablePeriodSeconds: 1
+    applyStrategy:
+      whenToTakeOver: IfNoDiff
+      whenToApply: IfNotDrifted
+EOF
+}
+
+function place_resources_via_kubefleet() {
+    echo "Placing resources via KubeFleet..."
+
+    install_crds_on_hub_cluster
+    install_crds_on_member_cluster $MEMBER_1 $MEMBER_1_CTX
+    install_crds_on_member_cluster $MEMBER_2 $MEMBER_2_CTX
+    install_crds_on_member_cluster $MEMBER_3 $MEMBER_3_CTX
+    label_member_clusters
+
+    place_kaito_workspaces $MEMBER_1 $DEEPSEEK_WORKSPACE $DEEPSEEK_MODEL $DEEPSEEK_INF_POOL_INSTALLATION "prod"
+    place_kaito_workspaces $MEMBER_2 $PHI4_WORKSPACE $PHI4_MODEL $PHI4_INF_POOL_INSTALLATION "staging"
+
+    place_inf_pool_epp_via_kubefleet $DEEPSEEK_INF_POOL_INSTALLATION $DEEPSEEK_WORKSPACE $MEMBER_1
+    place_inf_pool_epp_via_kubefleet $PHI4_INF_POOL_INSTALLATION $PHI4_WORKSPACE $MEMBER_2
+
+    place_single_cluster_gateway_via_kubefleet $MEMBER_1 $DEEPSEEK_INF_POOL_INSTALLATION
+    place_single_cluster_gateway_via_kubefleet $MEMBER_2 $PHI4_INF_POOL_INSTALLATION
+
+    place_inf_pool_epp_for_routing_via_kubefleet
+    place_multi_cluster_gateway_via_kubefleet
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/kubefleet_setup.sh b/multi-cluster-ai-with-kaito/kubefleet_setup.sh
new file mode 100644
index 0000000..ba5cf60
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/kubefleet_setup.sh
@@ -0,0 +1,118 @@
+function build_kubefleet_images() {
+    export OUTPUT_TYPE="type=registry"
+    export REGISTRY="$ACR.azurecr.io"
+    export TAG="demo"
+    export TARGET_ARCH="amd64"
+    export AUTO_DETECT_ARCH="FALSE"
+
+    echo "Cloning the KubeFleet source code repository..."
+    git clone https://github.com/kubefleet-dev/kubefleet.git
+    pushd kubefleet
+    git checkout kubefleet-kaito-demo-2025
+
+    echo "Building the KubeFleet images and pushing them to ACR..."
+    make docker-build-hub-agent
+    make docker-build-member-agent
+    make docker-build-refresh-token
+}
+
+function install_kubefleet_hub_agent() {
+    echo "Installing KubeFleet hub agent in the KubeFleet hub cluster..."
+    kubectl config use-context $FLEET_HUB_CTX
+    helm upgrade --install hub-agent ./charts/hub-agent/ \
+        --set image.pullPolicy=Always \
+        --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \
+        --set image.tag=$TAG \
+        --set namespace=fleet-system \
+        --set logVerbosity=5 \
+        --set enableWebhook=false \
+        --set webhookClientConnectionType=service \
+        --set forceDeleteWaitTime="1m0s" \
+        --set clusterUnhealthyThreshold="3m0s" \
+        --set logFileMaxSize=100000 \
+        --set MaxConcurrentClusterPlacement=200 \
+        --set resourceSnapshotCreationMinimumInterval=$RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL \
+        --set resourceChangesCollectionDuration=$RESOURCE_CHANGES_COLLECTION_DURATION
+}
+
+function set_up_kubefleet_member_cluster_access() {
+    echo "Creating the service account for KubeFleet member cluster $1..."
+    kubectl config use-context $FLEET_HUB_CTX
+    kubectl create serviceaccount fleet-member-agent-$1 -n fleet-system
+    cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Secret
+metadata:
+  name: fleet-member-agent-$1-sa
+  namespace: fleet-system
+  annotations:
+    kubernetes.io/service-account.name: fleet-member-agent-$1
+type: kubernetes.io/service-account-token
+EOF
+
+    echo "Adding the service account token to the KubeFleet member cluster $1..."
+    local TOKEN=$(kubectl get secret fleet-member-agent-$1-sa -n fleet-system -o jsonpath='{.data.token}' | base64 -d)
+    kubectl config use-context $2
+    kubectl delete secret hub-kubeconfig-secret --ignore-not-found
+    kubectl create secret generic hub-kubeconfig-secret --from-literal=token=$TOKEN
+}
+
+function install_kubefleet_member_agent() {
+    echo "Installing KubeFleet member agent in the KubeFleet member cluster $1..."
+    kubectl config use-context $2
+
+    helm upgrade --install member-agent ./charts/member-agent/ \
+        --set config.hubURL=$FLEET_HUB_ADDR \
+        --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \
+        --set image.tag=$TAG \
+        --set refreshtoken.repository=$REGISTRY/$REFRESH_TOKEN_IMAGE \
+        --set refreshtoken.tag=$TAG \
+        --set image.pullPolicy=Always \
+        --set refreshtoken.pullPolicy=Always \
+        --set config.memberClusterName=$1 \
+        --set logVerbosity=5 \
+        --set namespace=fleet-system \
+        --set enableV1Alpha1APIs=false \
+        --set enableV1Beta1APIs=true \
+        --set propertyProvider=$PROPERTY_PROVIDER
+}
+
+function create_member_cluster_object() {
+    echo "Creating KubeFleet MemberCluster API object for cluster $1 in the hub cluster..."
+    kubectl config use-context $FLEET_HUB_CTX
+
+    cat <<EOF | kubectl apply -f -
+apiVersion: cluster.kubernetes-fleet.io/v1beta1
+kind: MemberCluster
+metadata:
+  name: $1
+spec:
+  identity:
+    name: fleet-member-agent-$1
+    kind: ServiceAccount
+    namespace: fleet-system
+    apiGroup: ""
+EOF
+}
+
+function set_up_kubefleet() {
+    echo "Setting up the KubeFleet hub cluster..."
+    install_kubefleet_hub_agent
+
+    echo "Setting up the KubeFleet member clusters..."
+    FLEET_HUB_ADDR=https://$(az aks show --resource-group $RG --name $FLEET_HUB --query "fqdn" -o tsv):443
+    
+    set_up_kubefleet_member_cluster_access $MEMBER_1 $MEMBER_1_CTX
+    install_kubefleet_member_agent $MEMBER_1 $MEMBER_1_CTX
+    create_member_cluster_object $MEMBER_1
+
+    set_up_kubefleet_member_cluster_access $MEMBER_2 $MEMBER_2_CTX
+    install_kubefleet_member_agent $MEMBER_2 $MEMBER_2_CTX
+    create_member_cluster_object $MEMBER_2
+
+    set_up_kubefleet_member_cluster_access $MEMBER_3 $MEMBER_3_CTX
+    install_kubefleet_member_agent $MEMBER_3 $MEMBER_3_CTX
+    create_member_cluster_object $MEMBER_3
+
+    popd
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/semantic_router.sh b/multi-cluster-ai-with-kaito/semantic_router.sh
new file mode 100644
index 0000000..75c0471
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/semantic_router.sh
@@ -0,0 +1,11 @@
+function set_up_semantic_router() {
+    echo "Setting up semantic router in member cluster $MEMBER_3..."
+    git clone https://github.com/rambohe-ch/semantic-router.git
+    pushd semantic-router
+    git checkout add-helm-chart
+
+    kubectl config use-context $MEMBER_3_CTX
+    helm upgrade --install semantic-router --namespace vllm-semantic-router-system ./deploy/helm/semantic-router
+
+    popd
+}
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/setup.sh b/multi-cluster-ai-with-kaito/setup.sh
new file mode 100755
index 0000000..0210a75
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/setup.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+set -o errexit
+set -o nounset
+set -o pipefail
+
+# Required variables.
+if [ -z "$SUBSCRIPTION" ]; then echo "Variable SUBSCRIPTION is not set"; fi
+
+# Default configuration for the setup.
+RG="${RG:-kubefleet-kaito-demo-2025}"
+LOCATION="${LOCATION:-eastus}"
+VNET="${VNET:-shared-vnet}"
+VNET_ADDR_PREFIX="${VNET_ADDR_PREFIX:-'10.0.0.0/8'}"
+SUBNET_1="${SUBNET_1:-aks-subnet-1}"
+SUBNET_1_ADDR_PREFIX="${SUBNET_1_ADDR_PREFIX:-'10.1.0.0/16'}"
+SUBNET_2="${SUBNET_2:-aks-subnet-2}"
+SUBNET_2_ADDR_PREFIX="${SUBNET_2_ADDR_PREFIX:-'10.2.0.0/16'}"
+SUBNET_3="${SUBNET_3:-aks-subnet-routing}"
+SUBNET_3_ADDR_PREFIX="${SUBNET_3_ADDR_PREFIX:-'10.3.0.0/16'}"
+FLEET_HUB="${FLEET_HUB:-hub-cluster}"
+MEMBER_1="${MEMBER_1:-model-serving-cluster-1}"
+MEMBER_2="${MEMBER_2:-model-serving-cluster-2}"
+MEMBER_3="${MEMBER_3:-query-routing-cluster}"
+ACR="${ACR:-kubefleetkaitodemo2025$(echo $RANDOM | md5sum | head -c 6)}"
+VM_SIZE="${VM_SIZE:-Standard_D4s_v3}"
+GPU_VM_SIZE="${GPU_VM_SIZE:-Standard_NC24ads_A100_v4}"
+DEEPSEEK_WORKSPACE="${DEEPSEEK_WORKSPACE:-workspace-deepseek-r1-distill-qwen-14b}"
+PHI4_WORKSPACE="${PHI4_WORKSPACE:-workspace-phi-4}"
+DEEPSEEK_MODEL="${DEEPSEEK_MODEL:-deepseek-r1-distill-qwen-14b}"
+PHI4_MODEL="${PHI4_MODEL:-phi-4}"
+DEEPSEEK_INF_POOL_INSTALLATION="${DEEPSEEK_INF_POOL_INSTALLATION:-deepseek}"
+PHI4_INF_POOL_INSTALLATION="${PHI4_INF_POOL_INSTALLATION:-phi4}"
+MEMBER_1_CTX=$MEMBER_1-admin
+MEMBER_2_CTX=$MEMBER_2-admin
+MEMBER_3_CTX=$MEMBER_3-admin
+FLEET_HUB_CTX=$FLEET_HUB-admin
+INFERENCE_GATEWAY="inference-gateway"
+
+# The configuration below are for the KubeFleet setup; in most cases they do not need to be changed.
+HUB_AGENT_IMAGE="hub-agent"
+MEMBER_AGENT_IMAGE="member-agent"
+REFRESH_TOKEN_IMAGE="refresh-token"
+PROPERTY_PROVIDER="azure"
+RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL="0m"
+RESOURCE_CHANGES_COLLECTION_DURATION="0m"
+REGISTRY="$ACR.azurecr.io"
+TAG="demo"
+
+# Source the utility functions.
+source ./azresources.sh
+source ./kubefleet_setup.sh
+source ./istio.sh
+source ./kaito.sh
+source ./kubefleet_placement.sh
+source ./semantic_router.sh
+
+# Log in to Azure CLI and set the subscription to use.
+az login
+az account set --subscription $SUBSCRIPTION
+
+# Set up the Azure resource group.
+echo "Creating resource group $RG in location $LOCATION..."
+az group create --name $RG --location $LOCATION
+
+# Set up the Azure networking resources.
+create_azure_vnet
+create_azure_vnet_subnets
+
+# Set up the AKS clusters.
+create_aks_clusters
+
+# Set up the ACR.
+create_acr
+
+# Set up KubeFleet.
+build_kubefleet_images
+set_up_kubefleet
+
+# Set up Istio.
+set_up_istio
+
+# Set up Kaito.
+set_up_kaito
+
+# Place resources via KubeFleet.
+place_resources_via_kubefleet
+
+# Set up semantic router.
+set_up_semantic_router

From 1113216204b4e6575acf5aabd3c7b1b45d59392a Mon Sep 17 00:00:00 2001
From: Simon Waight <simon.waight@gmail.com>
Date: Tue, 4 Nov 2025 13:26:09 +1100
Subject: [PATCH 2/7] Revise README title and description

Updated the title and description of the README file.

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index d382b91..7171531 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # KubeFleet Cookbook
+<<<<<<< HEAD
 
 A collection of various demos, tutorials, and labs for using the KubeFleet project.
 
 **WIP**
+=======
+Examples and guides on using KubeFleet to manage multicluster scenarios.
+>>>>>>> b26101c (Revise README title and description)

From 5ef7666ea9c756cb30e35f4b1ebd1b9f1df8dc1a Mon Sep 17 00:00:00 2001
From: michaelawyu <chenyu1@microsoft.com>
Date: Thu, 6 Nov 2025 14:11:39 +0800
Subject: [PATCH 3/7] Minor changes

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 multi-cluster-ai-with-kaito/SETUP.md          |   0
 .../charts/semantic-router.tgz                | Bin 0 -> 8712 bytes
 multi-cluster-ai-with-kaito/istio.sh          |   7 +--
 multi-cluster-ai-with-kaito/kaito.sh          |   3 +-
 multi-cluster-ai-with-kaito/litellm/READMD.md |   2 +
 .../semantic_router.sh                        |  54 ++++++++++++++++--
 multi-cluster-ai-with-kaito/setup.sh          |   8 ++-
 7 files changed, 64 insertions(+), 10 deletions(-)
 create mode 100644 multi-cluster-ai-with-kaito/SETUP.md
 create mode 100644 multi-cluster-ai-with-kaito/charts/semantic-router.tgz
 create mode 100644 multi-cluster-ai-with-kaito/litellm/READMD.md

diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md
new file mode 100644
index 0000000..e69de29
diff --git a/multi-cluster-ai-with-kaito/charts/semantic-router.tgz b/multi-cluster-ai-with-kaito/charts/semantic-router.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..90e0a338c5403e3ad76fdff44ebe9ffa0ea90557
GIT binary patch
literal 8712
zcmV+jBKO@NiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc
zVQyr3R8em|NM&qo0PKBjd)v0MU_bL$%#nLe6Cab3{FYR^xo2I+O}sZwbYr*OKHaPg
zk&uKMBv=5nt=KugeIEb>3BE|SVmoPi!w-o?0)xR|Fc<*yB5;fnNkX5qOk&>MpCK-T
zIg05w_xAL9z25Hjw*J4@>(&47?LO&$)8Fa$wt8FJo10I*>Gij_`di;X@BUC&{A5BR
z{!Q=BZPh#Xg*+%FKVvQkO9pVh>7g_&eo`c{kY0pE$Vn>oyFGY`X$+y_5|}U!$2tGN
zTg^Y<!(k#ZrDTc|2}k5oW*qx$;vcaHQ;`?*KvzDPOh`yn!B?-2;4qnBj*}33GZy0k
z%%n`kpxd1iIm^aD$l~rfrLmuK_B#$`x5m2O8J=G-9*KeHL;af%l*XP9q-etj^@$`?
z&j&=j7?DIGl3=du;}{V-0O#%hQf&u_cmN4f=$$K^hXxnVysEb<SjI!F0j}B^IZyEb
zP?}N_B1L=e+>T2x=m$OTp2ojE|IZQ4u()3VuzddacYE8r_4&WEv;B4ce~M=fp5qD1
zs01_edS2F<aScvpL;w+h;McvQSN??Y7)gmEm=KBsPeBPO;uy{mCumHu0LfsCAw@#q
z2uLCs%vr`k;yBfkCGfm8ctP11Q4m<FsVKau{yoSinMhHnW}2EPHJp4g7ZS$`#%o)$
z1XEV_078zD!~>8#!>asYjHXUmqJFA!9H&eW$@qK#(^<#^!j{g_F20m0XQ2m>W|R&Y
zCE<Jkd-MX$g$E$fbO0SAMLV8pXP8lXj6;s47{G7uG@@_LG3O-01L$<TNk(borKWF6
z**sRVvcQ~UEgkk*mdFA0Rk33nW}L`*SxDq>_E{qFrPLkGl0EU7C2tv%1DGHxum^xB
zWfwzE&I!d+d>}$ZkrDc)goe(~iA!f;UMQeOVC~aPNX!q19>A2Rq51-lGA;+O)9dZ@
z^=~6B)pylyO59RUdrx~#p^zdW$Yk!LREgdgOHM*zD&RzMT9l7$3F7<y_b!OMj_SZ$
zY(z3p5)-K?ScqtVH8@O&gplPTABwLY07;14w_i;29RHDFA#-FDZn5T_&Q5>xh;;I|
zVVVt~v$@lY%^TBlqfY-tt;SBz)L4TfM3MzqsrBi$SZ&o?=r2WRw_bcx^Wl)QF%}A)
z37KZxEKQW0;{*$#KPpJ|k&$6~%)&D)-I316Us0AI`W#a<KgJ<TA~AqnHGnDRghj=>
zE%i>47_&?kFZ=4{1QD8Xd@|!$%ovRZu;qCiqlnxuR^Q(pt2G#^bzDd_lYeG3i=F9|
z+D~CCzCtpNrg&dXiPxI7(ff>JC4$vF3PUV}QnLo|7Nh7FP9%P#R2=|8-eZN;3pIyO
zk}#<UP7L7cgTg6S&Lo3~T_lvD$cyzWg@%2#Xwt30D<UMY3B(OBFySnQm)Uemk}3R%
zLY(uQke1_oJJXTGQ^x0`e99BdM+zWM#+XZgj0D!dX<p9=?154peXE7~M!oFb9+tNT
z`xRa`oNeU64+n2g3~g$K02Cbk#dJdhJB*YHnCS8IB}a)+>m=r)izxMvNJ3s6`LFt$
z{`sb^Bumcd1-trHCh)$RZDy@fI1~!@J_={Jfrnj_^w&irG)CbWPNEYnqRk9fpx>l9
z6LieG4loX#n9%b~n7J=VOej*z$BB#UQ`2X3d5}m>ux9w45xG**a%$}6FY!4ERa16}
zoRA4Ky*puyibkiTdRyyEz6hyGL(bw<!ix;?s8Ki7AN9Az5VY=okVGjXiEOs7R{i3u
zfnvF|6JeYRjL&?W7qC(iMG?nBQM14KB<Kabpl?6r%TlkmnZLNeWIEGpb_I4iBir``
z*Q^|^tgCr-zhHB4`o#NwM1&-i`hQ&D#P4s9J9eIOj08(aGW`+7gw7pO6zFg~!JOl$
zq#gX$La=wXJRvzp6jNF&AwSG>oz!7rMH_#62{IkkVO<C!h6N-{g2+-uqO$Z-M61+a
z6-iC-{X*O{s9s7U1IUUfM~FCLq+>+s?FE?~gG}I~;=4n?R%ug{H~DIB5kqN8lIal1
z*+<}0mW&OY;CI22PEIECjo{|g3yV+Hc{k34Ud&xW9fyo#QI3mmUJnc#6t4k<DyRk5
zj`KDX_^oA(QVp(wS~fM>T6l7c25g@wy0}$$R>k4_g*Z6Fd>e9Yuh5xP%)=R@Y&yT=
z5Iyt=?XJ+NF}Zz@KHDhWhGK=1HN!Cxl0QV3`a$o$y0pp|&4|*A@5*(5D#5_yOrMe7
zp+k$g>oZKF8Aepj9%L3RX8sjf?lufy`=b<G7L)LHtI=nhMXLyg7(K`U-D7WV!_mcJ
zG@FY*{qU?J5jr7@Q+>fALZbGUoxjeSl+Flcf~B*ES!X|wgh-|&!I+nOjz8%@-Nb}i
zU$<8TNBQwfWPTsr0(V`%fC_rM)~5GYH*|&iZ(&=KnN#LYI74z|ZxTjl7l=<qu?s~U
z#aLoKfR3Dz<cuU!^GzYO^^uUwDZ;1<aGFY=2~h)CbY+Am6UJp5xb`yBRNf1$%Al3R
zKhU4c@hiTV4@#Aq&OE(W#fa;pG;^%v>~QynhmEl-PVmP_hBLbdHrJNL&K6z%puQgG
zCy`c)hAas)&T$gDM@&0qt^KmV0PI|@TmwMsnU~=ni3r8f3uf+m@d-!aS#z4Ge!s<n
z(M+LyLgE%hNuW5T_^rN4au<Opr^63fcxG;OdO@$(-vIUBj{eX5*fc-%zxs`Nv9kgF
z4cJuwt3P`iu+@84wo2aP60#~-6x-}=z)ny7t$+01>ANr%VGx+&P%`c%EW*e64|~*m
z!YDQv3s07R>)R-A4fZAzk`OufQ2vLm_qlH=zdjH3)bBKl>)Gz>Hzx<jfxMJ=ZsP{~
zpWS}1e*eGQ>wmrf|0GX8fEQSToMG441b%w^3dVEbS)x}CIl}@&$l@3!kr;SaS3Z!5
zy_6KtQF3Rf)*c9cHWnoL)o`E|GWhVp1HhO1F!uGE=Le(Vo3|&=zI}9-jd3U`OtA#t
zFK+GO>Iz5_(k#Nzsc~pfl<GhLA3lK3;CI226v^4M&Q%~%90sZ(@mud*kmPuM=s=E;
zSPI-;zZ=+(Ygiu+Yd9<L;=V8^62}^8A(6~PkRSYG{VxCUp`&pMXG~iZ4Z7WLbIb<c
z7KGKqgQ>vPF?z*N^aG+O2{B(bP5_|h{s0ao#Apr@onb&Q!50vdB$K(3h+-KRBpeQN
zn-6r32vzHG;3${yE6X5630x>OK-V_?1U7M5?YsNn32`2Rf3d<ye^-w5@xj}l5BFOJ
zTLb+_gl>-s_`a3F(7C$mTwMW2$rQsQl0^6tEO$_c^Z`kxiaVh97(RTsx(d`kfRhM5
zeCT{=;hD0JgR83sE1;C<8Kl^uos|kRk6X<5;hX29*Lz0?HEO0Ty2H$&&_QV$D1bb{
z5{rPat}6BHCNKuVMzc0agjCd25Q4XuVkB_z8pWzNorQBX#u7zHA`LsIwCVT$o7X3M
zhp!Lbwh6nGMIYVyZ!JYzMPFekqR}e^Ve#5X!-69^KNCTuWsKs6aqzVFw5O(XCa|&F
za#jbm39+5|*iI<K{0ZY1h(}J3RI`I`tE4kLsF~UGHUrq7;qXigkjOEL(z9aVVD{xP
zndfZ<@H4>|K-sj?7xnu~>c_SnB~loLAdU;G5FELk!<YEu1*=aa{FwyH?Ii{HK2QNZ
zMbJfQ+BJ0Yyqy3}urMlFSlLUf&G+EG=bktZyDO|tdV=OQdSS*X`2PonZ;nr_1feWM
zbjF0#g}UecZa71-8#1+f);mr3zviUuC3Lh+Vj}&My0LRtUl!Yv5qx)LE}%OD=%7rp
z&IWX1ECiZj(HX#RR~^nMR$nuLx%wI!(CXDMGbDis2oVi4nu&jPKD_(zooAKU`T~}p
zEd=?If|5GB0qkk(D)->wGme6vqoFMw2f4YU9*xE*sE=K10qF|T9~p<3aSS9e*1Uuz
z8*r{xgVjy%Kld)?>+`YC;{E?<hG~kq2xNLkSAZM#|6AL;TlM|_lYZ~({{K@vS6AJ2
z@8B{;Nu*aCC1li&Os_uL7tCAl=KFvMPe`(G_4CV8`rJ7%u4Gy%urk)QY=>)*JWE2@
z-O_)O_&A$P$R%|APO<aIg_Xu~U)%8mDq2uNaSnfEh>{7xky=1B{<lE6l&2PLXC<iH
z;9G1Ke!opm)E+AT`&&)q!L!?ws>HF_gBc)o-OJ!vdATBTDJO}XK<Dqm|GVhaV3|g5
zKXC1TR)*8Kl4-b#2XikcKtgRjA{ft0Rs@R1nBHP^b?I_w>s@lGZT`-E-ryQIPN`bS
zJOAuJXVh7pNm(4T#9%6J#?+KEsLn8r12OAr><3GAiM)e>L1A$@dlT>~Zzga!DewAT
z;qj-wHFe*@?M!L<jVjmhN5&+sk)jDDMhTkY$RE!QAj?(Oqoj?L_U7W&#H{z$Z$-si
z(B4j)Z>er~KGlBM>4(y^=?1un_3QFB*FXV<A<Gh(Z@+KTZ(VQCRC~QhtW}Ez#w`kJ
zQv1<uRM5wZU?z?fa)pY8-pT)H92>O)SRgnGf3MrEOu5e8Se|kOjK^77vO1>bRTYcR
zy=33(vsnK#ju0_Q@9YeEga2o5bLUA-|9i6e_5SD6JQe@%zIR5FXpmp<AEDH<hZcjv
z?RkY=s^OrjFXsw7WDj!5Z?5^ZG+R`a77N@pBqWJ&B4Jy>u*e%+G%F9_+E^=KE{(Bz
z#r%FmA|u;#0g=}%9Dah_NWZu&aV_hY)hmjXH8X)n+PiL9J!7M<<)1bQ$h$rYOP3$V
z0HO)0;v(c8YnYn3bI{0GoZM91aV2sWe`~E^-GxR=X;KB*;$&2UjM3!?TQ8%VAeA0T
z&frKo64oQ>CIF@PQVGC1FOAaNd+A0BSG=8EDrtz0bJNmc)8(4OC0RN;TU=hU^gmZ0
z5L0*QK<FZ)Xgo@_L&)Ox85PgRLnLQS{+J+26$6bV8{s&Wt;95NY|#^Tpb8e$GwxlZ
zY3bj!l!%TuSd)}l?yO>+WtLa3c-A-!x<qEU)veTM5M}AWxDZy8A2rl-31X`KjtCud
zjLxic&cZ=;p%k?o&!7f(pHmu-aMgECNp5q8S%Y5jvn$c_%cUmBH=H$^IbLz9Q2wK>
zoN<v;h3t){{bGIP)Hn0D{pxC*<q(IC$6VQ?Z_n_21HRS46V#Tz)<ehFu5A12N>_8x
zEe+X9W4=l5*``Ws@IlS#t+l(>v#mY`HLMN5rXQROOAUk~PFp(;iHfEPu_Dp|p?&2>
z<M}AE-g0Gs3fH=z>QT379O^D~rLS6b8DBmrODI9b2XFBVy^W+;2z7zjxMAKe1lJ-2
zmWqL8cvWPAOUxUD!a|rWu7eE$QFriLC>+XyOC*DXz8P~-v#@r+I~Le=HqWi36<Gae
z^?PA1t-k9&>)+J?@X~Zzm2-`$m&}|xkLS~5<X}`Ki)(vb)T&917UJF@ryAzhO=;yy
zsX|xGXR>Tn2P<aE&a}5L>ope9qHf^4ge*0aW=C{j#7BCwkn7E@d|fenE-DxH&AEm1
zC~P%xHp;3gVFE5Rn?NZETCAi4*F3s1Crj(+<r>@~z-{)`m05!>N1<zCy%Nrjadri0
z-8_4#j<yhf^CGr{XHC0DQ)}Bjl7(^i>TN8v6tA{ABh%T47nb59nVX8Q-e8-_d9^KL
za6aPN;(YZ6N_+Jh;c|QR(b$@mUtNJ@|68zRWujG0-)rcT_xK*#a17x8a%XRUhG+5l
zUuj%**8{+t&i|hDHtXkq+nZm{|31m%p8ugV72V<xu#7l%r&GYsd;BL-9OkjzC<#&Z
z2*{0}W*W3NNO@2nC6`_~INX3m^R}(TX4f=er&r#9Ls#|CN?Uhm?WvW;w*MB|1(CC&
z|MiG;r2ucls;i(R*{>+3?G3hU4Qtm>#?CFUwt&s$y%K6o)WbyS>V9EK$P3I&96w0s
za>kOuR?zSJf|9s&rD_JkYwJ{TFI6e+bhY2BDrnj#VgSE&x?_@b$4JaN8_@B?PU*#k
z__X5mdF`%~Sb~pTO-Y;O9wtH}N|jdNM$dx%SBF6mbe!@jNr9Ns;QI;lwSMbIgu~3#
zoS+boX4&|+kdk*^qp9aPEma$31h2FVQ+BSyl+~IIqpI0rm73KZU46(Y5wfZ+tbxU%
zb#DiXfqsL(fgeG~DbRs;{{!R<Czah;vs+G;S~tKYk{?o1hAvZ;I8K={%GV}!nyT#x
zV%J%)#KI{Ih;odgIp9koq}Tv)Moeg&DoPVlBc{72a$)aLY6q;etOb#2Q$=kY^EVf6
z)7Z4@_)ng@3~Hv@%O6KCfBIpxzqkMLz=5N8aRcbeIPK0RW;^AaUpWyK-A>fzS5Ypg
zO!tNJ%<$lm64j!Ksp*by3ONnOs=SF{<T7i6wB|*)Q0SDl53g$jpG%#Nv#jV>K^aX{
z?VKB*C7Qx@-2kX!i_xjJF?2$VHlUU9M=NKfO6<UI9pBf|3_sHzt&UuJ7?G%b4%{)S
z_#RZSw>Gy^o`#JBT1VGtE#I7jDQA*}j1J&rf4C9?uFK0U+E>MI+x+d#j8{R}R$l>W
zTV)l*YiGQzV&c{F(W}EB-tN8qbu`>NdD)Z{9X<Cb8Fy|uoz33x=ILBqFxyAT*Eb2B
z*ZjL{L*RtjY$yL6gi0&fYVB9TrM6+vh^K0wTEx;~{FctEmc_I!xT#aUieO9W8v9fu
zqAmnfDOUwi51_gbMzvrSNHgH-LNN8BH{5O12Cy1hwrvEP!Dm155Y|w<!x+{eecT|{
zfPC~Q*5Ir-jH@drl>o~NRy-ph8ahL}5DTqo&7e(WuWVV<!kkN`+j7}f>ii}$@jgW)
zb`cHLi`T1SyIkvPY)jp!6=ktXEQ`BtM=R~wNEc<1BKF0-w&O(GaLp>p5F8)Z+TG57
z7N7qW$1x9V|JUnn^_%v8z5dtpzfbYhghx5(rFn3xiC}4jyV~L4Lmcw`Rc&)W#j|++
zr|024rGdF&{`WWgJB|1+TVL}(eUj&P%0sIN7`AB(ce5BSG=<N2Wze#6yHs(qCz#T1
zIt6dZ>g@o}g_S*=FEck3L9ZQ^x6*h~(g3R|wDsH{dBTXnB5<OxHdw+2^{f;5O-Umc
z#iBM^tGD}ieD2Bc-|$&1|Lq3!?rXpe@_%!yzuoZv?R~xf{UlGTWY4|w{sQ;D4X1`?
zn4;pi+P~ot#yx74h)=OxexB_f+B8pCZ+o`A1grX?^$keX53N_gsamyqOa9T9)Ow(N
z-(MjR$AfXJji~XtRS&REpk>;!duHXfi-Q{&r|sfk1?Fj1zfT7Gsy^K5sjvUR4AYoQ
z6UOnqwQ<w=&(2nT|F^x<fAV$x{}j&}43U(WC)()43_M)Sa026uP;ESsqVNn&u?TD)
z&^)b@05QXq!j!VHv6UjpbOSi1NRo35DUvhiJxU^P4H7&xv8*1Y98bt4j?7l;Z;u0b
zlh8S^MAuX(L5ew0lHkA#o*$2nCF5A<YttbXe%?QZh;ZQrQzE<iUxU6EjNkLF{@1>k
zO}pwJ`%|1J-4f6kg=bl+Gnoo+JrEbEw;qhqnYSLuSpCg7nR@I0_SWEM#0kp;96mn~
zUXVva@&XcJ)HTI9``ru9MaUxD{SpLHeg3~a*n56-5X8|vwy}KvZ$8=H>ec4|*6#M#
z`Tt3twMGVSo#0#Bqj;X4?oP&UV-qY|vVNoFR*AotrTqq$z<-vpHau@_4Tc<Z{6|Iv
zkyv=1?-YQ3(Et9Qo^SBm3VQ1A;m=AlIVVa*#JorVM-m-iBTFL8>0Ajll1w-fl4qg9
z6CO=;IJ5{;d$RFZx9^zYDFBu%tgyH%gVSg+rhal0Kf;9eIF0A^)WxR*&pSOm9V0Q*
zF<%N;&7u%ALam*io_gLJZW3!EE1V;PL>+SewGolzre^EOmUYVUKu+JA=GWHXr-Upb
zO=drJBTR7~=cJ7ePW;X~;suNr><S#Jc?i6d8HNd?lwD|?)TTiv8Wm=cY+?1fa6u9_
z_DY?j&6ws<u?R0LwetMvs{JB+^{{~EdDrDHxQ6Ft?tJ~rz_|9V{qk}Dc7I)a*Kk^o
zPI3y@?i47amR!T>k~G<;aBVO)xvw7(;kxmgLfACw7{sZLt&m__VPAkQP2Eb+>K&xk
z-Tu^M;I7n?Xj&FGws)r{Gj~B6C&Tsw(#N$qR~Ws{;xXom3$2N|Lakq5WdkHVAWH5&
zQlVs<%$@p9i(t<=MH`1As?zlBa9?jaRSli#yH?sBR;a>z!^5KN)85lwt!QoIuWB6V
zKkc5lh?H~<r;=xQL4DiYQC`b7`F;QUUP0g@CHR95y0s^<C-i~IxNES}`FQg$3csxv
zB=1p6#W2lOt^MIoZk2wcf=TP$Y)8LWRd!oi$tyW+6va0SiVe%t{f8u4%T<)a3C`CV
z7c9Gc(ClsA8E<mP;|rV?nY3G=l(#OLdGkoHOb7lut)$v+LxH@nG^_7i)QI&A{WVFA
zH*aAA|A16o;C<mx7G;vfNG*j%A1dtOt7LAo=~qye<N{Zs1=|&Lj$&%NUs=d2z2Bp>
znrq%&y0r>)>O}TA)d>d17?=s17~v$)ZJs)fo)*~)@}R^<OECK8!KHSa6b7#!kRYMe
z0dp;*Rdc(g+O}3|l%_LUXWY+(WHFdivyW{+$4d5nfhD+0%(KnPhXrEj+4c+Kl&iDv
zv(4TMa*G~Rhw(>3@kjT@pFpUA`p(E16!Ia0(`JEOv({x@D(HmRmAlgFO{t;606N<*
zh_O-D(G)tH>W7huwSrHp1-Cn1J$b+J0N0dra>_5uES_=}Wjb=uvK-(&6QI9W%>n+j
zS7g*GbAT5k_ES3LD8g!!c5q3ARQhoq<*u%G=OynnLV8N;@b8XJb*uyQm(4XpwkNPe
zgY~MR@Rybap8`tLXA9?GF2ar8Z!QnKp;8WPIBv=EOL<*Jhh1+VUWwm*1480FY!I-F
zq^|KQV!l^fyF-li+eTQ|@zpi+)Pt<&FP%W^*8K1`@zza-22Re+aUh$(<^S9KSI68R
zyz>A)d{=Dc%mQfaJC*P_mK;euots^qB^~6XBjrg0otu}Un9_R5ArpP5Anh_VMgm8`
z5|Buo<%0i`$hq0!%~Kq}iAL8fsMRGWznl;tFPH_W<zCs0kQ^sdITM4*f=-0wWSp6m
z<2rwP4L=DC`$%A=>mLY7VkB_{Cmc;CB!p`?nWrQ~6plljAWoQYR`|s${gQXyHLS1e
z9FXhlY8fV4f+|{w4Yk#k8>-79qsEX0uHoNXJ%79Rccld)ks+FwvT4}0-(!~K%(MZ*
zK>V+N>}});nRN#yD3ls`SYKb3CQ@UyZ$_mIW?~Rj$n5s~CmNX}BxjIv#wKFJq(YTM
z(F`Ss&IJ)0K=BmO62?Z6tk;a30-I|cnjS9CS!x@mD+y-wphx}hH-rA}(_k~`4|aOJ
z-ebk&ER`fC?{TEHijXDem<vr5L^X2>ht`HU)}SB28#%+g`vNDJBMSR!$~<~WrZXi4
zhMX|2m{Wn&NIdpDAJ*4(@S64YJe{Y`0}AJ&Op>Y7GOL1CjagqGcpiWiCMNg38BPb&
zFDI45OH3)-fEl~c`9$U{1IZ#Z&r>=8=q_5SYI&d&9eyz#Ji(Mgi~+&#><rsp*@bHj
zHUs#<PQiV53NB)hr#p2Hd2$nn6b;}Q5egTaNR!6)3!&jlcA?e)g|}9D@MNN9_&F}6
zSISw+1fo^GIl(Bd3brq&(<dY~Y1v4u55Sf5G~J;TjhR`=n*6jCz)O=qAO5SDE%(Pt
zFKbJ|mqX8AW;lU}MDPpaG=g6chr>e<5^-5&8mEhD*LhCx#h25|Lfnj5X0*=}luY6I
zJV8Rvo9wb3z);M?8KZ1ESNpv}Wmw2GD=$#TQpa*M5#x3EkuQdvx#TgZ!v*7KS}_!&
z$}$Hr%N2xDkqz;e(@m@3<JNxstIjq{6NE3*zi!YNYie1)P|NUTs56d9tLQgWO0B(@
z*}=_Wo()k}Drs8V{D+f4`_T|{wU~frbogiasji`wEZ+}$@Q7Cvv_Dp}qEzDCDbbh_
zUkFnSIXgF3eZ#q&p(*1S>04LE)dpDN6vlJ?f2&qAoXrIZ1?=UD`~#8t$zOrg2bU>D
zB!OwJ=jIkSb~~#t1SN^F>d!fn8E;V}w{(WSSZz?}aD>AdN{EObitM|8C9*3Xk1fT3
zO@LjJNH^zmoS)z+iKG7zt&8X@y-6*3UG4l<p}5-kEvNYQUxDJ9K<|xIG5dP8na*3%
zSNEzgN1=qVBKWLH?41A(>;b@$JpgzZCV%=@Ai2xrlA}b#MCgDt6^@Ttf+gbf{L;O#
z;-6r1$Xq+t)8--Pn3KsIE@mX0<!kr_61h!qevYCu&gKVt8xR%=UpW>jO9X})Mv=DO
z;CZgp;=pvx-ea9(K4t;~%Ve7AWI6(4FyTF?7C?T!dj2VnSnn_@zbbmz;|!SLT%v2(
zoB#2TK3v0;;}m=XolD=6(zw%g3X`Fdjwy@kbi4ccI{3asgg)EpS#&zbc2HeR^!0$g
zIw0>`nZ#xaN73(@kk%}#&=XU0JVvlk8b!<^GMQT|q|-8L^eKqE0!gYDfB_t!a8|^-
zh8YsJ5fdF+Dg`&f67!fOCC@}S)P`lkxn?S_cIttx?Y)jYbjXj+oha09;F~<&vlb>!
zz=b^UwBCH{&E|B5xp@oEbU0efEkWi^JvaLF0Ce1^W@F3r>oy#o+|Yoz$6#s(hZv9!
zKB!M;b$c5+)SX^DHLdl{JHAxYg-ANrmxCBZO1vnFP4j8;8SX;LiK>TV^%F`X5AEH?
z31``uVliV(AEY_=V?!3*+r?O<O#{Yea{teiMLI9-f812Pn36j2^eZ-PLFMi;q2KG(
z?*?l&4~xiPvP0j{Hj$Mcw<cDu-~Q!RLHB;K*OHv~7O8nHv+VJ3MkLl&sb(KswY-%T
zBS*#NEW!FN+3N?ezFx#-P}<9(U{qf%m3D$^b*+z4aWn31DiB5XCk3K<r*ADM>KAg4
z93u>KrIu%l3Dnej>}@Hyt^VSw4Qm>spdY=;Z`izTRj0f_Rt1lgW>Rn4naFxyucfLF
zhY68o95pEIdkb?EZ9d`#*EAJ%BV|RNC5c^KYE64P3dQ^eSs!JU0IY?YYFFFFDXQ!Z
z5247*kr%F6w5jq#&SETQI1?a3j?!Fcz;Pac&+vtl{N7$r4m=-Tt8EBlQb}m8XOFpl
zRA1CeaZV_nV!$Fqxk-z!)ZfTw37yw2K6Rusp$p_ja>f|KTHofEn+m-j83!UnhK25F
zSbSuO*nrenM1N#<Y1P^*65>Q)&pXCuKk(bD!~KKT#|Q5otq86=rfl4e5lOl^h{xLZ
mLzCXbffvD-;JWek`TBf)zCL$){(k@f0RR8k+;J8FvH$=R4F<LV

literal 0
HcmV?d00001

diff --git a/multi-cluster-ai-with-kaito/istio.sh b/multi-cluster-ai-with-kaito/istio.sh
index ced43a3..e2c0e3f 100644
--- a/multi-cluster-ai-with-kaito/istio.sh
+++ b/multi-cluster-ai-with-kaito/istio.sh
@@ -3,18 +3,17 @@ function prep_istio_setup() {
     git clone https://github.com/istio/istio.git
     pushd istio
 
-    ISTIO_TAG=$(curl https://storage.googleapis.com/istio-build/dev/1.28-dev)
     git fetch --all
-    git checkout 1.28.0-beta.1
+    git checkout $ISTIO_TAG
 }
 
 function connect_to_multi_cluster_service_mesh() {
     echo "Connecting AKS cluster $1 to the multi-cluster Istio service mesh..."
     kubectl config use-context $2
     go run ./istioctl/cmd/istioctl install \
-        --context $2\
+        --context $2 \
         --set tag=$ISTIO_TAG \
-        --set hub=gcr.io/istio-testing \
+        --set hub=gcr.io/istio-release \
         --set values.global.meshID=simplemesh \
         --set values.global.multiCluster.clusterName=$1 \
         --set values.global.network=simplenet \
diff --git a/multi-cluster-ai-with-kaito/kaito.sh b/multi-cluster-ai-with-kaito/kaito.sh
index d024ef1..46326a1 100644
--- a/multi-cluster-ai-with-kaito/kaito.sh
+++ b/multi-cluster-ai-with-kaito/kaito.sh
@@ -4,7 +4,7 @@ function prep_kaito_setup() {
     helm repo update
 
     echo "Retrieving the KAITO GPU Provisioner setup script..."
-    GPU_PROVISIONER_VERSION=0.3.6
+    GPU_PROVISIONER_VERSION=0.3.7
     curl -sO https://raw.githubusercontent.com/Azure/gpu-provisioner/main/hack/deploy/configure-helm-values.sh
 }
 
@@ -15,6 +15,7 @@ function install_kaito_core() {
         --namespace kaito-workspace \
         --create-namespace \
         --set clusterName="$1" \
+        --set featureGates.gatewayAPIInferenceExtension=true \
         --wait
 }
 
diff --git a/multi-cluster-ai-with-kaito/litellm/READMD.md b/multi-cluster-ai-with-kaito/litellm/READMD.md
new file mode 100644
index 0000000..cc02a9e
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/litellm/READMD.md
@@ -0,0 +1,2 @@
+# Instructions for setting up the LiteLLM proxy
+
diff --git a/multi-cluster-ai-with-kaito/semantic_router.sh b/multi-cluster-ai-with-kaito/semantic_router.sh
index 75c0471..ab85916 100644
--- a/multi-cluster-ai-with-kaito/semantic_router.sh
+++ b/multi-cluster-ai-with-kaito/semantic_router.sh
@@ -1,11 +1,57 @@
 function set_up_semantic_router() {
     echo "Setting up semantic router in member cluster $MEMBER_3..."
-    git clone https://github.com/rambohe-ch/semantic-router.git
-    pushd semantic-router
-    git checkout add-helm-chart
 
     kubectl config use-context $MEMBER_3_CTX
-    helm upgrade --install semantic-router --namespace vllm-semantic-router-system ./deploy/helm/semantic-router
+    helm upgrade \
+        --install semantic-router \
+        --namespace vllm-semantic-router-system \
+        --create-namespace \
+        --set namespace.create=false \
+        charts/semantic-router.tgz
+    
+    cat <<EOF | kubectl apply -f -
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: semantic-router
+  namespace: default
+spec:
+  configPatches:
+  - applyTo: HTTP_FILTER
+    match:
+      context: GATEWAY
+      listener:
+        filterChain:
+          filter:
+            name: envoy.filters.network.http_connection_manager
+            subFilter:
+              name: envoy.filters.http.router
+    patch:
+      operation: INSERT_BEFORE
+      value:
+        name: envoy.filters.http.ext_proc
+        typed_config:
+          '@type': type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+          allow_mode_override: true
+          failure_mode_allow: true
+          grpc_service:
+            envoy_grpc:
+              cluster_name: outbound|50051||semantic-router.vllm-semantic-router-system.svc.cluster.local
+            timeout: 30s
+          max_message_timeout: 600s
+          message_timeout: 300s
+          mutation_rules:
+            allow_all_routing: false
+            allow_envoy: false
+            disallow_system: true
+          processing_mode:
+            request_body_mode: BUFFERED
+            request_header_mode: SEND
+            request_trailer_mode: SKIP
+            response_body_mode: BUFFERED
+            response_header_mode: SEND
+            response_trailer_mode: SKIP
+EOF
 
     popd
 }
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/setup.sh b/multi-cluster-ai-with-kaito/setup.sh
index 0210a75..933007f 100755
--- a/multi-cluster-ai-with-kaito/setup.sh
+++ b/multi-cluster-ai-with-kaito/setup.sh
@@ -4,7 +4,10 @@ set -o nounset
 set -o pipefail
 
 # Required variables.
-if [ -z "$SUBSCRIPTION" ]; then echo "Variable SUBSCRIPTION is not set"; fi
+if [ -z "$SUBSCRIPTION" ]; then
+    echo "Variable SUBSCRIPTION is not set"
+    exit 1
+fi
 
 # Default configuration for the setup.
 RG="${RG:-kubefleet-kaito-demo-2025}"
@@ -46,6 +49,9 @@ RESOURCE_CHANGES_COLLECTION_DURATION="0m"
 REGISTRY="$ACR.azurecr.io"
 TAG="demo"
 
+# The configuration below are for the Istio setup; in most cases they do not need to be changed.
+ISTIO_TAG=1.28.0-beta.1
+
 # Source the utility functions.
 source ./azresources.sh
 source ./kubefleet_setup.sh

From eb1e97160087357007d5672b1b9835b3b9990866 Mon Sep 17 00:00:00 2001
From: michaelawyu <chenyu1@microsoft.com>
Date: Thu, 6 Nov 2025 14:54:00 +0800
Subject: [PATCH 4/7] Added LiteLLM setup part

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 multi-cluster-ai-with-kaito/litellm/READMD.md |  2 -
 multi-cluster-ai-with-kaito/litellm/README.md | 79 +++++++++++++++++++
 .../litellm/secret.yaml                       | 11 +++
 .../litellm/values.yaml                       | 28 +++++++
 4 files changed, 118 insertions(+), 2 deletions(-)
 delete mode 100644 multi-cluster-ai-with-kaito/litellm/READMD.md
 create mode 100644 multi-cluster-ai-with-kaito/litellm/README.md
 create mode 100644 multi-cluster-ai-with-kaito/litellm/secret.yaml
 create mode 100644 multi-cluster-ai-with-kaito/litellm/values.yaml

diff --git a/multi-cluster-ai-with-kaito/litellm/READMD.md b/multi-cluster-ai-with-kaito/litellm/READMD.md
deleted file mode 100644
index cc02a9e..0000000
--- a/multi-cluster-ai-with-kaito/litellm/READMD.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# Instructions for setting up the LiteLLM proxy
-
diff --git a/multi-cluster-ai-with-kaito/litellm/README.md b/multi-cluster-ai-with-kaito/litellm/README.md
new file mode 100644
index 0000000..fda2e96
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/litellm/README.md
@@ -0,0 +1,79 @@
+# Instructions for setting up the LiteLLM proxy
+
+This document provides additional instructions for setting up the LiteLLM proxy in your environment.
+
+## Before you begin
+
+* Make sure that you have completed other parts of the tutorial.
+* Set up a PostgreSQL database server, which LiteLLM requires for storing information.
+    * Any PostgreSQL installation should work, as long as the Kubernetes clusters you have created in this
+    tutorial can access the PostgreSQL instance. You may use an
+    [Azure DB for PostgreSQL instance](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/quickstart-create-server),
+    or deploy a PostgreSQL operator inside the query routing cluster.
+    * After the PostgreSQL database server is set up, create a database `litellm` in the server.
+
+        ```sql
+        CREATE DATABASE litellm
+        ```
+    
+    * Write down the address of the server, the password of the default `postgres` user, and a username/password combo that LiteLLM
+    will use to access the server.
+
+## Setting up LiteLLM
+
+* Edit the `secret.yaml` file in the directory, replace `POSTGRES-PASSWORD`, `YOUR-USERNAME`, and `YOUR-PASSWORD` with
+the password of the default `postgres` user, and the username/password for the account that LiteLLM will use respectively.
+* Edit the `values.yaml` file in the directory, replace `YOUR-POSTGRES-ENDPOINT` with the address of your PostgreSQL database server.
+    * You may find out that there are various placeholders in the file; it is OK to leave them as they are.
+* Switch to the current directory, and run the command below to deploy the LiteLLM proxy:
+
+    ```sh
+    helm install litellm --values ./values.yaml oci://ghcr.io/berriai/litellm-helm:0.1.742 --namespace litellm --create-namespace
+    kubectl apply -f ./secret.yaml
+    ```
+
+    It may take a few moments before the LiteLLM proxy starts up.
+
+* LiteLLM will create a secret in the `litellm` namespace, `litellm-masterkey`, that contains the password of the `admin` user, which
+you can use to access the LiteLLM UI. To retrieve the password, run the commands below:
+
+    ```sh
+    kubectl get secret -n litellm litellm-masterkey -o jsonpath='{.data.masterkey}' | base64 -d
+    ```
+
+    Write down the output. Depending on the shell program you use, you may see a precentage sign `%` at the end of the output,
+    which represents a missing new line character; ignore it: for example, if the output is `123456%`, the password
+    should be `123456`.
+
+* Port forward the LiteLLM service:
+
+    ```sh
+    export LITELLM_FORWARDING_PORT=10000
+    kubectl port-forward svc/litellm -n litellm $LITELLM_FORWARDING_PORT:4000
+    ```
+
+* Open a browser window, and go to `localhost:10000/ui`. You should see that the LiteLLM UI loads up. If prompted for username/password,
+use the username `admin` and the master password you just wrote down.
+
+* On the left panel, click `Models + Endpoints`. Then switch to the `Add Model` tab.
+
+* Add a new model using the setup below:
+
+    * For the `Provider` part, pick `OpenAI-Compatible Endpoints`.
+    * For the `LiteLLM Model Name(s)` part, type `openai/auto`.
+    * For the `Mode` part, pick `Chat - /chat/completions`.
+    * For the `API Base` part, type `http://inference-gateway-istio.default.svc.cluster.local/v1` if you haven't updated the name of
+    the inference gateway when you set up the environment; replace `inference-gateway` with the value of your own if the name
+    has been modified.
+    * No need to change other parts.
+
+* Click the `Test Connect` button; you should see a connection successful message.
+* Click the `Add Model` button to add the model.
+
+* On the left panel, check `Test Key`.
+
+* Make sure that in the `Configurations` panel, the model `openai/auto` has been selected and the endpoint type is `/v1/chat/completions`.
+
+* You can now use the chat panel to interact with the models. 
+    * Note that conversational continuity may lead to your messages keep landing on the same model; remember to clear the chat history
+    using the `Clear Chat` button as necessary.
diff --git a/multi-cluster-ai-with-kaito/litellm/secret.yaml b/multi-cluster-ai-with-kaito/litellm/secret.yaml
new file mode 100644
index 0000000..f3007f8
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/litellm/secret.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  namespace: litellm
+  name: postgres
+data:
+  # Password for the "postgres" user
+  postgres-password: POSTGRES-PASSWORD
+  username: YOUR-USERNAME
+  password: YOUR-PASSWORD
+type: Opaque
\ No newline at end of file
diff --git a/multi-cluster-ai-with-kaito/litellm/values.yaml b/multi-cluster-ai-with-kaito/litellm/values.yaml
new file mode 100644
index 0000000..213e737
--- /dev/null
+++ b/multi-cluster-ai-with-kaito/litellm/values.yaml
@@ -0,0 +1,28 @@
+db:
+  deployStandalone: false
+  # Use an existing postgres server/cluster
+  useExisting: true
+
+  # How to connect to the existing postgres server/cluster
+  endpoint: YOUR-POSTGRES-ENDPOINT
+  database: litellm
+  url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)
+  secret:
+    name: postgres
+    usernameKey: username
+    passwordKey: password
+
+# The elements within proxy_config are rendered as config.yaml for the proxy
+#  Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
+#  Reference: https://docs.litellm.ai/docs/proxy/configs
+proxy_config:
+  model_list:
+    # At least one model must exist for the proxy to start; this model might not actually exist.
+    - model_name: phi-4 # used in litellm proxy
+      litellm_params:
+        model: openai/$KAITO_MODEL_NAME  # openai prefix is required
+        api_key: fake-key
+        api_base: http://$WORKSPACE_SVC/v1
+  general_settings:
+    master_key: os.environ/PROXY_MASTER_KEY
+    store_model_in_db: true

From 03d169f8c6d9677a14b29b546a0999fabeb6ca85 Mon Sep 17 00:00:00 2001
From: michaelawyu <chenyu1@microsoft.com>
Date: Thu, 6 Nov 2025 15:27:29 +0800
Subject: [PATCH 5/7] Added some additional notes

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 multi-cluster-ai-with-kaito/SETUP.md | 122 +++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md
index e69de29..cbab13f 100644
--- a/multi-cluster-ai-with-kaito/SETUP.md
+++ b/multi-cluster-ai-with-kaito/SETUP.md
@@ -0,0 +1,122 @@
+# How to run the scripts in this tutorial
+
+The scripts in this tutorial will help you:
+
+* Create a fleet of 3 AKS (Azure Kubernetes Service) clusters for running LLM inference workloads and routing LLM queries.
+* Put the 3 clusters under the management of KubeFleet, a CNCF sandbox project for multi-cluster management, with an
+additional KubeFleet hub cluster (also an AKS cluster) as the management portal.
+* Set up KAITO, a CNCF sandbox project for easy LLM usage, on the clusters for facilitating LLM workloads with ease.
+* Connect the 3 clusters with an Istio service mesh.
+* Use Kubernetes Gateway API with Inference Extension for serving LLM queries.
+
+> Note that even though the scripts are set to use AKS clusters and related resources for simplicity reasons; the tutorial itself is not necessarily Azure specific. It can run on any Kubernetes environment, as long as inter-cluster connectivity can be established.
+
+## Before you begin
+
+* This tutorial assumes that you are familiar with basic Azure/AKS usage and Kubernetes usage.
+* If you don't have an Azure account, [create a free account](https://azure.microsoft.com/pricing/purchase-options/azure-account) before you begin.
+* Make sure that you have the following tools installed in your environment:
+    * The Azure CLI (`az`).
+    * The Kubernetes CLI (`kubectl`).
+    * Helm
+    * Docker
+    * The Istio CLI (istioctl)
+    * Go runtime (>=1.24)
+    * `git`
+    * `base64`
+    * `make`
+    * `curl`
+* The setup in the tutorial requires usage of GPU-enabled nodes (with NVIDIA A100 GPUs or similar specs).
+
+## Run the scripts
+
+Switch to the current directory and follow the steps below to run the scripts:
+
+```sh
+chmod +x setup.sh
+./setup.sh
+```
+
+It may take a while for the setup to complete.
+
+The script includes some configurable parameters; in most cases though, you should be able to just use
+the default values. See the list of parameters at the file `setup.sh`, and, if needed, set up
+environment variables accordingly to override the default values.
+
+## Verify the setup
+
+After the setup script completes, follow the steps below to verify the setup:
+
+* Switch to one of the clusters that is running the inference workload:
+
+    ```sh
+    MEMBER_1="${MEMBER_1:-model-serving-cluster-1}"
+    MEMBER_2="${MEMBER_2:-model-serving-cluster-2}"
+    MEMBER_3="${MEMBER_3:-query-routing-cluster}"
+    MEMBER_1_CTX=$MEMBER_1-admin
+    MEMBER_2_CTX=$MEMBER_2-admin
+    MEMBER_3_CTX=$MEMBER_3-admin
+
+    kubectl config use-context $MEMBER_1_CTX
+    kubectl get workspace
+    ```
+
+    You should see that the KAITO workspace with the DeepSeek model is up and running. Note that it may take 
+    a while for a GPU node to get ready and have the model downloaded/set up.
+
+* Similarly, switch to the other cluster that is running the inference workload and make sure that the Phi model
+is up and running:
+
+    ```sh
+    kubectl config use-context $MEMBER_2_CTX
+    kubectl get workspace
+    ```
+
+* Now, switch to the query routing cluster and send some queries to the inference gateway:
+
+    ```sh
+    kubectl config use-context $MEMBER_3_CTX
+
+    # Open another shell window.
+    kubectl port-forward svc/inference-gateway-istio 10000:80
+
+    curl -X POST http://localhost:10000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "auto",
+        "messages": [{"role": "user", "content": "Prove the Pythagorean theorem step by step"}],
+        "max_tokens": 100    
+    }'
+    ```
+
+    You should see from the response that the query is being served by the DeepSeek model.
+
+    ```sh
+    curl -X POST -i localhost:10000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "auto",
+        "messages": [{"role": "user", "content": "What is the color of the sky?"}],
+        "max_tokens": 100
+    }'
+    ```
+
+    You should see from the response that the query is being served by the Phi model.
+
+    > Note: the tutorial featuers a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try.
+
+## Additional steps
+
+You can set up the LiteLLM proxy to interact with the models using a UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup.
+
+## Clean things up
+
+To clean things up, delete the Azure resource group that contains all the resources:
+
+```sh
+export RG="${RG:-kubefleet-kaito-demo-2025}"
+az group delete -n $RG
+```
+
+
+

From 31d02d90695edcdb1450bdd90032a7aff56eb404 Mon Sep 17 00:00:00 2001
From: michaelawyu <chenyu1@microsoft.com>
Date: Fri, 7 Nov 2025 07:31:59 +0800
Subject: [PATCH 6/7] Re-sign Simon's commit

Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 multi-cluster-ai-with-kaito/SETUP.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md
index cbab13f..12b1cae 100644
--- a/multi-cluster-ai-with-kaito/SETUP.md
+++ b/multi-cluster-ai-with-kaito/SETUP.md
@@ -103,11 +103,11 @@ is up and running:
 
     You should see from the response that the query is being served by the Phi model.
 
-    > Note: the tutorial featuers a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try.
+    > Note: the tutorial features a semantic router that classifies queries based on their categories and sends queries to a LLM that is best equipped to process the category. The process is partly non-deterministic due to the nature of LLM. If you believe that a query belongs to a specific category but is not served by the expected LLM; tweak the query text a bit and give it another try.
 
 ## Additional steps
 
-You can set up the LiteLLM proxy to interact with the models using a UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup.
+You can set up the LiteLLM proxy to interact with the models using a web UI. Follow the steps in the [LiteLLM setup README](./litellm/README.md) to complete the setup.
 
 ## Clean things up
 
@@ -118,5 +118,12 @@ export RG="${RG:-kubefleet-kaito-demo-2025}"
 az group delete -n $RG
 ```
 
+## Questions or comments?
+
+If you have any questions or comments please using our [Q&A Discussions](https://github.com/kubefleet-dev/kubefleet/discussions/categories/q-a). 
+
+If you find a bug or the solution doesn't work, please open an [Issue](https://github.com/kubefleet-dev/kubefleet/issues/new) so we can take a look. We welcome submissions too, so if you find a fix please open a PR!
+
+Also, consider coming to a [Community Meeting](https://bit.ly/kubefleet-cm-meeting) too!
 
 

From 2373d41f7a3bd74942a765a8c31a04fb4d1f7d0a Mon Sep 17 00:00:00 2001
From: Simon Waight <simon.waight@gmail.com>
Date: Fri, 7 Nov 2025 03:25:29 +0000
Subject: [PATCH 7/7] Remove whitespace so I can sign-off.

Signed-off-by: Simon Waight <simon.waight@gmail.com>
Signed-off-by: michaelawyu <chenyu1@microsoft.com>
---
 multi-cluster-ai-with-kaito/SETUP.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/multi-cluster-ai-with-kaito/SETUP.md b/multi-cluster-ai-with-kaito/SETUP.md
index 12b1cae..e68bef7 100644
--- a/multi-cluster-ai-with-kaito/SETUP.md
+++ b/multi-cluster-ai-with-kaito/SETUP.md
@@ -125,5 +125,3 @@ If you have any questions or comments please using our [Q&A Discussions](https:/
 If you find a bug or the solution doesn't work, please open an [Issue](https://github.com/kubefleet-dev/kubefleet/issues/new) so we can take a look. We welcome submissions too, so if you find a fix please open a PR!
 
 Also, consider coming to a [Community Meeting](https://bit.ly/kubefleet-cm-meeting) too!
-
-