kaito-project · techworldhello · Apr 19, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
@@ -18,9 +18,9 @@ env:
   CLUSTER_NAME: "kaito-gw-e2e-aks-${{ github.run_id }}"
   ACR_NAME: "kaitogwe2eaks${{ github.run_id }}acr"
   GPU_MOCKER_IMAGE: "gpu-node-mocker:latest-${{ github.run_id }}"
-  LOCATION: eastus
+  LOCATION: swedencentral
   NODE_COUNT: '2'
-  NODE_VM_SIZE: Standard_D8s_v3
+  NODE_VM_SIZE: Standard_D4s_v3
 
 permissions:
   contents: read

@@ -46,3 +46,25 @@ spec:
     app: model-not-found
   ports:
   - port: 80
+---
+# Catch-all HTTPRoute: Routes any request whose model name did not match
+# any model-specific HTTPRoute to a dedicated error service that returns
+# an OpenAI-compatible 404 JSON response instead of Envoy's bare 404.
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: model-not-found-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - matches:
+    - path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: model-not-found
+      kind: Service
+      port: 80
@@ -10,8 +10,7 @@
 #   5. GWIE CRDs (InferencePool, InferenceModel)
 #   6. BBR (Body-Based Router) v1.3.1
 #   7. Inference Gateway
-#   8. InferencePools, InferenceModels, HTTPRoute
-#   9. InferenceSets (KAITO workloads on fake nodes)
+#   8. HTTPRoute catch-all, error service, debug filter
 #
 # Environment variables:
 #   KAITO_VERSION             — KAITO Helm chart version    (default: v0.9.1)
@@ -39,7 +38,7 @@ fi
 
 # ── 1. KAITO workspace operator ──────────────────────────────────────────
 echo ""
-echo "=== 1/9: Installing KAITO workspace operator ${KAITO_VERSION} ==="
+echo "=== 1/8: Installing KAITO workspace operator ${KAITO_VERSION} ==="
 helm repo add kaito https://kaito-project.github.io/kaito/charts 2>/dev/null || true
 helm repo update kaito
 helm install kaito kaito/workspace \
@@ -57,7 +56,7 @@ kubectl -n kaito-system wait --for=condition=ready pod -l app.kubernetes.io/name
 
 # ── 2. GPU node mocker (gpu-node-mocker) ──────────────────────────
 echo ""
-echo "=== 2/9: Deploying gpu-node-mocker (GPU node mocker) ==="
+echo "=== 2/8: Deploying gpu-node-mocker (GPU node mocker) ==="
 helm install gpu-node-mocker ./charts/gpu-node-mocker \
   --namespace kaito-system \
   --create-namespace \
@@ -69,12 +68,12 @@ kubectl -n kaito-system rollout status deployment/gpu-node-mocker --timeout=120s
 
 # ── 3. Gateway API CRDs ─────────────────────────────────────────────────
 echo ""
-echo "=== 3/9: Installing Gateway API CRDs ${GATEWAY_API_VERSION} ==="
+echo "=== 3/8: Installing Gateway API CRDs ${GATEWAY_API_VERSION} ==="
 kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/standard-install.yaml"
 
 # ── 4. Istio ─────────────────────────────────────────────────────────────
 echo ""
-echo "=== 4/9: Installing Istio ${ISTIO_VERSION} ==="
+echo "=== 4/8: Installing Istio ${ISTIO_VERSION} ==="
 if ! command -v istioctl &>/dev/null; then
   echo "Installing istioctl..."
   curl -L https://istio.io/downloadIstio | ISTIO_VERSION="${ISTIO_VERSION}" sh -
@@ -94,12 +93,12 @@ kubectl -n istio-system rollout status deployment/istiod --timeout=180s
 
 # ── 5. GWIE CRDs (InferencePool, InferenceModel) ────────────────────────
 echo ""
-echo "=== 5/9: Installing GWIE CRDs ==="
+echo "=== 5/8: Installing GWIE CRDs ==="
 kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml"
 
 # ── 6. BBR (Body-Based Router) ──────────────────────────────────────────
 echo ""
-echo "=== 6/9: Installing BBR ${BBR_VERSION} ==="
+echo "=== 6/8: Installing BBR ${BBR_VERSION} ==="
 helm upgrade --install body-based-router oci://registry.k8s.io/gateway-api-inference-extension/charts/body-based-routing \
   --version "${BBR_VERSION}" \
   --set provider.name=istio \
@@ -112,7 +111,7 @@ kubectl rollout status deployment/body-based-router --timeout=120s 2>/dev/null |
 
 # ── 7. Inference Gateway ────────────────────────────────────────────────
 echo ""
-echo "=== 7/9: Deploying inference Gateway ==="
+echo "=== 7/8: Deploying inference Gateway ==="
 kubectl apply -f "${MANIFESTS_DIR}/gateway.yaml"
 
 echo "⏳ Waiting for Gateway pod..."
@@ -128,22 +127,16 @@ kubectl wait --for=condition=ready pod \
   --timeout=180s 2>/dev/null || \
   echo "⚠️  Gateway pod not ready yet — continuing."
 
-# ── 8. HTTPRoute, error service, DestinationRules ───────────────────────
-# Note: InferencePools + EPP are auto-created by KAITO when InferenceSets are applied.
+# ── 8. HTTPRoute catch-all, error service, debug filter ─────────────────
+# Note: InferenceSets, model-specific HTTPRoutes, and DestinationRules are
+# created by individual E2E test cases via the test/e2e/utils helpers.
 echo ""
-echo "=== 8/9: Deploying routing, error service ==="
+echo "=== 8/8: Deploying routing catch-all, error service ==="
 kubectl apply -f "${MANIFESTS_DIR}/model-not-found.yaml"
-kubectl apply -f "${MANIFESTS_DIR}/httproute.yaml"
-kubectl apply -f "${MANIFESTS_DIR}/destination-rules.yaml"
 kubectl apply -f "${MANIFESTS_DIR}/inference-debug-filter.yaml"
 
 echo "⏳ Waiting for model-not-found service..."
 kubectl rollout status deployment/model-not-found --timeout=60s 2>/dev/null || true
 
-# ── 9. InferenceSets (KAITO workloads) ──────────────────────────────────
-echo ""
-echo "=== 9/9: Deploying InferenceSets ==="
-kubectl apply -f "${MANIFESTS_DIR}/inference-sets.yaml"
-
 echo ""
 echo "✅ All components installed."
@@ -13,9 +13,9 @@
 # Environment variables (override defaults as needed):
 #   RESOURCE_GROUP   (default: kaito-e2e-local)
 #   CLUSTER_NAME     (default: kaito-e2e-local)
-#   LOCATION         (default: eastus)
+#   LOCATION         (default: swedencentral)
 #   NODE_COUNT       (default: 2)
-#   NODE_VM_SIZE     (default: Standard_D8s_v3)
+#   NODE_VM_SIZE     (default: Standard_D4s_v3)
 #   SKIP_TEARDOWN    (default: false) — set to "true" to keep cluster after tests
 # ---------------------------------------------------------------------------
 set -euo pipefail
@@ -25,9 +25,9 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
 
 export RESOURCE_GROUP="${RESOURCE_GROUP:-kaito-e2e-local}"
 export CLUSTER_NAME="${CLUSTER_NAME:-kaito-e2e-local}"
-export LOCATION="${LOCATION:-eastus}"
+export LOCATION="${LOCATION:-swedencentral}"
 export NODE_COUNT="${NODE_COUNT:-2}"
-export NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D8s_v3}"
+export NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4s_v3}"
 SKIP_TEARDOWN="${SKIP_TEARDOWN:-false}"
 
 STEP="${1:-all}"

@@ -6,7 +6,7 @@
 #   RESOURCE_GROUP   — Azure resource group name  (default: kaito-rg)
 #   CLUSTER_NAME     — AKS cluster name           (default: kaito-aks)
 #   ACR_NAME         — ACR registry name           (default: <cluster_name>acr, sanitized)
-#   LOCATION         — Azure region               (default: eastus)
+#   LOCATION         — Azure region               (default: swedencentral)
 #   NODE_COUNT       — Number of worker nodes      (default: 2)
 #   NODE_VM_SIZE     — VM SKU for the node pool    (default: Standard_D4s_v3)
 #
@@ -19,9 +19,9 @@ RESOURCE_GROUP="${RESOURCE_GROUP:-kaito-rg}"
 CLUSTER_NAME="${CLUSTER_NAME:-kaito-aks}"
 # ACR names must be alphanumeric, 5-50 chars. Strip dashes from cluster name.
 ACR_NAME="${ACR_NAME:-$(echo "${CLUSTER_NAME}acr" | tr -d '-' | head -c 50)}"
-LOCATION="${LOCATION:-eastus}"
+LOCATION="${LOCATION:-swedencentral}"
 NODE_COUNT="${NODE_COUNT:-2}"
-NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D8s_v3}"
+NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4s_v3}"
 
 echo "=== Creating resource group ${RESOURCE_GROUP} in ${LOCATION} ==="
 az group create \

@@ -87,75 +87,6 @@ for crd in \
 done
 echo ""
 
-# ── 1. Inference pods Running ────────────────────────────────────────────
-echo "=== Inference pods ==="
-for name in falcon-7b-instruct ministral-3-3b-instruct; do
-  label="inferenceset.kaito.sh/created-by=${name}"
-  if kubectl wait --for=condition=ready pod -l "${label}" --timeout="${TIMEOUT}" >/dev/null 2>&1; then
-    pass "Pods for ${name} are Running"
-  else
-    fail "Pods for ${name} are NOT Running"
-  fi
-  kubectl get pods -l "${label}" 2>/dev/null || true
-done
-echo ""
-
-# ── 2. InferencePools exist with Running EPP ─────────────────────────────
-echo "=== InferencePools ==="
-for pool in falcon-7b-instruct-inferencepool ministral-3-3b-instruct-inferencepool; do
-  if kubectl get inferencepool "${pool}" >/dev/null 2>&1; then
-    # Check that the EPP pod for this pool is Running (try multiple label patterns)
-    EPP_READY=$(kubectl get pods --no-headers 2>/dev/null | grep "${pool}-epp" | grep -c "Running" || true)
-    if [[ "${EPP_READY:-0}" -gt 0 ]]; then
-      pass "InferencePool ${pool} exists, EPP Running"
-    else
-      fail "InferencePool ${pool} exists but EPP pod is not Running"
-    fi
-  else
-    fail "InferencePool ${pool} is MISSING"
-  fi
-done
-echo ""
-
-# ── 3. InferenceSets replicas ready ──────────────────────────────────────
-echo "=== InferenceSets ==="
-for ws in falcon-7b-instruct ministral-3-3b-instruct; do
-  if kubectl get inferenceset "${ws}" >/dev/null 2>&1; then
-    READY=$(kubectl get inferenceset "${ws}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
-    DESIRED=$(kubectl get inferenceset "${ws}" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "?")
-    if [[ "${READY}" == "${DESIRED}" && "${READY}" != "0" ]]; then
-      pass "InferenceSet ${ws} ready=${READY}/${DESIRED}"
-    else
-      fail "InferenceSet ${ws} ready=${READY}/${DESIRED}"
-    fi
-  else
-    fail "InferenceSet ${ws} is MISSING"
-  fi
-done
-echo ""
-
-# ── 4. HTTPRoute Accepted=True ───────────────────────────────────────────
-echo "=== HTTPRoute ==="
-ROUTE_ACCEPTED=$(kubectl get httproute llm-route \
-  -o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
-if [[ "${ROUTE_ACCEPTED}" == "True" ]]; then
-  pass "HTTPRoute llm-route Accepted=True"
-else
-  fail "HTTPRoute llm-route Accepted=${ROUTE_ACCEPTED:-<not found>}"
-fi
-echo ""
-
-# ── 5. DestinationRules exist ────────────────────────────────────────────
-echo "=== DestinationRules ==="
-for dr in falcon-7b-instruct-inferencepool-epp ministral-3-3b-instruct-inferencepool-epp; do
-  if kubectl get destinationrule "${dr}" >/dev/null 2>&1; then
-    pass "DestinationRule ${dr} exists"
-  else
-    fail "DestinationRule ${dr} is MISSING"
-  fi
-done
-echo ""
-
 # ── Summary ──────────────────────────────────────────────────────────────
 if [[ "$FAILED" -eq 0 ]]; then
   echo "=== All validation checks passed ✅ ==="