Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ env:
CLUSTER_NAME: "kaito-gw-e2e-aks-${{ github.run_id }}"
ACR_NAME: "kaitogwe2eaks${{ github.run_id }}acr"
GPU_MOCKER_IMAGE: "gpu-node-mocker:latest-${{ github.run_id }}"
LOCATION: eastus
LOCATION: swedencentral
NODE_COUNT: '2'
NODE_VM_SIZE: Standard_D8s_v3
NODE_VM_SIZE: Standard_D4s_v3

permissions:
contents: read
Expand Down
24 changes: 0 additions & 24 deletions hack/e2e/manifests/destination-rules.yaml

This file was deleted.

46 changes: 0 additions & 46 deletions hack/e2e/manifests/httproute.yaml

This file was deleted.

45 changes: 0 additions & 45 deletions hack/e2e/manifests/inference-sets.yaml

This file was deleted.

22 changes: 22 additions & 0 deletions hack/e2e/manifests/model-not-found.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,25 @@ spec:
app: model-not-found
ports:
- port: 80
---
# Catch-all HTTPRoute: Routes any request whose model name did not match
# any model-specific HTTPRoute to a dedicated error service that returns
# an OpenAI-compatible 404 JSON response instead of Envoy's bare 404.
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: model-not-found-route
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: inference-gateway
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: model-not-found
kind: Service
port: 80
31 changes: 12 additions & 19 deletions hack/e2e/scripts/install-components.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
# 5. GWIE CRDs (InferencePool, InferenceModel)
# 6. BBR (Body-Based Router) v1.3.1
# 7. Inference Gateway
# 8. InferencePools, InferenceModels, HTTPRoute
# 9. InferenceSets (KAITO workloads on fake nodes)
# 8. HTTPRoute catch-all, error service, debug filter
#
# Environment variables:
# KAITO_VERSION — KAITO Helm chart version (default: v0.9.1)
Expand Down Expand Up @@ -39,7 +38,7 @@ fi

# ── 1. KAITO workspace operator ──────────────────────────────────────────
echo ""
echo "=== 1/9: Installing KAITO workspace operator ${KAITO_VERSION} ==="
echo "=== 1/8: Installing KAITO workspace operator ${KAITO_VERSION} ==="
helm repo add kaito https://kaito-project.github.io/kaito/charts 2>/dev/null || true
helm repo update kaito
helm install kaito kaito/workspace \
Expand All @@ -57,7 +56,7 @@ kubectl -n kaito-system wait --for=condition=ready pod -l app.kubernetes.io/name

# ── 2. GPU node mocker (gpu-node-mocker) ──────────────────────────
echo ""
echo "=== 2/9: Deploying gpu-node-mocker (GPU node mocker) ==="
echo "=== 2/8: Deploying gpu-node-mocker (GPU node mocker) ==="
helm install gpu-node-mocker ./charts/gpu-node-mocker \
--namespace kaito-system \
--create-namespace \
Expand All @@ -69,12 +68,12 @@ kubectl -n kaito-system rollout status deployment/gpu-node-mocker --timeout=120s

# ── 3. Gateway API CRDs ─────────────────────────────────────────────────
echo ""
echo "=== 3/9: Installing Gateway API CRDs ${GATEWAY_API_VERSION} ==="
echo "=== 3/8: Installing Gateway API CRDs ${GATEWAY_API_VERSION} ==="
kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/standard-install.yaml"

# ── 4. Istio ─────────────────────────────────────────────────────────────
echo ""
echo "=== 4/9: Installing Istio ${ISTIO_VERSION} ==="
echo "=== 4/8: Installing Istio ${ISTIO_VERSION} ==="
if ! command -v istioctl &>/dev/null; then
echo "Installing istioctl..."
curl -L https://istio.io/downloadIstio | ISTIO_VERSION="${ISTIO_VERSION}" sh -
Expand All @@ -94,12 +93,12 @@ kubectl -n istio-system rollout status deployment/istiod --timeout=180s

# ── 5. GWIE CRDs (InferencePool, InferenceModel) ────────────────────────
echo ""
echo "=== 5/9: Installing GWIE CRDs ==="
echo "=== 5/8: Installing GWIE CRDs ==="
kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml"

# ── 6. BBR (Body-Based Router) ──────────────────────────────────────────
echo ""
echo "=== 6/9: Installing BBR ${BBR_VERSION} ==="
echo "=== 6/8: Installing BBR ${BBR_VERSION} ==="
helm upgrade --install body-based-router oci://registry.k8s.io/gateway-api-inference-extension/charts/body-based-routing \
--version "${BBR_VERSION}" \
--set provider.name=istio \
Expand All @@ -112,7 +111,7 @@ kubectl rollout status deployment/body-based-router --timeout=120s 2>/dev/null |

# ── 7. Inference Gateway ────────────────────────────────────────────────
echo ""
echo "=== 7/9: Deploying inference Gateway ==="
echo "=== 7/8: Deploying inference Gateway ==="
kubectl apply -f "${MANIFESTS_DIR}/gateway.yaml"

echo "⏳ Waiting for Gateway pod..."
Expand All @@ -128,22 +127,16 @@ kubectl wait --for=condition=ready pod \
--timeout=180s 2>/dev/null || \
echo "⚠️ Gateway pod not ready yet — continuing."

# ── 8. HTTPRoute, error service, DestinationRules ───────────────────────
# Note: InferencePools + EPP are auto-created by KAITO when InferenceSets are applied.
# ── 8. HTTPRoute catch-all, error service, debug filter ─────────────────
# Note: InferenceSets, model-specific HTTPRoutes, and DestinationRules are
# created by individual E2E test cases via the test/e2e/utils helpers.
echo ""
echo "=== 8/9: Deploying routing, error service ==="
echo "=== 8/8: Deploying routing catch-all, error service ==="
kubectl apply -f "${MANIFESTS_DIR}/model-not-found.yaml"
kubectl apply -f "${MANIFESTS_DIR}/httproute.yaml"
kubectl apply -f "${MANIFESTS_DIR}/destination-rules.yaml"
kubectl apply -f "${MANIFESTS_DIR}/inference-debug-filter.yaml"

echo "⏳ Waiting for model-not-found service..."
kubectl rollout status deployment/model-not-found --timeout=60s 2>/dev/null || true

# ── 9. InferenceSets (KAITO workloads) ──────────────────────────────────
echo ""
echo "=== 9/9: Deploying InferenceSets ==="
kubectl apply -f "${MANIFESTS_DIR}/inference-sets.yaml"

echo ""
echo "✅ All components installed."
8 changes: 4 additions & 4 deletions hack/e2e/scripts/run-e2e-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# Environment variables (override defaults as needed):
# RESOURCE_GROUP (default: kaito-e2e-local)
# CLUSTER_NAME (default: kaito-e2e-local)
# LOCATION (default: eastus)
# LOCATION (default: swedencentral)
# NODE_COUNT (default: 2)
# NODE_VM_SIZE (default: Standard_D8s_v3)
# NODE_VM_SIZE (default: Standard_D4s_v3)
# SKIP_TEARDOWN (default: false) — set to "true" to keep cluster after tests
# ---------------------------------------------------------------------------
set -euo pipefail
Expand All @@ -25,9 +25,9 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"

export RESOURCE_GROUP="${RESOURCE_GROUP:-kaito-e2e-local}"
export CLUSTER_NAME="${CLUSTER_NAME:-kaito-e2e-local}"
export LOCATION="${LOCATION:-eastus}"
export LOCATION="${LOCATION:-swedencentral}"
export NODE_COUNT="${NODE_COUNT:-2}"
export NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D8s_v3}"
export NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4s_v3}"
SKIP_TEARDOWN="${SKIP_TEARDOWN:-false}"

STEP="${1:-all}"
Expand Down
6 changes: 3 additions & 3 deletions hack/e2e/scripts/setup-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# RESOURCE_GROUP — Azure resource group name (default: kaito-rg)
# CLUSTER_NAME — AKS cluster name (default: kaito-aks)
# ACR_NAME — ACR registry name (default: <cluster_name>acr, sanitized)
# LOCATION — Azure region (default: eastus)
# LOCATION — Azure region (default: swedencentral)
# NODE_COUNT — Number of worker nodes (default: 2)
# NODE_VM_SIZE — VM SKU for the node pool (default: Standard_D4s_v3)
#
Expand All @@ -19,9 +19,9 @@ RESOURCE_GROUP="${RESOURCE_GROUP:-kaito-rg}"
CLUSTER_NAME="${CLUSTER_NAME:-kaito-aks}"
# ACR names must be alphanumeric, 5-50 chars. Strip dashes from cluster name.
ACR_NAME="${ACR_NAME:-$(echo "${CLUSTER_NAME}acr" | tr -d '-' | head -c 50)}"
LOCATION="${LOCATION:-eastus}"
LOCATION="${LOCATION:-swedencentral}"
NODE_COUNT="${NODE_COUNT:-2}"
NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D8s_v3}"
NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4s_v3}"

echo "=== Creating resource group ${RESOURCE_GROUP} in ${LOCATION} ==="
az group create \
Expand Down
69 changes: 0 additions & 69 deletions hack/e2e/scripts/validate-components.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,75 +87,6 @@ for crd in \
done
echo ""

# ── 1. Inference pods Running ────────────────────────────────────────────
echo "=== Inference pods ==="
for name in falcon-7b-instruct ministral-3-3b-instruct; do
label="inferenceset.kaito.sh/created-by=${name}"
if kubectl wait --for=condition=ready pod -l "${label}" --timeout="${TIMEOUT}" >/dev/null 2>&1; then
pass "Pods for ${name} are Running"
else
fail "Pods for ${name} are NOT Running"
fi
kubectl get pods -l "${label}" 2>/dev/null || true
done
echo ""

# ── 2. InferencePools exist with Running EPP ─────────────────────────────
echo "=== InferencePools ==="
for pool in falcon-7b-instruct-inferencepool ministral-3-3b-instruct-inferencepool; do
if kubectl get inferencepool "${pool}" >/dev/null 2>&1; then
# Check that the EPP pod for this pool is Running (try multiple label patterns)
EPP_READY=$(kubectl get pods --no-headers 2>/dev/null | grep "${pool}-epp" | grep -c "Running" || true)
if [[ "${EPP_READY:-0}" -gt 0 ]]; then
pass "InferencePool ${pool} exists, EPP Running"
else
fail "InferencePool ${pool} exists but EPP pod is not Running"
fi
else
fail "InferencePool ${pool} is MISSING"
fi
done
echo ""

# ── 3. InferenceSets replicas ready ──────────────────────────────────────
echo "=== InferenceSets ==="
for ws in falcon-7b-instruct ministral-3-3b-instruct; do
if kubectl get inferenceset "${ws}" >/dev/null 2>&1; then
READY=$(kubectl get inferenceset "${ws}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
DESIRED=$(kubectl get inferenceset "${ws}" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "?")
if [[ "${READY}" == "${DESIRED}" && "${READY}" != "0" ]]; then
pass "InferenceSet ${ws} ready=${READY}/${DESIRED}"
else
fail "InferenceSet ${ws} ready=${READY}/${DESIRED}"
fi
else
fail "InferenceSet ${ws} is MISSING"
fi
done
echo ""

# ── 4. HTTPRoute Accepted=True ───────────────────────────────────────────
echo "=== HTTPRoute ==="
ROUTE_ACCEPTED=$(kubectl get httproute llm-route \
-o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
if [[ "${ROUTE_ACCEPTED}" == "True" ]]; then
pass "HTTPRoute llm-route Accepted=True"
else
fail "HTTPRoute llm-route Accepted=${ROUTE_ACCEPTED:-<not found>}"
fi
echo ""

# ── 5. DestinationRules exist ────────────────────────────────────────────
echo "=== DestinationRules ==="
for dr in falcon-7b-instruct-inferencepool-epp ministral-3-3b-instruct-inferencepool-epp; do
if kubectl get destinationrule "${dr}" >/dev/null 2>&1; then
pass "DestinationRule ${dr} exists"
else
fail "DestinationRule ${dr} is MISSING"
fi
done
echo ""

# ── Summary ──────────────────────────────────────────────────────────────
if [[ "$FAILED" -eq 0 ]]; then
echo "=== All validation checks passed ✅ ==="
Expand Down
Loading
Loading