Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Integration tests use `//go:build integration` and skip gracefully when prerequi

**Design**: Deployment-centric (unique namespaces via petnames), local-first (k3d), XDG-compliant, two-stage templating (CLI flags → Go templates → Helmfile → K8s).

**Routing**: Traefik + Kubernetes Gateway API. GatewayClass `traefik`, Gateway `traefik-gateway` in `traefik` ns. Routes: `/` → frontend, `/rpc` → eRPC, `/services/<name>/*` → x402 ForwardAuth → upstream, `/.well-known/agent-registration.json` → ERC-8004 httpd, `/ethereum-<id>/execution|beacon`.
**Routing**: Traefik + Kubernetes Gateway API. GatewayClass `traefik`, Gateway `traefik-gateway` in `traefik` ns. Local-only routes (restricted to `hostnames: ["obol.stack"]`): `/` → frontend, `/rpc` → eRPC. Public routes (accessible via tunnel, no hostname restriction): `/services/<name>/*` → x402 ForwardAuth → upstream, `/.well-known/agent-registration.json` → ERC-8004 httpd, `/skill.md` → service catalog. Tunnel hostname gets a storefront landing page at `/`. NEVER remove hostname restrictions from frontend or eRPC HTTPRoutes — exposing the frontend/RPC to the public internet is a critical security flaw.

**Config**: `Config{ConfigDir, DataDir, BinDir}`. Precedence: `OBOL_CONFIG_DIR` > `XDG_CONFIG_HOME/obol` > `~/.config/obol`. `OBOL_DEVELOPMENT=true` → `.workspace/` dirs. All K8s tools auto-set `KUBECONFIG=$OBOL_CONFIG_DIR/kubeconfig.yaml`.

Expand Down Expand Up @@ -156,6 +156,22 @@ Skills = SKILL.md + optional scripts/references, embedded in `obol` binary (`int
4. **ExternalName services** — don't work with Traefik Gateway API, use ClusterIP + Endpoints
5. **eRPC `eth_call` cache** — default TTL is 10s for unfinalized reads, so `buy.py balance` can lag behind an already-settled paid request for a few seconds

### Security: Tunnel Exposure

The Cloudflare tunnel exposes the cluster to the public internet. Only x402-gated endpoints and discovery metadata should be reachable via the tunnel hostname. Internal services (frontend, eRPC, LiteLLM, monitoring) MUST have `hostnames: ["obol.stack"]` on their HTTPRoutes to restrict them to local access.

**NEVER**:
- Remove `hostnames` restrictions from frontend or eRPC HTTPRoutes
- Create HTTPRoutes without `hostnames` for internal services
- Expose the frontend UI, Prometheus/monitoring, or LiteLLM admin to the tunnel
- Run `obol stack down` or `obol stack purge` unless explicitly asked

**Public routes** (no hostname restriction, intentional):
- `/services/*` — x402 payment-gated, safe by design
- `/.well-known/agent-registration.json` — ERC-8004 discovery
- `/skill.md` — machine-readable service catalog
- `/` on tunnel hostname — static storefront landing page (busybox httpd)

## Key Packages

| Package | Key Files | Role |
Expand Down
7 changes: 7 additions & 0 deletions cmd/obol/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,13 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}}
return tunnel.Restart(cfg, getUI(cmd))
},
},
{
Name: "stop",
Usage: "Stop the tunnel (scale cloudflared to 0 replicas)",
Action: func(ctx context.Context, cmd *cli.Command) error {
return tunnel.Stop(cfg, getUI(cmd))
},
},
{
Name: "logs",
Usage: "View cloudflared logs",
Expand Down
8 changes: 4 additions & 4 deletions cmd/obol/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error {
if len(ollamaModels) == 0 {
u.Warn("No models pulled in Ollama")
u.Print("")
u.Print(" Hint: Pull a model with: ollama pull qwen3.5:9b")
u.Print(" Hint: Pull a model with: ollama pull qwen3.5:4b")
u.Print(" Hint: Or run: obol model pull")
return fmt.Errorf("Ollama is running but has no models")
}
Expand Down Expand Up @@ -145,9 +145,9 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
// Sensible defaults
switch provider {
case "anthropic":
models = []string{"claude-sonnet-4-5-20250929"}
models = []string{"claude-sonnet-4-6"}
case "openai":
models = []string{"gpt-4o"}
models = []string{"gpt-4.1"}
}
}

Expand Down Expand Up @@ -378,7 +378,7 @@ func promptModelPull() (string, error) {
desc string
}
suggestions := []suggestion{
{"qwen3.5:9b", "5.6 GB", "Strong general-purpose (recommended)"},
{"qwen3.5:4b", "2.7 GB", "Fast general-purpose (recommended)"},
{"qwen2.5-coder:7b", "4.7 GB", "Code generation"},
{"deepseek-r1:8b", "4.9 GB", "Reasoning"},
{"gemma3:4b", "3.3 GB", "Lightweight, multilingual"},
Expand Down
221 changes: 220 additions & 1 deletion cmd/obol/sell.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"net"
"os"
"os/signal"
"runtime"
"strconv"
"strings"
"syscall"

Expand All @@ -16,6 +19,7 @@ import (
"github.com/ObolNetwork/obol-stack/internal/inference"
"github.com/ObolNetwork/obol-stack/internal/kubectl"
"github.com/ObolNetwork/obol-stack/internal/schemas"
"github.com/ObolNetwork/obol-stack/internal/stack"
"github.com/ObolNetwork/obol-stack/internal/tee"
"github.com/ObolNetwork/obol-stack/internal/tunnel"
x402verifier "github.com/ObolNetwork/obol-stack/internal/x402"
Expand Down Expand Up @@ -210,6 +214,68 @@ Examples:
return err
}

// If a cluster is available, route through the cluster's x402 flow
// (tunnel → Traefik → x402-verifier → host gateway → Ollama).
// The gateway's built-in x402 is disabled to avoid double-gating.
kubeconfigPath := fmt.Sprintf("%s/kubeconfig.yaml", cfg.ConfigDir)
clusterAvailable := false
if _, statErr := os.Stat(kubeconfigPath); statErr == nil {
clusterAvailable = true
}

if clusterAvailable {
d.NoPaymentGate = true

// Resolve the gateway port from the listen address.
listenAddr := d.ListenAddr
port := "8402"
if idx := strings.LastIndex(listenAddr, ":"); idx >= 0 {
port = listenAddr[idx+1:]
}

// Bind to loopback only — the cluster reaches us via the
// K8s Service+Endpoints bridge; there is no reason to expose
// the unpaid gateway on all interfaces.
d.ListenAddr = "127.0.0.1:" + port

// Create a K8s Service + Endpoints pointing to the host.
svcNs := "llm" // co-locate with LiteLLM for simplicity
if err := createHostService(cfg, name, svcNs, port); err != nil {
fmt.Printf("Warning: could not create cluster service: %v\n", err)
fmt.Println("Falling back to standalone mode with built-in x402 payment gate.")
d.NoPaymentGate = false
} else {
// Create a ServiceOffer CR pointing at the host service.
soSpec := buildInferenceServiceOfferSpec(d, priceTable, svcNs, port)
soManifest := map[string]interface{}{
"apiVersion": "obol.org/v1alpha1",
"kind": "ServiceOffer",
"metadata": map[string]interface{}{
"name": name,
"namespace": svcNs,
},
"spec": soSpec,
}
if err := kubectlApply(cfg, soManifest); err != nil {
fmt.Printf("Warning: could not create ServiceOffer: %v\n", err)
d.NoPaymentGate = false
} else {
fmt.Printf("ServiceOffer %s/%s created (type: inference, routed via cluster)\n", svcNs, name)

// Ensure tunnel is active.
u := getUI(cmd)
u.Blank()
u.Info("Ensuring tunnel is active for public access...")
if tunnelURL, tErr := tunnel.EnsureTunnelForSell(cfg, u); tErr != nil {
u.Warnf("Tunnel not started: %v", tErr)
u.Dim(" Start manually with: obol tunnel restart")
} else {
u.Successf("Tunnel active: %s", tunnelURL)
}
}
}
}

return runInferenceGateway(d, chain)
},
}
Expand Down Expand Up @@ -397,6 +463,17 @@ Examples:
}
fmt.Printf("The agent will reconcile: health-check → payment gate → route\n")
fmt.Printf("Check status: obol sell status %s -n %s\n", name, ns)

// Ensure tunnel is active for public access.
u := getUI(cmd)
u.Blank()
u.Info("Ensuring tunnel is active for public access...")
if tunnelURL, err := tunnel.EnsureTunnelForSell(cfg, u); err != nil {
u.Warnf("Tunnel not started: %v", err)
u.Dim(" Start manually with: obol tunnel restart")
} else {
u.Successf("Tunnel active: %s", tunnelURL)
}
return nil
},
}
Expand Down Expand Up @@ -621,7 +698,24 @@ func sellDeleteCommand(cfg *config.Config) *cli.Command {
}
}

return kubectlRun(cfg, "delete", "serviceoffers.obol.org", name, "-n", ns)
if err := kubectlRun(cfg, "delete", "serviceoffers.obol.org", name, "-n", ns); err != nil {
return err
}

// Auto-stop quick tunnel when no ServiceOffers remain.
remaining, listErr := kubectlOutput(cfg, "get", "serviceoffers.obol.org", "-A",
"-o", "jsonpath={.items}")
if listErr == nil && (remaining == "[]" || strings.TrimSpace(remaining) == "") {
st, _ := tunnel.LoadTunnelState(cfg)
if st == nil || st.Mode != "dns" {
u := getUI(cmd)
u.Blank()
u.Info("No ServiceOffers remaining. Stopping quick tunnel.")
_ = tunnel.Stop(cfg, u)
_ = tunnel.DeleteStorefront(cfg)
}
}
return nil
},
}
}
Expand Down Expand Up @@ -791,6 +885,7 @@ func runInferenceGateway(d *inference.Deployment, chain x402.ChainConfig) error
VMHostPort: d.VMHostPort,
TEEType: d.TEEType,
ModelHash: d.ModelHash,
NoPaymentGate: d.NoPaymentGate,
})
if err != nil {
return fmt.Errorf("failed to create gateway: %w", err)
Expand Down Expand Up @@ -1024,6 +1119,130 @@ func formatInferencePriceSummary(d *inference.Deployment) string {
return fmt.Sprintf("%s USDC/request", d.PricePerRequest)
}

// createHostService creates a headless Service + Endpoints in the cluster
// pointing to the Docker host IP on the given port, so that the cluster can
// route traffic to a host-side inference gateway.
//
// Kubernetes Endpoints require an IP address, not a hostname. We resolve the
// host IP using the same strategy as ollamaHostIPForBackend in internal/stack.
func createHostService(cfg *config.Config, name, ns, port string) error {
hostIP, err := resolveHostIP(cfg)
if err != nil {
return fmt.Errorf("cannot resolve host IP for cluster routing: %w", err)
}

portNum, _ := strconv.Atoi(port)

svc := map[string]interface{}{
"apiVersion": "v1",
"kind": "Service",
"metadata": map[string]interface{}{
"name": name,
"namespace": ns,
},
"spec": map[string]interface{}{
"ports": []map[string]interface{}{
{"port": portNum, "targetPort": portNum, "protocol": "TCP"},
},
},
}
ep := map[string]interface{}{
"apiVersion": "v1",
"kind": "Endpoints",
"metadata": map[string]interface{}{
"name": name,
"namespace": ns,
},
"subsets": []map[string]interface{}{
{
"addresses": []map[string]interface{}{
{"ip": hostIP},
},
"ports": []map[string]interface{}{
{"port": portNum, "protocol": "TCP"},
},
},
},
}

if err := kubectlApply(cfg, svc); err != nil {
return fmt.Errorf("failed to create service: %w", err)
}
if err := kubectlApply(cfg, ep); err != nil {
return fmt.Errorf("failed to create endpoints: %w", err)
}
return nil
}

// resolveHostIP returns the host IP reachable from cluster containers.
// For k3s (bare-metal) the host is localhost; for k3d the host is
// reachable via Docker networking.
func resolveHostIP(cfg *config.Config) (string, error) {
// Check if this is a k3s (bare-metal) backend — host is localhost.
if backend := stack.DetectExistingBackend(cfg); backend == stack.BackendK3s {
return "127.0.0.1", nil
}

// k3d / Docker: try DNS resolution of host.docker.internal or host.k3d.internal.
for _, host := range []string{"host.docker.internal", "host.k3d.internal"} {
if addrs, err := net.LookupHost(host); err == nil && len(addrs) > 0 {
return addrs[0], nil
}
}
// macOS Docker Desktop fallback: well-known VM gateway.
if runtime.GOOS == "darwin" {
return "192.168.65.254", nil
}
// Linux fallback: docker0 bridge IP.
if iface, err := net.InterfaceByName("docker0"); err == nil {
if addrs, err := iface.Addrs(); err == nil {
for _, addr := range addrs {
if ipNet, ok := addr.(*net.IPNet); ok && ipNet.IP.To4() != nil {
return ipNet.IP.String(), nil
}
}
}
}
return "", fmt.Errorf("cannot determine host IP; ensure Docker is running or using k3s backend")
}

// buildInferenceServiceOfferSpec builds a ServiceOffer spec for a host-side
// inference gateway routed through the cluster's x402 flow.
func buildInferenceServiceOfferSpec(d *inference.Deployment, pt schemas.PriceTable, ns, port string) map[string]interface{} {
portNum, _ := strconv.Atoi(port)
spec := map[string]interface{}{
"type": "inference",
"upstream": map[string]interface{}{
"service": d.Name,
"namespace": ns,
"port": portNum,
"healthPath": "/health",
},
"payment": map[string]interface{}{
"scheme": "exact",
"network": d.Chain,
"payTo": d.WalletAddress,
"price": map[string]interface{}{},
},
"path": fmt.Sprintf("/services/%s", d.Name),
}

price := spec["payment"].(map[string]interface{})["price"].(map[string]interface{})
if pt.PerMTok != "" {
price["perMTok"] = pt.PerMTok
} else {
price["perRequest"] = d.PricePerRequest
}

if d.UpstreamURL != "" {
spec["model"] = map[string]interface{}{
"name": "ollama",
"runtime": "ollama",
}
}
return spec
}

// removePricingRoute removes the x402-verifier pricing route for the given offer.
func removePricingRoute(cfg *config.Config, name string) {
urlPath := fmt.Sprintf("/services/%s", name)
Expand Down
Loading
Loading