diff --git a/integration/pgdog.toml b/integration/pgdog.toml index 041d6bb2e..4dd44353b 100644 --- a/integration/pgdog.toml +++ b/integration/pgdog.toml @@ -52,26 +52,26 @@ read_only = true [[databases]] name = "pgdog_sharded" -host = "localhost" +host = "127.0.0.1" database_name = "shard_0" shard = 0 [[databases]] name = "pgdog_sharded" -host = "localhost" +host = "127.0.0.1" database_name = "shard_1" shard = 1 [[databases]] name = "pgdog_sharded" -host = "localhost" +host = "127.0.0.1" database_name = "shard_0" shard = 0 role = "replica" [[databases]] name = "pgdog_sharded" -host = "localhost" +host = "127.0.0.1" database_name = "shard_1" shard = 1 role = "replica" diff --git a/integration/production_readiness/.gitignore b/integration/production_readiness/.gitignore new file mode 100644 index 000000000..beb83c302 --- /dev/null +++ b/integration/production_readiness/.gitignore @@ -0,0 +1,3 @@ +results/ +*.log +*.pid diff --git a/integration/production_readiness/README.md b/integration/production_readiness/README.md new file mode 100644 index 000000000..1cbec616e --- /dev/null +++ b/integration/production_readiness/README.md @@ -0,0 +1,216 @@ +# PgDog Production Readiness Test Suite + +Validates PgDog for multi-tenant production use with 2000+ tenant databases, wildcard pool templates, and passthrough authentication. + +## Prerequisites + +- **Docker** and **Docker Compose** v2+ +- **PostgreSQL client tools**: `psql`, `pgbench` +- **Python 3** (for config generation) +- **Rust toolchain** (for building PgDog and Rust integration tests) +- **curl** (for metrics validation) +- (Optional) **kubectl** for K8s tests + +## Quick Start + +```bash +# Full suite — builds PgDog, starts Postgres, creates 2000 databases, runs all tests +bash integration/production_readiness/run.sh + +# Customized run +bash integration/production_readiness/run.sh \ + --tenant-count 500 \ + --duration 120 \ + --pool-size 15 \ + --max-wildcard-pools 1000 +``` + +## Running Individual Tests + +Start infrastructure first, then run tests independently: + +```bash +# 1. Start Postgres +cd integration/production_readiness +docker compose up -d + +# 2. Create tenant databases +bash setup/generate_tenants.sh --count 2000 + +# 3. Generate PgDog config +python3 setup/generate_config.py --pool-size 10 --output-dir config + +# 4. Start PgDog (from the config directory) +cd config && /path/to/pgdog & + +# 5. Run individual tests +bash load/scale_connect.sh --count 2000 +bash load/multi_tenant_bench.sh --tenant-count 100 --clients 50 --duration 60 +bash load/passthrough_auth.sh --user-count 100 +bash load/pool_pressure.sh --pool-size 5 --connections 50 +bash load/sustained_load.sh --duration 10 --clients 50 + +# New realistic tests +bash load/pool_lifecycle.sh --idle-timeout 10 --cycles 3 --batch-size 50 +bash load/connection_storm.sh --storm-size 200 --parallel 100 +bash load/idle_in_transaction.sh --target-db tenant_1 +bash load/graceful_shutdown.sh --pgdog-bin /path/to/pgdog --config-dir config + +# Fault injection tests (require toxiproxy running) +bash setup/configure_toxiproxy.sh +bash load/backend_failure.sh --target-db tenant_1 +bash load/network_latency.sh --latency 5 --jitter 2 + +# 6. Validate observability +bash validate/check_metrics.sh +bash validate/check_pools.sh +bash validate/check_memory.sh +``` + +## Running Rust Integration Tests + +These tests require PgDog running with wildcard config and tenant databases: + +```bash +cd integration/rust +cargo nextest run --test-threads=1 pool_cap_saturation -- --ignored +cargo nextest run --test-threads=1 concurrent_pool_creation -- --ignored +``` + +## Running on Kubernetes + +```bash +cd integration/production_readiness/k8s +bash run-k8s.sh 2000 # Creates namespace, deploys Postgres + PgDog, sets up 2000 tenants + +# Then run load tests against localhost:6432 (port-forwarded) +bash ../load/multi_tenant_bench.sh --tenant-count 100 --clients 50 + +# Cleanup +kubectl delete namespace pgdog-test +``` + +## Test Descriptions + +| Test | What it validates | Pass criteria | +|------|-------------------|---------------| +| **Scale connect** | Connect to 2000+ databases | 100% success, <5s total | +| **Multi-tenant load** | Throughput under concurrent tenant load | p99 <100ms, errors <0.5% | +| **Passthrough auth** | Auth delegation to Postgres | Correct creds succeed, wrong creds fail | +| **Pool pressure** | Behavior when pools are exhausted | Clear timeouts, recovery after pressure | +| **Sustained load** | Memory stability over time | RSS growth <20% | +| **Pool lifecycle** | Wildcard pool create→idle→evict→recreate | Pools evict, recreate works, stable memory | +| **Connection storm** | Thundering herd on cold pools | ≥90% success, recovery after burst | +| **Idle-in-transaction** | Pool blocking by held transactions | Queries queue/timeout, pool recovers | +| **Backend failure** | Reconnect after reset/partition/slow | Recovery after each fault type | +| **Network latency** | Behavior under injected latency | Timeouts work, throughput degrades gracefully | +| **Graceful shutdown** | SIGTERM drain under active load | Process exits within grace period, no panic | +| **Metrics check** | OpenMetrics endpoint accuracy | Valid Prometheus format, pool counts match | +| **Admin pools** | Admin interface consistency | Pool states match expected | +| **Pool cap** (Rust) | `max_wildcard_pools` enforcement | Clean rejection at limit, no corruption | +| **Concurrent creates** (Rust) | Race safety on pool instantiation | Exactly 1 pool per database | + +## Configuration Reference + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `TENANT_COUNT` | 2000 | Number of tenant databases to create | +| `DURATION` | 60 | Load test duration (seconds) | +| `POOL_SIZE` | 10 | PgDog default_pool_size | +| `MAX_WILDCARD_POOLS` | 0 | Max wildcard pools (0 = unlimited) | +| `PGDOG_PORT` | 6432 | PgDog listen port | +| `PG_HOST` | 127.0.0.1 | Postgres host | +| `PG_PORT` | 15432 | Postgres port | + +### CLI Arguments (run.sh) + +``` +--tenant-count N Number of tenant databases +--duration N Load test duration in seconds +--pool-size N Default pool size +--max-wildcard-pools N Max wildcard pools (0 = unlimited) +--skip-build Skip cargo build (use existing binary) +--skip-infra Skip Docker Compose startup and tenant creation +--skip-k8s Skip K8s tests (default) +--with-k8s Include K8s tests +``` + +## Interpreting Results + +Results are written to `integration/production_readiness/results/`: + +- `summary.txt` — Pass/fail per test +- `scale_connect.log` — Connection timing details +- `multi_tenant_bench.log` — TPS, latency percentiles +- `passthrough_auth.log` — Auth success/failure counts +- `pool_pressure.log` — Queue behavior, recovery timing +- `sustained_load.log` — Time-series telemetry +- `memory_trace.csv` — RSS measurements over time +- `metrics_snapshot.txt` — Raw OpenMetrics output +- `show_pools.txt` / `show_clients.txt` — Admin DB snapshots + +### What "good" looks like + +- **TPS**: Depends on hardware. Baseline with direct Postgres, then compare via PgDog. Overhead should be <15%. +- **p99 latency**: Under 100ms for transaction-mode pooling with moderate load. +- **Memory**: RSS should stabilize within the first minute. Growth >20% over 10 minutes suggests a leak. +- **Pool cap**: Rejection should be immediate (not timeout), with a clear error message. +- **Concurrent creates**: Zero duplicate pools. All connections succeed. + +## Relation to ADR + +This test suite addresses the "Edge cases to test before production use" from `2026-03-22-pgdog-as-db-bouncer.md`: + +- ✅ **Wildcard pool cap reached** → `pool_cap_saturation` test +- ✅ **Concurrent first connections** → `concurrent_pool_creation` test +- ✅ **Cold tenant reconnect after eviction** → `pool_lifecycle` churn test +- ✅ **Backend failure / network partition** → `backend_failure` test (toxiproxy) +- ✅ **Network latency impact** → `network_latency` test (toxiproxy) +- ✅ **Graceful shutdown under load** → `graceful_shutdown` test +- ✅ **Idle-in-transaction pool starvation** → `idle_in_transaction` test +- ✅ **Thundering herd on cold pools** → `connection_storm` test +- ⬜ Password rotation for materialized wildcard pools (future: extend passthrough_auth test) +- ⬜ Reload after changing wildcard template (future: config reload test) + +## Directory Structure + +``` +integration/production_readiness/ +├── run.sh # Master orchestrator +├── docker-compose.yml # Postgres test environment +├── README.md # This file +├── .gitignore +├── setup/ +│ ├── init.sql # Tenant database schema template +│ ├── generate_tenants.sh # Create N tenant databases +│ ├── generate_config.py # Generate PgDog wildcard config +│ └── configure_toxiproxy.sh # Create toxiproxy proxy for fault injection +├── config/ # Generated PgDog config (gitignored) +├── load/ +│ ├── tenant_workload.sql # pgbench workload per tenant +│ ├── scale_connect.sh # 2000+ database connection test +│ ├── multi_tenant_bench.sh # pgbench multi-tenant load +│ ├── passthrough_auth.sh # Auth delegation test +│ ├── pool_pressure.sh # Pool exhaustion test +│ ├── sustained_load.sh # Long-running soak test +│ ├── pool_lifecycle.sh # Wildcard pool churn (create/evict/recreate) +│ ├── connection_storm.sh # Thundering herd on cold pools +│ ├── idle_in_transaction.sh # Transaction hold pool starvation +│ ├── backend_failure.sh # Backend reset/partition/slow (toxiproxy) +│ ├── network_latency.sh # Injected latency impact (toxiproxy) +│ └── graceful_shutdown.sh # SIGTERM drain under load +├── validate/ +│ ├── check_metrics.sh # OpenMetrics validation +│ ├── check_pools.sh # Admin DB pool checks +│ └── check_memory.sh # RSS memory monitoring +├── k8s/ +│ ├── namespace.yaml +│ ├── postgres-statefulset.yaml +│ ├── pgdog-deployment.yaml +│ ├── pgdog-configmap.yaml +│ ├── tenant-setup-job.yaml +│ └── run-k8s.sh +└── results/ # Test output (gitignored) +``` diff --git a/integration/production_readiness/config/pgdog.toml b/integration/production_readiness/config/pgdog.toml new file mode 100644 index 000000000..f369f5adf --- /dev/null +++ b/integration/production_readiness/config/pgdog.toml @@ -0,0 +1,30 @@ +[general] +host = "0.0.0.0" +port = 6432 +workers = 4 +default_pool_size = 7 +min_pool_size = 0 +pooler_mode = "transaction" +idle_timeout = 10000 +checkout_timeout = 5000 +healthcheck_interval = 30000 +healthcheck_timeout = 5000 +connect_timeout = 5000 +query_timeout = 30000 +load_balancing_strategy = "random" +prepared_statements = "extended" +passthrough_auth = "enabled_plain" +openmetrics_port = 9090 +openmetrics_namespace = "pgdog" +max_wildcard_pools = 0 +wildcard_pool_idle_timeout = 15 + +[admin] +name = "admin" +user = "admin" +password = "admin" + +[[databases]] +name = "*" +host = "127.0.0.1" +port = 15432 diff --git a/integration/production_readiness/config/users.toml b/integration/production_readiness/config/users.toml new file mode 100644 index 000000000..af0aa36aa --- /dev/null +++ b/integration/production_readiness/config/users.toml @@ -0,0 +1,3 @@ +[[users]] +name = "*" +database = "*" diff --git a/integration/production_readiness/docker-compose.yml b/integration/production_readiness/docker-compose.yml new file mode 100644 index 000000000..c1ac28d02 --- /dev/null +++ b/integration/production_readiness/docker-compose.yml @@ -0,0 +1,124 @@ +services: + postgres_primary: + image: postgres:16 + ports: + - "${POSTGRES_PRIMARY_PORT:-15432}:5432" + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + volumes: + - ./setup/init.sql:/docker-entrypoint-initdb.d/01-init.sql + # Many tenant databases need adequate shared memory + shm_size: 2g + deploy: + resources: + limits: + cpus: "2" + memory: 8g + command: + - postgres + - -c + - max_connections=500 + - -c + - shared_buffers=2GB + - -c + - effective_cache_size=6GB + - -c + - work_mem=8MB + - -c + - maintenance_work_mem=256MB + - -c + - wal_buffers=16MB + - -c + - checkpoint_completion_target=0.9 + - -c + - random_page_cost=1.1 + - -c + - effective_io_concurrency=200 + - -c + - max_worker_processes=2 + - -c + - max_parallel_workers_per_gather=1 + - -c + - max_parallel_workers=2 + - -c + - log_min_duration_statement=1000 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 3s + retries: 10 + networks: + - pgdog_test + + # Standalone instance for read/write split testing (not actual replication) + postgres_replica: + image: postgres:16 + ports: + - "${POSTGRES_REPLICA_PORT:-15433}:5432" + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-postgres}" + volumes: + - ./setup/init.sql:/docker-entrypoint-initdb.d/01-init.sql + shm_size: 2g + deploy: + resources: + limits: + cpus: "2" + memory: 8g + command: + - postgres + - -c + - max_connections=500 + - -c + - shared_buffers=2GB + - -c + - effective_cache_size=6GB + - -c + - work_mem=8MB + - -c + - maintenance_work_mem=256MB + - -c + - wal_buffers=16MB + - -c + - checkpoint_completion_target=0.9 + - -c + - random_page_cost=1.1 + - -c + - effective_io_concurrency=200 + - -c + - max_worker_processes=2 + - -c + - max_parallel_workers_per_gather=1 + - -c + - max_parallel_workers=2 + - -c + - default_transaction_read_only=on + - -c + - log_min_duration_statement=1000 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 3s + retries: 10 + networks: + - pgdog_test + + # Toxiproxy for network fault injection (latency, partition, reset) + toxiproxy: + image: ghcr.io/shopify/toxiproxy:2.12.0 + ports: + - "${TOXI_LISTEN_PORT:-15440}:5432" + - "${TOXI_API_PORT:-8474}:8474" + networks: + - pgdog_test + healthcheck: + test: ["CMD", "/toxiproxy-cli", "list"] + interval: 5s + timeout: 3s + retries: 5 + +networks: + pgdog_test: + driver: bridge diff --git a/integration/production_readiness/k8s/namespace.yaml b/integration/production_readiness/k8s/namespace.yaml new file mode 100644 index 000000000..de53d070f --- /dev/null +++ b/integration/production_readiness/k8s/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: pgdog-test + labels: + app.kubernetes.io/part-of: pgdog-test diff --git a/integration/production_readiness/k8s/pgdog-configmap.yaml b/integration/production_readiness/k8s/pgdog-configmap.yaml new file mode 100644 index 000000000..f66a9f3f3 --- /dev/null +++ b/integration/production_readiness/k8s/pgdog-configmap.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: pgdog-config + namespace: pgdog-test + labels: + app: pgdog +data: + pgdog.toml: | + [general] + host = "0.0.0.0" + port = 6432 + workers = 4 + min_pool_size = 1 + default_pool_size = 5 + pooler_mode = "transaction" + healthcheck_interval = 30000 + idle_healthcheck_interval = 30000 + shutdown_timeout = 60000 + connect_timeout = 5000 + query_timeout = 60000 + openmetrics_port = 9090 + passthrough_auth = "enabled_plain" + + [admin] + name = "admin" + user = "admin" + password = "admin" + + [[databases]] + name = "*" + host = "postgres" + port = 5432 + role = "primary" + + users.toml: | + [[users]] + name = "*" + database = "*" diff --git a/integration/production_readiness/k8s/pgdog-deployment.yaml b/integration/production_readiness/k8s/pgdog-deployment.yaml new file mode 100644 index 000000000..54ffe4b69 --- /dev/null +++ b/integration/production_readiness/k8s/pgdog-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: v1 +kind: Service +metadata: + name: pgdog + namespace: pgdog-test + labels: + app: pgdog +spec: + type: ClusterIP + ports: + - port: 6432 + targetPort: 6432 + name: proxy + - port: 9090 + targetPort: 9090 + name: metrics + selector: + app: pgdog +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pgdog + namespace: pgdog-test + labels: + app: pgdog +spec: + replicas: 1 + selector: + matchLabels: + app: pgdog + template: + metadata: + labels: + app: pgdog + spec: + containers: + - name: pgdog + image: docker.io/library/pgdog:local + imagePullPolicy: Never + ports: + - containerPort: 6432 + name: proxy + - containerPort: 9090 + name: metrics + env: + - name: RUST_LOG + value: info + resources: + requests: + memory: 256Mi + cpu: 250m + limits: + memory: 1Gi + cpu: "1" + readinessProbe: + tcpSocket: + port: 6432 + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: config + mountPath: /pgdog/pgdog.toml + subPath: pgdog.toml + readOnly: true + - name: config + mountPath: /pgdog/users.toml + subPath: users.toml + readOnly: true + volumes: + - name: config + configMap: + name: pgdog-config diff --git a/integration/production_readiness/k8s/postgres-statefulset.yaml b/integration/production_readiness/k8s/postgres-statefulset.yaml new file mode 100644 index 000000000..d28c18e4b --- /dev/null +++ b/integration/production_readiness/k8s/postgres-statefulset.yaml @@ -0,0 +1,81 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: pgdog-test + labels: + app: postgres +spec: + type: ClusterIP + ports: + - port: 5432 + targetPort: 5432 + name: postgres + selector: + app: postgres +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: pgdog-test + labels: + app: postgres +spec: + serviceName: postgres + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:16 + args: + - -c + - max_connections=2500 + - -c + - shared_buffers=512MB + ports: + - containerPort: 5432 + name: postgres + env: + - name: POSTGRES_USER + value: postgres + - name: POSTGRES_PASSWORD + value: postgres + - name: POSTGRES_HOST_AUTH_METHOD + value: scram-sha-256 + - name: POSTGRES_INITDB_ARGS + value: "--auth-host=scram-sha-256 --auth-local=scram-sha-256" + resources: + requests: + memory: 512Mi + cpu: 500m + limits: + memory: 2Gi + cpu: "2" + readinessProbe: + exec: + command: + - pg_isready + - -U + - postgres + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: pgdata + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: pgdata + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/integration/production_readiness/k8s/run-k8s.sh b/integration/production_readiness/k8s/run-k8s.sh new file mode 100755 index 000000000..57a7423e1 --- /dev/null +++ b/integration/production_readiness/k8s/run-k8s.sh @@ -0,0 +1,70 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +NAMESPACE="pgdog-test" +TENANT_COUNT="${1:-2000}" + +echo "=== PgDog K8s Production Readiness Test ===" +echo " Namespace: ${NAMESPACE}" +echo " Tenants: ${TENANT_COUNT}" +echo "" + +kubectl apply -f namespace.yaml + +kubectl apply -f postgres-statefulset.yaml +echo "Waiting for Postgres to be ready..." +kubectl -n "$NAMESPACE" rollout status statefulset/postgres --timeout=120s + +# Patch tenant count if non-default +if [ "$TENANT_COUNT" != "2000" ]; then + kubectl -n "$NAMESPACE" delete job tenant-setup --ignore-not-found + cat tenant-setup-job.yaml \ + | sed "s/value: \"2000\"/value: \"${TENANT_COUNT}\"/" \ + | kubectl apply -f - +else + kubectl -n "$NAMESPACE" delete job tenant-setup --ignore-not-found + kubectl apply -f tenant-setup-job.yaml +fi + +echo "Creating ${TENANT_COUNT} tenant databases (this may take a few minutes)..." +kubectl -n "$NAMESPACE" wait --for=condition=complete job/tenant-setup --timeout=600s + +kubectl apply -f pgdog-configmap.yaml +kubectl apply -f pgdog-deployment.yaml +echo "Waiting for PgDog to be ready..." +kubectl -n "$NAMESPACE" rollout status deployment/pgdog --timeout=60s + +# Port forward for local access (PgDog + Postgres) +echo "" +echo "Port forwarding PgDog and Postgres to localhost..." +kubectl -n "$NAMESPACE" port-forward svc/pgdog 6432:6432 9090:9090 & +PF_PGDOG=$! +kubectl -n "$NAMESPACE" port-forward svc/postgres 15432:5432 & +PF_PG=$! + +# Give port-forward a moment to bind +sleep 2 + +echo "" +echo "=== PgDog is ready ===" +echo " Proxy: localhost:6432" +echo " Metrics: localhost:9090" +echo " Postgres: localhost:15432 (direct)" +echo " Tenants: ${TENANT_COUNT}" +echo "" +echo "Test: psql -h localhost -p 6432 -U pgdog -d tenant_1" +echo "Metrics: curl -s http://localhost:9090/metrics" +echo "Cleanup: kubectl delete namespace ${NAMESPACE}" +echo "" +echo "Press Ctrl+C to stop port forwarding" + +cleanup_pf() { + kill $PF_PGDOG 2>/dev/null || true + kill $PF_PG 2>/dev/null || true + exit 0 +} +trap cleanup_pf INT TERM +wait $PF_PGDOG diff --git a/integration/production_readiness/k8s/tenant-setup-job.yaml b/integration/production_readiness/k8s/tenant-setup-job.yaml new file mode 100644 index 000000000..27204a072 --- /dev/null +++ b/integration/production_readiness/k8s/tenant-setup-job.yaml @@ -0,0 +1,146 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: tenant-setup-script + namespace: pgdog-test + labels: + app: tenant-setup +data: + create-tenants.sh: | + #!/bin/bash + set -euo pipefail + + TENANT_COUNT="${TENANT_COUNT:-2000}" + + echo "Creating pgdog role..." + psql -h postgres -U postgres -d postgres --no-psqlrc -q <<'EOSQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'pgdog') THEN + CREATE ROLE pgdog WITH LOGIN PASSWORD 'pgdog' SUPERUSER; + END IF; + END $$; + EOSQL + + echo "Creating tenant_template with schema and seed data..." + psql -h postgres -U postgres -d postgres --no-psqlrc -q \ + -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'tenant_template' AND pid <> pg_backend_pid();" 2>/dev/null || true + psql -h postgres -U postgres -d postgres --no-psqlrc -q \ + -c "DROP DATABASE IF EXISTS tenant_template;" + psql -h postgres -U postgres -d postgres --no-psqlrc -q \ + -c "CREATE DATABASE tenant_template OWNER pgdog;" + + psql -h postgres -U postgres -d tenant_template --no-psqlrc -q <<'EOSQL' + CREATE TABLE IF NOT EXISTS users ( + id BIGSERIAL PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() + ); + CREATE TABLE IF NOT EXISTS orders ( + id BIGSERIAL PRIMARY KEY, + user_id BIGINT REFERENCES users(id), + total NUMERIC(10,2) NOT NULL, + status TEXT DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT NOW() + ); + CREATE INDEX idx_orders_user_id ON orders(user_id); + CREATE INDEX idx_orders_status ON orders(status); + INSERT INTO users (name, email) + SELECT 'user_' || i, 'user_' || i || '@example.com' + FROM generate_series(1, 100) AS i; + INSERT INTO orders (user_id, total, status) + SELECT + (random() * 99 + 1)::int, + (random() * 1000)::numeric(10,2), + (ARRAY['pending', 'completed', 'cancelled'])[floor(random() * 3 + 1)::int] + FROM generate_series(1, 1000) AS i; + EOSQL + + echo "Creating ${TENANT_COUNT} tenant databases from template..." + BATCH_SIZE=50 + created=0 + for (( i=1; i<=TENANT_COUNT; i++ )); do + DB="tenant_${i}" + if ! psql -h postgres -U postgres -d postgres --no-psqlrc -tAq \ + -c "SELECT 1 FROM pg_database WHERE datname = '${DB}'" | grep -q 1; then + psql -h postgres -U postgres -d postgres --no-psqlrc -q \ + -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'tenant_template' AND pid <> pg_backend_pid();" 2>/dev/null || true + psql -h postgres -U postgres -d postgres --no-psqlrc -q \ + -c "CREATE DATABASE ${DB} TEMPLATE tenant_template OWNER pgdog;" + ((created++)) || true + fi + if (( i % BATCH_SIZE == 0 )); then + echo " [${i}/${TENANT_COUNT}] created=${created}" + fi + done + + ACTUAL=$(psql -h postgres -U postgres -d postgres --no-psqlrc -tAq \ + -c "SELECT count(*) FROM pg_database WHERE datname LIKE 'tenant_%' AND datname != 'tenant_template';") + echo "Done. ${ACTUAL} tenant databases exist (${created} newly created)." + + echo "Creating ${TENANT_COUNT} tenant users for passthrough auth tests..." + USER_SQL="" + GRANT_SQL="" + for (( i=1; i<=TENANT_COUNT; i++ )); do + USER_SQL+="DO \$\$ BEGIN IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'tenant_user_${i}') THEN CREATE ROLE tenant_user_${i} WITH LOGIN PASSWORD 'pass_${i}'; END IF; END \$\$;" + GRANT_SQL+="GRANT CONNECT ON DATABASE tenant_1 TO tenant_user_${i};" + done + psql -h postgres -U postgres -d postgres --no-psqlrc -q -c "$USER_SQL" + psql -h postgres -U postgres -d postgres --no-psqlrc -q -c "$GRANT_SQL" + + # Grant table access inside tenant_1 for test users + TABLE_GRANT="" + for (( i=1; i<=TENANT_COUNT; i++ )); do + TABLE_GRANT+="GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO tenant_user_${i}; GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO tenant_user_${i};" + done + psql -h postgres -U postgres -d tenant_1 --no-psqlrc -q -c "$TABLE_GRANT" + echo "Done. ${TENANT_COUNT} tenant users created." +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: tenant-setup + namespace: pgdog-test + labels: + app: tenant-setup +spec: + backoffLimit: 3 + ttlSecondsAfterFinished: 3600 + template: + metadata: + labels: + app: tenant-setup + spec: + restartPolicy: OnFailure + initContainers: + - name: wait-for-postgres + image: postgres:16 + command: + - sh + - -c + - | + until pg_isready -h postgres -U postgres; do + echo "Waiting for postgres..." + sleep 2 + done + containers: + - name: create-tenants + image: postgres:16 + env: + - name: PGPASSWORD + value: postgres + - name: TENANT_COUNT + value: "100" + command: + - bash + - /scripts/create-tenants.sh + volumeMounts: + - name: scripts + mountPath: /scripts + readOnly: true + volumes: + - name: scripts + configMap: + name: tenant-setup-script + defaultMode: 0755 diff --git a/integration/production_readiness/load/backend_failure.sh b/integration/production_readiness/load/backend_failure.sh new file mode 100755 index 000000000..2aaf390f4 --- /dev/null +++ b/integration/production_readiness/load/backend_failure.sh @@ -0,0 +1,341 @@ +#!/usr/bin/env bash +# Test PgDog behavior when backend connections fail unexpectedly. +# +# Uses toxiproxy to simulate: connection reset, timeout, and brief network partition. +# Validates: automatic reconnection, error propagation to clients, +# pool recovery, no zombie connections. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +TOXI_API="${TOXI_API:-http://127.0.0.1:8474}" +PROXY_NAME="${PROXY_NAME:-pg_primary}" +TARGET_DB="tenant_1" +PARTITION_SEC=5 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --toxi-api) TOXI_API="$2"; shift 2 ;; + --proxy-name) PROXY_NAME="$2"; shift 2 ;; + --target-db) TARGET_DB="$2"; shift 2 ;; + --partition-sec) PARTITION_SEC="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/backend_failure.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +admin_query() { + PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U admin -d admin -t -A -c "$1" 2>/dev/null +} + +remove_all_toxics() { + # List and remove all toxics for the proxy + local toxics + toxics=$(curl -sf "${TOXI_API}/proxies/${PROXY_NAME}/toxics" 2>/dev/null) || return 0 + for name in $(echo "$toxics" | grep -o '"name":"[^"]*"' | sed 's/"name":"//;s/"//'); do + curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/${name}" >/dev/null 2>&1 || true + done +} + +FAILURES=0 + +echo "=== Backend Failure Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Toxiproxy API: ${TOXI_API}" +echo "Proxy: ${PROXY_NAME}" +echo "Target: ${TARGET_DB}" +echo "" + +# Verify toxiproxy is reachable +if ! curl -sf "${TOXI_API}/version" >/dev/null 2>&1; then + fail "Toxiproxy API not reachable at ${TOXI_API}" + exit 1 +fi + +# Ensure clean state +remove_all_toxics + +trap 'remove_all_toxics; wait 2>/dev/null || true' EXIT SIGINT SIGTERM + +# ── Phase 1: Baseline ────────────────────────────────────────────────────── + +echo "--- Phase 1: Baseline connectivity ---" + +baseline_ok=0 +for i in $(seq 1 5); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((baseline_ok++)) || true + fi +done + +if [ "$baseline_ok" -eq 5 ]; then + pass "Baseline: 5/5 queries OK" +else + fail "Baseline: only ${baseline_ok}/5 queries OK — cannot proceed" + exit 1 +fi +echo "" + +# ── Phase 2: Connection reset (reset_peer toxic) ─────────────────────────── + +echo "--- Phase 2: Connection reset (reset_peer) ---" + +# Inject reset_peer: abruptly closes connections after N bytes +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}/toxics" \ + -H "Content-Type: application/json" \ + -d '{"name":"reset_peer","type":"reset_peer","stream":"downstream","toxicity":0.5,"attributes":{"timeout":500}}' \ + >/dev/null 2>&1 + +echo " Injected: reset_peer (50% toxicity, 500ms timeout)" + +# Send queries — some should fail, but PgDog should recover +reset_ok=0 +reset_fail=0 +for i in $(seq 1 20); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((reset_ok++)) || true + else + ((reset_fail++)) || true + fi +done + +echo " Results: ok=${reset_ok}, fail=${reset_fail}" + +# Remove toxic +curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/reset_peer" >/dev/null 2>&1 || true + +# Some failures expected, but not all +if [ "$reset_ok" -gt 0 ]; then + pass "Phase 2: ${reset_ok}/20 queries survived connection resets" +else + fail "Phase 2: All queries failed during connection reset" + ((FAILURES++)) || true +fi + +# Recovery check +sleep 2 +recovery_ok=0 +for i in $(seq 1 5); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((recovery_ok++)) || true + fi +done + +if [ "$recovery_ok" -eq 5 ]; then + pass "Phase 2 recovery: 5/5 queries OK" +else + fail "Phase 2 recovery: only ${recovery_ok}/5 queries OK" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 3: Network partition (bandwidth=0) ─────────────────────────────── + +echo "--- Phase 3: Network partition (${PARTITION_SEC}s blackout) ---" + +# Disable the proxy entirely +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}" \ + -H "Content-Type: application/json" \ + -d '{"enabled":false}' \ + >/dev/null 2>&1 + +echo " Proxy disabled — simulating network partition" + +# Queries during partition should fail +partition_ok=0 +partition_fail=0 +for i in $(seq 1 5); do + if timeout 3 bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d $TARGET_DB -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + ((partition_ok++)) || true + else + ((partition_fail++)) || true + fi +done + +echo " During partition: ok=${partition_ok}, fail=${partition_fail}" + +# Wait for partition duration +echo " Waiting ${PARTITION_SEC}s..." +sleep "$PARTITION_SEC" + +# Re-enable proxy +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}" \ + -H "Content-Type: application/json" \ + -d '{"enabled":true}' \ + >/dev/null 2>&1 + +echo " Proxy re-enabled — partition ended" + +# Recovery — allow PgDog time to reconnect +sleep 3 + +post_partition_ok=0 +for attempt in $(seq 1 10); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((post_partition_ok++)) || true + fi + sleep 0.5 +done + +if [ "$post_partition_ok" -ge 8 ]; then + pass "Phase 3 recovery: ${post_partition_ok}/10 queries OK after partition" +else + fail "Phase 3 recovery: only ${post_partition_ok}/10 queries OK — slow recovery" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 4: Slow backend (latency toxic) ─────────────────────────────────── + +echo "--- Phase 4: Slow backend (200ms latency — near timeout threshold) ---" + +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}/toxics" \ + -H "Content-Type: application/json" \ + -d '{"name":"slow_upstream","type":"latency","stream":"upstream","attributes":{"latency":200,"jitter":100}}' \ + >/dev/null 2>&1 + +echo " Injected: 200ms ± 100ms latency" + +slow_ok=0 +slow_fail=0 +slow_total_ms=0 + +for i in $(seq 1 10); do + t_start=$(date +%s%N) + if timeout 5 bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d $TARGET_DB -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + ((slow_ok++)) || true + slow_total_ms=$((slow_total_ms + ms)) + else + ((slow_fail++)) || true + fi +done + +slow_avg=0 +if [ "$slow_ok" -gt 0 ]; then + slow_avg=$((slow_total_ms / slow_ok)) +fi + +echo " Results: ok=${slow_ok}, fail=${slow_fail}, avg=${slow_avg}ms" + +curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/slow_upstream" >/dev/null 2>&1 || true + +if [ "$slow_ok" -ge 7 ]; then + pass "Phase 4: ${slow_ok}/10 queries succeeded under high latency" +else + fail "Phase 4: only ${slow_ok}/10 queries succeeded — timeouts too aggressive?" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 5: Multi-tenant impact (partition one backend, query another) ───── + +echo "--- Phase 5: Cross-tenant isolation during failure ---" + +# Disable proxy (simulating backend failure) +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}" \ + -H "Content-Type: application/json" \ + -d '{"enabled":false}' \ + >/dev/null 2>&1 + +echo " Backend partitioned — testing if new tenant connections timeout cleanly" + +# Try a different tenant — the pool creation should fail with clear error, not hang +timeout_start=$(date +%s) +timeout 10 bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d tenant_999 -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1 && cross_ok=true || cross_ok=false +timeout_end=$(date +%s) +timeout_elapsed=$((timeout_end - timeout_start)) + +# Re-enable +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}" \ + -H "Content-Type: application/json" \ + -d '{"enabled":true}' \ + >/dev/null 2>&1 + +if [ "$cross_ok" = "false" ]; then + if [ "$timeout_elapsed" -le 8 ]; then + pass "Phase 5: Connection failed fast (${timeout_elapsed}s) — connect_timeout working" + else + warn "Phase 5: Connection took ${timeout_elapsed}s to fail — connect_timeout may be too high" + fi +else + warn "Phase 5: Connection unexpectedly succeeded during partition (cached pool?)" +fi +echo "" + +# ── Final recovery ────────────────────────────────────────────────────────── + +remove_all_toxics +sleep 2 + +final_ok=0 +for i in $(seq 1 5); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((final_ok++)) || true + fi +done + +if [ "$final_ok" -eq 5 ]; then + pass "Final recovery: 5/5 OK" +else + fail "Final recovery: ${final_ok}/5 OK" + ((FAILURES++)) || true +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Backend Failure Test Summary" +echo "========================================" +printf " %-30s %s\n" "Connection reset:" "${reset_ok}/20 survived, recovered ${recovery_ok}/5" +printf " %-30s %s\n" "Network partition:" "Recovered ${post_partition_ok}/10 after ${PARTITION_SEC}s" +printf " %-30s %s\n" "Slow backend (200ms):" "${slow_ok}/10 OK (avg ${slow_avg}ms)" +printf " %-30s %s\n" "Cross-tenant isolation:" "$([ "$cross_ok" = "false" ] && echo "Failed fast (${timeout_elapsed}s)" || echo "Succeeded (cached)")" +printf " %-30s %s\n" "Final health:" "${final_ok}/5 OK" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Backend failure test: ${FAILURES} check(s) failed" + exit 1 +else + pass "Backend failure test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/connection_storm.sh b/integration/production_readiness/load/connection_storm.sh new file mode 100755 index 000000000..69ba16df4 --- /dev/null +++ b/integration/production_readiness/load/connection_storm.sh @@ -0,0 +1,275 @@ +#!/usr/bin/env bash +# Test PgDog behavior under a thundering herd: many clients simultaneously +# request connections to cold (no warm pool) databases. +# +# Simulates: PgDog restart, pod rescheduling, or max_wildcard_pools eviction +# followed by a burst of traffic. Validates connect_timeout, connect_attempts, +# and backend connection limit behavior. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +STORM_SIZE=200 +PARALLEL=100 +TARGET_RANGE_START=500 +TARGET_RANGE_END=700 +TIMEOUT_SEC=10 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --storm-size) STORM_SIZE="$2"; shift 2 ;; + --parallel) PARALLEL="$2"; shift 2 ;; + --range-start) TARGET_RANGE_START="$2"; shift 2 ;; + --range-end) TARGET_RANGE_END="$2"; shift 2 ;; + --timeout) TIMEOUT_SEC="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/connection_storm.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +admin_query() { + PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U admin -d admin -t -A -c "$1" 2>/dev/null +} + +echo "=== Connection Storm (Thundering Herd) Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Storm size: ${STORM_SIZE} concurrent connections" +echo "Parallelism: ${PARALLEL}" +echo "Target DBs: tenant_${TARGET_RANGE_START}..tenant_${TARGET_RANGE_END}" +echo "Timeout: ${TIMEOUT_SEC}s per connection" +echo "" + +FAILURES=0 + +# ── Phase 1: Verify target DBs are cold (no existing pool) ───────────────── + +echo "--- Phase 1: Ensuring target pools are cold ---" + +# Wait for any existing pools in this range to expire +pools_before=$(admin_query "SHOW POOLS" 2>/dev/null | grep -c "tenant_" || echo "0") +echo " Existing tenant pools: ${pools_before}" +echo "" + +# ── Phase 2: Single massive burst ───────────────────────────────────────── + +echo "--- Phase 2: Firing ${STORM_SIZE} connections simultaneously ---" + +RESULTS_DIR=$(mktemp -d) +storm_start=$(date +%s%N) + +storm_connect() { + local idx=$1 + # Pick a random DB from the target range + local db_num=$(( TARGET_RANGE_START + (idx % (TARGET_RANGE_END - TARGET_RANGE_START + 1)) )) + local t_start t_end ms + + t_start=$(date +%s%N) + if timeout "$TIMEOUT_SEC" bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d tenant_${db_num} -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + echo "ok $ms $db_num" > "${RESULTS_DIR}/${idx}" + else + echo "fail 0 $db_num" > "${RESULTS_DIR}/${idx}" + fi +} +export -f storm_connect +export PGDOG_HOST PGDOG_PORT RESULTS_DIR TIMEOUT_SEC TARGET_RANGE_START TARGET_RANGE_END + +seq 1 "$STORM_SIZE" | xargs -P "$PARALLEL" -I{} bash -c 'storm_connect {}' + +storm_end=$(date +%s%N) +storm_ms=$(( (storm_end - storm_start) / 1000000 )) + +# Parse results +storm_ok=0 +storm_fail=0 +total_latency=0 +latencies=() + +for f in "$RESULTS_DIR"/*; do + [ -f "$f" ] || continue + read -r status ms db_num < "$f" + if [ "$status" = "ok" ]; then + ((storm_ok++)) || true + total_latency=$((total_latency + ms)) + latencies+=("$ms") + else + ((storm_fail++)) || true + fi +done +rm -rf "$RESULTS_DIR" + +storm_avg=0 +if [ "$storm_ok" -gt 0 ]; then + storm_avg=$((total_latency / storm_ok)) +fi + +# Compute p95 and p99 +storm_p95="N/A" +storm_p99="N/A" +if [ "${#latencies[@]}" -gt 0 ]; then + sorted_lat=$(printf '%s\n' "${latencies[@]}" | sort -n) + n=${#latencies[@]} + idx_p95=$(( n * 95 / 100 )) + idx_p99=$(( n * 99 / 100 )) + [ "$idx_p95" -ge "$n" ] && idx_p95=$((n - 1)) + [ "$idx_p99" -ge "$n" ] && idx_p99=$((n - 1)) + + storm_p95=$(echo "$sorted_lat" | sed -n "$((idx_p95 + 1))p") + storm_p99=$(echo "$sorted_lat" | sed -n "$((idx_p99 + 1))p") +fi + +success_rate=0 +if [ "$STORM_SIZE" -gt 0 ]; then + success_rate=$(( storm_ok * 100 / STORM_SIZE )) +fi + +echo " Completed in ${storm_ms}ms" +echo " Success: ${storm_ok}/${STORM_SIZE} (${success_rate}%)" +echo " Failed: ${storm_fail}" +echo " Avg latency: ${storm_avg}ms" +echo " p95 latency: ${storm_p95}ms" +echo " p99 latency: ${storm_p99}ms" + +# At least 90% should succeed even during a storm +if [ "$success_rate" -ge 90 ]; then + pass "Storm: ${success_rate}% success rate" +else + fail "Storm: only ${success_rate}% success rate (need >= 90%)" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 3: Verify PgDog is healthy after storm ─────────────────────────── + +echo "--- Phase 3: Post-storm health check ---" +sleep 2 + +# Simple connectivity +post_ok=0 +for i in $(seq 1 10); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_1" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((post_ok++)) || true + fi +done + +if [ "$post_ok" -eq 10 ]; then + pass "PgDog healthy after storm (10/10 queries OK)" +else + fail "PgDog degraded after storm: only ${post_ok}/10 queries OK" + ((FAILURES++)) || true +fi + +# Metrics endpoint +if curl -sf "http://${PGDOG_HOST}:9090/metrics" >/dev/null 2>&1; then + pass "Metrics endpoint responsive after storm" +else + warn "Metrics endpoint not responsive after storm" +fi + +# Admin stats +pools_after=$(admin_query "SHOW POOLS" 2>/dev/null | grep -c "tenant_" || echo "0") +echo " Pools after storm: ${pools_after}" +echo "" + +# ── Phase 4: Repeated bursts ─────────────────────────────────────────────── + +echo "--- Phase 4: 3 rapid-fire bursts (${PARALLEL} connections each) ---" + +burst_total_ok=0 +burst_total_fail=0 + +for burst in 1 2 3; do + BURST_DIR=$(mktemp -d) + + burst_one() { + local idx=$1 + local db_num=$(( TARGET_RANGE_START + (idx % (TARGET_RANGE_END - TARGET_RANGE_START + 1)) )) + if timeout "$TIMEOUT_SEC" bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d tenant_${db_num} -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + echo "ok" > "${BURST_DIR}/${idx}" + else + echo "fail" > "${BURST_DIR}/${idx}" + fi + } + export -f burst_one + export BURST_DIR + + seq 1 "$PARALLEL" | xargs -P "$PARALLEL" -I{} bash -c 'burst_one {}' + + b_ok=$(find "$BURST_DIR" -type f -exec cat {} + 2>/dev/null | awk '$1 == "ok" { count++ } END { print count + 0 }') + b_fail=$(find "$BURST_DIR" -type f -exec cat {} + 2>/dev/null | awk '$1 == "fail" { count++ } END { print count + 0 }') + rm -rf "$BURST_DIR" + + burst_total_ok=$((burst_total_ok + b_ok)) + burst_total_fail=$((burst_total_fail + b_fail)) + echo " Burst ${burst}: ok=${b_ok}, fail=${b_fail}" + + # No sleep between bursts — that's the point +done + +burst_total=$((burst_total_ok + burst_total_fail)) +burst_rate=0 +if [ "$burst_total" -gt 0 ]; then + burst_rate=$(( burst_total_ok * 100 / burst_total )) +fi + +if [ "$burst_rate" -ge 85 ]; then + pass "Rapid bursts: ${burst_rate}% success rate" +else + fail "Rapid bursts: only ${burst_rate}% success rate (need >= 85%)" + ((FAILURES++)) || true +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Connection Storm Test Summary" +echo "========================================" +printf " %-28s %s\n" "Storm size:" "$STORM_SIZE" +printf " %-28s %s\n" "Target DBs:" "tenant_${TARGET_RANGE_START}..${TARGET_RANGE_END}" +printf " %-28s %s\n" "Storm success rate:" "${success_rate}%" +printf " %-28s %s\n" "Storm avg latency:" "${storm_avg}ms" +printf " %-28s %s\n" "Storm p95 latency:" "${storm_p95}ms" +printf " %-28s %s\n" "Storm p99 latency:" "${storm_p99}ms" +printf " %-28s %s\n" "Storm wall time:" "${storm_ms}ms" +printf " %-28s %s\n" "Rapid burst rate:" "${burst_rate}%" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Connection storm test: ${FAILURES} check(s) failed" + exit 1 +else + pass "Connection storm test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/graceful_shutdown.sh b/integration/production_readiness/load/graceful_shutdown.sh new file mode 100755 index 000000000..ede6857f1 --- /dev/null +++ b/integration/production_readiness/load/graceful_shutdown.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash +# Test PgDog graceful shutdown behavior under active load. +# +# Simulates a K8s rolling restart: SIGTERM sent while clients are running queries. +# Validates: in-flight transactions complete, no partial writes, +# new connections are rejected, process exits within terminationGracePeriodSeconds. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +PGDOG_BIN="${PGDOG_BIN:-$SCRIPT_DIR/../../target/release/pgdog}" +CONFIG_DIR="${CONFIG_DIR:-$SCRIPT_DIR/../config}" +TARGET_DB="tenant_1" +LOAD_CLIENTS=10 +GRACE_PERIOD=10 +PRE_LOAD_SEC=5 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --pgdog-bin) PGDOG_BIN="$2"; shift 2 ;; + --config-dir) CONFIG_DIR="$2"; shift 2 ;; + --target-db) TARGET_DB="$2"; shift 2 ;; + --clients) LOAD_CLIENTS="$2"; shift 2 ;; + --grace-period) GRACE_PERIOD="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql pgbench; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/graceful_shutdown.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +CHILDREN=() +cleanup() { + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup EXIT SIGINT SIGTERM + +FAILURES=0 + +echo "=== Graceful Shutdown Test ===" +echo "PgDog binary: ${PGDOG_BIN}" +echo "Config dir: ${CONFIG_DIR}" +echo "Port: ${PGDOG_PORT}, Target: ${TARGET_DB}" +echo "Load clients: ${LOAD_CLIENTS}, Grace period: ${GRACE_PERIOD}s" +echo "" + +# ── Phase 1: Start a separate PgDog instance ──────────────────────────────── + +echo "--- Phase 1: Starting test PgDog instance ---" + +# Use a different port to avoid conflicts with the main test instance +TEST_PORT=16432 +PGDOG_LOG="$OUTPUT_DIR/graceful_shutdown_pgdog.log" + +if [ ! -f "$PGDOG_BIN" ]; then + fail "PgDog binary not found: ${PGDOG_BIN}" + exit 1 +fi + +# Generate config with the test port +TEST_CONFIG_DIR=$(mktemp -d) +cp "${CONFIG_DIR}/pgdog.toml" "${TEST_CONFIG_DIR}/pgdog.toml" +cp "${CONFIG_DIR}/users.toml" "${TEST_CONFIG_DIR}/users.toml" + +# Patch port in the copied config +sed -i.bak "s/port = 6432/port = ${TEST_PORT}/" "${TEST_CONFIG_DIR}/pgdog.toml" 2>/dev/null || \ + sed -i '' "s/port = 6432/port = ${TEST_PORT}/" "${TEST_CONFIG_DIR}/pgdog.toml" +# Patch metrics port +sed -i.bak "s/openmetrics_port = 9090/openmetrics_port = 19090/" "${TEST_CONFIG_DIR}/pgdog.toml" 2>/dev/null || \ + sed -i '' "s/openmetrics_port = 9090/openmetrics_port = 19090/" "${TEST_CONFIG_DIR}/pgdog.toml" + +cd "$TEST_CONFIG_DIR" +"$PGDOG_BIN" > "$PGDOG_LOG" 2>&1 & +TEST_PGDOG_PID=$! +CHILDREN+=("$TEST_PGDOG_PID") + +# Wait for readiness +for i in $(seq 1 15); do + if PGPASSWORD=pgdog psql -h 127.0.0.1 -p "$TEST_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + echo " Test PgDog ready (PID: ${TEST_PGDOG_PID}, port: ${TEST_PORT})" + break + fi + sleep 1 + if [ "$i" -eq 15 ]; then + fail "Test PgDog did not start in 15s" + exit 1 + fi +done +echo "" + +# ── Phase 2: Start load ───────────────────────────────────────────────────── + +echo "--- Phase 2: Starting background load ---" + +WORKLOAD_SQL="${SCRIPT_DIR}/tenant_workload.sql" +BENCH_LOG="$OUTPUT_DIR/graceful_shutdown_bench.log" + +PGPASSWORD=pgdog pgbench -h 127.0.0.1 -p "$TEST_PORT" -U pgdog "$TARGET_DB" \ + -c "$LOAD_CLIENTS" -T 60 \ + --protocol=extended -f "$WORKLOAD_SQL" \ + --no-vacuum \ + > "$BENCH_LOG" 2>&1 & +BENCH_PID=$! +CHILDREN+=("$BENCH_PID") + +echo " pgbench running (PID: ${BENCH_PID})" +echo " Waiting ${PRE_LOAD_SEC}s for load to stabilize..." +sleep "$PRE_LOAD_SEC" +echo "" + +# ── Phase 3: Send SIGTERM ──────────────────────────────────────────────────── + +echo "--- Phase 3: Sending SIGTERM to PgDog (PID: ${TEST_PGDOG_PID}) ---" + +sigterm_time=$(date +%s) +kill -TERM "$TEST_PGDOG_PID" 2>/dev/null + +echo " SIGTERM sent at $(date '+%H:%M:%S')" +echo "" + +# ── Phase 4: Monitor shutdown ─────────────────────────────────────────────── + +echo "--- Phase 4: Monitoring shutdown behavior ---" + +# Check if process exits within grace period +exited=false +for i in $(seq 1 "$GRACE_PERIOD"); do + if ! kill -0 "$TEST_PGDOG_PID" 2>/dev/null; then + exit_time=$(date +%s) + drain_sec=$((exit_time - sigterm_time)) + exited=true + pass "PgDog exited after ${drain_sec}s (within ${GRACE_PERIOD}s grace period)" + break + fi + sleep 1 + echo " [${i}/${GRACE_PERIOD}s] still running..." +done + +if [ "$exited" = "false" ]; then + exit_check_time=$(date +%s) + drain_sec=$((exit_check_time - sigterm_time)) + warn "PgDog still running after ${drain_sec}s — sending SIGKILL" + kill -9 "$TEST_PGDOG_PID" 2>/dev/null || true + fail "PgDog did not exit within grace period (${GRACE_PERIOD}s)" + ((FAILURES++)) || true +fi +echo "" + +# Wait for pgbench to finish (it should notice the connection dropped) +wait "$BENCH_PID" 2>/dev/null || true + +# ── Phase 5: Analyze pgbench results ──────────────────────────────────────── + +echo "--- Phase 5: Analyzing pgbench results ---" + +if [ -f "$BENCH_LOG" ]; then + committed=$(awk '/number of transactions actually processed:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$BENCH_LOG") + committed="${committed:-0}" + aborted=$(awk '/number of failed transactions:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$BENCH_LOG") + aborted="${aborted:-0}" + + echo " Transactions committed: ${committed}" + echo " Transactions aborted: ${aborted}" + + if [ "$committed" -gt 0 ]; then + pass "PgDog processed ${committed} transactions before shutdown" + else + warn "No transactions completed" + fi +fi +echo "" + +# ── Phase 6: Verify new connections were rejected ──────────────────────────── + +echo "--- Phase 6: Verifying connections rejected after SIGTERM ---" + +if [ "$exited" = "true" ]; then + if PGPASSWORD=pgdog psql -h 127.0.0.1 -p "$TEST_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + fail "Connection succeeded after PgDog exited — unexpected (port reuse?)" + ((FAILURES++)) || true + else + pass "Connection correctly refused after shutdown" + fi +else + warn "Cannot verify — PgDog was force-killed" +fi +echo "" + +# ── Phase 7: Check PgDog logs for errors ──────────────────────────────────── + +echo "--- Phase 7: Checking PgDog shutdown logs ---" + +if [ -f "$PGDOG_LOG" ]; then + panic_count=$(grep -ci "panic" "$PGDOG_LOG" 2>/dev/null || true) + panic_count=${panic_count:-0} + error_count=$(grep -ci "error" "$PGDOG_LOG" 2>/dev/null || true) + error_count=${error_count:-0} + + if [ "$panic_count" -gt 0 ]; then + fail "PgDog log contains ${panic_count} panic(s)" + ((FAILURES++)) || true + else + pass "No panics in PgDog log" + fi + + echo " Log errors: ${error_count}" + echo " Last 5 log lines:" + tail -5 "$PGDOG_LOG" | sed 's/^/ /' +fi +echo "" + +# Cleanup temp config +rm -rf "$TEST_CONFIG_DIR" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Graceful Shutdown Test Summary" +echo "========================================" +printf " %-28s %s\n" "Grace period:" "${GRACE_PERIOD}s" +printf " %-28s %s\n" "Exited cleanly:" "$([ "$exited" = "true" ] && echo "YES" || echo "NO")" +printf " %-28s %s\n" "Drain time:" "${drain_sec:-N/A}s" +printf " %-28s %s\n" "Transactions committed:" "${committed:-0}" +printf " %-28s %s\n" "Transactions aborted:" "${aborted:-0}" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Graceful shutdown test: ${FAILURES} failure(s)" + exit 1 +else + pass "Graceful shutdown test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/idle_in_transaction.sh b/integration/production_readiness/load/idle_in_transaction.sh new file mode 100755 index 000000000..2481fc4ff --- /dev/null +++ b/integration/production_readiness/load/idle_in_transaction.sh @@ -0,0 +1,261 @@ +#!/usr/bin/env bash +# Test idle-in-transaction behavior in transaction pooling mode. +# +# In transaction pooling, a client holding BEGIN without COMMIT blocks +# that backend connection for all other clients in the pool. With pool_size=1-5, +# this can starve an entire tenant's traffic. +# +# Validates: client_idle_in_transaction_timeout, pool fairness, starvation detection. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +TARGET_DB="tenant_1" +IDLE_HOLD_SEC=15 +CONCURRENT_QUERIES=10 +QUERY_TIMEOUT=5 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --target-db) TARGET_DB="$2"; shift 2 ;; + --hold-time) IDLE_HOLD_SEC="$2"; shift 2 ;; + --concurrent) CONCURRENT_QUERIES="$2"; shift 2 ;; + --query-timeout) QUERY_TIMEOUT="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/idle_in_transaction.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +CHILDREN=() +cleanup() { + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup EXIT SIGINT SIGTERM + +FAILURES=0 + +echo "=== Idle-in-Transaction Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Target: ${TARGET_DB}" +echo "Hold time: ${IDLE_HOLD_SEC}s" +echo "Concurrent queries during hold: ${CONCURRENT_QUERIES}" +echo "" + +# ── Phase 1: Baseline — normal query latency ──────────────────────────────── + +echo "--- Phase 1: Baseline query latency ---" + +baseline_total=0 +baseline_count=10 + +for i in $(seq 1 "$baseline_count"); do + t_start=$(date +%s%N) + PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1 || true + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + baseline_total=$((baseline_total + ms)) +done + +baseline_avg=$((baseline_total / baseline_count)) +pass "Baseline avg: ${baseline_avg}ms" +echo "" + +# ── Phase 2: Hold a transaction open ──────────────────────────────────────── + +echo "--- Phase 2: Opening idle transaction (holding for ${IDLE_HOLD_SEC}s) ---" + +# Start a connection that BEGINs and sits idle +PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "BEGIN; SELECT pg_sleep(${IDLE_HOLD_SEC}); COMMIT;" -q >/dev/null 2>&1 & +HOLDER_PID=$! +CHILDREN+=("$HOLDER_PID") + +# Give it a moment to establish the transaction +sleep 1 + +echo " Transaction holder running (PID: ${HOLDER_PID})" +echo "" + +# ── Phase 3: Concurrent queries while transaction is held ──────────────────── + +echo "--- Phase 3: Querying while pool slot is held ---" + +RESULTS_DIR=$(mktemp -d) +phase3_start=$(date +%s%N) + +query_during_hold() { + local idx=$1 + local t_start t_end ms + + t_start=$(date +%s%N) + if timeout "$QUERY_TIMEOUT" bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d $TARGET_DB -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + echo "ok $ms" > "${RESULTS_DIR}/${idx}" + else + echo "fail 0" > "${RESULTS_DIR}/${idx}" + fi +} +export -f query_during_hold +export PGDOG_HOST PGDOG_PORT TARGET_DB RESULTS_DIR QUERY_TIMEOUT + +seq 1 "$CONCURRENT_QUERIES" | xargs -P "$CONCURRENT_QUERIES" -I{} bash -c 'query_during_hold {}' + +phase3_end=$(date +%s%N) +phase3_ms=$(( (phase3_end - phase3_start) / 1000000 )) + +q_ok=0 +q_fail=0 +q_total_ms=0 +q_max_ms=0 + +for f in "$RESULTS_DIR"/*; do + [ -f "$f" ] || continue + read -r status ms < "$f" + if [ "$status" = "ok" ]; then + ((q_ok++)) || true + q_total_ms=$((q_total_ms + ms)) + if [ "$ms" -gt "$q_max_ms" ]; then + q_max_ms="$ms" + fi + else + ((q_fail++)) || true + fi +done +rm -rf "$RESULTS_DIR" + +q_avg=0 +if [ "$q_ok" -gt 0 ]; then + q_avg=$((q_total_ms / q_ok)) +fi + +echo " Results: ok=${q_ok}, fail=${q_fail}, avg=${q_avg}ms, max=${q_max_ms}ms, total=${phase3_ms}ms" + +# With pool_size > 1, some queries should succeed via other pool slots +# With pool_size = 1, all queries will queue/fail +if [ "$q_ok" -gt 0 ]; then + pass "Phase 3: ${q_ok}/${CONCURRENT_QUERIES} queries succeeded during transaction hold" + latency_slowdown=$((q_avg - baseline_avg)) + echo " Latency increase: ${latency_slowdown}ms above baseline" +else + warn "Phase 3: All queries failed/timed out — pool may be fully blocked (pool_size=1?)" +fi +echo "" + +# ── Phase 4: Multiple holders (saturate all pool slots) ────────────────────── + +echo "--- Phase 4: Saturating pool with idle transactions ---" + +# Saturate with 5 holders (matches typical pool_size=5) +HOLDER_PIDS=() +for i in $(seq 1 5); do + PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "BEGIN; SELECT pg_sleep(${IDLE_HOLD_SEC}); COMMIT;" -q >/dev/null 2>&1 & + pid=$! + HOLDER_PIDS+=("$pid") + CHILDREN+=("$pid") +done + +sleep 1 +echo " 5 idle transactions holding pool slots" + +# Try queries — they should queue and hit checkout_timeout +saturated_ok=0 +saturated_fail=0 + +for i in $(seq 1 5); do + if timeout "$QUERY_TIMEOUT" bash -c \ + "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d $TARGET_DB -c 'SELECT 1' -t -q -A" \ + >/dev/null 2>&1; then + ((saturated_ok++)) || true + else + ((saturated_fail++)) || true + fi +done + +echo " Queries under full saturation: ok=${saturated_ok}, fail/timeout=${saturated_fail}" + +if [ "$saturated_fail" -gt 0 ]; then + pass "Phase 4: Queries correctly blocked when all pool slots held (${saturated_fail}/5 timed out)" +else + warn "Phase 4: All queries succeeded — pool may be larger than expected or checkout_timeout not enforced" +fi +echo "" + +# ── Phase 5: Recovery after holders release ────────────────────────────────── + +echo "--- Phase 5: Waiting for holders to release ---" + +for pid in "${HOLDER_PIDS[@]}"; do + kill "$pid" 2>/dev/null || true +done +# Also kill the original holder +kill "$HOLDER_PID" 2>/dev/null || true +sleep 2 + +recovery_ok=0 +for i in $(seq 1 10); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((recovery_ok++)) || true + fi +done + +if [ "$recovery_ok" -eq 10 ]; then + pass "Phase 5: Full recovery (10/10 queries OK)" +else + fail "Phase 5: Incomplete recovery (${recovery_ok}/10 queries OK)" + ((FAILURES++)) || true +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Idle-in-Transaction Test Summary" +echo "========================================" +printf " %-30s %s\n" "Baseline avg latency:" "${baseline_avg}ms" +printf " %-30s %s\n" "Queries during hold:" "${q_ok}/${CONCURRENT_QUERIES} OK" +printf " %-30s %s\n" "Avg latency during hold:" "${q_avg}ms" +printf " %-30s %s\n" "Max latency during hold:" "${q_max_ms}ms" +printf " %-30s %s\n" "Saturated pool queries:" "${saturated_ok}/5 OK, ${saturated_fail}/5 blocked" +printf " %-30s %s\n" "Recovery:" "${recovery_ok}/10 OK" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Idle-in-transaction test: ${FAILURES} failure(s)" + exit 1 +else + pass "Idle-in-transaction test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/multi_tenant_bench.sh b/integration/production_readiness/load/multi_tenant_bench.sh new file mode 100755 index 000000000..025876569 --- /dev/null +++ b/integration/production_readiness/load/multi_tenant_bench.sh @@ -0,0 +1,217 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +TENANT_COUNT=100 +CLIENTS=10 +DURATION=60 +PROTOCOL="extended" +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 + +while [[ $# -gt 0 ]]; do + case "$1" in + --tenant-count) TENANT_COUNT="$2"; shift 2 ;; + --clients) CLIENTS="$2"; shift 2 ;; + --duration) DURATION="$2"; shift 2 ;; + --protocol) PROTOCOL="$2"; shift 2 ;; + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql pgbench curl shuf; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +WORKLOAD_SQL="${SCRIPT_DIR}/tenant_workload.sql" +if [ ! -f "$WORKLOAD_SQL" ]; then + fail "Workload file not found: ${WORKLOAD_SQL}" + exit 1 +fi + +BENCH_DIR="${OUTPUT_DIR}/bench_run_$(date +%Y%m%d_%H%M%S)" +mkdir -p "$BENCH_DIR" +LOGFILE="${BENCH_DIR}/multi_tenant_bench.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +CHILDREN=() +cleanup() { + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup SIGINT SIGTERM + +# Distribute clients across tenants: at least 1 per tenant +clients_per_tenant=$((CLIENTS / TENANT_COUNT)) +if [ "$clients_per_tenant" -lt 1 ]; then + clients_per_tenant=1 +fi +remainder=$((CLIENTS - clients_per_tenant * TENANT_COUNT)) + +echo "=== Multi-Tenant Benchmark ===" +echo "Tenants: ${TENANT_COUNT}, Total clients: ${CLIENTS}, Duration: ${DURATION}s" +echo "Protocol: ${PROTOCOL}, Clients/tenant: ${clients_per_tenant} (+${remainder} extra on first tenants)" +echo "Results: ${BENCH_DIR}" +echo "" + +# Pick random tenant indices from the available pool +TENANT_IDS=$(shuf -i 1-"$TENANT_COUNT" -n "$TENANT_COUNT" | sort -n) + +# ── Background metrics collector ───────────────────────────────────────────── + +METRICS_DIR="${BENCH_DIR}/metrics" +mkdir -p "$METRICS_DIR" + +collect_metrics() { + local seq=0 + while true; do + curl -sf "http://${PGDOG_HOST}:9090/metrics" > "${METRICS_DIR}/snapshot_$(printf '%04d' $seq).txt" 2>/dev/null || true + ((seq++)) || true + sleep 10 + done +} +collect_metrics & +METRICS_PID=$! +CHILDREN+=("$METRICS_PID") + +# ── Launch pgbench workers ─────────────────────────────────────────────────── + +echo "--- Launching pgbench across ${TENANT_COUNT} tenants ---" +PIDS=() +idx=0 + +for tid in $TENANT_IDS; do + c=$clients_per_tenant + if [ "$remainder" -gt 0 ]; then + c=$((c + 1)) + ((remainder--)) || true + fi + + PGPASSWORD=pgdog pgbench -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog "tenant_$tid" \ + -c "$c" -T "$DURATION" \ + --protocol="$PROTOCOL" -f "$WORKLOAD_SQL" \ + --no-vacuum -P 5 \ + > "${BENCH_DIR}/bench_tenant_${tid}.log" 2>&1 & + pid=$! + PIDS+=("$pid") + CHILDREN+=("$pid") + + ((idx++)) || true + if (( idx % 20 == 0 )); then + echo " [${idx}/${TENANT_COUNT}] launched..." + fi +done +echo " [${idx}/${TENANT_COUNT}] all launched, waiting ${DURATION}s..." +echo "" + +for pid in "${PIDS[@]}"; do + wait "$pid" 2>/dev/null || true +done + +kill "$METRICS_PID" 2>/dev/null || true +wait "$METRICS_PID" 2>/dev/null || true + +# ── Aggregate results ──────────────────────────────────────────────────────── + +echo "--- Aggregating results ---" + +total_tps=0 +total_committed=0 +total_aborted=0 +error_tenants=0 +tenant_results=0 +all_latencies=() + +for logfile in "$BENCH_DIR"/bench_tenant_*.log; do + [ -f "$logfile" ] || continue + ((tenant_results++)) || true + + tps=$(awk '/tps = /{gsub(/.*tps = /,""); gsub(/[^0-9.].*/,""); print}' "$logfile" | tail -1) + tps="${tps:-0}" + committed=$(awk '/number of transactions actually processed:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$logfile") + committed="${committed:-0}" + aborted=$(awk '/number of failed transactions:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$logfile") + aborted="${aborted:-0}" + lat_avg=$(awk '/latency average =/{gsub(/.*= /,""); gsub(/[^0-9.].*/,""); print}' "$logfile") + lat_avg="${lat_avg:-0}" + + total_tps=$(echo "$total_tps + $tps" | bc 2>/dev/null || echo "$total_tps") + total_committed=$((total_committed + committed)) + total_aborted=$((total_aborted + aborted)) + + if [ "$aborted" -gt 0 ]; then + ((error_tenants++)) || true + fi + + if [ "$lat_avg" != "0" ]; then + all_latencies+=("$lat_avg") + fi +done + +# Compute latency percentiles from per-tenant averages +latency_p50="N/A" +latency_p95="N/A" +latency_p99="N/A" + +if [ "${#all_latencies[@]}" -gt 0 ]; then + sorted_lat=$(printf '%s\n' "${all_latencies[@]}" | sort -n) + n=${#all_latencies[@]} + idx_p50=$(( n * 50 / 100 )) + idx_p95=$(( n * 95 / 100 )) + idx_p99=$(( n * 99 / 100 )) + # Clamp to valid range + [ "$idx_p50" -ge "$n" ] && idx_p50=$((n - 1)) + [ "$idx_p95" -ge "$n" ] && idx_p95=$((n - 1)) + [ "$idx_p99" -ge "$n" ] && idx_p99=$((n - 1)) + + latency_p50=$(echo "$sorted_lat" | sed -n "$((idx_p50 + 1))p") + latency_p95=$(echo "$sorted_lat" | sed -n "$((idx_p95 + 1))p") + latency_p99=$(echo "$sorted_lat" | sed -n "$((idx_p99 + 1))p") +fi + +metrics_snapshots=$(find "$METRICS_DIR" -name "snapshot_*.txt" 2>/dev/null | wc -l | tr -d ' ') + +echo "" +echo "========================================" +echo " Multi-Tenant Benchmark Summary" +echo "========================================" +printf " %-26s %s\n" "Tenants benchmarked:" "$tenant_results" +printf " %-26s %s\n" "Total TPS:" "$total_tps" +printf " %-26s %s\n" "Transactions committed:" "$total_committed" +printf " %-26s %s\n" "Transactions aborted:" "$total_aborted" +printf " %-26s %s\n" "Tenants with errors:" "$error_tenants" +printf " %-26s %s ms\n" "Latency p50:" "$latency_p50" +printf " %-26s %s ms\n" "Latency p95:" "$latency_p95" +printf " %-26s %s ms\n" "Latency p99:" "$latency_p99" +printf " %-26s %s\n" "Metrics snapshots:" "$metrics_snapshots" +printf " %-26s %s\n" "Results directory:" "$BENCH_DIR" +echo "========================================" + +if [ "$total_aborted" -gt 0 ]; then + warn "${total_aborted} transactions aborted across ${error_tenants} tenant(s)" +fi + +if [ "$tenant_results" -eq "$TENANT_COUNT" ]; then + pass "All ${TENANT_COUNT} tenants completed benchmark" +else + fail "Only ${tenant_results}/${TENANT_COUNT} tenants produced results" + exit 1 +fi diff --git a/integration/production_readiness/load/network_latency.sh b/integration/production_readiness/load/network_latency.sh new file mode 100755 index 000000000..857388daf --- /dev/null +++ b/integration/production_readiness/load/network_latency.sh @@ -0,0 +1,300 @@ +#!/usr/bin/env bash +# Test PgDog behavior under realistic network latency. +# +# Uses toxiproxy between PgDog and Postgres to inject latency +# simulating Cloud SQL Auth Proxy + VPC hop (~2-10ms RTT). +# Validates: checkout_timeout, connect_timeout, tail latency under contention. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +TOXI_HOST="127.0.0.1" +TOXI_API="${TOXI_API:-http://127.0.0.1:8474}" +PROXY_NAME="${PROXY_NAME:-pg_primary}" +LATENCY_MS=5 +JITTER_MS=2 +CLIENTS=20 +DURATION=30 +TARGET_DB="tenant_1" +COLD_RANGE_START=1001 +COLD_RANGE_END=1050 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --toxi-api) TOXI_API="$2"; shift 2 ;; + --proxy-name) PROXY_NAME="$2"; shift 2 ;; + --latency) LATENCY_MS="$2"; shift 2 ;; + --jitter) JITTER_MS="$2"; shift 2 ;; + --clients) CLIENTS="$2"; shift 2 ;; + --duration) DURATION="$2"; shift 2 ;; + --target-db) TARGET_DB="$2"; shift 2 ;; + --cold-range-start) COLD_RANGE_START="$2"; shift 2 ;; + --cold-range-end) COLD_RANGE_END="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql pgbench curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/network_latency.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +WORKLOAD_SQL="${SCRIPT_DIR}/tenant_workload.sql" + +CHILDREN=() +cleanup() { + # Remove toxics + curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/latency_downstream" >/dev/null 2>&1 || true + curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/latency_upstream" >/dev/null 2>&1 || true + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup EXIT SIGINT SIGTERM + +echo "=== Network Latency Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Toxiproxy API: ${TOXI_API}" +echo "Proxy: ${PROXY_NAME}" +echo "Injected latency: ${LATENCY_MS}ms ± ${JITTER_MS}ms" +echo "" + +# Verify toxiproxy is reachable +if ! curl -sf "${TOXI_API}/version" >/dev/null 2>&1; then + fail "Toxiproxy API not reachable at ${TOXI_API}" + exit 1 +fi + +# ── Phase 1: Baseline (no latency) ───────────────────────────────────────── + +echo "--- Phase 1: Baseline latency (no toxics) ---" + +baseline_total=0 +baseline_count=10 +baseline_failures=0 + +for i in $(seq 1 "$baseline_count"); do + t_start=$(date +%s%N) + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + baseline_total=$((baseline_total + ms)) + else + ((baseline_failures++)) || true + fi +done + +if [ "$baseline_failures" -gt 0 ]; then + fail "Baseline: ${baseline_failures}/${baseline_count} queries failed without latency injection" + exit 1 +fi + +baseline_avg=$((baseline_total / baseline_count)) +pass "Baseline avg latency: ${baseline_avg}ms" +echo "" + +# ── Phase 2: Inject latency ───────────────────────────────────────────────── + +echo "--- Phase 2: Injecting ${LATENCY_MS}ms ± ${JITTER_MS}ms latency ---" + +# Downstream (Postgres → PgDog) +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}/toxics" \ + -H "Content-Type: application/json" \ + -d "{\"name\":\"latency_downstream\",\"type\":\"latency\",\"stream\":\"downstream\",\"attributes\":{\"latency\":${LATENCY_MS},\"jitter\":${JITTER_MS}}}" \ + >/dev/null 2>&1 + +# Upstream (PgDog → Postgres) +curl -sf -X POST "${TOXI_API}/proxies/${PROXY_NAME}/toxics" \ + -H "Content-Type: application/json" \ + -d "{\"name\":\"latency_upstream\",\"type\":\"latency\",\"stream\":\"upstream\",\"attributes\":{\"latency\":${LATENCY_MS},\"jitter\":${JITTER_MS}}}" \ + >/dev/null 2>&1 + +pass "Latency toxics injected (${LATENCY_MS}ms ± ${JITTER_MS}ms each direction)" +echo "" + +# ── Phase 3: Single-query latency under injected delay ─────────────────── + +echo "--- Phase 3: Single-query latency with injected delay ---" + +injected_total=0 +injected_count=20 +injected_failures=0 + +for i in $(seq 1 "$injected_count"); do + t_start=$(date +%s%N) + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + injected_total=$((injected_total + ms)) + else + ((injected_failures++)) || true + fi +done + +if [ "$((injected_count - injected_failures))" -gt 0 ]; then + injected_avg=$((injected_total / (injected_count - injected_failures))) +else + injected_avg=0 +fi + +latency_delta=$((injected_avg - baseline_avg)) +echo " Baseline avg: ${baseline_avg}ms" +echo " Injected avg: ${injected_avg}ms" +echo " Delta: ${latency_delta}ms" + +if [ "$injected_failures" -gt 0 ]; then + warn "Phase 3: ${injected_failures}/${injected_count} queries failed with latency injection" +fi + +# Latency should have increased by roughly 2x LATENCY_MS (both directions) +expected_min=$(( LATENCY_MS )) # conservative: at least 1x latency added +if [ "$latency_delta" -ge "$expected_min" ]; then + pass "Latency increased by ${latency_delta}ms (expected >= ${expected_min}ms)" +else + warn "Latency delta ${latency_delta}ms is lower than expected ${expected_min}ms" +fi +echo "" + +# ── Phase 4: Load test under latency ──────────────────────────────────────── + +echo "--- Phase 4: pgbench under ${LATENCY_MS}ms latency (${CLIENTS} clients, ${DURATION}s) ---" + +if [ -f "$WORKLOAD_SQL" ]; then + BENCH_LOG="$OUTPUT_DIR/network_latency_bench.log" + + PGPASSWORD=pgdog pgbench -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog "$TARGET_DB" \ + -c "$CLIENTS" -T "$DURATION" \ + --protocol=extended -f "$WORKLOAD_SQL" \ + --no-vacuum -P 5 \ + > "$BENCH_LOG" 2>&1 || true + + tps=$(awk '/tps = /{gsub(/.*tps = /,""); gsub(/[^0-9.].*/,""); print}' "$BENCH_LOG" | tail -1) + tps="${tps:-0}" + lat_avg=$(awk '/latency average =/{gsub(/.*= /,""); gsub(/[^0-9.].*/,""); print}' "$BENCH_LOG") + lat_avg="${lat_avg:-N/A}" + + echo " TPS: ${tps}" + echo " Avg latency: ${lat_avg}ms" + + if [ "$tps" != "0" ]; then + pass "pgbench completed under latency (TPS: ${tps})" + else + fail "pgbench produced 0 TPS under latency" + fi +else + warn "Workload SQL not found — skipping pgbench phase" +fi +echo "" + +# ── Phase 5: Multi-tenant connect under latency ───────────────────────────── + +echo "--- Phase 5: Connecting to cold tenant DBs under latency ---" + +RESULTS_DIR=$(mktemp -d) +cold_count=$((COLD_RANGE_END - COLD_RANGE_START + 1)) +cold_start=$(date +%s) + +connect_cold() { + local idx=$1 + local t_start t_end ms + t_start=$(date +%s%N) + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$idx" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + echo "ok $ms" > "${RESULTS_DIR}/${idx}" + else + echo "fail 0" > "${RESULTS_DIR}/${idx}" + fi +} +export -f connect_cold +export PGDOG_HOST PGDOG_PORT RESULTS_DIR + +# Connect to a configurable range that should exist but ideally remain cold. +seq "$COLD_RANGE_START" "$COLD_RANGE_END" | xargs -P 10 -I{} bash -c 'connect_cold {}' + +cold_end=$(date +%s) +cold_elapsed=$((cold_end - cold_start)) + +cold_ok=0 +cold_fail=0 +cold_total_ms=0 +for f in "$RESULTS_DIR"/*; do + [ -f "$f" ] || continue + read -r status ms < "$f" + if [ "$status" = "ok" ]; then + ((cold_ok++)) || true + cold_total_ms=$((cold_total_ms + ms)) + else + ((cold_fail++)) || true + fi +done +rm -rf "$RESULTS_DIR" + +cold_avg=0 +if [ "$cold_ok" -gt 0 ]; then + cold_avg=$((cold_total_ms / cold_ok)) +fi + +echo " OK: ${cold_ok}/${cold_count}, Failed: ${cold_fail}" +echo " Avg cold-connect latency: ${cold_avg}ms (${cold_elapsed}s total)" + +if [ "$cold_fail" -eq 0 ]; then + pass "All cold connections succeeded under latency" +else + fail "${cold_fail}/${cold_count} cold connections failed under latency" +fi +echo "" + +# ── Cleanup toxics ────────────────────────────────────────────────────────── + +curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/latency_downstream" >/dev/null 2>&1 || true +curl -sf -X DELETE "${TOXI_API}/proxies/${PROXY_NAME}/toxics/latency_upstream" >/dev/null 2>&1 || true + +# ── Summary ────────────────────────────────────────────────────────────────── + +FAILURES=0 +[ "$injected_failures" -gt 2 ] && ((FAILURES++)) || true +[ "$cold_fail" -gt 0 ] && ((FAILURES++)) || true + +echo "========================================" +echo " Network Latency Test Summary" +echo "========================================" +printf " %-28s %s\n" "Baseline avg latency:" "${baseline_avg}ms" +printf " %-28s %s\n" "Injected latency:" "${LATENCY_MS}ms ± ${JITTER_MS}ms" +printf " %-28s %s\n" "Measured avg latency:" "${injected_avg}ms" +printf " %-28s %s\n" "Latency delta:" "${latency_delta}ms" +printf " %-28s %s\n" "Cold-connect avg:" "${cold_avg}ms" +printf " %-28s %s\n" "Cold-connect failures:" "${cold_fail}/${cold_count}" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Network latency test: ${FAILURES} check(s) failed" + exit 1 +else + pass "Network latency test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/passthrough_auth.sh b/integration/production_readiness/load/passthrough_auth.sh new file mode 100755 index 000000000..bd397b3df --- /dev/null +++ b/integration/production_readiness/load/passthrough_auth.sh @@ -0,0 +1,230 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +USER_COUNT=100 +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +PG_HOST="127.0.0.1" +PG_PORT=15432 +PARALLEL=50 + +while [[ $# -gt 0 ]]; do + case "$1" in + --user-count) USER_COUNT="$2"; shift 2 ;; + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --pg-host) PG_HOST="$2"; shift 2 ;; + --pg-port) PG_PORT="$2"; shift 2 ;; + --parallel) PARALLEL="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/passthrough_auth.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +CHILDREN=() +cleanup() { + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup SIGINT SIGTERM + +FAILURES=0 + +echo "=== Passthrough Authentication Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Postgres: ${PG_HOST}:${PG_PORT}" +echo "Test users: ${USER_COUNT}" +echo "" + +# ── Phase 1: Correct credentials ──────────────────────────────────────────── + +echo "--- Phase 1: Correct credentials (${USER_COUNT} users) ---" +phase1_ok=0 +phase1_fail=0 + +for i in $(seq 1 "$USER_COUNT"); do + result=$(PGPASSWORD="pass_${i}" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "tenant_user_${i}" -d "tenant_1" -t -A -c "SELECT current_user" 2>/dev/null) || result="" + + if [ "$result" = "tenant_user_${i}" ]; then + ((phase1_ok++)) || true + else + ((phase1_fail++)) || true + if [ "$phase1_fail" -le 5 ]; then + warn " tenant_user_${i}: expected 'tenant_user_${i}', got '${result}'" + fi + fi + + if (( i % 25 == 0 )); then + echo " [${i}/${USER_COUNT}] ok=${phase1_ok} fail=${phase1_fail}" + fi +done + +if [ "$phase1_fail" -eq 0 ]; then + pass "Phase 1: All ${USER_COUNT} users authenticated correctly" +else + fail "Phase 1: ${phase1_fail}/${USER_COUNT} users failed authentication" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 2: Wrong credentials ────────────────────────────────────────────── + +echo "--- Phase 2: Wrong credentials (expecting failures) ---" +phase2_rejected=0 +phase2_unexpected_success=0 +SAMPLE_SIZE=$((USER_COUNT < 20 ? USER_COUNT : 20)) + +for i in $(seq 1 "$SAMPLE_SIZE"); do + if PGPASSWORD="wrong_password" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "tenant_user_${i}" -d "tenant_1" -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((phase2_unexpected_success++)) || true + warn " tenant_user_${i}: unexpectedly succeeded with wrong password" + else + ((phase2_rejected++)) || true + fi +done + +if [ "$phase2_unexpected_success" -eq 0 ]; then + pass "Phase 2: All ${SAMPLE_SIZE} wrong-password attempts correctly rejected" +else + fail "Phase 2: ${phase2_unexpected_success} logins succeeded with wrong password" + ((FAILURES++)) || true +fi +echo "" + +# ── Phase 3: Credential rotation ──────────────────────────────────────────── +# NOTE: PgDog caches the passthrough password from the first successful auth +# for each pool. Credential rotation requires the pool to be destroyed and +# recreated (via wildcard_pool_idle_timeout), or a PgDog reload. This phase +# tests whether rotation works after the pool expires. + +echo "--- Phase 3: Credential rotation ---" +ROTATION_USER="tenant_user_1" +OLD_PASS="pass_1" +NEW_PASS="rotated_pass_1" +rotation_pass=true + +if ! PGPASSWORD="$OLD_PASS" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "$ROTATION_USER" -d "tenant_1" -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + fail "Phase 3: Cannot connect with original password (precondition failed)" + rotation_pass=false +fi + +if [ "$rotation_pass" = true ]; then + PGPASSWORD=postgres psql -h "$PG_HOST" -p "$PG_PORT" -U postgres -d postgres \ + -c "ALTER USER ${ROTATION_USER} WITH PASSWORD '${NEW_PASS}'" -q 2>/dev/null || { + fail "Phase 3: Could not ALTER USER on Postgres directly" + rotation_pass=false + } +fi + +if [ "$rotation_pass" = true ]; then + new_pass_ok=false + for attempt in $(seq 1 5); do + if PGPASSWORD="$NEW_PASS" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "$ROTATION_USER" -d "tenant_1" -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + new_pass_ok=true + break + fi + sleep 1 + done + + if [ "$new_pass_ok" = true ]; then + pass "Phase 3: New password accepted after rotation" + else + warn "Phase 3: New password not accepted (expected — PgDog caches passthrough password per pool)" + echo " To rotate credentials, wait for wildcard_pool_idle_timeout or reload PgDog" + fi + + PGPASSWORD=postgres psql -h "$PG_HOST" -p "$PG_PORT" -U postgres -d postgres \ + -c "ALTER USER ${ROTATION_USER} WITH PASSWORD '${OLD_PASS}'" -q 2>/dev/null || true +fi +echo "" + +# ── Phase 4: Concurrent authentication ─────────────────────────────────────── + +echo "--- Phase 4: Concurrent authentication (${USER_COUNT} users, ${PARALLEL} parallel) ---" + +RESULTS_DIR=$(mktemp -d) + +auth_one() { + local idx=$1 + if PGPASSWORD="pass_${idx}" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "tenant_user_${idx}" -d "tenant_1" -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + echo "ok" > "${RESULTS_DIR}/${idx}" + else + echo "fail" > "${RESULTS_DIR}/${idx}" + fi +} +export -f auth_one +export PGDOG_HOST PGDOG_PORT RESULTS_DIR + +phase4_start=$(date +%s) +seq 1 "$USER_COUNT" | xargs -P "$PARALLEL" -I{} bash -c 'auth_one {}' +phase4_end=$(date +%s) +phase4_elapsed=$((phase4_end - phase4_start)) + +phase4_ok=0 +phase4_fail=0 +for f in "$RESULTS_DIR"/*; do + [ -f "$f" ] || continue + status=$(cat "$f") + if [ "$status" = "ok" ]; then + ((phase4_ok++)) || true + else + ((phase4_fail++)) || true + fi +done +rm -rf "$RESULTS_DIR" + +if [ "$phase4_fail" -eq 0 ]; then + pass "Phase 4: All ${USER_COUNT} concurrent logins succeeded (${phase4_elapsed}s)" +else + fail "Phase 4: ${phase4_fail}/${USER_COUNT} concurrent logins failed" + ((FAILURES++)) || true +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Passthrough Auth Test Summary" +echo "========================================" +printf " %-30s %s\n" "Correct credentials:" "${phase1_ok}/${USER_COUNT} passed" +printf " %-30s %s\n" "Wrong credentials:" "${phase2_rejected}/${SAMPLE_SIZE} rejected" +printf " %-30s %s\n" "Credential rotation:" "$([ "$rotation_pass" = true ] && echo "PASS" || echo "FAIL")" +printf " %-30s %s\n" "Concurrent auth:" "${phase4_ok}/${USER_COUNT} passed (${phase4_elapsed}s)" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Passthrough auth test: ${FAILURES} phase(s) failed" + exit 1 +else + pass "Passthrough auth test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/pool_lifecycle.sh b/integration/production_readiness/load/pool_lifecycle.sh new file mode 100755 index 000000000..b1d88ddd4 --- /dev/null +++ b/integration/production_readiness/load/pool_lifecycle.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Test wildcard pool lifecycle: create → use → idle → evict → recreate. +# +# Validates that pools are properly evicted after wildcard_pool_idle_timeout, +# memory remains stable through churn cycles, and recreated pools work correctly. +# Critical for deployments with more DBs than max_wildcard_pools. +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +IDLE_TIMEOUT="${IDLE_TIMEOUT:-10}" +CYCLE_COUNT=3 +BATCH_SIZE=50 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --idle-timeout) IDLE_TIMEOUT="$2"; shift 2 ;; + --cycles) CYCLE_COUNT="$2"; shift 2 ;; + --batch-size) BATCH_SIZE="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/pool_lifecycle.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +admin_query() { + PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U admin -d admin -t -A -c "$1" 2>/dev/null +} + +get_pool_count() { + local pools + pools=$(admin_query "SHOW POOLS" 2>/dev/null) || true + printf '%s\n' "$pools" | awk 'NF { count++ } END { print count + 0 }' +} + +get_rss() { + local pid + pid=$(pgrep -x pgdog 2>/dev/null | head -1 || true) + if [ -n "$pid" ]; then + ps -o rss= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0" + else + echo "0" + fi +} + +FAILURES=0 + +echo "=== Pool Lifecycle Churn Test ===" +echo "PgDog: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Idle timeout: ${IDLE_TIMEOUT}s" +echo "Cycles: ${CYCLE_COUNT}, Batch: ${BATCH_SIZE} DBs per cycle" +echo "" + +# Configure PgDog's eviction timeout via admin SET so eviction actually fires +# during this test, regardless of the global config value. +echo "Setting wildcard_pool_idle_timeout=${IDLE_TIMEOUT} via admin..." +admin_query "SET wildcard_pool_idle_timeout TO '${IDLE_TIMEOUT}'" || { + warn "Could not SET wildcard_pool_idle_timeout — eviction may not fire" +} + +# Record initial state +initial_pools=$(get_pool_count) +initial_rss=$(get_rss) +echo "Initial state: pools=${initial_pools}, RSS=${initial_rss}KB" +echo "" + +TELEMETRY="${OUTPUT_DIR}/pool_lifecycle_telemetry.csv" +echo "cycle,phase,pool_count,rss_kb" > "$TELEMETRY" + +for cycle in $(seq 1 "$CYCLE_COUNT"); do + echo "--- Cycle ${cycle}/${CYCLE_COUNT} ---" + + # Each cycle uses a different range of tenant DBs to force new pool creation + start_db=$(( (cycle - 1) * BATCH_SIZE + 1 )) + end_db=$(( cycle * BATCH_SIZE )) + + # ── Create: touch BATCH_SIZE new databases ─────────────────────────────── + echo " Creating pools for tenant_${start_db}..tenant_${end_db}" + create_ok=0 + create_fail=0 + + for i in $(seq "$start_db" "$end_db"); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$i" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((create_ok++)) || true + else + ((create_fail++)) || true + fi + done + + pools_after_create=$(get_pool_count) + rss_after_create=$(get_rss) + echo "$cycle,create,$pools_after_create,$rss_after_create" >> "$TELEMETRY" + echo " After create: ok=${create_ok}, fail=${create_fail}, pools=${pools_after_create}, RSS=${rss_after_create}KB" + + if [ "$create_fail" -gt 0 ]; then + warn " ${create_fail} connections failed during pool creation" + fi + + # ── Use: run queries to verify pools work ───────────────────────────── + echo " Verifying pools with queries..." + use_ok=0 + use_fail=0 + + for i in $(seq "$start_db" "$end_db"); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$i" \ + -c "SELECT current_database()" -t -q -A >/dev/null 2>&1; then + ((use_ok++)) || true + else + ((use_fail++)) || true + fi + done + + echo " Verify: ok=${use_ok}, fail=${use_fail}" + + # ── Idle: wait for pools to be evicted ───────────────────────────────── + wait_time=$((IDLE_TIMEOUT + 5)) + echo " Waiting ${wait_time}s for idle pools to be evicted..." + sleep "$wait_time" + + pools_after_idle=$(get_pool_count) + rss_after_idle=$(get_rss) + echo "$cycle,idle,$pools_after_idle,$rss_after_idle" >> "$TELEMETRY" + echo " After idle: pools=${pools_after_idle}, RSS=${rss_after_idle}KB" + + # Pools should have decreased + if [ "$pools_after_idle" -lt "$pools_after_create" ]; then + pass " Pools evicted: ${pools_after_create} → ${pools_after_idle}" + else + warn " Pools did NOT decrease: ${pools_after_create} → ${pools_after_idle}" + fi + + # ── Recreate: reconnect to same databases ───────────────────────────── + echo " Recreating pools for the same range..." + recreate_ok=0 + recreate_fail=0 + + for i in $(seq "$start_db" "$end_db"); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$i" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((recreate_ok++)) || true + else + ((recreate_fail++)) || true + fi + done + + pools_after_recreate=$(get_pool_count) + rss_after_recreate=$(get_rss) + echo "$cycle,recreate,$pools_after_recreate,$rss_after_recreate" >> "$TELEMETRY" + echo " After recreate: ok=${recreate_ok}, fail=${recreate_fail}, pools=${pools_after_recreate}" + + if [ "$recreate_fail" -gt 0 ]; then + fail " ${recreate_fail} connections failed after pool recreation" + ((FAILURES++)) || true + else + pass " All ${BATCH_SIZE} pools recreated successfully" + fi + echo "" +done + +# ── Memory stability check ────────────────────────────────────────────────── + +final_rss=$(get_rss) +if [ "$initial_rss" -gt 0 ] && [ "$final_rss" -gt 0 ]; then + growth_pct=$(( (final_rss - initial_rss) * 100 / initial_rss )) + echo "Memory: initial=${initial_rss}KB, final=${final_rss}KB, growth=${growth_pct}%" + if [ "$growth_pct" -gt 30 ]; then + fail "Memory grew ${growth_pct}% over ${CYCLE_COUNT} churn cycles — possible leak" + ((FAILURES++)) || true + else + pass "Memory growth ${growth_pct}% after ${CYCLE_COUNT} churn cycles" + fi +else + warn "Could not measure memory growth" +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Pool Lifecycle Churn Summary" +echo "========================================" +printf " %-26s %s\n" "Cycles:" "$CYCLE_COUNT" +printf " %-26s %s\n" "Batch size:" "$BATCH_SIZE DBs" +printf " %-26s %s\n" "Idle timeout:" "${IDLE_TIMEOUT}s" +printf " %-26s %s\n" "Initial pools:" "$initial_pools" +printf " %-26s %s\n" "Telemetry:" "$TELEMETRY" +echo "========================================" + +if [ "$FAILURES" -gt 0 ]; then + fail "Pool lifecycle test: ${FAILURES} check(s) failed" + exit 1 +else + pass "Pool lifecycle churn test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/pool_pressure.sh b/integration/production_readiness/load/pool_pressure.sh new file mode 100755 index 000000000..1b6eece68 --- /dev/null +++ b/integration/production_readiness/load/pool_pressure.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +POOL_SIZE=5 +CONNECTIONS=50 +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +TARGET_DB="tenant_1" +SLEEP_DURATION=10 +PROBE_COUNT=20 + +while [[ $# -gt 0 ]]; do + case "$1" in + --pool-size) POOL_SIZE="$2"; shift 2 ;; + --connections) CONNECTIONS="$2"; shift 2 ;; + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --target-db) TARGET_DB="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/pool_pressure.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +SATURATOR_PIDS=() +cleanup() { + echo "" + echo "Cleaning up saturating connections..." + for pid in "${SATURATOR_PIDS[@]+"${SATURATOR_PIDS[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup SIGINT SIGTERM EXIT + +echo "=== Pool Pressure Test ===" +echo "Target: ${PGDOG_HOST}:${PGDOG_PORT} / ${TARGET_DB}" +echo "Expected pool_size: ${POOL_SIZE}, Saturating connections: ${CONNECTIONS}" +echo "" +warn "This test assumes PgDog is configured with pool_size=${POOL_SIZE} for ${TARGET_DB}" +echo "" + +# ── Phase 1: Saturate the pool ─────────────────────────────────────────────── + +echo "--- Phase 1: Saturating pool with ${CONNECTIONS} long-running connections ---" +saturate_start=$(date +%s) + +for i in $(seq 1 "$CONNECTIONS"); do + PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT pg_sleep(${SLEEP_DURATION})" -q >/dev/null 2>&1 & + SATURATOR_PIDS+=("$!") + + if (( i % 10 == 0 )); then + echo " [${i}/${CONNECTIONS}] connections launched" + fi +done + +# Brief pause so connections are established and pools are occupied +sleep 2 +echo " All ${CONNECTIONS} saturating connections launched" +echo "" + +# ── Phase 2: Probe while pool is saturated ─────────────────────────────────── + +echo "--- Phase 2: Probing during saturation (${PROBE_COUNT} queries) ---" + +probe_successes=0 +probe_timeouts=0 +probe_total_ms=0 + +for i in $(seq 1 "$PROBE_COUNT"); do + t_start=$(date +%s%N) + + # 3-second timeout: if pool is full these should queue or timeout + if timeout 3 bash -c "PGPASSWORD=pgdog psql -h $PGDOG_HOST -p $PGDOG_PORT -U pgdog -d $TARGET_DB -c 'SELECT 1' -t -q -A" >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + ((probe_successes++)) || true + probe_total_ms=$((probe_total_ms + ms)) + echo " [${i}/${PROBE_COUNT}] OK (${ms}ms)" + else + ((probe_timeouts++)) || true + echo " [${i}/${PROBE_COUNT}] TIMEOUT/ERROR" + fi +done + +if [ "$probe_successes" -gt 0 ]; then + probe_avg=$((probe_total_ms / probe_successes)) +else + probe_avg=0 +fi + +echo "" +if [ "$probe_timeouts" -gt 0 ]; then + warn "During saturation: ${probe_successes} succeeded, ${probe_timeouts} timed out (avg ${probe_avg}ms)" +else + pass "All ${PROBE_COUNT} probes succeeded during saturation (avg ${probe_avg}ms)" +fi +echo "" + +# ── Phase 3: Recovery ──────────────────────────────────────────────────────── + +echo "--- Phase 3: Recovery after releasing saturating connections ---" + +for pid in "${SATURATOR_PIDS[@]+"${SATURATOR_PIDS[@]}"}"; do + kill "$pid" 2>/dev/null || true +done +SATURATOR_PIDS=() + +# Wait for connections to fully terminate +sleep 2 + +recovery_start=$(date +%s%N) +recovery_successes=0 +recovery_failures=0 + +for i in $(seq 1 "$PROBE_COUNT"); do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "$TARGET_DB" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((recovery_successes++)) || true + else + ((recovery_failures++)) || true + fi +done + +recovery_end=$(date +%s%N) +recovery_ms=$(( (recovery_end - recovery_start) / 1000000 )) + +if [ "$recovery_failures" -eq 0 ]; then + pass "Recovery: all ${PROBE_COUNT} queries succeeded (${recovery_ms}ms total)" +else + fail "Recovery: ${recovery_failures}/${PROBE_COUNT} queries still failing" +fi +echo "" + +# ── Phase 4: Admin stats verification ──────────────────────────────────────── + +echo "--- Phase 4: Admin stats verification ---" + +STATS=$(PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U admin -d admin \ + -t -A -c "SHOW STATS" 2>/dev/null) || true + +if [ -n "$STATS" ]; then + echo "$STATS" > "$OUTPUT_DIR/pool_pressure_stats.txt" + pass "Admin stats collected" + # Show lines matching the target DB + target_stats=$(echo "$STATS" | grep "$TARGET_DB" || true) + if [ -n "$target_stats" ]; then + echo " Stats for ${TARGET_DB}:" + echo " $target_stats" + fi +else + warn "Could not query admin stats" +fi + +POOLS=$(PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U admin -d admin \ + -t -A -c "SHOW POOLS" 2>/dev/null) || true + +if [ -n "$POOLS" ]; then + echo "$POOLS" > "$OUTPUT_DIR/pool_pressure_pools.txt" + target_pools=$(echo "$POOLS" | grep "$TARGET_DB" || true) + if [ -n "$target_pools" ]; then + echo " Pools for ${TARGET_DB}:" + echo " $target_pools" + fi +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Pool Pressure Test Summary" +echo "========================================" +printf " %-30s %s\n" "Target database:" "$TARGET_DB" +printf " %-30s %s\n" "Configured pool_size:" "$POOL_SIZE" +printf " %-30s %s\n" "Saturating connections:" "$CONNECTIONS" +printf " %-30s %s\n" "Probes during saturation:" "$PROBE_COUNT" +printf " %-30s %s\n" " Succeeded:" "$probe_successes" +printf " %-30s %s\n" " Timed out:" "$probe_timeouts" +printf " %-30s %s\n" " Avg latency (succeeded):" "${probe_avg}ms" +printf " %-30s %s\n" "Recovery probes succeeded:" "${recovery_successes}/${PROBE_COUNT}" +printf " %-30s %s\n" "Recovery total time:" "${recovery_ms}ms" +echo "========================================" + +if [ "$recovery_failures" -gt 0 ]; then + fail "Pool did not fully recover" + exit 1 +else + pass "Pool pressure test passed" + exit 0 +fi diff --git a/integration/production_readiness/load/scale_connect.sh b/integration/production_readiness/load/scale_connect.sh new file mode 100755 index 000000000..815dc3bbe --- /dev/null +++ b/integration/production_readiness/load/scale_connect.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +COUNT=2000 +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +PARALLEL=50 +MIN_SUCCESS_RATE=85 + +while [[ $# -gt 0 ]]; do + case "$1" in + --count) COUNT="$2"; shift 2 ;; + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --parallel) PARALLEL="$2"; shift 2 ;; + --min-success-rate) MIN_SUCCESS_RATE="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql curl; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +mkdir -p "$OUTPUT_DIR" +LOGFILE="$OUTPUT_DIR/scale_connect.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +cleanup() { + wait 2>/dev/null || true +} +trap cleanup EXIT SIGINT SIGTERM + +echo "=== Scale Connection Test ===" +echo "Target: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Databases: ${COUNT}, Parallelism: ${PARALLEL}" +echo "Required success rate: ${MIN_SUCCESS_RATE}%" +echo "" + +# ── Phase 1: Sequential warm-up ───────────────────────────────────────────── + +echo "--- Phase 1: Sequential warm-up (first 10 databases) ---" +WARMUP_COUNT=$((COUNT < 10 ? COUNT : 10)) +warmup_total_ms=0 +warmup_failures=0 + +for i in $(seq 1 "$WARMUP_COUNT"); do + t_start=$(date +%s%N) + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$i" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + warmup_total_ms=$((warmup_total_ms + ms)) + echo " [${i}/${WARMUP_COUNT}] tenant_${i}: ${ms}ms" + else + ((warmup_failures++)) || true + warn " [${i}/${WARMUP_COUNT}] tenant_${i}: FAILED" + fi +done + +if [ "$warmup_failures" -eq 0 ]; then + warmup_avg=$((warmup_total_ms / WARMUP_COUNT)) + pass "Warm-up complete: avg ${warmup_avg}ms per connection" +else + fail "Warm-up had ${warmup_failures} failures out of ${WARMUP_COUNT}" +fi +echo "" + +# ── Phase 2: Parallel batch connect ───────────────────────────────────────── + +echo "--- Phase 2: Parallel batch connect (${COUNT} databases, ${PARALLEL} at a time) ---" + +RESULTS_DIR=$(mktemp -d) +phase2_start=$(date +%s) + +connect_one() { + local idx=$1 + local t_start t_end ms + t_start=$(date +%s%N) + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_$idx" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + t_end=$(date +%s%N) + ms=$(( (t_end - t_start) / 1000000 )) + echo "ok $ms" > "${RESULTS_DIR}/${idx}" + else + echo "fail 0" > "${RESULTS_DIR}/${idx}" + fi +} +export -f connect_one +export PGDOG_HOST PGDOG_PORT RESULTS_DIR + +seq 1 "$COUNT" | xargs -P "$PARALLEL" -I{} bash -c 'connect_one {}' + +phase2_end=$(date +%s) +phase2_elapsed=$((phase2_end - phase2_start)) + +successes=0 +failures=0 +total_latency_ms=0 +failed_dbs=() +for f in "$RESULTS_DIR"/*; do + read -r status ms < "$f" + if [ "$status" = "ok" ]; then + ((successes++)) || true + total_latency_ms=$((total_latency_ms + ms)) + else + ((failures++)) || true + failed_dbs+=("$(basename "$f")") + fi +done + +if [ "$successes" -gt 0 ]; then + avg_latency=$((total_latency_ms / successes)) +else + avg_latency=0 +fi + +success_rate=0 +if [ "$COUNT" -gt 0 ]; then + success_rate=$(( successes * 100 / COUNT )) +fi + +retry_recovered=0 +if [ "$failures" -gt 0 ]; then + echo "" + echo "--- Phase 2b: Retrying failed connections sequentially ---" + for db_idx in "${failed_dbs[@]}"; do + if PGPASSWORD=pgdog psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog -d "tenant_${db_idx}" \ + -c "SELECT 1" -t -q -A >/dev/null 2>&1; then + ((retry_recovered++)) || true + fi + done + echo " Recovered on retry: ${retry_recovered}/${failures}" +fi + +final_successes=$((successes + retry_recovered)) +final_failures=$((COUNT - final_successes)) +final_success_rate=0 +if [ "$COUNT" -gt 0 ]; then + final_success_rate=$(( final_successes * 100 / COUNT )) +fi + +rm -rf "$RESULTS_DIR" + +batch_processed=$((successes + failures)) +if (( batch_processed % 200 == 0 || batch_processed == COUNT )); then + : # progress already implicit from xargs completing +fi + +if [ "$final_failures" -eq 0 ]; then + pass "All ${COUNT} connections succeeded in ${phase2_elapsed}s (avg ${avg_latency}ms)" +else + warn "${final_failures}/${COUNT} connections still failed (${final_successes} succeeded, ${phase2_elapsed}s, ${final_success_rate}% success)" +fi +echo "" + +# ── Phase 3: Verify pools via admin DB ─────────────────────────────────────── + +echo "--- Phase 3: Admin pool verification ---" +pool_count=0 + +POOLS=$(PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U admin -d admin \ + -t -A -c "SHOW POOLS" 2>/dev/null) || true + +if [ -n "$POOLS" ]; then + pool_count=$(echo "$POOLS" | grep -c "|" || true) + echo "$POOLS" > "$OUTPUT_DIR/scale_connect_pools.txt" + if [ "$pool_count" -ge "$COUNT" ]; then + pass "Pool count ${pool_count} >= expected ${COUNT}" + else + warn "Pool count ${pool_count} < expected ${COUNT} (pools may be created lazily)" + fi +else + warn "Could not query admin database — skipping pool verification" +fi +echo "" + +# ── Phase 4: Metrics check ────────────────────────────────────────────────── + +echo "--- Phase 4: Metrics endpoint check ---" +metrics_pool_count="N/A" + +METRICS=$(curl -sf "http://${PGDOG_HOST}:9090/metrics" 2>/dev/null) || true +if [ -n "$METRICS" ]; then + echo "$METRICS" > "$OUTPUT_DIR/scale_connect_metrics.txt" + metrics_pool_count=$(echo "$METRICS" | grep -c "pool" || echo "0") + pass "Metrics endpoint reachable (${metrics_pool_count} pool-related lines)" +else + warn "Metrics endpoint unreachable at http://${PGDOG_HOST}:9090/metrics" +fi +echo "" + +# ── Summary ────────────────────────────────────────────────────────────────── + +echo "========================================" +echo " Scale Connection Summary" +echo "========================================" +printf " %-22s %s\n" "Total databases:" "$COUNT" +printf " %-22s %s\n" "Successes:" "$successes" +printf " %-22s %s\n" "Failures:" "$failures" +printf " %-22s %s\n" "Recovered on retry:" "$retry_recovered" +printf " %-22s %s\n" "Final failures:" "$final_failures" +printf " %-22s %s\n" "Success rate:" "${final_success_rate}%" +printf " %-22s %s\n" "Avg latency:" "${avg_latency}ms" +printf " %-22s %s\n" "Total time:" "${phase2_elapsed}s" +printf " %-22s %s\n" "Pool count (admin):" "$pool_count" +printf " %-22s %s\n" "Metrics pool lines:" "$metrics_pool_count" +echo "========================================" + +if [ "$final_success_rate" -lt "$MIN_SUCCESS_RATE" ]; then + fail "Scale test completed below threshold: ${final_success_rate}% < ${MIN_SUCCESS_RATE}%" + exit 1 +else + pass "Scale test passed (${final_success_rate}% success, threshold ${MIN_SUCCESS_RATE}%)" + exit 0 +fi diff --git a/integration/production_readiness/load/sustained_load.sh b/integration/production_readiness/load/sustained_load.sh new file mode 100755 index 000000000..2d0df360d --- /dev/null +++ b/integration/production_readiness/load/sustained_load.sh @@ -0,0 +1,298 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +OUTPUT_DIR="${SCRIPT_DIR}/../results" + +DURATION_MIN=10 +CLIENTS=50 +TENANTS=100 +PGDOG_HOST="127.0.0.1" +PGDOG_PORT=6432 +SAMPLE_INTERVAL=30 +GROWTH_THRESHOLD=20 + +while [[ $# -gt 0 ]]; do + case "$1" in + --duration) DURATION_MIN="$2"; shift 2 ;; + --clients) CLIENTS="$2"; shift 2 ;; + --tenants) TENANTS="$2"; shift 2 ;; + --pgdog-host) PGDOG_HOST="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --pgdog-pid) PGDOG_PID_ARG="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +DURATION_SEC=$((DURATION_MIN * 60)) + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +for cmd in psql pgbench curl shuf; do + command -v "$cmd" >/dev/null 2>&1 || { fail "Required command not found: $cmd"; exit 1; } +done + +WORKLOAD_SQL="${SCRIPT_DIR}/tenant_workload.sql" +if [ ! -f "$WORKLOAD_SQL" ]; then + fail "Workload file not found: ${WORKLOAD_SQL}" + exit 1 +fi + +SOAK_DIR="${OUTPUT_DIR}/soak_$(date +%Y%m%d_%H%M%S)" +mkdir -p "$SOAK_DIR/metrics" "$SOAK_DIR/bench" +LOGFILE="${SOAK_DIR}/sustained_load.log" +exec > >(tee -a "$LOGFILE") 2>&1 + +CHILDREN=() +cleanup() { + echo "" + echo "Shutting down..." + for pid in "${CHILDREN[@]+"${CHILDREN[@]}"}"; do + kill "$pid" 2>/dev/null || true + done + wait 2>/dev/null || true +} +trap cleanup SIGINT SIGTERM + +echo "=== Sustained Load (Soak) Test ===" +echo "Duration: ${DURATION_MIN} min (${DURATION_SEC}s)" +echo "Clients: ${CLIENTS}, Tenants: ${TENANTS}" +echo "Target: ${PGDOG_HOST}:${PGDOG_PORT}" +echo "Results: ${SOAK_DIR}" +echo "" + +# ── Find PgDog PID for memory tracking ─────────────────────────────────────── + +PGDOG_PID="${PGDOG_PID_ARG:-}" +if [ -z "$PGDOG_PID" ]; then + PGDOG_PID=$(pgrep -x pgdog 2>/dev/null | head -1 || true) +fi +if [ -n "$PGDOG_PID" ]; then + echo "Tracking PgDog PID: ${PGDOG_PID}" +else + warn "Cannot find PgDog process — memory tracking disabled" +fi + +# ── Launch pgbench workers ─────────────────────────────────────────────────── + +TENANT_IDS=$(shuf -i 1-"$TENANTS" -n "$TENANTS" | sort -n) + +clients_per_tenant=$((CLIENTS / TENANTS)) +if [ "$clients_per_tenant" -lt 1 ]; then + clients_per_tenant=1 +fi +remainder=$((CLIENTS - clients_per_tenant * TENANTS)) + +echo "--- Launching pgbench across ${TENANTS} tenants ---" +BENCH_PIDS=() + +for tid in $TENANT_IDS; do + c=$clients_per_tenant + if [ "$remainder" -gt 0 ]; then + c=$((c + 1)) + ((remainder--)) || true + fi + + PGPASSWORD=pgdog pgbench -h "$PGDOG_HOST" -p "$PGDOG_PORT" -U pgdog "tenant_$tid" \ + -c "$c" -T "$DURATION_SEC" \ + --protocol=extended -f "$WORKLOAD_SQL" \ + --no-vacuum -P 30 \ + > "${SOAK_DIR}/bench/tenant_${tid}.log" 2>&1 & + pid=$! + BENCH_PIDS+=("$pid") + CHILDREN+=("$pid") +done +echo " ${TENANTS} pgbench processes launched" +echo "" + +# ── Periodic telemetry collector ───────────────────────────────────────────── + +TELEMETRY_CSV="${SOAK_DIR}/telemetry.csv" +echo "elapsed_s,rss_kb,pool_count,error_lines" > "$TELEMETRY_CSV" + +collect_telemetry() { + local elapsed=0 + while [ "$elapsed" -le "$DURATION_SEC" ]; do + # RSS + rss="N/A" + if [ -n "$PGDOG_PID" ] && kill -0 "$PGDOG_PID" 2>/dev/null; then + rss=$(ps -o rss= -p "$PGDOG_PID" 2>/dev/null | tr -d ' ' || echo "N/A") + fi + + # Metrics snapshot + metrics_file="${SOAK_DIR}/metrics/t${elapsed}.txt" + curl -sf "http://${PGDOG_HOST}:9090/metrics" > "$metrics_file" 2>/dev/null || true + error_lines=$(grep -ci "error" "$metrics_file" 2>/dev/null || echo "0") + + # Pool count via admin + pool_count=$(PGPASSWORD=admin psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U admin -d admin -t -A -c "SHOW POOLS" 2>/dev/null | grep -c "|" || echo "0") + + rss_val="${rss}" + [ "$rss_val" = "N/A" ] && rss_val=0 + echo "${elapsed},${rss_val},${pool_count},${error_lines}" >> "$TELEMETRY_CSV" + + rss_mb="N/A" + if [ "$rss" != "N/A" ] && [ "$rss" -gt 0 ] 2>/dev/null; then + rss_mb="$((rss / 1024))MB" + fi + printf " [%5ds/%ds] RSS=%-8s pools=%-5s errors=%s\n" \ + "$elapsed" "$DURATION_SEC" "$rss_mb" "$pool_count" "$error_lines" + + sleep "$SAMPLE_INTERVAL" + elapsed=$((elapsed + SAMPLE_INTERVAL)) + done +} +collect_telemetry & +TELEMETRY_PID=$! +CHILDREN+=("$TELEMETRY_PID") + +# ── Wait for pgbench to complete ───────────────────────────────────────────── + +for pid in "${BENCH_PIDS[@]}"; do + wait "$pid" 2>/dev/null || true +done + +kill "$TELEMETRY_PID" 2>/dev/null || true +wait "$TELEMETRY_PID" 2>/dev/null || true + +echo "" +echo "--- Load phase complete, analyzing results ---" +echo "" + +# ── Analyze telemetry ──────────────────────────────────────────────────────── + +VERDICT_FAILURES=0 + +# Memory analysis +rss_values=$(tail -n +2 "$TELEMETRY_CSV" | cut -d',' -f2 | grep -v '^0$' || true) +if [ -n "$rss_values" ]; then + first_rss=$(echo "$rss_values" | head -1) + last_rss=$(echo "$rss_values" | tail -1) + max_rss=$(echo "$rss_values" | sort -n | tail -1) + + if [ "$first_rss" -gt 0 ] 2>/dev/null; then + growth_pct=$(( (last_rss - first_rss) * 100 / first_rss )) + else + growth_pct=0 + fi + + echo "Memory:" + printf " Initial: %d KB (%d MB)\n" "$first_rss" "$((first_rss / 1024))" + printf " Final: %d KB (%d MB)\n" "$last_rss" "$((last_rss / 1024))" + printf " Peak: %d KB (%d MB)\n" "$max_rss" "$((max_rss / 1024))" + printf " Growth: %d%%\n" "$growth_pct" + + if [ "$growth_pct" -gt "$GROWTH_THRESHOLD" ]; then + fail "Memory grew ${growth_pct}% (threshold: ${GROWTH_THRESHOLD}%) — possible leak" + ((VERDICT_FAILURES++)) || true + else + pass "Memory growth ${growth_pct}% within ${GROWTH_THRESHOLD}% threshold" + fi +else + warn "No memory readings available" +fi +echo "" + +# Error rate analysis +error_values=$(tail -n +2 "$TELEMETRY_CSV" | cut -d',' -f4) +if [ -n "$error_values" ]; then + first_errors=$(echo "$error_values" | head -1) + last_errors=$(echo "$error_values" | tail -1) + echo "Error metric lines: first=${first_errors}, last=${last_errors}" + + if [ "$last_errors" -gt "$((first_errors + 10))" ] 2>/dev/null; then + warn "Error metric lines increased from ${first_errors} to ${last_errors}" + else + pass "Error rate stable" + fi +else + warn "No error data collected" +fi +echo "" + +# Pool count stability +pool_values=$(tail -n +2 "$TELEMETRY_CSV" | cut -d',' -f3 | grep -v '^0$' || true) +if [ -n "$pool_values" ]; then + first_pools=$(echo "$pool_values" | head -1) + last_pools=$(echo "$pool_values" | tail -1) + echo "Pool count: first=${first_pools}, last=${last_pools}" + + diff_pools=$(( last_pools - first_pools )) + abs_diff=${diff_pools#-} + if [ "$abs_diff" -gt 10 ] 2>/dev/null; then + warn "Pool count drifted by ${diff_pools} (first=${first_pools}, last=${last_pools})" + else + pass "Pool count stable (drift: ${diff_pools})" + fi +else + warn "No pool count data collected" +fi +echo "" + +# pgbench aggregation +total_tps=0 +total_committed=0 +total_aborted=0 +bench_files=0 + +for logfile in "$SOAK_DIR"/bench/tenant_*.log; do + [ -f "$logfile" ] || continue + ((bench_files++)) || true + + tps=$(awk '/tps = /{gsub(/.*tps = /,""); gsub(/[^0-9.].*/,""); print}' "$logfile" | tail -1) + tps="${tps:-0}" + committed=$(awk '/number of transactions actually processed:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$logfile") + committed="${committed:-0}" + aborted=$(awk '/number of failed transactions:/{gsub(/.*: /,""); gsub(/[^0-9].*/,""); print}' "$logfile") + aborted="${aborted:-0}" + + total_tps=$(echo "$total_tps + $tps" | bc 2>/dev/null || echo "$total_tps") + total_committed=$((total_committed + committed)) + total_aborted=$((total_aborted + aborted)) +done + +# ── Time-series summary ────────────────────────────────────────────────────── + +echo "--- Telemetry time-series (${TELEMETRY_CSV}) ---" +echo " elapsed_s | rss_kb | pools | errors" +echo " ----------|----------|-------|-------" +tail -n +2 "$TELEMETRY_CSV" | while IFS=',' read -r t rss pools errs; do + printf " %9ss | %7s | %5s | %s\n" "$t" "$rss" "$pools" "$errs" +done +echo "" + +# ── Final verdict ──────────────────────────────────────────────────────────── + +echo "========================================" +echo " Sustained Load Summary" +echo "========================================" +printf " %-26s %s\n" "Duration:" "${DURATION_MIN} min" +printf " %-26s %s\n" "Tenants:" "$bench_files" +printf " %-26s %s\n" "Total TPS:" "$total_tps" +printf " %-26s %s\n" "Transactions committed:" "$total_committed" +printf " %-26s %s\n" "Transactions aborted:" "$total_aborted" +printf " %-26s %s\n" "Telemetry samples:" "$(tail -n +2 "$TELEMETRY_CSV" | wc -l | tr -d ' ')" +printf " %-26s %s\n" "Results directory:" "$SOAK_DIR" +echo "========================================" + +if [ "$VERDICT_FAILURES" -gt 0 ]; then + fail "Soak test: ${VERDICT_FAILURES} check(s) failed" + exit 1 +elif [ "$total_aborted" -gt 0 ]; then + warn "Soak test passed with ${total_aborted} aborted transactions" + exit 0 +else + pass "Soak test passed — no anomalies detected" + exit 0 +fi diff --git a/integration/production_readiness/load/tenant_workload.sql b/integration/production_readiness/load/tenant_workload.sql new file mode 100644 index 000000000..44711de7e --- /dev/null +++ b/integration/production_readiness/load/tenant_workload.sql @@ -0,0 +1,9 @@ +\set user_id random(1, 100) +\set order_total random(1, 10000) / 100.0 + +BEGIN; +SELECT * FROM users WHERE id = :user_id; +INSERT INTO orders (user_id, total, status) VALUES (:user_id, :order_total, 'pending'); +SELECT COUNT(*) FROM orders WHERE user_id = :user_id; +UPDATE orders SET status = 'completed' WHERE user_id = :user_id AND status = 'pending' AND id = (SELECT MAX(id) FROM orders WHERE user_id = :user_id); +COMMIT; diff --git a/integration/production_readiness/run.sh b/integration/production_readiness/run.sh new file mode 100755 index 000000000..c13f789d1 --- /dev/null +++ b/integration/production_readiness/run.sh @@ -0,0 +1,398 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +TENANT_COUNT="${TENANT_COUNT:-2000}" +DURATION="${DURATION:-60}" +POOL_SIZE="${POOL_SIZE:-1}" +MAX_WILDCARD_POOLS="${MAX_WILDCARD_POOLS:-0}" +WILDCARD_IDLE_TIMEOUT="${WILDCARD_IDLE_TIMEOUT:-15}" +PGDOG_PORT="${PGDOG_PORT:-6432}" +PG_HOST="${PG_HOST:-127.0.0.1}" +PG_PORT="${PG_PORT:-15432}" +SKIP_BUILD="${SKIP_BUILD:-false}" +SKIP_INFRA="${SKIP_INFRA:-false}" +SKIP_K8S="${SKIP_K8S:-true}" +RESULTS_DIR="${RESULTS_DIR:-${SCRIPT_DIR}/results}" +CLIENTS="${CLIENTS:-50}" +LOG_LEVEL="${LOG_LEVEL:-off}" +LOAD_TENANTS="${LOAD_TENANTS:-100}" +AUTH_USERS="${AUTH_USERS:-100}" +LIFECYCLE_CYCLES="${LIFECYCLE_CYCLES:-3}" +LIFECYCLE_BATCH="${LIFECYCLE_BATCH:-50}" +STORM_SIZE="${STORM_SIZE:-200}" +STORM_PARALLEL="${STORM_PARALLEL:-100}" +INJECT_LATENCY="${INJECT_LATENCY:-5}" +INJECT_JITTER="${INJECT_JITTER:-2}" +SCALE_MIN_SUCCESS_RATE="${SCALE_MIN_SUCCESS_RATE:-85}" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' CYAN='' BOLD='' NC='' +fi + +while [[ $# -gt 0 ]]; do + case "$1" in + --tenant-count) TENANT_COUNT="$2"; shift 2 ;; + --duration) DURATION="$2"; shift 2 ;; + --pool-size) POOL_SIZE="$2"; shift 2 ;; + --max-wildcard-pools) MAX_WILDCARD_POOLS="$2"; shift 2 ;; + --wildcard-idle-timeout) WILDCARD_IDLE_TIMEOUT="$2"; shift 2 ;; + --clients) CLIENTS="$2"; shift 2 ;; + --pg-host) PG_HOST="$2"; shift 2 ;; + --pg-port) PG_PORT="$2"; shift 2 ;; + --pgdog-port) PGDOG_PORT="$2"; shift 2 ;; + --log-level) LOG_LEVEL="$2"; shift 2 ;; + --results-dir) RESULTS_DIR="$2"; shift 2 ;; + --load-tenants) LOAD_TENANTS="$2"; shift 2 ;; + --auth-users) AUTH_USERS="$2"; shift 2 ;; + --lifecycle-cycles) LIFECYCLE_CYCLES="$2"; shift 2 ;; + --lifecycle-batch) LIFECYCLE_BATCH="$2"; shift 2 ;; + --storm-size) STORM_SIZE="$2"; shift 2 ;; + --storm-parallel) STORM_PARALLEL="$2"; shift 2 ;; + --inject-latency) INJECT_LATENCY="$2"; shift 2 ;; + --inject-jitter) INJECT_JITTER="$2"; shift 2 ;; + --scale-min-success-rate) SCALE_MIN_SUCCESS_RATE="$2"; shift 2 ;; + --skip-build) SKIP_BUILD=true; shift ;; + --skip-infra) SKIP_INFRA=true; shift ;; + --skip-k8s) SKIP_K8S=true; shift ;; + --with-k8s) SKIP_K8S=false; shift ;; + --help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --tenant-count N Number of tenant databases (default: 2000)" + echo " --duration N Load test duration in seconds (default: 60)" + echo " --pool-size N Default pool size (default: 1)" + echo " --max-wildcard-pools N Max wildcard pools, 0=unlimited (default: 0)" + echo " --wildcard-idle-timeout N Idle seconds before pool eviction (default: 15)" + echo " --clients N Number of clients (default: 50)" + echo " --pg-host HOST Postgres host (default: 127.0.0.1)" + echo " --pg-port PORT Postgres port (default: 15432)" + echo " --pgdog-port PORT PgDog listen port (default: 6432)" + echo " --log-level LEVEL PgDog log level: off|error|warn|info|debug|trace (default: off)" + echo " --results-dir DIR Directory for test results (default: ./results)" + echo " --load-tenants N Tenants for load/sustained tests (default: 100)" + echo " --auth-users N Users for passthrough auth test (default: 100)" + echo " --lifecycle-cycles N Pool lifecycle churn cycles (default: 3)" + echo " --lifecycle-batch N Pool lifecycle batch size (default: 50)" + echo " --storm-size N Connection storm total connections (default: 200)" + echo " --storm-parallel N Connection storm parallelism (default: 100)" + echo " --inject-latency N Toxiproxy injected latency in ms (default: 5)" + echo " --inject-jitter N Toxiproxy injected jitter in ms (default: 2)" + echo " --scale-min-success-rate N Minimum acceptable scale-connect success %% (default: 85)" + echo " --skip-build Skip cargo build" + echo " --skip-infra Skip Docker Compose and tenant setup" + echo " --skip-k8s Skip K8s tests (default)" + echo " --with-k8s Include K8s tests" + exit 0 + ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +mkdir -p "$RESULTS_DIR" +SUMMARY_FILE="$RESULTS_DIR/summary.txt" +: > "$SUMMARY_FILE" + +TOTAL_TESTS=0 +PASSED_TESTS=0 +FAILED_TESTS=0 +PGDOG_PID="" + +record_result() { + local name="$1" status="$2" + TOTAL_TESTS=$((TOTAL_TESTS + 1)) + if [ "$status" -eq 0 ]; then + PASSED_TESTS=$((PASSED_TESTS + 1)) + echo -e "${GREEN}[PASS]${NC} $name" + echo "PASS: $name" >> "$SUMMARY_FILE" + else + FAILED_TESTS=$((FAILED_TESTS + 1)) + echo -e "${RED}[FAIL]${NC} $name" + echo "FAIL: $name" >> "$SUMMARY_FILE" + fi +} + +cleanup() { + echo "" + echo -e "${CYAN}Cleaning up...${NC}" + if [ -n "$PGDOG_PID" ] && kill -0 "$PGDOG_PID" 2>/dev/null; then + kill "$PGDOG_PID" 2>/dev/null || true + wait "$PGDOG_PID" 2>/dev/null || true + fi + if [ "$SKIP_INFRA" = "false" ]; then + docker compose -f "$SCRIPT_DIR/docker-compose.yml" down -v 2>/dev/null || true + fi +} +trap cleanup EXIT INT TERM + +echo -e "${BOLD}╔══════════════════════════════════════════════╗${NC}" +echo -e "${BOLD}║ PgDog Production Readiness Test Suite ║${NC}" +echo -e "${BOLD}╚══════════════════════════════════════════════╝${NC}" +echo "" +echo "Configuration:" +echo " Tenant count: $TENANT_COUNT" +echo " Load duration: ${DURATION}s" +echo " Pool size: $POOL_SIZE" +echo " Max wildcard pools: $MAX_WILDCARD_POOLS" +echo " Wildcard idle timeout: ${WILDCARD_IDLE_TIMEOUT}s" +echo " PgDog port: $PGDOG_PORT" +echo " Postgres: $PG_HOST:$PG_PORT" +echo " Log level: $LOG_LEVEL" +echo " Results dir: $RESULTS_DIR" +echo " Load tenants: $LOAD_TENANTS" +echo " Auth users: $AUTH_USERS" +echo " Lifecycle cycles: $LIFECYCLE_CYCLES (batch: $LIFECYCLE_BATCH)" +echo " Storm size: $STORM_SIZE (parallel: $STORM_PARALLEL)" +echo " Inject latency: ${INJECT_LATENCY}ms (jitter: ${INJECT_JITTER}ms)" +echo " Scale min success: ${SCALE_MIN_SUCCESS_RATE}%" +echo "" + +# ── Phase 1: Build ────────────────────────────────────────────────── +if [ "$SKIP_BUILD" = "false" ]; then + echo -e "${CYAN}═══ Phase 1: Building PgDog ═══${NC}" + cd "$SCRIPT_DIR/../.." + cargo build --release 2>&1 | tail -5 + PGDOG_BIN="$SCRIPT_DIR/../../target/release/pgdog" + echo "Binary: $PGDOG_BIN" + echo "" +else + PGDOG_BIN="${PGDOG_BIN:-$SCRIPT_DIR/../../target/release/pgdog}" + echo -e "${YELLOW}Skipping build (using $PGDOG_BIN)${NC}" + echo "" +fi + +if [ ! -f "$PGDOG_BIN" ]; then + echo -e "${RED}PgDog binary not found at $PGDOG_BIN${NC}" + echo "Run without --skip-build or set PGDOG_BIN" + exit 1 +fi + +# ── Phase 2: Infrastructure ───────────────────────────────────────── +if [ "$SKIP_INFRA" = "false" ]; then + echo -e "${CYAN}═══ Phase 2: Starting Infrastructure ═══${NC}" + cd "$SCRIPT_DIR" + docker compose -f docker-compose.yml up -d + echo "Waiting for Postgres to be ready..." + for i in $(seq 1 30); do + if PGPASSWORD=postgres psql -h "$PG_HOST" -p "$PG_PORT" -U postgres -c "SELECT 1" -t -q 2>/dev/null | grep -q 1; then + echo "Postgres ready after ${i}s" + break + fi + sleep 1 + if [ "$i" -eq 30 ]; then + echo -e "${RED}Postgres did not become ready in 30s${NC}" + exit 1 + fi + done + + echo "Creating $TENANT_COUNT tenant databases..." + QUIET=true bash "$SCRIPT_DIR/setup/generate_tenants.sh" \ + --count "$TENANT_COUNT" --host "$PG_HOST" --port "$PG_PORT" + + echo "Configuring toxiproxy..." + bash "$SCRIPT_DIR/setup/configure_toxiproxy.sh" || { + echo -e "${YELLOW}Toxiproxy setup failed (fault injection tests will be skipped)${NC}" + } + echo "" +else + echo -e "${YELLOW}Skipping infrastructure setup${NC}" + echo "" +fi + +# ── Phase 3: Generate PgDog Config ────────────────────────────────── +echo -e "${CYAN}═══ Phase 3: Generating PgDog Config ═══${NC}" +python3 "$SCRIPT_DIR/setup/generate_config.py" \ + --pool-size "$POOL_SIZE" \ + --max-wildcard-pools "$MAX_WILDCARD_POOLS" \ + --wildcard-idle-timeout "$WILDCARD_IDLE_TIMEOUT" \ + --host "$PG_HOST" \ + --port "$PG_PORT" \ + --output-dir "$SCRIPT_DIR/config" +echo "" + +# ── Phase 4: Start PgDog ──────────────────────────────────────────── +echo -e "${CYAN}═══ Phase 4: Starting PgDog ═══${NC}" +cd "$SCRIPT_DIR/config" +RUST_LOG="$LOG_LEVEL" "$PGDOG_BIN" & +PGDOG_PID=$! +echo "PgDog PID: $PGDOG_PID" + +for i in $(seq 1 15); do + if PGPASSWORD=pgdog psql -h 127.0.0.1 -p "$PGDOG_PORT" -U pgdog -d "tenant_1" -c "SELECT 1" -t -q 2>/dev/null | grep -q 1; then + echo "PgDog ready after ${i}s" + break + fi + sleep 1 + if [ "$i" -eq 15 ]; then + echo -e "${RED}PgDog did not start in 15s${NC}" + exit 1 + fi +done +echo "" + +# ── Phase 5: Run Tests ────────────────────────────────────────────── +echo -e "${CYAN}═══ Phase 5: Running Tests ═══${NC}" +echo "" + +# Helper: run a test script, capture exit code without aborting the runner. +run_test() { + local name="$1"; shift + local logfile="$1"; shift + set +e + "$@" 2>&1 | tee "$logfile" + local rc=${PIPESTATUS[0]} + set -e + record_result "$name" "$rc" +} + +# 5a: Scale connection test +echo -e "${BOLD}── Scale Connect (${TENANT_COUNT} databases) ──${NC}" +run_test "Scale connect ($TENANT_COUNT DBs)" "$RESULTS_DIR/scale_connect.log" \ + bash "$SCRIPT_DIR/load/scale_connect.sh" \ + --count "$TENANT_COUNT" --pgdog-port "$PGDOG_PORT" \ + --min-success-rate "$SCALE_MIN_SUCCESS_RATE" +echo "" + +# Let idle connections drain before next test (idle_timeout=10s) +echo "Waiting 15s for idle backend connections to drain..." +sleep 15 + +# 5b: Multi-tenant load test +echo -e "${BOLD}── Multi-Tenant Load Test ──${NC}" +run_test "Multi-tenant load ($LOAD_TENANTS tenants, ${CLIENTS} clients, ${DURATION}s)" "$RESULTS_DIR/multi_tenant_bench.log" \ + bash "$SCRIPT_DIR/load/multi_tenant_bench.sh" \ + --tenant-count "$LOAD_TENANTS" --clients "$CLIENTS" --duration "$DURATION" \ + --pgdog-port "$PGDOG_PORT" +echo "" + +# Let connections drain after heavy load before auth test +echo "Waiting 20s for backend connections to drain after load test..." +sleep 20 + +# 5c: Passthrough auth +echo -e "${BOLD}── Passthrough Auth ──${NC}" +run_test "Passthrough auth ($AUTH_USERS users)" "$RESULTS_DIR/passthrough_auth.log" \ + bash "$SCRIPT_DIR/load/passthrough_auth.sh" \ + --user-count "$AUTH_USERS" --pgdog-port "$PGDOG_PORT" --pg-port "$PG_PORT" +echo "" + +# 5d: Pool pressure +echo -e "${BOLD}── Pool Pressure ──${NC}" +run_test "Pool pressure (pool=$POOL_SIZE, conns=$CLIENTS)" "$RESULTS_DIR/pool_pressure.log" \ + bash "$SCRIPT_DIR/load/pool_pressure.sh" \ + --pool-size "$POOL_SIZE" --connections "$CLIENTS" --pgdog-port "$PGDOG_PORT" +echo "" + +# 5e: Observability +echo -e "${BOLD}── Observability Validation ──${NC}" +run_test "OpenMetrics validation" "$RESULTS_DIR/check_metrics.log" \ + bash "$SCRIPT_DIR/validate/check_metrics.sh" + +run_test "Admin pool validation" "$RESULTS_DIR/check_pools.log" \ + bash "$SCRIPT_DIR/validate/check_pools.sh" +echo "" + +# 5f: Sustained load (shorter for automated run) +echo -e "${BOLD}── Sustained Load (soak) ──${NC}" +DURATION_MINS=$(( (DURATION + 59) / 60 )) +run_test "Sustained load (${DURATION_MINS}m, memory check)" "$RESULTS_DIR/sustained_load.log" \ + bash "$SCRIPT_DIR/load/sustained_load.sh" \ + --duration "$DURATION_MINS" --clients "$CLIENTS" --tenants "$LOAD_TENANTS" \ + --pgdog-port "$PGDOG_PORT" --pgdog-pid "$PGDOG_PID" +echo "" + +# Let pools drain before lifecycle test +echo "Waiting 15s for pools to drain..." +sleep 15 + +# 5g: Pool lifecycle churn +echo -e "${BOLD}── Pool Lifecycle Churn ──${NC}" +run_test "Pool lifecycle churn ($LIFECYCLE_CYCLES cycles)" "$RESULTS_DIR/pool_lifecycle.log" \ + bash "$SCRIPT_DIR/load/pool_lifecycle.sh" \ + --pgdog-port "$PGDOG_PORT" --idle-timeout "$WILDCARD_IDLE_TIMEOUT" --cycles "$LIFECYCLE_CYCLES" --batch-size "$LIFECYCLE_BATCH" +echo "" + +# 5h: Connection storm (thundering herd) +# Target range must stay within existing tenants +STORM_RANGE_END=$((TENANT_COUNT < 700 ? TENANT_COUNT : 700)) +STORM_RANGE_START=$(( STORM_RANGE_END > 200 ? STORM_RANGE_END - 200 : 1 )) +echo -e "${BOLD}── Connection Storm ──${NC}" +run_test "Connection storm ($STORM_SIZE concurrent to cold pools)" "$RESULTS_DIR/connection_storm.log" \ + bash "$SCRIPT_DIR/load/connection_storm.sh" \ + --pgdog-port "$PGDOG_PORT" --storm-size "$STORM_SIZE" --parallel "$STORM_PARALLEL" \ + --range-start "$STORM_RANGE_START" --range-end "$STORM_RANGE_END" +echo "" + +# 5i: Idle-in-transaction blocking +echo -e "${BOLD}── Idle-in-Transaction ──${NC}" +run_test "Idle-in-transaction (pool starvation)" "$RESULTS_DIR/idle_in_transaction.log" \ + bash "$SCRIPT_DIR/load/idle_in_transaction.sh" \ + --pgdog-port "$PGDOG_PORT" --target-db "tenant_1" +echo "" + +# 5j: Backend failure (requires toxiproxy) +if curl -sf "http://127.0.0.1:8474/version" >/dev/null 2>&1; then + echo -e "${BOLD}── Backend Failure (toxiproxy) ──${NC}" + run_test "Backend failure (reset, partition, slow)" "$RESULTS_DIR/backend_failure.log" \ + bash "$SCRIPT_DIR/load/backend_failure.sh" \ + --pgdog-port "$PGDOG_PORT" --target-db "tenant_1" + echo "" + + echo -e "${BOLD}── Network Latency (toxiproxy) ──${NC}" + COLD_RANGE_START=$(( LOAD_TENANTS + 1 )) + if [ "$COLD_RANGE_START" -gt "$TENANT_COUNT" ]; then + COLD_RANGE_START=1 + fi + COLD_RANGE_END=$(( COLD_RANGE_START + 49 )) + if [ "$COLD_RANGE_END" -gt "$TENANT_COUNT" ]; then + COLD_RANGE_END="$TENANT_COUNT" + fi + run_test "Network latency (${INJECT_LATENCY}ms injected)" "$RESULTS_DIR/network_latency.log" \ + bash "$SCRIPT_DIR/load/network_latency.sh" \ + --pgdog-port "$PGDOG_PORT" --latency "$INJECT_LATENCY" --jitter "$INJECT_JITTER" \ + --cold-range-start "$COLD_RANGE_START" --cold-range-end "$COLD_RANGE_END" + echo "" +else + echo -e "${YELLOW}Skipping toxiproxy tests (toxiproxy not available)${NC}" + echo "" +fi + +# 5k: Graceful shutdown (runs last — kills PgDog) +echo -e "${BOLD}── Graceful Shutdown ──${NC}" +run_test "Graceful shutdown under load" "$RESULTS_DIR/graceful_shutdown.log" \ + bash "$SCRIPT_DIR/load/graceful_shutdown.sh" \ + --pgdog-port "$PGDOG_PORT" --pgdog-bin "$PGDOG_BIN" --config-dir "$SCRIPT_DIR/config" +echo "" + +# ── Phase 6: Summary ──────────────────────────────────────────────── +echo -e "${BOLD}╔══════════════════════════════════════════════╗${NC}" +echo -e "${BOLD}║ Test Results Summary ║${NC}" +echo -e "${BOLD}╚══════════════════════════════════════════════╝${NC}" +echo "" +echo " Total: $TOTAL_TESTS" +echo -e " Passed: ${GREEN}$PASSED_TESTS${NC}" +echo -e " Failed: ${RED}$FAILED_TESTS${NC}" +echo "" +echo " Results: $RESULTS_DIR/" +echo " Summary: $SUMMARY_FILE" +echo "" + +cat "$SUMMARY_FILE" +echo "" + +if [ "$FAILED_TESTS" -gt 0 ]; then + echo -e "${RED}${BOLD}VERDICT: NOT PRODUCTION READY (${FAILED_TESTS} failure(s))${NC}" + exit 1 +else + echo -e "${GREEN}${BOLD}VERDICT: ALL TESTS PASSED${NC}" + exit 0 +fi diff --git a/integration/production_readiness/setup/configure_toxiproxy.sh b/integration/production_readiness/setup/configure_toxiproxy.sh new file mode 100755 index 000000000..1fa7dea1a --- /dev/null +++ b/integration/production_readiness/setup/configure_toxiproxy.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Configure toxiproxy to proxy traffic to Postgres primary. +# Run after docker-compose up and before PgDog starts. +set -euo pipefail + +TOXI_API="${TOXI_API:-http://127.0.0.1:8474}" +PG_HOST="${PG_HOST:-postgres_primary}" +PG_PORT="${PG_PORT:-5432}" +LISTEN_PORT="${LISTEN_PORT:-5432}" + +# Wait for toxiproxy API +for i in $(seq 1 15); do + if curl -sf "${TOXI_API}/version" >/dev/null 2>&1; then + echo "Toxiproxy API ready" + break + fi + sleep 1 + if [ "$i" -eq 15 ]; then + echo "ERROR: Toxiproxy API not reachable at ${TOXI_API}" + exit 1 + fi +done + +# Create proxy: PgDog connects to toxiproxy:15440, which forwards to postgres:5432 +RESPONSE=$(curl -sf -X POST "${TOXI_API}/proxies" \ + -H "Content-Type: application/json" \ + -d "{ + \"name\": \"pg_primary\", + \"listen\": \"0.0.0.0:${LISTEN_PORT}\", + \"upstream\": \"${PG_HOST}:${PG_PORT}\", + \"enabled\": true + }" 2>&1) || { + echo "WARNING: Could not create proxy (may already exist): ${RESPONSE}" + # If it already exists, that's fine + curl -sf "${TOXI_API}/proxies/pg_primary" >/dev/null 2>&1 || { + echo "ERROR: Proxy does not exist and could not be created" + exit 1 + } +} + +echo "Proxy created: 0.0.0.0:${LISTEN_PORT} -> ${PG_HOST}:${PG_PORT}" diff --git a/integration/production_readiness/setup/generate_config.py b/integration/production_readiness/setup/generate_config.py new file mode 100755 index 000000000..4aaad17f6 --- /dev/null +++ b/integration/production_readiness/setup/generate_config.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""Generate PgDog wildcard configuration for production readiness testing.""" + +import argparse +import os +import textwrap + + +def generate_pgdog_toml(args: argparse.Namespace) -> str: + return textwrap.dedent(f"""\ + [general] + host = "0.0.0.0" + port = 6432 + workers = 4 + default_pool_size = {args.pool_size} + min_pool_size = 0 + pooler_mode = "transaction" + idle_timeout = 10000 + checkout_timeout = 5000 + healthcheck_interval = 30000 + healthcheck_timeout = 5000 + connect_timeout = 5000 + query_timeout = 30000 + load_balancing_strategy = "random" + prepared_statements = "extended" + passthrough_auth = "enabled_plain" + openmetrics_port = 9090 + openmetrics_namespace = "pgdog" + max_wildcard_pools = {args.max_wildcard_pools} + wildcard_pool_idle_timeout = {args.wildcard_idle_timeout} + + [admin] + name = "admin" + user = "admin" + password = "admin" + + [[databases]] + name = "*" + host = "{args.host}" + port = {args.port} + """) + + +def generate_users_toml() -> str: + return textwrap.dedent("""\ + [[users]] + name = "*" + database = "*" + """) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Generate PgDog config for production readiness testing" + ) + parser.add_argument("--tenant-count", type=int, default=2000) + parser.add_argument("--pool-size", type=int, default=10) + parser.add_argument("--max-wildcard-pools", type=int, default=0) + parser.add_argument("--wildcard-idle-timeout", type=int, default=300) + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=15432) + parser.add_argument("--output-dir", default=os.path.join(os.path.dirname(__file__), "..", "config")) + args = parser.parse_args() + + output_dir = os.path.abspath(args.output_dir) + os.makedirs(output_dir, exist_ok=True) + + pgdog_path = os.path.join(output_dir, "pgdog.toml") + with open(pgdog_path, "w") as f: + f.write(generate_pgdog_toml(args)) + + users_path = os.path.join(output_dir, "users.toml") + with open(users_path, "w") as f: + f.write(generate_users_toml()) + + print(f"Generated: {pgdog_path}") + print(f"Generated: {users_path}") + print(f"Configured for {args.tenant_count} tenants, pool_size={args.pool_size}, " + f"max_wildcard_pools={args.max_wildcard_pools}") + + +if __name__ == "__main__": + main() diff --git a/integration/production_readiness/setup/generate_tenants.sh b/integration/production_readiness/setup/generate_tenants.sh new file mode 100755 index 000000000..1f279930e --- /dev/null +++ b/integration/production_readiness/setup/generate_tenants.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +set -euo pipefail + +COUNT=2000 +HOST="127.0.0.1" +PORT=15432 + +while [[ $# -gt 0 ]]; do + case "$1" in + --count) COUNT="$2"; shift 2 ;; + --host) HOST="$2"; shift 2 ;; + --port) PORT="$2"; shift 2 ;; + --quiet) QUIET=true; shift ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +PGCONN="postgresql://postgres:postgres@${HOST}:${PORT}/postgres" + +QUIET="${QUIET:-false}" + +created=0 +skipped=0 +start_time=$(date +%s) + +log() { [ "$QUIET" != "true" ] && echo "$@" || true; } + +log "Creating ${COUNT} tenant databases on ${HOST}:${PORT}..." +log "Using tenant_template as base (schema + seed data pre-applied)" + +# Verify template exists before bulk creation +if ! psql "$PGCONN" -tAc "SELECT 1 FROM pg_database WHERE datname = 'tenant_template'" | grep -q 1; then + echo "ERROR: tenant_template database does not exist. Run init.sql first." + exit 1 +fi + +for i in $(seq 1 "$COUNT"); do + dbname="tenant_$i" + exists=$(psql "$PGCONN" -tAc "SELECT 1 FROM pg_database WHERE datname = '${dbname}'" 2>/dev/null || true) + + if [[ "$exists" == "1" ]]; then + ((skipped++)) || true + else + # Disconnect any sessions from template before cloning + psql "$PGCONN" -qAtc "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'tenant_template' AND pid <> pg_backend_pid();" >/dev/null 2>&1 || true + psql "$PGCONN" -qc "CREATE DATABASE ${dbname} TEMPLATE tenant_template OWNER pgdog;" 2>/dev/null + ((created++)) || true + fi + + if (( i % 100 == 0 )); then + elapsed=$(( $(date +%s) - start_time )) + log " [${i}/${COUNT}] created=${created} skipped=${skipped} elapsed=${elapsed}s" + fi +done + +log "" +log "Creating test users for passthrough auth testing..." + +user_sql="" +for i in $(seq 1 100); do + user_sql+="DO \$\$ BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'tenant_user_${i}') THEN + CREATE ROLE tenant_user_${i} WITH LOGIN PASSWORD 'pass_${i}'; + END IF; +END \$\$; +" +done +psql "$PGCONN" -qc "$user_sql" + +# Grant connect on all tenant databases to pgdog and test users +log "Granting access to tenant databases..." +grant_sql="" +for i in $(seq 1 "$COUNT"); do + grant_sql+="GRANT CONNECT ON DATABASE tenant_${i} TO pgdog;" +done +for i in $(seq 1 100); do + grant_sql+="GRANT CONNECT ON DATABASE tenant_1 TO tenant_user_${i};" +done +psql "$PGCONN" -qc "$grant_sql" + +# Grant table-level access inside each tenant DB for test users +# (pgdog is superuser so needs no explicit grants) +for i in $(seq 1 100); do + psql "postgresql://postgres:postgres@${HOST}:${PORT}/tenant_1" \ + -qc "GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO tenant_user_${i}; + GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO tenant_user_${i};" 2>/dev/null || true +done + +elapsed=$(( $(date +%s) - start_time )) +log "" +log "=== Summary ===" +log " Total requested: ${COUNT}" +log " Created: ${created}" +log " Skipped: ${skipped}" +log " Test users: 100 (tenant_user_1..tenant_user_100)" +log " Elapsed: ${elapsed}s" diff --git a/integration/production_readiness/setup/init.sql b/integration/production_readiness/setup/init.sql new file mode 100644 index 000000000..de3c6a0e8 --- /dev/null +++ b/integration/production_readiness/setup/init.sql @@ -0,0 +1,38 @@ +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + +CREATE USER pgdog WITH PASSWORD 'pgdog' SUPERUSER; + +CREATE DATABASE tenant_template OWNER pgdog; + +\c tenant_template + +CREATE TABLE IF NOT EXISTS users ( + id BIGSERIAL PRIMARY KEY, + name TEXT NOT NULL, + email TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS orders ( + id BIGSERIAL PRIMARY KEY, + user_id BIGINT REFERENCES users(id), + total NUMERIC(10,2) NOT NULL, + status TEXT DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX idx_orders_user_id ON orders(user_id); +CREATE INDEX idx_orders_status ON orders(status); + +INSERT INTO users (name, email) +SELECT + 'user_' || i, + 'user_' || i || '@example.com' +FROM generate_series(1, 100) AS i; + +INSERT INTO orders (user_id, total, status) +SELECT + (random() * 99 + 1)::int, + (random() * 1000)::numeric(10,2), + (ARRAY['pending', 'completed', 'cancelled'])[floor(random() * 3 + 1)::int] +FROM generate_series(1, 1000) AS i; diff --git a/integration/production_readiness/validate/check_memory.sh b/integration/production_readiness/validate/check_memory.sh new file mode 100755 index 000000000..160f59d65 --- /dev/null +++ b/integration/production_readiness/validate/check_memory.sh @@ -0,0 +1,100 @@ +#!/bin/bash +set -euo pipefail + +PGDOG_PID="${1:-}" +DURATION="${DURATION:-300}" +INTERVAL="${INTERVAL:-10}" +OUTPUT_DIR="${OUTPUT_DIR:-$(dirname "$0")/../results}" +GROWTH_THRESHOLD="${GROWTH_THRESHOLD:-20}" + +mkdir -p "$OUTPUT_DIR" +MEMLOG="$OUTPUT_DIR/memory_trace.csv" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +if [ -z "$PGDOG_PID" ]; then + PGDOG_PID=$(pgrep -f "pgdog" | head -1 || true) + if [ -z "$PGDOG_PID" ]; then + fail "Cannot find PgDog process. Pass PID as first argument." + exit 1 + fi +fi + +if ! kill -0 "$PGDOG_PID" 2>/dev/null; then + fail "PID $PGDOG_PID is not running" + exit 1 +fi + +echo "=== Memory Monitor ===" +echo "PID: $PGDOG_PID" +echo "Duration: ${DURATION}s, Interval: ${INTERVAL}s" +echo "Growth threshold: ${GROWTH_THRESHOLD}%" +echo "" + +echo "elapsed_s,rss_kb" > "$MEMLOG" + +INITIAL_RSS="" +ELAPSED=0 + +while [ "$ELAPSED" -le "$DURATION" ]; do + if ! kill -0 "$PGDOG_PID" 2>/dev/null; then + warn "Process $PGDOG_PID exited at ${ELAPSED}s" + break + fi + + RSS=$(ps -o rss= -p "$PGDOG_PID" 2>/dev/null | tr -d ' ') + if [ -z "$RSS" ]; then + warn "Could not read RSS at ${ELAPSED}s" + sleep "$INTERVAL" + ELAPSED=$((ELAPSED + INTERVAL)) + continue + fi + + echo "${ELAPSED},${RSS}" >> "$MEMLOG" + + if [ -z "$INITIAL_RSS" ]; then + INITIAL_RSS="$RSS" + echo "Initial RSS: ${RSS} KB ($(( RSS / 1024 )) MB)" + fi + + RSS_MB=$(( RSS / 1024 )) + printf " [%4ds] RSS: %d KB (%d MB)\n" "$ELAPSED" "$RSS" "$RSS_MB" + + sleep "$INTERVAL" + ELAPSED=$((ELAPSED + INTERVAL)) +done + +if [ -z "$INITIAL_RSS" ]; then + fail "No memory readings collected" + exit 1 +fi + +FINAL_RSS=$(tail -1 "$MEMLOG" | cut -d',' -f2) +GROWTH_PCT=$(( (FINAL_RSS - INITIAL_RSS) * 100 / INITIAL_RSS )) + +echo "" +echo "=== Memory Summary ===" +echo "Initial: ${INITIAL_RSS} KB ($(( INITIAL_RSS / 1024 )) MB)" +echo "Final: ${FINAL_RSS} KB ($(( FINAL_RSS / 1024 )) MB)" +echo "Growth: ${GROWTH_PCT}%" +echo "Log: $MEMLOG" +echo "" + +if [ "$GROWTH_PCT" -gt "$GROWTH_THRESHOLD" ]; then + fail "Memory grew ${GROWTH_PCT}% (threshold: ${GROWTH_THRESHOLD}%) — possible leak" + exit 1 +else + pass "Memory growth ${GROWTH_PCT}% is within ${GROWTH_THRESHOLD}% threshold" + exit 0 +fi diff --git a/integration/production_readiness/validate/check_metrics.sh b/integration/production_readiness/validate/check_metrics.sh new file mode 100755 index 000000000..ed9f0e1cf --- /dev/null +++ b/integration/production_readiness/validate/check_metrics.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set -euo pipefail + +PGDOG_METRICS="${PGDOG_METRICS_URL:-http://127.0.0.1:9090/metrics}" +OUTPUT_DIR="${OUTPUT_DIR:-$(dirname "$0")/../results}" +mkdir -p "$OUTPUT_DIR" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; FAILURES=$((FAILURES + 1)); } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +FAILURES=0 + +echo "=== OpenMetrics Validation ===" +echo "Endpoint: $PGDOG_METRICS" +echo "" + +METRICS=$(curl -sf "$PGDOG_METRICS" 2>/dev/null) || { + fail "Cannot reach metrics endpoint at $PGDOG_METRICS" + exit 1 +} + +echo "$METRICS" > "$OUTPUT_DIR/metrics_snapshot.txt" +pass "Metrics endpoint reachable ($(wc -l <<< "$METRICS" | tr -d ' ') lines)" + +POOL_METRICS=$(grep -c "pool" <<< "$METRICS" || true) +if [ "$POOL_METRICS" -gt 0 ]; then + pass "Pool metrics present ($POOL_METRICS lines)" +else + fail "No pool metrics found" +fi + +CLIENT_METRICS=$(grep -c "client" <<< "$METRICS" || true) +if [ "$CLIENT_METRICS" -gt 0 ]; then + pass "Client metrics present ($CLIENT_METRICS lines)" +else + warn "No client metrics found (may be expected if no active clients)" +fi + +ERROR_LINES=$(grep -i "error" <<< "$METRICS" || true) +if [ -n "$ERROR_LINES" ]; then + TOTAL_ERRORS=$(grep -v "^#" <<< "$ERROR_LINES" | awk '{sum += $NF} END {print sum+0}') + if [ "$TOTAL_ERRORS" -gt 0 ]; then + warn "Error metrics detected: total=$TOTAL_ERRORS" + else + pass "Error metrics present but zero" + fi +else + pass "No error metrics (clean state)" +fi + +# Validate Prometheus format +if grep -qE "^# (HELP|TYPE) " <<< "$METRICS"; then + pass "Valid Prometheus/OpenMetrics format (HELP/TYPE headers present)" +else + fail "Metrics do not appear to be in valid Prometheus format" +fi + +echo "" +echo "=== Results ===" +if [ "$FAILURES" -eq 0 ]; then + pass "All metrics checks passed" +else + fail "$FAILURES check(s) failed" +fi + +exit "$FAILURES" diff --git a/integration/production_readiness/validate/check_pools.sh b/integration/production_readiness/validate/check_pools.sh new file mode 100755 index 000000000..38987a92c --- /dev/null +++ b/integration/production_readiness/validate/check_pools.sh @@ -0,0 +1,87 @@ +#!/bin/bash +set -euo pipefail + +PGDOG_HOST="${PGDOG_HOST:-127.0.0.1}" +PGDOG_PORT="${PGDOG_PORT:-6432}" +ADMIN_USER="${ADMIN_USER:-admin}" +ADMIN_DB="${ADMIN_DB:-admin}" +OUTPUT_DIR="${OUTPUT_DIR:-$(dirname "$0")/../results}" +mkdir -p "$OUTPUT_DIR" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +if ! [ -t 1 ]; then + RED='' GREEN='' YELLOW='' NC='' +fi + +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; FAILURES=$((FAILURES + 1)); } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } + +FAILURES=0 + +admin_query() { + PGPASSWORD="${ADMIN_PASSWORD:-admin}" psql -h "$PGDOG_HOST" -p "$PGDOG_PORT" \ + -U "$ADMIN_USER" -d "$ADMIN_DB" -t -A -c "$1" 2>/dev/null +} + +echo "=== Admin Pool Validation ===" +echo "PgDog: $PGDOG_HOST:$PGDOG_PORT" +echo "" + +echo "--- SHOW POOLS ---" +POOLS=$(admin_query "SHOW POOLS" 2>/dev/null) || { + fail "Cannot query admin database" + exit 1 +} +echo "$POOLS" > "$OUTPUT_DIR/show_pools.txt" + +POOL_COUNT=$(echo "$POOLS" | grep -c "|" || true) +echo "Total pool entries: $POOL_COUNT" + +HEALTHY=$(echo "$POOLS" | grep -c "|t|" || echo "0") +UNHEALTHY=$(echo "$POOLS" | grep -c "|f|" || echo "0") + +if [ "$POOL_COUNT" -gt 0 ]; then + pass "Pools exist ($POOL_COUNT entries)" +else + fail "No pools found" +fi + +if [ "$UNHEALTHY" -gt 0 ]; then + warn "$UNHEALTHY unhealthy pool(s) detected" +else + pass "All pools healthy" +fi + +echo "" +echo "--- SHOW CLIENTS ---" +CLIENTS=$(admin_query "SHOW CLIENTS" 2>/dev/null) || { + warn "SHOW CLIENTS failed" + CLIENTS="" +} +echo "$CLIENTS" > "$OUTPUT_DIR/show_clients.txt" + +CLIENT_COUNT=$(echo "$CLIENTS" | grep -c "|" || true) +echo "Active clients: $CLIENT_COUNT" + +echo "" +echo "--- SHOW STATS ---" +STATS=$(admin_query "SHOW STATS" 2>/dev/null) || { + warn "SHOW STATS failed" + STATS="" +} +echo "$STATS" > "$OUTPUT_DIR/show_stats.txt" + +echo "" +echo "=== Results ===" +if [ "$FAILURES" -eq 0 ]; then + pass "All pool checks passed" +else + fail "$FAILURES check(s) failed" +fi + +exit "$FAILURES" diff --git a/integration/rust/tests/integration/concurrent_pool_creation.rs b/integration/rust/tests/integration/concurrent_pool_creation.rs new file mode 100644 index 000000000..28b6e9576 --- /dev/null +++ b/integration/rust/tests/integration/concurrent_pool_creation.rs @@ -0,0 +1,154 @@ +use rust::setup::{admin_sqlx, admin_tokio}; +use serial_test::serial; +use sqlx::{Executor, Row}; +use std::collections::HashSet; +use std::time::Duration; +use tokio::time::sleep; +use tokio_postgres::NoTls; + +async fn try_connect(db: &str) -> Result { + let (client, connection) = tokio_postgres::connect( + &format!( + "host=127.0.0.1 user=pgdog dbname={} password=pgdog port=6432", + db + ), + NoTls, + ) + .await?; + + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + Ok(client) +} + +async fn pool_databases() -> HashSet { + let admin = admin_sqlx().await; + let pools = admin.fetch_all("SHOW POOLS").await.unwrap(); + pools + .iter() + .map(|p| p.get::("database")) + .collect() +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and tenant_concurrent_1 database in Postgres"] +async fn concurrent_same_db() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 0") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + let db = "tenant_concurrent_1"; + let handles: Vec<_> = (0..20) + .map(|_| { + let db = db.to_string(); + tokio::spawn(async move { + let client = try_connect(&db).await?; + client.simple_query("SELECT 1").await?; + Ok::<_, tokio_postgres::Error>(()) + }) + }) + .collect(); + + let results = futures_util::future::join_all(handles).await; + let successes = results.iter().filter(|r| matches!(r, Ok(Ok(())))).count(); + assert!( + successes >= 18, + "at least 18/20 concurrent connections to same db should succeed, got {successes}" + ); + + // Allow pool registration to settle before verifying. + sleep(Duration::from_millis(200)).await; + let pools = pool_databases().await; + assert!(pools.contains(db), "SHOW POOLS should list a pool for {db}"); + + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and 20 tenant databases in Postgres"] +async fn concurrent_different_dbs() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 0") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + let handles: Vec<_> = (0..20) + .map(|i| { + tokio::spawn(async move { + let db = format!("tenant_diff_{i}"); + let client = try_connect(&db).await?; + client.simple_query("SELECT 1").await?; + Ok::<_, tokio_postgres::Error>(()) + }) + }) + .collect(); + + let results = futures_util::future::join_all(handles).await; + let successes = results.iter().filter(|r| matches!(r, Ok(Ok(())))).count(); + assert_eq!( + successes, 20, + "all 20 different-db connections should succeed" + ); + + sleep(Duration::from_millis(200)).await; + let pools = pool_databases().await; + for i in 0..20 { + let db = format!("tenant_diff_{i}"); + assert!(pools.contains(&db), "pool for {db} should exist"); + } + + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and 5 tenant databases in Postgres"] +async fn concurrent_mixed() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 0") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + // 5 different databases, 10 concurrent connections each. + let handles: Vec<_> = (0..50) + .map(|i| { + let db_idx = i % 5; + tokio::spawn(async move { + let db = format!("tenant_mixed_{db_idx}"); + let client = try_connect(&db).await?; + client.simple_query("SELECT 1").await?; + Ok::<_, tokio_postgres::Error>(()) + }) + }) + .collect(); + + let results = futures_util::future::join_all(handles).await; + let successes = results.iter().filter(|r| matches!(r, Ok(Ok(())))).count(); + assert_eq!(successes, 50, "all 50 mixed connections should succeed"); + + sleep(Duration::from_millis(200)).await; + let pools = pool_databases().await; + let mixed_pools: HashSet<_> = pools + .iter() + .filter(|db| db.starts_with("tenant_mixed_")) + .collect(); + assert_eq!(mixed_pools.len(), 5, "should have exactly 5 distinct pools"); + + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} diff --git a/integration/rust/tests/integration/cross_shard_disabled.rs b/integration/rust/tests/integration/cross_shard_disabled.rs index 73df13552..bc3d1d89d 100644 --- a/integration/rust/tests/integration/cross_shard_disabled.rs +++ b/integration/rust/tests/integration/cross_shard_disabled.rs @@ -14,6 +14,26 @@ async fn test_cross_shard_disabled() { let conns = connections_sqlx().await; + // Ensure the sharded table exists on both shards (may have been + // dropped by cleanup_split_table in a previous test run). + let sharded = conns.get(1).unwrap(); + for shard in [0, 1] { + let ddl = format!( + "/* pgdog_shard: {shard} */ CREATE TABLE IF NOT EXISTS sharded \ + (id BIGINT PRIMARY KEY, value TEXT)" + ); + sharded.execute(ddl.as_str()).await.unwrap(); + } + conns + .get(0) + .unwrap() + .execute( + "CREATE TABLE IF NOT EXISTS sharded \ + (id BIGINT PRIMARY KEY, value TEXT)", + ) + .await + .unwrap(); + for conn in &conns { sqlx::query("SELECT * FROM sharded") .fetch_optional(conn) diff --git a/integration/rust/tests/integration/mod.rs b/integration/rust/tests/integration/mod.rs index a25d94950..0d04a0999 100644 --- a/integration/rust/tests/integration/mod.rs +++ b/integration/rust/tests/integration/mod.rs @@ -4,6 +4,7 @@ pub mod auto_id; pub mod avg; pub mod ban; pub mod client_ids; +pub mod concurrent_pool_creation; pub mod connection_recovery; pub mod cross_shard_disabled; pub mod distinct; @@ -17,6 +18,7 @@ pub mod multi_set; pub mod notify; pub mod offset; pub mod per_stmt_routing; +pub mod pool_cap_saturation; pub mod prepared; pub mod reload; pub mod rewrite; diff --git a/integration/rust/tests/integration/pool_cap_saturation.rs b/integration/rust/tests/integration/pool_cap_saturation.rs new file mode 100644 index 000000000..e13407c8e --- /dev/null +++ b/integration/rust/tests/integration/pool_cap_saturation.rs @@ -0,0 +1,159 @@ +use rust::setup::{admin_sqlx, admin_tokio}; +use serial_test::serial; +use sqlx::{Executor, Row}; +use std::collections::HashSet; +use std::time::Duration; +use tokio::time::sleep; +use tokio_postgres::NoTls; + +async fn connect_tenant(db: &str) -> Result { + let (client, connection) = tokio_postgres::connect( + &format!( + "host=127.0.0.1 user=pgdog dbname={} password=pgdog port=6432", + db + ), + NoTls, + ) + .await?; + + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + Ok(client) +} + +async fn count_wildcard_pools(prefix: &str) -> usize { + let admin = admin_sqlx().await; + let pools = admin.fetch_all("SHOW POOLS").await.unwrap(); + pools + .iter() + .map(|p| p.get::("database")) + .filter(|db| db.starts_with(prefix)) + .collect::>() + .len() +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and tenant databases in Postgres"] +async fn pool_cap_basic() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 5") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + let mut clients = Vec::new(); + for i in 1..=5 { + let db = format!("tenant_cap_{i}"); + let client = connect_tenant(&db) + .await + .unwrap_or_else(|e| panic!("tenant_cap_{i} should connect: {e}")); + client.simple_query("SELECT 1").await.unwrap(); + clients.push(client); + } + + // 6th connection should be rejected — pool cap reached. + let result = connect_tenant("tenant_cap_6").await; + match result { + Ok(client) => { + let q = client.simple_query("SELECT 1").await; + assert!(q.is_err(), "query on 6th tenant should fail when cap is 5"); + } + Err(_) => { /* connection-level rejection is also acceptable */ } + } + + // Allow pool registration to settle before counting. + sleep(Duration::from_millis(200)).await; + assert_eq!(count_wildcard_pools("tenant_cap_").await, 5); + + drop(clients); + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and tenant databases in Postgres"] +async fn pool_cap_no_corruption() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 3") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + let mut clients = Vec::new(); + for i in 1..=3 { + let db = format!("tenant_nocorr_{i}"); + let client = connect_tenant(&db).await.unwrap(); + client.simple_query("SELECT 1").await.unwrap(); + clients.push(client); + } + + // 4th should be rejected. + let overflow = connect_tenant("tenant_nocorr_4").await; + match overflow { + Ok(c) => assert!(c.simple_query("SELECT 1").await.is_err()), + Err(_) => {} + } + + // Original 3 pools must still function after the rejection. + for client in &clients { + client + .simple_query("SELECT 1") + .await + .expect("existing wildcard pool should still work after cap rejection"); + } + + drop(clients); + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} + +#[tokio::test] +#[serial] +#[ignore = "requires wildcard config and tenant databases in Postgres"] +async fn pool_cap_after_eviction() { + let admin = admin_tokio().await; + admin + .simple_query("SET max_wildcard_pools TO 3") + .await + .unwrap(); + admin + .simple_query("SET wildcard_pool_idle_timeout TO 2") + .await + .unwrap(); + sleep(Duration::from_millis(200)).await; + + // Connect, verify, then explicitly drop so pools become idle. + { + let mut clients = Vec::new(); + for i in 1..=3 { + let db = format!("tenant_evict_{i}"); + let client = connect_tenant(&db).await.unwrap(); + client.simple_query("SELECT 1").await.unwrap(); + clients.push(client); + } + drop(clients); + } + + // Wait for connections to close + eviction loop (timeout=2s + margin). + sleep(Duration::from_secs(5)).await; + + // Old pools should be evicted; new ones should succeed. + for i in 4..=6 { + let db = format!("tenant_evict_{i}"); + let client = connect_tenant(&db) + .await + .unwrap_or_else(|e| panic!("tenant_evict_{i} should connect after eviction: {e}")); + client.simple_query("SELECT 1").await.unwrap(); + } + + admin.simple_query("RELOAD").await.unwrap(); + sleep(Duration::from_millis(200)).await; +} diff --git a/integration/rust/tests/integration/rewrite.rs b/integration/rust/tests/integration/rewrite.rs index 0d44fb649..71ed4eb93 100644 --- a/integration/rust/tests/integration/rewrite.rs +++ b/integration/rust/tests/integration/rewrite.rs @@ -295,7 +295,9 @@ async fn test_error_disconnects_and_update_works() -> Result<(), Box PathBuf { #[tokio::test] #[serial] async fn tls_acceptor_swaps_after_sighup() { + // Ensure PgDog has loaded the current on-disk config (a previous test's + // ConfigGuard may not have given the SIGHUP enough time to take effect). + let admin = admin_tokio().await; + admin + .simple_query("RELOAD") + .await + .expect("admin reload before sighup test"); + sleep(Duration::from_millis(500)).await; + let mut guard = ConfigGuard::new().expect("config guard"); let initial_cert = fetch_server_cert_der().await.expect("initial cert"); @@ -183,7 +192,7 @@ impl Drop for ConfigGuard { fn drop(&mut self) { let _ = fs::write(&self.path, &self.original); let _ = unsafe { libc::kill(self.pid, libc::SIGHUP) }; - std::thread::sleep(Duration::from_millis(500)); + std::thread::sleep(Duration::from_millis(1000)); } } diff --git a/integration/setup.sh b/integration/setup.sh old mode 100644 new mode 100755 diff --git a/integration/wildcard/pgdog.toml b/integration/wildcard/pgdog.toml new file mode 100644 index 000000000..b2c9e55c2 --- /dev/null +++ b/integration/wildcard/pgdog.toml @@ -0,0 +1,33 @@ +# Wildcard integration test config. +# Only "pgdog" is explicitly configured. All other database names +# are handled by the wildcard "*" template pointing at the same +# Postgres instance. +# +# Uses passthrough auth: pgdog forwards the client's credentials +# to Postgres for verification. This lets wildcard users connect +# with their own Postgres username and password. + +[general] +host = "0.0.0.0" +port = 6432 +query_timeout = 5_000 +checkout_timeout = 5_000 +connect_timeout = 5_000 +idle_timeout = 30_000 +min_pool_size = 0 +default_pool_size = 5 +pooler_mode = "transaction" +passthrough_auth = "enabled_plain" + +# Explicit database — always available. +[[databases]] +name = "pgdog" +host = "127.0.0.1" +port = 5432 + +# Wildcard template — any database name not explicitly listed +# will spawn a pool using these connection settings. +[[databases]] +name = "*" +host = "127.0.0.1" +port = 5432 diff --git a/integration/wildcard/run.sh b/integration/wildcard/run.sh new file mode 100644 index 000000000..fceeacf00 --- /dev/null +++ b/integration/wildcard/run.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -euo pipefail +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +source ${SCRIPT_DIR}/../common.sh + +run_pgdog ${SCRIPT_DIR} +wait_for_pgdog + +pushd ${SCRIPT_DIR} + +python3 test_wildcard.py + +popd + +stop_pgdog diff --git a/integration/wildcard/test_wildcard.py b/integration/wildcard/test_wildcard.py new file mode 100644 index 000000000..08a40e9bd --- /dev/null +++ b/integration/wildcard/test_wildcard.py @@ -0,0 +1,244 @@ +""" +Integration test for wildcard database routing with passthrough auth. + +Tests that pgdog dynamically creates pools when a client connects with +a (user, database) pair not explicitly listed in the config — using +the wildcard "*" template — and forwards the client's actual +credentials to Postgres for verification. + +Setup (run before this test): + CREATE USER wildcard_tester WITH PASSWORD 'Xk9mP2vLq7w'; + CREATE DATABASE wildcard_test_db OWNER wildcard_tester; + -- plus a table: + CREATE TABLE items (id serial PRIMARY KEY, name text NOT NULL); + INSERT INTO items (name) VALUES ('alpha'), ('beta'), ('gamma'); +""" + +import psycopg +import sys + +PGDOG_HOST = "127.0.0.1" +PGDOG_PORT = 6432 + +# Existing user configured explicitly in users.toml. +EXPLICIT_USER = "pgdog" +EXPLICIT_PASS = "pgdog" + +# New user unknown to pgdog — only exists in Postgres. +WILDCARD_USER = "wildcard_tester" +WILDCARD_PASS = "Xk9mP2vLq7w" +WILDCARD_DB = "wildcard_test_db" + + +def connect(dbname, user, password): + return psycopg.connect( + host=PGDOG_HOST, + port=PGDOG_PORT, + dbname=dbname, + user=user, + password=password, + autocommit=True, + ) + + +# ------------------------------------------------------------------ # +# 1. Baseline: explicit pool still works +# ------------------------------------------------------------------ # + +def test_explicit_pool(): + """The explicit (pgdog, pgdog) pool works as before.""" + conn = connect("pgdog", EXPLICIT_USER, EXPLICIT_PASS) + cur = conn.cursor() + cur.execute("SELECT current_user, current_database()") + row = cur.fetchone() + assert row[0] == "pgdog", f"expected user pgdog, got {row[0]}" + assert row[1] == "pgdog", f"expected db pgdog, got {row[1]}" + conn.close() + print(" PASS explicit pool (pgdog/pgdog)") + + +# ------------------------------------------------------------------ # +# 2. Wildcard: known user (pgdog) → unknown database +# ------------------------------------------------------------------ # + +def test_known_user_wildcard_db(): + """User 'pgdog' connects to 'wildcard_test_db' — a database + pgdog doesn't know about. Passthrough auth forwards pgdog's + credentials to Postgres.""" + conn = connect(WILDCARD_DB, EXPLICIT_USER, EXPLICIT_PASS) + cur = conn.cursor() + cur.execute("SELECT current_database()") + db = cur.fetchone()[0] + assert db == WILDCARD_DB, f"expected {WILDCARD_DB}, got {db}" + conn.close() + print(f" PASS known user → wildcard db ({WILDCARD_DB})") + + +# ------------------------------------------------------------------ # +# 3. Wildcard: unknown user + unknown database (the main scenario) +# ------------------------------------------------------------------ # + +def test_unknown_user_wildcard_db(): + """User 'wildcard_tester' (unknown to pgdog) connects to + 'wildcard_test_db' (also unknown). Both user and database are + resolved via the wildcard template, and passthrough auth + forwards the real credentials to Postgres.""" + conn = connect(WILDCARD_DB, WILDCARD_USER, WILDCARD_PASS) + cur = conn.cursor() + cur.execute("SELECT current_user, current_database()") + row = cur.fetchone() + assert row[0] == WILDCARD_USER, f"expected user {WILDCARD_USER}, got {row[0]}" + assert row[1] == WILDCARD_DB, f"expected db {WILDCARD_DB}, got {row[1]}" + conn.close() + print(f" PASS unknown user ({WILDCARD_USER}) → wildcard db ({WILDCARD_DB})") + + +def test_unknown_user_read_existing_data(): + """wildcard_tester reads the pre-seeded 'items' table through + the wildcard pool.""" + conn = connect(WILDCARD_DB, WILDCARD_USER, WILDCARD_PASS) + cur = conn.cursor() + cur.execute("SELECT name FROM items ORDER BY id") + rows = [r[0] for r in cur.fetchall()] + assert rows == ["alpha", "beta", "gamma"], f"unexpected: {rows}" + conn.close() + print(" PASS unknown user → read existing data") + + +def test_unknown_user_write_and_read(): + """wildcard_tester creates a table, writes, reads, and drops it + — full lifecycle through the wildcard pool.""" + conn = connect(WILDCARD_DB, WILDCARD_USER, WILDCARD_PASS) + cur = conn.cursor() + cur.execute("DROP TABLE IF EXISTS wc_lifecycle") + cur.execute("CREATE TABLE wc_lifecycle (id int, val text)") + cur.execute("INSERT INTO wc_lifecycle VALUES (1, 'x'), (2, 'y')") + cur.execute("SELECT val FROM wc_lifecycle ORDER BY id") + rows = [r[0] for r in cur.fetchall()] + assert rows == ["x", "y"], f"unexpected: {rows}" + cur.execute("DROP TABLE wc_lifecycle") + conn.close() + print(" PASS unknown user → full DDL+DML lifecycle") + + +# ------------------------------------------------------------------ # +# 4. Wrong password — pgdog should relay the Postgres auth error +# ------------------------------------------------------------------ # + +def test_wrong_password_rejected(): + """wildcard_tester with a wrong password is rejected. + Passthrough auth should forward the bad password to Postgres + and relay the auth failure back.""" + try: + conn = connect(WILDCARD_DB, WILDCARD_USER, "WRONG_PASSWORD") + cur = conn.cursor() + cur.execute("SELECT 1") + conn.close() + raise AssertionError("expected auth failure, but connection succeeded") + except psycopg.OperationalError as e: + err = str(e).lower() + ok = ("password" in err or "authentication" in err + or "auth" in err or "fatal" in err) + assert ok, f"unexpected error: {e}" + print(" PASS wrong password → rejected") + + +# ------------------------------------------------------------------ # +# 5. Unknown user + unknown db — nonexistent database +# ------------------------------------------------------------------ # + +def test_nonexistent_database(): + """wildcard_tester tries to connect to a database that doesn't + exist in Postgres. The error should come from Postgres.""" + try: + conn = connect("nope_db_xyz", WILDCARD_USER, WILDCARD_PASS) + cur = conn.cursor() + cur.execute("SELECT 1") + conn.close() + raise AssertionError("expected error for nonexistent db") + except psycopg.OperationalError as e: + err = str(e).lower() + ok = "does not exist" in err or "fatal" in err or "down" in err + assert ok, f"unexpected error: {e}" + print(" PASS nonexistent db → correct error") + + +# ------------------------------------------------------------------ # +# 6. Multiple wildcard users concurrently +# ------------------------------------------------------------------ # + +def test_concurrent_wildcard_users(): + """Both pgdog and wildcard_tester connect to wildcard_test_db + at the same time — each gets their own pool.""" + conn1 = connect(WILDCARD_DB, EXPLICIT_USER, EXPLICIT_PASS) + conn2 = connect(WILDCARD_DB, WILDCARD_USER, WILDCARD_PASS) + + cur1 = conn1.cursor() + cur2 = conn2.cursor() + + cur1.execute("SELECT current_user") + cur2.execute("SELECT current_user") + + assert cur1.fetchone()[0] == EXPLICIT_USER + assert cur2.fetchone()[0] == WILDCARD_USER + + conn1.close() + conn2.close() + print(" PASS concurrent wildcard users (pgdog + wildcard_tester)") + + +# ------------------------------------------------------------------ # +# 7. Unknown user connects to multiple databases +# ------------------------------------------------------------------ # + +def test_wildcard_user_multiple_dbs(): + """wildcard_tester connects to wildcard_test_db and also to pgdog + (the pgdog database grants connect to all users by default).""" + for dbname in [WILDCARD_DB, "pgdog"]: + conn = connect(dbname, WILDCARD_USER, WILDCARD_PASS) + cur = conn.cursor() + cur.execute("SELECT current_database()") + db = cur.fetchone()[0] + assert db == dbname, f"expected {dbname}, got {db}" + conn.close() + print(" PASS wildcard user → multiple databases") + + +# ------------------------------------------------------------------ # + +def main(): + print("=== Wildcard Passthrough Auth Integration Tests ===") + print(f" user: {WILDCARD_USER}, db: {WILDCARD_DB}") + print() + failures = 0 + total = 0 + + tests = [ + ("explicit pool", test_explicit_pool), + ("known user → wc db", test_known_user_wildcard_db), + ("unknown user → wc db", test_unknown_user_wildcard_db), + ("read existing data", test_unknown_user_read_existing_data), + ("write+read lifecycle", test_unknown_user_write_and_read), + ("wrong password", test_wrong_password_rejected), + ("nonexistent db", test_nonexistent_database), + ("concurrent users", test_concurrent_wildcard_users), + ("user → multiple dbs", test_wildcard_user_multiple_dbs), + ] + + for name, test_fn in tests: + total += 1 + try: + test_fn() + except Exception as e: + print(f" FAIL {name}: {e}") + failures += 1 + + passed = total - failures + print(f"\n=== Results: {passed}/{total} passed, {failures} failed ===") + if failures > 0: + sys.exit(1) + print("All wildcard passthrough auth tests passed!") + + +if __name__ == "__main__": + main() diff --git a/integration/wildcard/users.toml b/integration/wildcard/users.toml new file mode 100644 index 000000000..00633dbb0 --- /dev/null +++ b/integration/wildcard/users.toml @@ -0,0 +1,13 @@ +# Explicit user for the explicit database. +[[users]] +name = "pgdog" +database = "pgdog" +password = "pgdog" + +# Wildcard user — any (user, database) pair not matched above. +# No server_user/server_password: pgdog passes through the +# client-provided credentials to Postgres directly. +[[users]] +name = "*" +database = "*" +min_pool_size = 0 diff --git a/pgdog-config/src/core.rs b/pgdog-config/src/core.rs index 64199dbb5..076400800 100644 --- a/pgdog-config/src/core.rs +++ b/pgdog-config/src/core.rs @@ -214,6 +214,34 @@ impl Config { databases } + /// Get wildcard database entries (name = "*"), organized by shard. + /// Returns None if no wildcard databases are configured. + pub fn wildcard_databases(&self) -> Option>> { + let wildcard_dbs: Vec<&Database> = + self.databases.iter().filter(|d| d.is_wildcard()).collect(); + if wildcard_dbs.is_empty() { + return None; + } + + let mut shards: Vec> = Vec::new(); + for (number, database) in self.databases.iter().enumerate() { + if database.is_wildcard() { + while shards.len() <= database.shard { + shards.push(vec![]); + } + shards + .get_mut(database.shard) + .unwrap() + .push(EnumeratedDatabase { + number, + database: database.clone(), + }); + } + } + + Some(shards) + } + /// Organize sharded tables by database name. pub fn sharded_tables(&self) -> HashMap> { let mut tables = HashMap::new(); @@ -321,7 +349,17 @@ impl Config { pub fn check(&mut self) { // Check databases. let mut duplicate_dbs = HashSet::new(); + let mut wildcard_db_count = 0usize; for database in self.databases.clone() { + if database.is_wildcard() { + wildcard_db_count += 1; + if database.shard > 0 { + warn!( + r#"wildcard database "*" with shard={} is not supported, use shard=0 only"#, + database.shard + ); + } + } let id = ( database.name.clone(), database.role, @@ -338,6 +376,13 @@ impl Config { } } + if wildcard_db_count > 2 { + warn!( + r#"multiple wildcard "*" database entries detected ({} entries), only one primary and one replica are expected"#, + wildcard_db_count + ); + } + struct Check { pooler_mode: Option, role: Role, diff --git a/pgdog-config/src/database.rs b/pgdog-config/src/database.rs index a8683d528..82cccd876 100644 --- a/pgdog-config/src/database.rs +++ b/pgdog-config/src/database.rs @@ -137,6 +137,28 @@ impl Database { fn port() -> u16 { 5432 } + + /// Whether this database entry is a wildcard template (`name = "*"`). + pub fn is_wildcard(&self) -> bool { + self.name == "*" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_database_is_wildcard() { + let mut db = Database::default(); + assert!(!db.is_wildcard()); + + db.name = "mydb".to_string(); + assert!(!db.is_wildcard()); + + db.name = "*".to_string(); + assert!(db.is_wildcard()); + } } #[derive( diff --git a/pgdog-config/src/general.rs b/pgdog-config/src/general.rs index cf7b5ed45..1cc1a588d 100644 --- a/pgdog-config/src/general.rs +++ b/pgdog-config/src/general.rs @@ -238,6 +238,17 @@ pub struct General { /// Cutover save config to disk. #[serde(default)] pub cutover_save_config: bool, + /// Maximum number of dynamically-created wildcard pools (0 = unlimited). + /// Once this limit is reached further wildcard connections are rejected with + /// a "no such database" error until an existing wildcard pool is evicted + /// (e.g. via a SIGHUP config reload). + #[serde(default)] + pub max_wildcard_pools: usize, + /// Seconds a dynamically-created wildcard pool must have zero connections + /// before it is automatically removed. 0 disables automatic eviction; + /// pools are only cleaned up on SIGHUP or restart. + #[serde(default)] + pub wildcard_pool_idle_timeout: u64, } impl Default for General { @@ -320,6 +331,8 @@ impl Default for General { cutover_timeout: Self::cutover_timeout(), cutover_timeout_action: Self::cutover_timeout_action(), cutover_save_config: bool::default(), + max_wildcard_pools: 0, + wildcard_pool_idle_timeout: 0, } } } diff --git a/pgdog-config/src/users.rs b/pgdog-config/src/users.rs index ab36f5af0..da6948f43 100644 --- a/pgdog-config/src/users.rs +++ b/pgdog-config/src/users.rs @@ -146,6 +146,16 @@ impl User { ..Default::default() } } + + /// Whether this user entry has a wildcard name (`name = "*"`). + pub fn is_wildcard_name(&self) -> bool { + self.name == "*" + } + + /// Whether this user entry has a wildcard database (`database = "*"`). + pub fn is_wildcard_database(&self) -> bool { + self.database == "*" + } } /// Admin database settings. @@ -253,4 +263,28 @@ mod tests { .unwrap(); assert_eq!(bob_source.password(), "pass4"); } + + #[test] + fn test_user_wildcard_name() { + let mut user = User::default(); + assert!(!user.is_wildcard_name()); + + user.name = "alice".to_string(); + assert!(!user.is_wildcard_name()); + + user.name = "*".to_string(); + assert!(user.is_wildcard_name()); + } + + #[test] + fn test_user_wildcard_database() { + let mut user = User::default(); + assert!(!user.is_wildcard_database()); + + user.database = "mydb".to_string(); + assert!(!user.is_wildcard_database()); + + user.database = "*".to_string(); + assert!(user.is_wildcard_database()); + } } diff --git a/pgdog-plugin/src/bindings.rs b/pgdog-plugin/src/bindings.rs index 351aeb3b5..fdabc97b7 100644 --- a/pgdog-plugin/src/bindings.rs +++ b/pgdog-plugin/src/bindings.rs @@ -1,214 +1,339 @@ /* automatically generated by rust-bindgen 0.71.1 */ -pub const _STDINT_H: u32 = 1; -pub const _FEATURES_H: u32 = 1; -pub const _DEFAULT_SOURCE: u32 = 1; -pub const __GLIBC_USE_ISOC2Y: u32 = 0; -pub const __GLIBC_USE_ISOC23: u32 = 0; -pub const __USE_ISOC11: u32 = 1; -pub const __USE_ISOC99: u32 = 1; -pub const __USE_ISOC95: u32 = 1; -pub const __USE_POSIX_IMPLICITLY: u32 = 1; -pub const _POSIX_SOURCE: u32 = 1; -pub const _POSIX_C_SOURCE: u32 = 202405; -pub const __USE_POSIX: u32 = 1; -pub const __USE_POSIX2: u32 = 1; -pub const __USE_POSIX199309: u32 = 1; -pub const __USE_POSIX199506: u32 = 1; -pub const __USE_XOPEN2K: u32 = 1; -pub const __USE_XOPEN2K8: u32 = 1; -pub const _ATFILE_SOURCE: u32 = 1; -pub const __USE_XOPEN2K24: u32 = 1; pub const __WORDSIZE: u32 = 64; -pub const __WORDSIZE_TIME64_COMPAT32: u32 = 1; -pub const __SYSCALL_WORDSIZE: u32 = 64; -pub const __TIMESIZE: u32 = 64; -pub const __USE_TIME_BITS64: u32 = 1; -pub const __USE_MISC: u32 = 1; -pub const __USE_ATFILE: u32 = 1; -pub const __USE_FORTIFY_LEVEL: u32 = 0; -pub const __GLIBC_USE_DEPRECATED_GETS: u32 = 0; -pub const __GLIBC_USE_DEPRECATED_SCANF: u32 = 0; -pub const __GLIBC_USE_C23_STRTOL: u32 = 0; -pub const _STDC_PREDEF_H: u32 = 1; -pub const __STDC_IEC_559__: u32 = 1; -pub const __STDC_IEC_60559_BFP__: u32 = 201404; -pub const __STDC_IEC_559_COMPLEX__: u32 = 1; -pub const __STDC_IEC_60559_COMPLEX__: u32 = 201404; -pub const __STDC_ISO_10646__: u32 = 201706; -pub const __GNU_LIBRARY__: u32 = 6; -pub const __GLIBC__: u32 = 2; -pub const __GLIBC_MINOR__: u32 = 43; -pub const _SYS_CDEFS_H: u32 = 1; -pub const __glibc_c99_flexarr_available: u32 = 1; -pub const __LDOUBLE_REDIRECTS_TO_FLOAT128_ABI: u32 = 0; -pub const __HAVE_GENERIC_SELECTION: u32 = 1; -pub const __GLIBC_USE_LIB_EXT2: u32 = 0; -pub const __GLIBC_USE_IEC_60559_BFP_EXT: u32 = 0; -pub const __GLIBC_USE_IEC_60559_BFP_EXT_C23: u32 = 0; -pub const __GLIBC_USE_IEC_60559_EXT: u32 = 0; -pub const __GLIBC_USE_IEC_60559_FUNCS_EXT: u32 = 0; -pub const __GLIBC_USE_IEC_60559_FUNCS_EXT_C23: u32 = 0; -pub const __GLIBC_USE_IEC_60559_TYPES_EXT: u32 = 0; -pub const _BITS_TYPES_H: u32 = 1; -pub const _BITS_TYPESIZES_H: u32 = 1; -pub const __OFF_T_MATCHES_OFF64_T: u32 = 1; -pub const __INO_T_MATCHES_INO64_T: u32 = 1; -pub const __RLIM_T_MATCHES_RLIM64_T: u32 = 1; -pub const __STATFS_MATCHES_STATFS64: u32 = 1; -pub const __KERNEL_OLD_TIMEVAL_MATCHES_TIMEVAL64: u32 = 1; -pub const __FD_SETSIZE: u32 = 1024; -pub const _BITS_TIME64_H: u32 = 1; -pub const _BITS_WCHAR_H: u32 = 1; -pub const _BITS_STDINT_INTN_H: u32 = 1; -pub const _BITS_STDINT_UINTN_H: u32 = 1; -pub const _BITS_STDINT_LEAST_H: u32 = 1; -pub const INT8_MIN: i32 = -128; -pub const INT16_MIN: i32 = -32768; -pub const INT32_MIN: i32 = -2147483648; +pub const __has_safe_buffers: u32 = 1; +pub const __DARWIN_ONLY_64_BIT_INO_T: u32 = 1; +pub const __DARWIN_ONLY_UNIX_CONFORMANCE: u32 = 1; +pub const __DARWIN_ONLY_VERS_1050: u32 = 1; +pub const __DARWIN_UNIX03: u32 = 1; +pub const __DARWIN_64_BIT_INO_T: u32 = 1; +pub const __DARWIN_VERS_1050: u32 = 1; +pub const __DARWIN_NON_CANCELABLE: u32 = 0; +pub const __DARWIN_SUF_EXTSN: &[u8; 14] = b"$DARWIN_EXTSN\0"; +pub const __DARWIN_C_ANSI: u32 = 4096; +pub const __DARWIN_C_FULL: u32 = 900000; +pub const __DARWIN_C_LEVEL: u32 = 900000; +pub const __STDC_WANT_LIB_EXT1__: u32 = 1; +pub const __DARWIN_NO_LONG_LONG: u32 = 0; +pub const _DARWIN_FEATURE_64_BIT_INODE: u32 = 1; +pub const _DARWIN_FEATURE_ONLY_64_BIT_INODE: u32 = 1; +pub const _DARWIN_FEATURE_ONLY_VERS_1050: u32 = 1; +pub const _DARWIN_FEATURE_ONLY_UNIX_CONFORMANCE: u32 = 1; +pub const _DARWIN_FEATURE_UNIX_CONFORMANCE: u32 = 3; +pub const __has_ptrcheck: u32 = 0; +pub const __has_bounds_safety_attributes: u32 = 0; +pub const USE_CLANG_TYPES: u32 = 0; +pub const __PTHREAD_SIZE__: u32 = 8176; +pub const __PTHREAD_ATTR_SIZE__: u32 = 56; +pub const __PTHREAD_MUTEXATTR_SIZE__: u32 = 8; +pub const __PTHREAD_MUTEX_SIZE__: u32 = 56; +pub const __PTHREAD_CONDATTR_SIZE__: u32 = 8; +pub const __PTHREAD_COND_SIZE__: u32 = 40; +pub const __PTHREAD_ONCE_SIZE__: u32 = 8; +pub const __PTHREAD_RWLOCK_SIZE__: u32 = 192; +pub const __PTHREAD_RWLOCKATTR_SIZE__: u32 = 16; pub const INT8_MAX: u32 = 127; pub const INT16_MAX: u32 = 32767; pub const INT32_MAX: u32 = 2147483647; +pub const INT64_MAX: u64 = 9223372036854775807; +pub const INT8_MIN: i32 = -128; +pub const INT16_MIN: i32 = -32768; +pub const INT32_MIN: i32 = -2147483648; +pub const INT64_MIN: i64 = -9223372036854775808; pub const UINT8_MAX: u32 = 255; pub const UINT16_MAX: u32 = 65535; pub const UINT32_MAX: u32 = 4294967295; +pub const UINT64_MAX: i32 = -1; pub const INT_LEAST8_MIN: i32 = -128; pub const INT_LEAST16_MIN: i32 = -32768; pub const INT_LEAST32_MIN: i32 = -2147483648; +pub const INT_LEAST64_MIN: i64 = -9223372036854775808; pub const INT_LEAST8_MAX: u32 = 127; pub const INT_LEAST16_MAX: u32 = 32767; pub const INT_LEAST32_MAX: u32 = 2147483647; +pub const INT_LEAST64_MAX: u64 = 9223372036854775807; pub const UINT_LEAST8_MAX: u32 = 255; pub const UINT_LEAST16_MAX: u32 = 65535; pub const UINT_LEAST32_MAX: u32 = 4294967295; +pub const UINT_LEAST64_MAX: i32 = -1; pub const INT_FAST8_MIN: i32 = -128; -pub const INT_FAST16_MIN: i64 = -9223372036854775808; -pub const INT_FAST32_MIN: i64 = -9223372036854775808; +pub const INT_FAST16_MIN: i32 = -32768; +pub const INT_FAST32_MIN: i32 = -2147483648; +pub const INT_FAST64_MIN: i64 = -9223372036854775808; pub const INT_FAST8_MAX: u32 = 127; -pub const INT_FAST16_MAX: u64 = 9223372036854775807; -pub const INT_FAST32_MAX: u64 = 9223372036854775807; +pub const INT_FAST16_MAX: u32 = 32767; +pub const INT_FAST32_MAX: u32 = 2147483647; +pub const INT_FAST64_MAX: u64 = 9223372036854775807; pub const UINT_FAST8_MAX: u32 = 255; -pub const UINT_FAST16_MAX: i32 = -1; -pub const UINT_FAST32_MAX: i32 = -1; -pub const INTPTR_MIN: i64 = -9223372036854775808; +pub const UINT_FAST16_MAX: u32 = 65535; +pub const UINT_FAST32_MAX: u32 = 4294967295; +pub const UINT_FAST64_MAX: i32 = -1; pub const INTPTR_MAX: u64 = 9223372036854775807; +pub const INTPTR_MIN: i64 = -9223372036854775808; pub const UINTPTR_MAX: i32 = -1; -pub const PTRDIFF_MIN: i64 = -9223372036854775808; -pub const PTRDIFF_MAX: u64 = 9223372036854775807; +pub const SIZE_MAX: i32 = -1; +pub const RSIZE_MAX: i32 = -1; +pub const WINT_MIN: i32 = -2147483648; +pub const WINT_MAX: u32 = 2147483647; pub const SIG_ATOMIC_MIN: i32 = -2147483648; pub const SIG_ATOMIC_MAX: u32 = 2147483647; -pub const SIZE_MAX: i32 = -1; -pub const WINT_MIN: u32 = 0; -pub const WINT_MAX: u32 = 4294967295; pub type wchar_t = ::std::os::raw::c_int; -#[repr(C)] -#[repr(align(16))] -#[derive(Debug, Copy, Clone)] -pub struct max_align_t { - pub __clang_max_align_nonce1: ::std::os::raw::c_longlong, - pub __bindgen_padding_0: u64, - pub __clang_max_align_nonce2: u128, -} -#[allow(clippy::unnecessary_operation, clippy::identity_op)] -const _: () = { - ["Size of max_align_t"][::std::mem::size_of::() - 32usize]; - ["Alignment of max_align_t"][::std::mem::align_of::() - 16usize]; - ["Offset of field: max_align_t::__clang_max_align_nonce1"] - [::std::mem::offset_of!(max_align_t, __clang_max_align_nonce1) - 0usize]; - ["Offset of field: max_align_t::__clang_max_align_nonce2"] - [::std::mem::offset_of!(max_align_t, __clang_max_align_nonce2) - 16usize]; -}; -pub type __u_char = ::std::os::raw::c_uchar; -pub type __u_short = ::std::os::raw::c_ushort; -pub type __u_int = ::std::os::raw::c_uint; -pub type __u_long = ::std::os::raw::c_ulong; +pub type max_align_t = f64; +pub type int_least8_t = i8; +pub type int_least16_t = i16; +pub type int_least32_t = i32; +pub type int_least64_t = i64; +pub type uint_least8_t = u8; +pub type uint_least16_t = u16; +pub type uint_least32_t = u32; +pub type uint_least64_t = u64; +pub type int_fast8_t = i8; +pub type int_fast16_t = i16; +pub type int_fast32_t = i32; +pub type int_fast64_t = i64; +pub type uint_fast8_t = u8; +pub type uint_fast16_t = u16; +pub type uint_fast32_t = u32; +pub type uint_fast64_t = u64; pub type __int8_t = ::std::os::raw::c_schar; pub type __uint8_t = ::std::os::raw::c_uchar; pub type __int16_t = ::std::os::raw::c_short; pub type __uint16_t = ::std::os::raw::c_ushort; pub type __int32_t = ::std::os::raw::c_int; pub type __uint32_t = ::std::os::raw::c_uint; -pub type __int64_t = ::std::os::raw::c_long; -pub type __uint64_t = ::std::os::raw::c_ulong; -pub type __int_least8_t = __int8_t; -pub type __uint_least8_t = __uint8_t; -pub type __int_least16_t = __int16_t; -pub type __uint_least16_t = __uint16_t; -pub type __int_least32_t = __int32_t; -pub type __uint_least32_t = __uint32_t; -pub type __int_least64_t = __int64_t; -pub type __uint_least64_t = __uint64_t; -pub type __quad_t = ::std::os::raw::c_long; -pub type __u_quad_t = ::std::os::raw::c_ulong; -pub type __intmax_t = ::std::os::raw::c_long; -pub type __uintmax_t = ::std::os::raw::c_ulong; -pub type __dev_t = ::std::os::raw::c_ulong; -pub type __uid_t = ::std::os::raw::c_uint; -pub type __gid_t = ::std::os::raw::c_uint; -pub type __ino_t = ::std::os::raw::c_ulong; -pub type __ino64_t = ::std::os::raw::c_ulong; -pub type __mode_t = ::std::os::raw::c_uint; -pub type __nlink_t = ::std::os::raw::c_ulong; -pub type __off_t = ::std::os::raw::c_long; -pub type __off64_t = ::std::os::raw::c_long; -pub type __pid_t = ::std::os::raw::c_int; +pub type __int64_t = ::std::os::raw::c_longlong; +pub type __uint64_t = ::std::os::raw::c_ulonglong; +pub type __darwin_intptr_t = ::std::os::raw::c_long; +pub type __darwin_natural_t = ::std::os::raw::c_uint; +pub type __darwin_ct_rune_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Copy, Clone)] +pub union __mbstate_t { + pub __mbstate8: [::std::os::raw::c_char; 128usize], + pub _mbstateL: ::std::os::raw::c_longlong, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of __mbstate_t"][::std::mem::size_of::<__mbstate_t>() - 128usize]; + ["Alignment of __mbstate_t"][::std::mem::align_of::<__mbstate_t>() - 8usize]; + ["Offset of field: __mbstate_t::__mbstate8"] + [::std::mem::offset_of!(__mbstate_t, __mbstate8) - 0usize]; + ["Offset of field: __mbstate_t::_mbstateL"] + [::std::mem::offset_of!(__mbstate_t, _mbstateL) - 0usize]; +}; +pub type __darwin_mbstate_t = __mbstate_t; +pub type __darwin_ptrdiff_t = ::std::os::raw::c_long; +pub type __darwin_size_t = ::std::os::raw::c_ulong; +pub type __darwin_va_list = __builtin_va_list; +pub type __darwin_wchar_t = ::std::os::raw::c_int; +pub type __darwin_rune_t = __darwin_wchar_t; +pub type __darwin_wint_t = ::std::os::raw::c_int; +pub type __darwin_clock_t = ::std::os::raw::c_ulong; +pub type __darwin_socklen_t = __uint32_t; +pub type __darwin_ssize_t = ::std::os::raw::c_long; +pub type __darwin_time_t = ::std::os::raw::c_long; +pub type __darwin_blkcnt_t = __int64_t; +pub type __darwin_blksize_t = __int32_t; +pub type __darwin_dev_t = __int32_t; +pub type __darwin_fsblkcnt_t = ::std::os::raw::c_uint; +pub type __darwin_fsfilcnt_t = ::std::os::raw::c_uint; +pub type __darwin_gid_t = __uint32_t; +pub type __darwin_id_t = __uint32_t; +pub type __darwin_ino64_t = __uint64_t; +pub type __darwin_ino_t = __darwin_ino64_t; +pub type __darwin_mach_port_name_t = __darwin_natural_t; +pub type __darwin_mach_port_t = __darwin_mach_port_name_t; +pub type __darwin_mode_t = __uint16_t; +pub type __darwin_off_t = __int64_t; +pub type __darwin_pid_t = __int32_t; +pub type __darwin_sigset_t = __uint32_t; +pub type __darwin_suseconds_t = __int32_t; +pub type __darwin_uid_t = __uint32_t; +pub type __darwin_useconds_t = __uint32_t; +pub type __darwin_uuid_t = [::std::os::raw::c_uchar; 16usize]; +pub type __darwin_uuid_string_t = [::std::os::raw::c_char; 37usize]; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __darwin_pthread_handler_rec { + pub __routine: ::std::option::Option, + pub __arg: *mut ::std::os::raw::c_void, + pub __next: *mut __darwin_pthread_handler_rec, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of __darwin_pthread_handler_rec"] + [::std::mem::size_of::<__darwin_pthread_handler_rec>() - 24usize]; + ["Alignment of __darwin_pthread_handler_rec"] + [::std::mem::align_of::<__darwin_pthread_handler_rec>() - 8usize]; + ["Offset of field: __darwin_pthread_handler_rec::__routine"] + [::std::mem::offset_of!(__darwin_pthread_handler_rec, __routine) - 0usize]; + ["Offset of field: __darwin_pthread_handler_rec::__arg"] + [::std::mem::offset_of!(__darwin_pthread_handler_rec, __arg) - 8usize]; + ["Offset of field: __darwin_pthread_handler_rec::__next"] + [::std::mem::offset_of!(__darwin_pthread_handler_rec, __next) - 16usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_attr_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 56usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_attr_t"][::std::mem::size_of::<_opaque_pthread_attr_t>() - 64usize]; + ["Alignment of _opaque_pthread_attr_t"] + [::std::mem::align_of::<_opaque_pthread_attr_t>() - 8usize]; + ["Offset of field: _opaque_pthread_attr_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_attr_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_attr_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_attr_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_cond_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 40usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_cond_t"][::std::mem::size_of::<_opaque_pthread_cond_t>() - 48usize]; + ["Alignment of _opaque_pthread_cond_t"] + [::std::mem::align_of::<_opaque_pthread_cond_t>() - 8usize]; + ["Offset of field: _opaque_pthread_cond_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_cond_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_cond_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_cond_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_condattr_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 8usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_condattr_t"] + [::std::mem::size_of::<_opaque_pthread_condattr_t>() - 16usize]; + ["Alignment of _opaque_pthread_condattr_t"] + [::std::mem::align_of::<_opaque_pthread_condattr_t>() - 8usize]; + ["Offset of field: _opaque_pthread_condattr_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_condattr_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_condattr_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_condattr_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_mutex_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 56usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_mutex_t"][::std::mem::size_of::<_opaque_pthread_mutex_t>() - 64usize]; + ["Alignment of _opaque_pthread_mutex_t"] + [::std::mem::align_of::<_opaque_pthread_mutex_t>() - 8usize]; + ["Offset of field: _opaque_pthread_mutex_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_mutex_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_mutex_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_mutex_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_mutexattr_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 8usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_mutexattr_t"] + [::std::mem::size_of::<_opaque_pthread_mutexattr_t>() - 16usize]; + ["Alignment of _opaque_pthread_mutexattr_t"] + [::std::mem::align_of::<_opaque_pthread_mutexattr_t>() - 8usize]; + ["Offset of field: _opaque_pthread_mutexattr_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_mutexattr_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_mutexattr_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_mutexattr_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_once_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 8usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_once_t"][::std::mem::size_of::<_opaque_pthread_once_t>() - 16usize]; + ["Alignment of _opaque_pthread_once_t"] + [::std::mem::align_of::<_opaque_pthread_once_t>() - 8usize]; + ["Offset of field: _opaque_pthread_once_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_once_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_once_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_once_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_rwlock_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 192usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_rwlock_t"] + [::std::mem::size_of::<_opaque_pthread_rwlock_t>() - 200usize]; + ["Alignment of _opaque_pthread_rwlock_t"] + [::std::mem::align_of::<_opaque_pthread_rwlock_t>() - 8usize]; + ["Offset of field: _opaque_pthread_rwlock_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_rwlock_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_rwlock_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_rwlock_t, __opaque) - 8usize]; +}; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _opaque_pthread_rwlockattr_t { + pub __sig: ::std::os::raw::c_long, + pub __opaque: [::std::os::raw::c_char; 16usize], +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of _opaque_pthread_rwlockattr_t"] + [::std::mem::size_of::<_opaque_pthread_rwlockattr_t>() - 24usize]; + ["Alignment of _opaque_pthread_rwlockattr_t"] + [::std::mem::align_of::<_opaque_pthread_rwlockattr_t>() - 8usize]; + ["Offset of field: _opaque_pthread_rwlockattr_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_rwlockattr_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_rwlockattr_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_rwlockattr_t, __opaque) - 8usize]; +}; #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct __fsid_t { - pub __val: [::std::os::raw::c_int; 2usize], +pub struct _opaque_pthread_t { + pub __sig: ::std::os::raw::c_long, + pub __cleanup_stack: *mut __darwin_pthread_handler_rec, + pub __opaque: [::std::os::raw::c_char; 8176usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { - ["Size of __fsid_t"][::std::mem::size_of::<__fsid_t>() - 8usize]; - ["Alignment of __fsid_t"][::std::mem::align_of::<__fsid_t>() - 4usize]; - ["Offset of field: __fsid_t::__val"][::std::mem::offset_of!(__fsid_t, __val) - 0usize]; + ["Size of _opaque_pthread_t"][::std::mem::size_of::<_opaque_pthread_t>() - 8192usize]; + ["Alignment of _opaque_pthread_t"][::std::mem::align_of::<_opaque_pthread_t>() - 8usize]; + ["Offset of field: _opaque_pthread_t::__sig"] + [::std::mem::offset_of!(_opaque_pthread_t, __sig) - 0usize]; + ["Offset of field: _opaque_pthread_t::__cleanup_stack"] + [::std::mem::offset_of!(_opaque_pthread_t, __cleanup_stack) - 8usize]; + ["Offset of field: _opaque_pthread_t::__opaque"] + [::std::mem::offset_of!(_opaque_pthread_t, __opaque) - 16usize]; }; -pub type __clock_t = ::std::os::raw::c_long; -pub type __rlim_t = ::std::os::raw::c_ulong; -pub type __rlim64_t = ::std::os::raw::c_ulong; -pub type __id_t = ::std::os::raw::c_uint; -pub type __time_t = ::std::os::raw::c_long; -pub type __useconds_t = ::std::os::raw::c_uint; -pub type __suseconds_t = ::std::os::raw::c_long; -pub type __suseconds64_t = ::std::os::raw::c_long; -pub type __daddr_t = ::std::os::raw::c_int; -pub type __key_t = ::std::os::raw::c_int; -pub type __clockid_t = ::std::os::raw::c_int; -pub type __timer_t = *mut ::std::os::raw::c_void; -pub type __blksize_t = ::std::os::raw::c_long; -pub type __blkcnt_t = ::std::os::raw::c_long; -pub type __blkcnt64_t = ::std::os::raw::c_long; -pub type __fsblkcnt_t = ::std::os::raw::c_ulong; -pub type __fsblkcnt64_t = ::std::os::raw::c_ulong; -pub type __fsfilcnt_t = ::std::os::raw::c_ulong; -pub type __fsfilcnt64_t = ::std::os::raw::c_ulong; -pub type __fsword_t = ::std::os::raw::c_long; -pub type __ssize_t = ::std::os::raw::c_long; -pub type __syscall_slong_t = ::std::os::raw::c_long; -pub type __syscall_ulong_t = ::std::os::raw::c_ulong; -pub type __loff_t = __off64_t; -pub type __caddr_t = *mut ::std::os::raw::c_char; -pub type __intptr_t = ::std::os::raw::c_long; -pub type __socklen_t = ::std::os::raw::c_uint; -pub type __sig_atomic_t = ::std::os::raw::c_int; -pub type int_least8_t = __int_least8_t; -pub type int_least16_t = __int_least16_t; -pub type int_least32_t = __int_least32_t; -pub type int_least64_t = __int_least64_t; -pub type uint_least8_t = __uint_least8_t; -pub type uint_least16_t = __uint_least16_t; -pub type uint_least32_t = __uint_least32_t; -pub type uint_least64_t = __uint_least64_t; -pub type int_fast8_t = ::std::os::raw::c_schar; -pub type int_fast16_t = ::std::os::raw::c_long; -pub type int_fast32_t = ::std::os::raw::c_long; -pub type int_fast64_t = ::std::os::raw::c_long; -pub type uint_fast8_t = ::std::os::raw::c_uchar; -pub type uint_fast16_t = ::std::os::raw::c_ulong; -pub type uint_fast32_t = ::std::os::raw::c_ulong; -pub type uint_fast64_t = ::std::os::raw::c_ulong; -pub type intmax_t = __intmax_t; -pub type uintmax_t = __uintmax_t; +pub type __darwin_pthread_attr_t = _opaque_pthread_attr_t; +pub type __darwin_pthread_cond_t = _opaque_pthread_cond_t; +pub type __darwin_pthread_condattr_t = _opaque_pthread_condattr_t; +pub type __darwin_pthread_key_t = ::std::os::raw::c_ulong; +pub type __darwin_pthread_mutex_t = _opaque_pthread_mutex_t; +pub type __darwin_pthread_mutexattr_t = _opaque_pthread_mutexattr_t; +pub type __darwin_pthread_once_t = _opaque_pthread_once_t; +pub type __darwin_pthread_rwlock_t = _opaque_pthread_rwlock_t; +pub type __darwin_pthread_rwlockattr_t = _opaque_pthread_rwlockattr_t; +pub type __darwin_pthread_t = *mut _opaque_pthread_t; +pub type intmax_t = ::std::os::raw::c_long; +pub type uintmax_t = ::std::os::raw::c_ulong; #[doc = " Wrapper around Rust's [`&str`], without allocating memory, unlike [`std::ffi::CString`].\n The caller must use it as a Rust string. This is not a C-string."] #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -325,3 +450,4 @@ const _: () = { ["Offset of field: PdRoute::shard"][::std::mem::offset_of!(PdRoute, shard) - 0usize]; ["Offset of field: PdRoute::read_write"][::std::mem::offset_of!(PdRoute, read_write) - 8usize]; }; +pub type __builtin_va_list = *mut ::std::os::raw::c_char; diff --git a/pgdog/src/admin/set.rs b/pgdog/src/admin/set.rs index af180ec26..f59828329 100644 --- a/pgdog/src/admin/set.rs +++ b/pgdog/src/admin/set.rs @@ -180,6 +180,14 @@ impl Command for Set { config.config.general.connect_timeout = self.value.parse()?; } + "max_wildcard_pools" => { + config.config.general.max_wildcard_pools = self.value.parse()?; + } + + "wildcard_pool_idle_timeout" => { + config.config.general.wildcard_pool_idle_timeout = self.value.parse()?; + } + _ => return Err(Error::Syntax), } diff --git a/pgdog/src/backend/databases.rs b/pgdog/src/backend/databases.rs index 7607ea975..8d8f9fb44 100644 --- a/pgdog/src/backend/databases.rs +++ b/pgdog/src/backend/databases.rs @@ -1,8 +1,9 @@ //! Databases behind pgDog. -use std::collections::{hash_map::Entry, HashMap}; +use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::ops::Deref; use std::sync::Arc; +use std::time::Duration; use arc_swap::ArcSwap; use futures::future::try_join_all; @@ -34,6 +35,20 @@ use super::{ static DATABASES: Lazy> = Lazy::new(|| ArcSwap::from_pointee(Databases::default())); static LOCK: Lazy> = Lazy::new(|| Mutex::new(())); +/// Spawns the wildcard-pool background eviction loop exactly once. +static WILDCARD_EVICTION: Lazy<()> = Lazy::new(|| { + tokio::spawn(async { + loop { + let timeout_secs = config().config.general.wildcard_pool_idle_timeout; + if timeout_secs == 0 { + tokio::time::sleep(Duration::from_secs(60)).await; + continue; + } + tokio::time::sleep(Duration::from_secs(timeout_secs)).await; + evict_idle_wildcard_pools(); + } + }); +}); /// Sync databases during modification. pub fn lock() -> MutexGuard<'static, RawMutex, ()> { @@ -75,8 +90,16 @@ pub fn reconnect() -> Result<(), Error> { Ok(()) } -/// Re-create databases from existing config, -/// preserving connections. +/// Re-create databases from existing config, preserving connections. +/// +/// **SIGHUP / config-reload behaviour for wildcard pools:** +/// Wildcard pools created on demand by [`add_wildcard_pool`] are *not* included +/// in the freshly built [`Databases`] produced by [`from_config`]. Because +/// [`replace_databases`] only moves connections whose key exists in the new +/// config, those connections are dropped and the pools are evicted. On the +/// next client login [`add_wildcard_pool`] will recreate the pool from the +/// (potentially updated) wildcard template, and the +/// [`General::max_wildcard_pools`] counter resets to zero. pub fn reload_from_existing() -> Result<(), Error> { let _lock = lock(); @@ -98,9 +121,80 @@ pub fn init() -> Result<(), Error> { // Start two-pc manager. let _monitor = Manager::get(); + // Start the wildcard pool eviction background task. + let _ = &*WILDCARD_EVICTION; + Ok(()) } +/// Remove dynamically-created wildcard pools that currently have zero connections. +/// +/// This is called periodically by the background eviction task started in +/// [`init`], and is also exposed as `pub(crate)` so unit tests can invoke it +/// directly without running a Tokio runtime loop. +pub(crate) fn evict_idle_wildcard_pools() { + let _lock = lock(); + let dbs = databases(); + + let to_evict: Vec = dbs + .dynamic_pools + .iter() + .filter(|user| { + dbs.databases + .get(*user) + .map_or(false, |c| c.total_connections() == 0) + }) + .cloned() + .collect(); + + if to_evict.is_empty() { + return; + } + + let mut new_dbs = (*dbs).clone(); + for user in &to_evict { + if let Some(cluster) = new_dbs.databases.remove(user) { + cluster.shutdown(); + new_dbs.dynamic_pools.remove(user); + new_dbs.wildcard_pool_count = new_dbs.wildcard_pool_count.saturating_sub(1); + } + } + DATABASES.store(Arc::new(new_dbs)); + info!("evicted {} idle wildcard pool(s)", to_evict.len()); +} + +/// Remove a single dynamically-created wildcard pool so it can be +/// recreated with updated credentials (e.g. after a password rotation). +/// +/// Returns `true` if the pool existed and was removed. +pub(crate) fn remove_wildcard_pool(user: &str, database: &str) -> bool { + let _lock = lock(); + let dbs = databases(); + let key = User { + user: user.to_string(), + database: database.to_string(), + }; + + if !dbs.dynamic_pools.contains(&key) { + return false; + } + + let mut new_dbs = (*dbs).clone(); + if let Some(cluster) = new_dbs.databases.remove(&key) { + cluster.shutdown(); + new_dbs.dynamic_pools.remove(&key); + new_dbs.wildcard_pool_count = new_dbs.wildcard_pool_count.saturating_sub(1); + DATABASES.store(Arc::new(new_dbs)); + debug!( + "removed wildcard pool for credential rotation: user=\"{}\" database=\"{}\"", + user, database + ); + true + } else { + false + } +} + /// Shutdown all databases. pub fn shutdown() { databases().shutdown(); @@ -173,6 +267,162 @@ pub(crate) fn add(mut user: crate::config::User) { } } +/// Attempt to create a pool from wildcard templates for the given user/database. +/// Returns the Cluster if a wildcard match was found and the pool was created. +/// +/// When `passthrough_password` is provided (from passthrough auth), it overrides +/// the wildcard template's password so the pool can authenticate to PostgreSQL +/// and the login check can verify the client's credential. +pub(crate) fn add_wildcard_pool( + user: &str, + database: &str, + passthrough_password: Option<&str>, +) -> Result, Error> { + let _lock = lock(); + + // Double-check: another thread may have created it. + let dbs = databases(); + if dbs.exists((user, database)) { + return Ok(Some(dbs.cluster((user, database))?)); + } + + let wildcard_match = match dbs.find_wildcard_match(user, database) { + Some(m) => m, + None => return Ok(None), + }; + + let config_snapshot = match dbs.config_snapshot() { + Some(c) => c.clone(), + None => return Ok(None), + }; + + // Enforce the operator-configured pool limit before allocating a new pool. + let max = config_snapshot.config.general.max_wildcard_pools; + if max > 0 && dbs.wildcard_pool_count >= max { + warn!( + "max_wildcard_pools limit ({}) reached, rejecting wildcard pool \ + for user=\"{}\" database=\"{}\"", + max, user, database + ); + return Ok(None); + } + + // Build a synthetic user config from the wildcard template. + let template_user_key = User { + user: if wildcard_match.wildcard_user { + "*".to_string() + } else { + user.to_string() + }, + database: if wildcard_match.wildcard_database { + "*".to_string() + } else { + database.to_string() + }, + }; + + // Find the user template from wildcard_users or from the existing pool configs. + let user_config = if wildcard_match.wildcard_user { + // Look for a wildcard user template that matches. + let db_pattern = if wildcard_match.wildcard_database { + "*" + } else { + database + }; + dbs.wildcard_users() + .iter() + .find(|u| { + u.is_wildcard_name() && (u.database == db_pattern || u.is_wildcard_database()) + }) + .cloned() + } else { + // Use an existing user config's settings from a template pool. + let template_cluster = dbs.databases.get(&template_user_key); + template_cluster.map(|_| { + // Use the snapshot so user lookups are consistent with the database + // config captured at the same instant (avoids a race if a SIGHUP + // reload changes the global config mid-call). + config_snapshot + .users + .users + .iter() + .find(|u| u.name == user && (u.database == "*" || u.is_wildcard_database())) + .cloned() + .unwrap_or_else(|| crate::config::User::new(user, "", database)) + }) + }; + + let mut user_config = match user_config { + Some(u) => u, + None => return Ok(None), + }; + + // Override the wildcard name/database with the actual values. + if user_config.is_wildcard_name() { + user_config.name = user.to_string(); + } + user_config.database = database.to_string(); + + // For passthrough auth, set the client's password so the backend pool can + // authenticate to PostgreSQL and the proxy-level credential check succeeds. + if let Some(pw) = passthrough_password { + user_config.password = Some(pw.to_string()); + } + + // Build a synthetic Config so we can substitute the real database name + // into the wildcard template before handing it to new_pool. + let mut synthetic_config = config_snapshot.config.clone(); + if wildcard_match.wildcard_database { + if let Some(templates) = dbs.wildcard_db_templates() { + let mut new_dbs: Vec = synthetic_config + .databases + .iter() + .filter(|d| !d.is_wildcard()) + .cloned() + .collect(); + + for shard_templates in templates { + for template in shard_templates { + let mut db = template.database.clone(); + db.name = database.to_string(); + // Substitute wildcard database_name with the actual name. + if db.database_name.is_none() || db.database_name.as_deref() == Some("*") { + db.database_name = Some(database.to_string()); + } + new_dbs.push(db); + } + } + + synthetic_config.databases = new_dbs; + } + } + + let pool = new_pool(&user_config, &synthetic_config); + if let Some((pool_user, cluster)) = pool { + debug!( + "created wildcard pool for user=\"{}\" database=\"{}\"", + user, database + ); + + let databases = (*databases()).clone(); + let (added, mut databases) = databases.add(pool_user.clone(), cluster.clone()); + if added { + databases.wildcard_pool_count += 1; + databases.dynamic_pools.insert(pool_user); + databases.launch(); + DATABASES.store(Arc::new(databases)); + } + + Ok(Some(cluster)) + } else { + warn!( + "wildcard match found but pool creation failed for user=\"{}\" database=\"{}\"", + user, database + ); + Ok(None) + } +} + /// Swap database configs between source and destination. /// Both databases keep their names, but their configs (host, port, etc.) are exchanged. /// User database references are also swapped. @@ -278,6 +528,13 @@ impl ToUser for (&str, Option<&str>) { } } +/// Describes which parts of a wildcard match were used. +#[derive(Debug, Clone, PartialEq)] +struct WildcardMatch { + wildcard_user: bool, + wildcard_database: bool, +} + /// Databases. #[derive(Default, Clone)] pub struct Databases { @@ -285,6 +542,20 @@ pub struct Databases { manual_queries: HashMap, mirrors: HashMap>, mirror_configs: HashMap<(String, String), crate::config::MirrorConfig>, + /// Wildcard database templates (databases with name = "*"), organized by shard. + wildcard_db_templates: Option>>, + /// Wildcard user templates (users with name = "*"). + wildcard_users: Vec, + /// Full config snapshot (both databases and users) captured at construction + /// time, needed to create pools lazily from wildcard templates without + /// racing against a concurrent config reload that might change `config()`. + config_snapshot: Option>, + /// Number of pools created dynamically via wildcard matching. + /// Reset to zero on every config reload so the limit applies per-epoch. + wildcard_pool_count: usize, + /// Keys of pools that were created dynamically via wildcard matching. + /// Used by the background eviction task to identify eligible candidates. + dynamic_pools: HashSet, } impl Databases { @@ -315,6 +586,96 @@ impl Databases { } } + /// Check if any wildcard templates are configured. + pub fn has_wildcard(&self) -> bool { + self.wildcard_db_templates.is_some() || !self.wildcard_users.is_empty() + } + + /// Check if a cluster exists or could be created via wildcard matching. + pub fn exists_or_wildcard(&self, user: impl ToUser) -> bool { + let user = user.to_user(); + if self.exists((&*user.user, &*user.database)) { + return true; + } + self.has_wildcard() + && self + .find_wildcard_match(&user.user, &user.database) + .is_some() + } + + /// Find a wildcard match for a user/database pair. + /// Returns a tuple of (user_template, is_wildcard_user, is_wildcard_db). + fn find_wildcard_match(&self, user: &str, database: &str) -> Option { + // Priority 1: exact user name, wildcard database + // A user config with name="alice" and database="*" means alice can + // connect to any database backed by the wildcard db templates. + if self.wildcard_db_templates.is_some() { + let has_exact_user_wildcard_db = self + .wildcard_users + .iter() + .any(|u| u.name == user && u.is_wildcard_database()); + if has_exact_user_wildcard_db { + return Some(WildcardMatch { + wildcard_user: false, + wildcard_database: true, + }); + } + } + + // Priority 2: wildcard user, exact database + let has_wildcard_user_exact_db = self + .wildcard_users + .iter() + .any(|u| u.is_wildcard_name() && u.database == database); + if has_wildcard_user_exact_db { + return Some(WildcardMatch { + wildcard_user: true, + wildcard_database: false, + }); + } + + // Priority 3: both wildcard + let has_full_wildcard = self + .wildcard_users + .iter() + .any(|u| u.is_wildcard_name() && u.is_wildcard_database()); + if has_full_wildcard && self.wildcard_db_templates.is_some() { + return Some(WildcardMatch { + wildcard_user: true, + wildcard_database: true, + }); + } + + None + } + + /// Get wildcard database templates. + pub fn wildcard_db_templates(&self) -> Option<&Vec>> { + self.wildcard_db_templates.as_ref() + } + + /// Get wildcard user templates. + pub fn wildcard_users(&self) -> &[crate::config::User] { + &self.wildcard_users + } + + /// Get the full config snapshot used for creating wildcard pools. + pub fn config_snapshot(&self) -> Option<&crate::config::ConfigAndUsers> { + self.config_snapshot.as_deref() + } + + /// Number of pools currently created via wildcard matching. + #[cfg(test)] + pub(crate) fn wildcard_pool_count(&self) -> usize { + self.wildcard_pool_count + } + + /// Keys of dynamically-created wildcard pools. + #[cfg(test)] + pub(crate) fn dynamic_pools(&self) -> &HashSet { + &self.dynamic_pools + } + /// Get a cluster for the user/database pair if it's configured. pub fn cluster(&self, user: impl ToUser) -> Result { let user = user.to_user(); @@ -580,6 +941,12 @@ pub fn from_config(config: &ConfigAndUsers) -> Databases { }; for user in users { + // Wildcard templates are stored separately and pools are created + // lazily via add_wildcard_pool(). Creating a static pool here + // would attempt to connect to Postgres with literal "*". + if user.is_wildcard_name() || user.is_wildcard_database() { + continue; + } if let Some((user, cluster)) = new_pool(&user, &config.config) { databases.insert(user, cluster); } @@ -679,11 +1046,31 @@ pub fn from_config(config: &ConfigAndUsers) -> Databases { } } + let wildcard_db_templates = config.config.wildcard_databases(); + let wildcard_users: Vec = config + .users + .users + .iter() + .filter(|u| u.is_wildcard_name() || u.is_wildcard_database()) + .cloned() + .collect(); + + let config_snapshot = if wildcard_db_templates.is_some() || !wildcard_users.is_empty() { + Some(Arc::new(config.clone())) + } else { + None + }; + Databases { databases, manual_queries: config.config.manual_queries(), mirrors, mirror_configs, + wildcard_db_templates, + wildcard_users, + config_snapshot, + wildcard_pool_count: 0, + dynamic_pools: HashSet::new(), } } @@ -1738,4 +2125,329 @@ password = "testpass" assert_eq!(new_users.users[0].name, "testuser"); assert_eq!(new_users.users[0].database, "destination_db"); } + + #[test] + fn test_wildcard_db_templates_populated() { + let mut config = Config::default(); + config.databases = vec![ + Database { + name: "explicit_db".to_string(), + host: "host1".to_string(), + role: Role::Primary, + ..Default::default() + }, + Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }, + ]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![crate::config::User::new("alice", "pass", "explicit_db")], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + assert!(databases.has_wildcard()); + assert!(databases.wildcard_db_templates().is_some()); + let templates = databases.wildcard_db_templates().unwrap(); + assert_eq!(templates.len(), 1); // shard 0 only + assert_eq!(templates[0].len(), 1); + assert_eq!(templates[0][0].host, "wildcard-host"); + } + + #[test] + fn test_no_wildcard_when_absent() { + let mut config = Config::default(); + config.databases = vec![Database { + name: "mydb".to_string(), + host: "host1".to_string(), + role: Role::Primary, + ..Default::default() + }]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![crate::config::User::new("alice", "pass", "mydb")], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + assert!(!databases.has_wildcard()); + assert!(databases.wildcard_db_templates().is_none()); + assert!(databases.wildcard_users().is_empty()); + assert!(databases.config_snapshot().is_none()); + } + + #[test] + fn test_wildcard_users_populated() { + let mut config = Config::default(); + config.databases = vec![Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![crate::config::User { + name: "*".to_string(), + database: "*".to_string(), + password: Some("secret".to_string()), + ..Default::default() + }], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + assert!(databases.has_wildcard()); + assert_eq!(databases.wildcard_users().len(), 1); + assert!(databases.wildcard_users()[0].is_wildcard_name()); + assert!(databases.wildcard_users()[0].is_wildcard_database()); + assert!(databases.config_snapshot().is_some()); + } + + #[test] + fn test_find_wildcard_match_priority() { + let mut config = Config::default(); + config.databases = vec![ + Database { + name: "explicit_db".to_string(), + host: "host1".to_string(), + role: Role::Primary, + ..Default::default() + }, + Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }, + ]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![ + crate::config::User::new("alice", "pass", "explicit_db"), + crate::config::User { + name: "alice".to_string(), + database: "*".to_string(), + password: Some("pass".to_string()), + ..Default::default() + }, + crate::config::User { + name: "*".to_string(), + database: "*".to_string(), + password: Some("wild".to_string()), + ..Default::default() + }, + ], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + // Exact match exists — no wildcard needed. + assert!(databases.cluster(("alice", "explicit_db")).is_ok()); + + // Wildcard database for known user (alice/*) — priority 1. + let m = databases.find_wildcard_match("alice", "unknown_db"); + assert_eq!( + m, + Some(WildcardMatch { + wildcard_user: false, + wildcard_database: true, + }) + ); + + // Wildcard user for unknown user — priority 3 (full wildcard). + let m = databases.find_wildcard_match("unknown_user", "unknown_db"); + assert_eq!( + m, + Some(WildcardMatch { + wildcard_user: true, + wildcard_database: true, + }) + ); + } + + #[test] + fn test_wildcard_templates_no_static_pools() { + // Wildcard-only config must NOT create static pools (which would + // attempt to connect to Postgres with literal "*" as db/user name). + let mut config = Config::default(); + config.databases = vec![Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![crate::config::User { + name: "*".to_string(), + database: "*".to_string(), + password: Some("secret".to_string()), + ..Default::default() + }], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + // Templates should be stored for lazy pool creation. + assert!(databases.has_wildcard()); + assert!(databases.wildcard_db_templates().is_some()); + assert_eq!(databases.wildcard_users().len(), 1); + + // No static pools — literal "*" must never be launched. + assert!( + databases.all().is_empty(), + "wildcard-only config must not create static pools; got {} pool(s)", + databases.all().len() + ); + } + + #[test] + fn test_mixed_wildcard_and_concrete_pools() { + // Concrete user/db should create a static pool; wildcard should not. + let mut config = Config::default(); + config.databases = vec![ + Database { + name: "real_db".to_string(), + host: "host1".to_string(), + role: Role::Primary, + ..Default::default() + }, + Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }, + ]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![ + crate::config::User::new("alice", "pass", "real_db"), + crate::config::User { + name: "*".to_string(), + database: "*".to_string(), + password: Some("secret".to_string()), + ..Default::default() + }, + ], + ..Default::default() + }, + ..Default::default() + }; + + let databases = from_config(&config_and_users); + + assert!(databases.has_wildcard()); + // Only the concrete pool should exist. + assert_eq!( + databases.all().len(), + 1, + "expected 1 concrete pool, got {}", + databases.all().len() + ); + assert!(databases.exists(("alice", "real_db"))); + // The wildcard entry must NOT produce a static pool. + assert!(!databases.exists(("*", "*"))); + } + + #[tokio::test] + async fn test_remove_wildcard_pool() { + // Verify that remove_wildcard_pool removes a dynamically-created pool + // so it can be recreated with updated credentials. + let mut config = Config::default(); + config.databases = vec![Database { + name: "*".to_string(), + host: "wildcard-host".to_string(), + role: Role::Primary, + ..Default::default() + }]; + + let config_and_users = ConfigAndUsers { + config, + users: crate::config::Users { + users: vec![crate::config::User { + name: "*".to_string(), + database: "*".to_string(), + password: Some("secret".to_string()), + ..Default::default() + }], + ..Default::default() + }, + ..Default::default() + }; + + let dbs = from_config(&config_and_users); + DATABASES.store(Arc::new(dbs)); + + // Create a wildcard pool with initial password. + let result = add_wildcard_pool("bob", "tenant_1", Some("old_pass")); + assert!(result.is_ok()); + let cluster = result.unwrap(); + assert!(cluster.is_some(), "wildcard pool should have been created"); + let cluster = cluster.unwrap(); + assert_eq!(cluster.password(), "old_pass"); + + assert!(super::databases().exists(("bob", "tenant_1"))); + assert_eq!(super::databases().wildcard_pool_count(), 1); + assert!(super::databases().dynamic_pools().contains(&User { + user: "bob".to_string(), + database: "tenant_1".to_string(), + })); + + // Remove the pool. + let removed = remove_wildcard_pool("bob", "tenant_1"); + assert!(removed, "pool should have been removed"); + + // Pool should be gone. + assert!(!super::databases().exists(("bob", "tenant_1"))); + assert_eq!(super::databases().wildcard_pool_count(), 0); + assert!(!super::databases().dynamic_pools().contains(&User { + user: "bob".to_string(), + database: "tenant_1".to_string(), + })); + + // Recreate with new password — simulates credential rotation. + let result = add_wildcard_pool("bob", "tenant_1", Some("new_pass")); + assert!(result.is_ok()); + let cluster = result.unwrap(); + assert!(cluster.is_some(), "pool should be recreated"); + let cluster = cluster.unwrap(); + assert_eq!( + cluster.password(), + "new_pass", + "recreated pool must use the new password" + ); + } } diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index f86632e75..786dd02b9 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -346,6 +346,16 @@ impl Cluster { &self.shards } + /// Total number of connections (idle + checked-out) across all shards. + /// Used by the wildcard-pool eviction task to decide whether a pool is idle. + pub fn total_connections(&self) -> usize { + self.shards + .iter() + .flat_map(|shard| shard.pools()) + .map(|pool| pool.state().total) + .sum() + } + /// Get the password the user should use to connect to the database. pub fn password(&self) -> &str { &self.password diff --git a/pgdog/src/backend/pool/connection/mod.rs b/pgdog/src/backend/pool/connection/mod.rs index 3933d7f4b..854dbd88d 100644 --- a/pgdog/src/backend/pool/connection/mod.rs +++ b/pgdog/src/backend/pool/connection/mod.rs @@ -333,15 +333,41 @@ impl Connection { } let databases = databases(); - let cluster = databases.cluster(user)?; + let cluster = match databases.cluster(user) { + Ok(c) => c, + Err(_) => { + // Drop the Arc before mutating global state. + drop(databases); + // Attempt wildcard pool creation. + match databases::add_wildcard_pool( + &self.user, + &self.database, + self.passthrough_password.as_deref(), + ) { + Ok(Some(c)) => c, + Ok(None) => { + return Err(Error::NoDatabase(databases::User { + user: self.user.clone(), + database: self.database.clone(), + })); + } + Err(e) => return Err(e), + } + } + }; self.cluster = Some(cluster.clone()); let source_db = cluster.name(); + + // Re-read databases after potential wildcard pool creation. + let databases = databases::databases(); self.mirrors = databases - .mirrors(user)? + .mirrors(user) + .ok() + .flatten() .unwrap_or(&[]) .iter() - .map(|dest_cluster| { + .map(|dest_cluster: &Cluster| { let mirror_config = databases.mirror_config(source_db, dest_cluster.name()); Mirror::spawn(source_db, dest_cluster, mirror_config) }) diff --git a/pgdog/src/backend/pool/lb/test.rs b/pgdog/src/backend/pool/lb/test.rs index 31d29a48e..5ff5788fd 100644 --- a/pgdog/src/backend/pool/lb/test.rs +++ b/pgdog/src/backend/pool/lb/test.rs @@ -33,7 +33,7 @@ fn create_test_pool_config(host: &str, port: u16) -> PoolConfig { fn setup_test_replicas() -> LoadBalancer { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let replicas = LoadBalancer::new( &None, @@ -171,7 +171,7 @@ async fn test_primary_pool_banning() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -325,7 +325,7 @@ async fn test_read_write_split_exclude_primary() { primary_pool.launch(); let replica_configs = [ - create_test_pool_config("localhost", 5432), + create_test_pool_config("127.0.0.1", 5432), create_test_pool_config("127.0.0.1", 5432), ]; @@ -363,7 +363,7 @@ async fn test_read_write_split_include_primary() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -397,7 +397,7 @@ async fn test_read_write_split_include_primary() { async fn test_read_write_split_exclude_primary_no_primary() { // Test exclude primary setting when no primary exists let replica_configs = [ - create_test_pool_config("localhost", 5432), + create_test_pool_config("127.0.0.1", 5432), create_test_pool_config("127.0.0.1", 5432), ]; @@ -427,7 +427,7 @@ async fn test_read_write_split_exclude_primary_no_primary() { async fn test_read_write_split_include_primary_no_primary() { // Test include primary setting when no primary exists let replica_configs = [ - create_test_pool_config("localhost", 5432), + create_test_pool_config("127.0.0.1", 5432), create_test_pool_config("127.0.0.1", 5432), ]; @@ -460,7 +460,7 @@ async fn test_read_write_split_with_banned_primary() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -500,7 +500,7 @@ async fn test_read_write_split_with_banned_replicas() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -541,7 +541,7 @@ async fn test_read_write_split_exclude_primary_with_round_robin() { primary_pool.launch(); let replica_configs = [ - create_test_pool_config("localhost", 5432), + create_test_pool_config("127.0.0.1", 5432), create_test_pool_config("127.0.0.1", 5432), ]; @@ -588,7 +588,7 @@ async fn test_read_write_split_exclude_primary_with_round_robin() { #[tokio::test] async fn test_monitor_shuts_down_on_notify() { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let replicas = LoadBalancer::new( &None, @@ -708,7 +708,7 @@ async fn test_monitor_does_not_ban_with_zero_ban_timeout() { let pool_config2 = PoolConfig { address: Address { - host: "localhost".into(), + host: "127.0.0.1".into(), port: 5432, user: "pgdog".into(), password: "pgdog".into(), @@ -780,7 +780,7 @@ async fn test_include_primary_if_replica_banned_no_bans() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -816,7 +816,7 @@ async fn test_include_primary_if_replica_banned_with_ban() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let replicas = LoadBalancer::new( &Some(primary_pool), @@ -865,7 +865,7 @@ async fn test_has_replicas_with_primary_and_replicas() { let primary_pool = Pool::new(&primary_config); primary_pool.launch(); - let replica_configs = [create_test_pool_config("localhost", 5432)]; + let replica_configs = [create_test_pool_config("127.0.0.1", 5432)]; let lb = LoadBalancer::new( &Some(primary_pool), @@ -945,7 +945,7 @@ async fn test_set_role() { #[tokio::test] async fn test_can_move_conns_to_same_config() { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let lb1 = LoadBalancer::new( &None, @@ -967,7 +967,7 @@ async fn test_can_move_conns_to_same_config() { #[tokio::test] async fn test_can_move_conns_to_different_count() { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let lb1 = LoadBalancer::new( &None, @@ -989,7 +989,7 @@ async fn test_can_move_conns_to_different_count() { #[tokio::test] async fn test_can_move_conns_to_different_addresses() { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let pool_config3 = create_test_pool_config("127.0.0.1", 5433); let lb1 = LoadBalancer::new( @@ -1050,7 +1050,7 @@ async fn test_monitor_unbans_all_when_second_target_becomes_unhealthy_after_firs #[tokio::test] async fn test_least_active_connections_prefers_pool_with_fewer_checked_out() { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); let replicas = LoadBalancer::new( &None, @@ -1086,7 +1086,7 @@ async fn test_least_active_connections_prefers_pool_with_fewer_checked_out() { fn setup_test_replicas_no_launch() -> LoadBalancer { let pool_config1 = create_test_pool_config("127.0.0.1", 5432); - let pool_config2 = create_test_pool_config("localhost", 5432); + let pool_config2 = create_test_pool_config("127.0.0.1", 5432); LoadBalancer::new( &None, @@ -1337,7 +1337,7 @@ fn test_ban_check_does_not_ban_with_zero_ban_timeout() { let pool_config2 = PoolConfig { address: Address { - host: "localhost".into(), + host: "127.0.0.1".into(), port: 5432, user: "pgdog".into(), password: "pgdog".into(), diff --git a/pgdog/src/config/mod.rs b/pgdog/src/config/mod.rs index c20aa4d37..1a6e141ad 100644 --- a/pgdog/src/config/mod.rs +++ b/pgdog/src/config/mod.rs @@ -16,7 +16,7 @@ pub mod sharding; pub mod users; pub use core::{Config, ConfigAndUsers}; -pub use database::{Database, Role}; +pub use database::{Database, EnumeratedDatabase, Role}; pub use error::Error; pub use general::General; pub use memory::*; @@ -314,3 +314,102 @@ pub fn load_test_sharded() { set(config).unwrap(); init().unwrap(); } + +/// Load a wildcard test configuration. +/// +/// Sets up a wildcard database template (`name = "*"`) pointing at a real +/// PostgreSQL server and a wildcard user (`name = "*", database = "*"`). +/// An explicit pool for user=pgdog / database=pgdog is also created so +/// that tests can compare explicit vs. wildcard resolution. +#[cfg(test)] +pub fn load_test_wildcard() { + use crate::backend::databases::init; + + let mut config = ConfigAndUsers::default(); + config.config.general.min_pool_size = 0; + + config.config.databases = vec![ + // Explicit database — should always take priority. + Database { + name: "pgdog".into(), + host: "127.0.0.1".into(), + port: 5432, + role: Role::Primary, + database_name: Some("pgdog".into()), + ..Default::default() + }, + // Wildcard template — any other database name resolves here. + Database { + name: "*".into(), + host: "127.0.0.1".into(), + port: 5432, + role: Role::Primary, + ..Default::default() + }, + ]; + + config.users.users = vec![ + // Explicit user for the explicit database. + User { + name: "pgdog".into(), + database: "pgdog".into(), + password: Some("pgdog".into()), + ..Default::default() + }, + // Wildcard user — any user / any database. + User { + name: "*".into(), + database: "*".into(), + password: Some("pgdog".into()), + ..Default::default() + }, + ]; + + set(config).unwrap(); + init().unwrap(); +} +/// Like [`load_test_wildcard`] but also sets `max_wildcard_pools` so tests +/// can exercise the pool-count limit without modifying the global default. +pub fn load_test_wildcard_with_limit(max_wildcard_pools: usize) { + use crate::backend::databases::init; + + let mut config = ConfigAndUsers::default(); + config.config.general.min_pool_size = 0; + config.config.general.max_wildcard_pools = max_wildcard_pools; + + config.config.databases = vec![ + Database { + name: "pgdog".into(), + host: "127.0.0.1".into(), + port: 5432, + role: Role::Primary, + database_name: Some("pgdog".into()), + ..Default::default() + }, + Database { + name: "*".into(), + host: "127.0.0.1".into(), + port: 5432, + role: Role::Primary, + ..Default::default() + }, + ]; + + config.users.users = vec![ + User { + name: "pgdog".into(), + database: "pgdog".into(), + password: Some("pgdog".into()), + ..Default::default() + }, + User { + name: "*".into(), + database: "*".into(), + password: Some("pgdog".into()), + ..Default::default() + }, + ]; + + set(config).unwrap(); + init().unwrap(); +} diff --git a/pgdog/src/frontend/client/mod.rs b/pgdog/src/frontend/client/mod.rs index b868a6c1d..b762e9036 100644 --- a/pgdog/src/frontend/client/mod.rs +++ b/pgdog/src/frontend/client/mod.rs @@ -156,7 +156,11 @@ impl Client { let comms = ClientComms::new(&id); // Auto database. - let exists = databases::databases().exists((user, database)); + let dbs = databases::databases(); + let exists = dbs.exists((user, database)); + let wildcard_available = !exists && dbs.exists_or_wildcard((user, database)); + drop(dbs); + let passthrough_password = if config.config.general.passthrough_auth() && !admin { let password = if auth_type.trust() { // Use empty password. @@ -172,7 +176,7 @@ impl Client { Password::from_bytes(password.to_bytes()?)? }; - if !exists { + if !exists && !wildcard_available { let user = user_from_params(¶ms, &password).ok(); if let Some(user) = user { databases::add(user); @@ -193,27 +197,40 @@ impl Client { }; let password = if admin { - admin_password + admin_password.to_owned() } else { - conn.cluster()?.password() + conn.cluster()?.password().to_owned() }; let mut auth_ok = false; - if let Some(ref passthrough_password) = passthrough_password { - if passthrough_password != password && auth_type != &AuthType::Trust { - stream.fatal(ErrorResponse::auth(user, database)).await?; - return Ok(None); - } else { - auth_ok = true; + if let Some(ref client_password) = passthrough_password { + if client_password.as_str() != password && auth_type != &AuthType::Trust { + // Password changed — remove the old pool and recreate with the + // new credentials so both the proxy cache and backend Addresses + // pick up the rotated password. + databases::remove_wildcard_pool(user, database); + conn = match Connection::new(user, database, admin, &passthrough_password) { + Ok(c) => c, + Err(_) => { + stream.fatal(ErrorResponse::auth(user, database)).await?; + return Ok(None); + } + }; + let new_password = conn.cluster()?.password(); + if client_password.as_str() != new_password && auth_type != &AuthType::Trust { + stream.fatal(ErrorResponse::auth(user, database)).await?; + return Ok(None); + } } + auth_ok = true; } let auth_type = &config.config.general.auth_type; if !auth_ok { auth_ok = match auth_type { AuthType::Md5 => { - let md5 = md5::Client::new(user, password); + let md5 = md5::Client::new(user, &password); stream.send_flush(&md5.challenge()).await?; let password = Password::from_bytes(stream.read().await?.to_bytes()?)?; if let Password::PasswordMessage { response } = password { @@ -226,7 +243,7 @@ impl Client { AuthType::Scram => { stream.send_flush(&Authentication::scram()).await?; - let scram = Server::new(password); + let scram = Server::new(&password); let res = scram.handle(&mut stream).await; matches!(res, Ok(true)) } @@ -237,7 +254,7 @@ impl Client { .await?; let response = stream.read().await?; let response = Password::from_bytes(response.to_bytes()?)?; - response.password() == Some(password) + response.password() == Some(&password) } AuthType::Trust => true, diff --git a/pgdog/src/frontend/client/query_engine/test/mod.rs b/pgdog/src/frontend/client/query_engine/test/mod.rs index de93b592b..292e06a57 100644 --- a/pgdog/src/frontend/client/query_engine/test/mod.rs +++ b/pgdog/src/frontend/client/query_engine/test/mod.rs @@ -18,6 +18,7 @@ mod set; mod set_schema_sharding; mod sharded; mod spliced; +mod wildcard; pub(super) fn test_client() -> Client { load_test(); diff --git a/pgdog/src/frontend/client/query_engine/test/omni.rs b/pgdog/src/frontend/client/query_engine/test/omni.rs index 1dfb4c1c6..eae4b31e9 100644 --- a/pgdog/src/frontend/client/query_engine/test/omni.rs +++ b/pgdog/src/frontend/client/query_engine/test/omni.rs @@ -9,13 +9,7 @@ use super::prelude::*; async fn test_omni_update_returns_single_shard_count() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup: create table and insert data on both shards - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -47,7 +41,7 @@ async fn test_omni_update_returns_single_shard_count() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } @@ -56,13 +50,7 @@ async fn test_omni_update_returns_single_shard_count() { async fn test_omni_delete_returns_single_shard_count() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -92,7 +80,7 @@ async fn test_omni_delete_returns_single_shard_count() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } @@ -101,13 +89,7 @@ async fn test_omni_delete_returns_single_shard_count() { async fn test_omni_insert_returns_single_shard_count() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -132,7 +114,7 @@ async fn test_omni_insert_returns_single_shard_count() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } @@ -141,13 +123,7 @@ async fn test_omni_insert_returns_single_shard_count() { async fn test_omni_update_returning_only_from_one_shard() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -194,7 +170,7 @@ async fn test_omni_update_returning_only_from_one_shard() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } @@ -203,13 +179,7 @@ async fn test_omni_update_returning_only_from_one_shard() { async fn test_omni_delete_returning_only_from_one_shard() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -246,7 +216,7 @@ async fn test_omni_delete_returning_only_from_one_shard() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } @@ -255,13 +225,7 @@ async fn test_omni_delete_returning_only_from_one_shard() { async fn test_omni_insert_returning_only_from_one_shard() { let mut client = TestClient::new_sharded(Parameters::default()).await; - // Setup - client - .send_simple(Query::new( - "CREATE TABLE IF NOT EXISTS sharded_omni (id BIGINT PRIMARY KEY, value TEXT)", - )) - .await; - client.read_until('Z').await.unwrap(); + // Setup: table is provisioned by integration/setup.sh client .send_simple(Query::new("DELETE FROM sharded_omni")) @@ -293,7 +257,7 @@ async fn test_omni_insert_returning_only_from_one_shard() { // Cleanup client - .send_simple(Query::new("DROP TABLE IF EXISTS sharded_omni")) + .send_simple(Query::new("DELETE FROM sharded_omni")) .await; client.read_until('Z').await.unwrap(); } diff --git a/pgdog/src/frontend/client/query_engine/test/wildcard.rs b/pgdog/src/frontend/client/query_engine/test/wildcard.rs new file mode 100644 index 000000000..b988d4ba7 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/test/wildcard.rs @@ -0,0 +1,275 @@ +use crate::backend::databases::{add_wildcard_pool, databases, evict_idle_wildcard_pools}; +use crate::config::load_test_wildcard_with_limit; +use crate::frontend::client::test::test_client::TestClient; +use crate::net::{Parameters, Query}; + +/// Wildcard database: connecting to an unmapped database name triggers +/// dynamic pool creation from the "*" template. The pool should forward +/// queries to the real Postgres database whose name matches the +/// client-requested name. +#[tokio::test] +async fn test_wildcard_database_simple_query() { + let mut params = Parameters::default(); + params.insert("user", "pgdog"); + params.insert("database", "pgdog"); + + let mut client = TestClient::new_wildcard(params).await; + + // The explicit pool for (pgdog, pgdog) already exists, so this goes + // through the explicit path. Verify basic connectivity. + client.send_simple(Query::new("SELECT 1 AS result")).await; + let messages = client.read_until('Z').await.unwrap(); + assert!( + messages.len() >= 3, + "expected DataRow + CommandComplete + ReadyForQuery" + ); +} + +/// When a wildcard template is configured, `exists_or_wildcard` should +/// return true for database names that don't have an explicit pool but +/// match the wildcard pattern. +#[tokio::test] +async fn test_wildcard_exists_or_wildcard() { + use crate::backend::databases::databases; + use crate::config::load_test_wildcard; + + load_test_wildcard(); + + let dbs = databases(); + + // Explicit pool exists: + assert!(dbs.exists(("pgdog", "pgdog"))); + assert!(dbs.exists_or_wildcard(("pgdog", "pgdog"))); + + // No explicit pool, but wildcard matches: + assert!(!dbs.exists(("pgdog", "some_other_db"))); + assert!(dbs.exists_or_wildcard(("pgdog", "some_other_db"))); + + // Fully unknown user + database — wildcard user+db template covers it: + assert!(!dbs.exists(("unknown_user", "unknown_db"))); + assert!(dbs.exists_or_wildcard(("unknown_user", "unknown_db"))); +} + +/// Dynamic pool creation via `add_wildcard_pool` for a database that has +/// no explicit pool but matches the wildcard template. +#[tokio::test] +async fn test_wildcard_add_pool_dynamic() { + use crate::backend::databases::{add_wildcard_pool, databases}; + use crate::config::load_test_wildcard; + + load_test_wildcard(); + + let target_db = "pgdog"; // must exist in Postgres + + // Before: no explicit pool for ("pgdog", target_db) via wildcard user. + // The explicit pool is under user "pgdog" / database "pgdog", so let's + // test a wildcard user scenario. + let dbs = databases(); + assert!(!dbs.exists(("wildcard_user", target_db))); + drop(dbs); + + // Create pool dynamically. + let result = add_wildcard_pool("wildcard_user", target_db, None); + assert!(result.is_ok(), "add_wildcard_pool should succeed"); + let cluster = result.unwrap(); + assert!(cluster.is_some(), "wildcard match should produce a cluster"); + + // After: pool exists. + let dbs = databases(); + assert!(dbs.exists(("wildcard_user", target_db))); +} + +/// Requesting a database that doesn't exist in Postgres should still +/// create a wildcard pool — the error only surfaces when a connection +/// attempt is actually made. +#[tokio::test] +async fn test_wildcard_nonexistent_pg_database() { + use crate::backend::databases::{add_wildcard_pool, databases}; + use crate::config::load_test_wildcard; + + load_test_wildcard(); + + let fake_db = "totally_fake_db_12345"; + + let dbs = databases(); + assert!(!dbs.exists(("pgdog", fake_db))); + assert!(dbs.exists_or_wildcard(("pgdog", fake_db))); + drop(dbs); + + // Pool creation succeeds (it only creates the config, doesn't connect yet). + let result = add_wildcard_pool("pgdog", fake_db, None); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + + let dbs = databases(); + assert!(dbs.exists(("pgdog", fake_db))); +} + +/// When `max_wildcard_pools` is set, pools beyond the limit are rejected +/// (returning `Ok(None)`) without panicking or erroring. +#[tokio::test] +async fn test_max_wildcard_pools_limit_enforced() { + load_test_wildcard_with_limit(2); + + // First two pools succeed. + let r1 = add_wildcard_pool("user_a", "db_one", None); + assert!(r1.is_ok()); + assert!( + r1.unwrap().is_some(), + "first pool within limit should be created" + ); + + let r2 = add_wildcard_pool("user_b", "db_two", None); + assert!(r2.is_ok()); + assert!( + r2.unwrap().is_some(), + "second pool within limit should be created" + ); + + // Third pool must be rejected. + let r3 = add_wildcard_pool("user_c", "db_three", None); + assert!(r3.is_ok(), "should not error, just reject gracefully"); + assert!( + r3.unwrap().is_none(), + "pool creation beyond max_wildcard_pools must return None" + ); + + let dbs = databases(); + assert!( + !dbs.exists(("user_c", "db_three")), + "rejected pool must not be registered" + ); +} + +/// `max_wildcard_pools = 0` means unlimited: any number of pools may be +/// created without triggering the limit. +#[tokio::test] +async fn test_max_wildcard_pools_zero_means_unlimited() { + load_test_wildcard_with_limit(0); + + for i in 0..5usize { + let db = format!("unlimited_db_{i}"); + let user = format!("unlimited_user_{i}"); + let result = add_wildcard_pool(&user, &db, None); + assert!(result.is_ok()); + assert!( + result.unwrap().is_some(), + "pool {i} should be created when limit is 0" + ); + } +} + +/// After a config reload (simulated by calling `load_test_wildcard_with_limit` +/// again) the wildcard pool counter is reset to zero, so pools that were +/// previously rejected can now be created. +#[tokio::test] +async fn test_max_wildcard_pools_counter_resets_on_reload() { + load_test_wildcard_with_limit(1); + + // Fill the single slot. + let r1 = add_wildcard_pool("reload_user", "first_db", None); + assert!(r1.unwrap().is_some(), "slot 1 should be filled"); + + // Next pool is rejected. + let r2 = add_wildcard_pool("reload_user", "second_db", None); + assert!(r2.unwrap().is_none(), "should be rejected at limit"); + + // Simulate SIGHUP / config reload — resets the counter and the pool map. + load_test_wildcard_with_limit(1); + + // The previously rejected database can now be created again. + let r3 = add_wildcard_pool("reload_user", "second_db", None); + assert!( + r3.unwrap().is_some(), + "should succeed after reload resets the counter" + ); +} + +/// Eviction removes an idle wildcard pool and clears it from the dynamic-pool +/// registry and the pool count. +#[tokio::test] +async fn test_evict_idle_wildcard_pools_removes_idle_pool() { + load_test_wildcard_with_limit(0); + + let result = add_wildcard_pool("evict_user", "evict_db", None); + assert!(result.unwrap().is_some(), "pool should be created"); + + let dbs = databases(); + assert!( + dbs.exists(("evict_user", "evict_db")), + "pool must exist before eviction" + ); + assert!( + dbs.dynamic_pools() + .iter() + .any(|u| u.user == "evict_user" && u.database == "evict_db"), + "pool must be tracked in dynamic_pools" + ); + assert!(dbs.wildcard_pool_count() >= 1, "counter must be positive"); + drop(dbs); + + // All freshly-created test pools have zero connections — eviction proceeds. + evict_idle_wildcard_pools(); + + let dbs = databases(); + assert!( + !dbs.exists(("evict_user", "evict_db")), + "evicted pool must no longer be registered" + ); + assert!( + !dbs.dynamic_pools() + .iter() + .any(|u| u.user == "evict_user" && u.database == "evict_db"), + "evicted pool must be removed from dynamic_pools" + ); +} + +/// Evicting a pool decrements `wildcard_pool_count` so that a new pool can be +/// created even when the limit was full before eviction. +#[tokio::test] +async fn test_evict_idle_wildcard_pools_decrements_count() { + load_test_wildcard_with_limit(1); + + // Fill the single slot. + let r = add_wildcard_pool("count_user", "count_db", None); + assert!(r.unwrap().is_some(), "slot should be filled"); + assert_eq!(databases().wildcard_pool_count(), 1, "counter should be 1"); + + // Slot is full — a new pool is rejected. + let rejected = add_wildcard_pool("count_user", "other_db", None); + assert!( + rejected.unwrap().is_none(), + "must be rejected when at limit" + ); + + evict_idle_wildcard_pools(); + + assert_eq!( + databases().wildcard_pool_count(), + 0, + "counter must drop to 0 after eviction" + ); + + // Now a new pool can be created again without reloading config. + let r2 = add_wildcard_pool("count_user", "new_db_after_eviction", None); + assert!( + r2.unwrap().is_some(), + "pool creation must succeed once eviction freed a slot" + ); +} + +/// When there are no dynamic pools, calling `evict_idle_wildcard_pools` is a +/// safe no-op that doesn't disturb statically-configured pools. +#[tokio::test] +async fn test_evict_idle_wildcard_pools_noop_on_empty() { + load_test_wildcard_with_limit(0); + + let before = databases().all().len(); + evict_idle_wildcard_pools(); + let after = databases().all().len(); + + assert_eq!( + before, after, + "static pools must be unaffected by eviction when no dynamic pools exist" + ); +} diff --git a/pgdog/src/frontend/client/test/test_client.rs b/pgdog/src/frontend/client/test/test_client.rs index fd912e914..688a62def 100644 --- a/pgdog/src/frontend/client/test/test_client.rs +++ b/pgdog/src/frontend/client/test/test_client.rs @@ -1,6 +1,8 @@ use std::{fmt::Debug, ops::Deref}; use bytes::{BufMut, Bytes, BytesMut}; +use once_cell::sync::Lazy; +use parking_lot::{Mutex, MutexGuard}; use pgdog_config::RewriteMode; use rand::{rng, Rng}; use tokio::{ @@ -10,7 +12,7 @@ use tokio::{ use crate::{ backend::databases::{reload_from_existing, shutdown}, - config::{config, load_test_replicas, load_test_sharded, set}, + config::{config, load_test_replicas, load_test_sharded, load_test_wildcard, set}, frontend::{ client::query_engine::QueryEngine, router::{parser::Shard, sharding::ContextBuilder}, @@ -48,11 +50,14 @@ macro_rules! expect_message { /// Test client. #[derive(Debug)] pub struct TestClient { + _test_guard: MutexGuard<'static, ()>, pub(crate) client: Client, pub(crate) engine: QueryEngine, pub(crate) conn: TcpStream, } +static TEST_CLIENT_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); + impl TestClient { /// Create new test client after the login phase /// is complete. @@ -60,6 +65,8 @@ impl TestClient { /// Config needs to be loaded. /// async fn new(params: Parameters) -> Self { + let test_guard = TEST_CLIENT_LOCK.lock(); + let addr = "127.0.0.1:0".to_string(); let conn_addr = addr.clone(); let stream = TcpListener::bind(&conn_addr).await.unwrap(); @@ -77,6 +84,7 @@ impl TestClient { let client = connect_handle.await.unwrap(); Self { + _test_guard: test_guard, conn, engine: QueryEngine::from_client(&client).expect("create query engine from client"), client, @@ -96,6 +104,13 @@ impl TestClient { Self::new(params).await } + /// New client with wildcard database configuration. + #[allow(dead_code)] + pub(crate) async fn new_wildcard(params: Parameters) -> Self { + load_test_wildcard(); + Self::new(params).await + } + /// New client with cross-shard-queries disabled. pub(crate) async fn new_cross_shard_disabled(params: Parameters) -> Self { load_test_sharded();