diff --git a/.dockerignore b/.dockerignore index 0c3c378ce..e8bd47ffe 100644 --- a/.dockerignore +++ b/.dockerignore @@ -18,3 +18,5 @@ node_modules/ # Test test/ coverage/ +.claude/ +logs/ diff --git a/CLAUDE.md b/CLAUDE.md index 804122f20..1625505dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,12 +1,50 @@ # CLAUDE.md +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + AR.IO Node — Arweave gateway for accessing and indexing blockchain data, with caching, ANS-104 bundle unbundling, and multi-source data retrieval. +## Tech stack + +- Node.js v20 (see `.nvmrc`), TypeScript strict mode, ESM (`"type": "module"`) +- Test framework: **Node.js native `node:test`** (not Jest/Mocha/Vitest) +- Transpiler: SWC (via ts-node) +- Databases: SQLite (primary) + ClickHouse (analytics/GQL) +- Caching: Redis, LMDB, LRU in-memory +- HTTP: Express +- Observability: OpenTelemetry + Prometheus + Winston + +## Commands + +```bash +# Development +yarn start # Start service (requires .env file) +yarn watch # Start with nodemon (auto-restart on changes) +yarn build # Clean + compile TypeScript (prod) + +# Testing +yarn test # Run all unit tests +yarn test:file src/path/to/file.test.ts # Run a single test file +yarn test:e2e # Run end-to-end tests (in test/ directory) +yarn test:coverage # Run tests with coverage report + +# Linting & quality +yarn lint:check # ESLint check +yarn lint:fix # ESLint auto-fix +yarn duplicate:check # Detect code duplication (jscpd) +yarn deps:check # Detect circular dependencies (madge) + +# Database +yarn db:migrate # Run SQLite migrations +yarn db:dump-test-schemas # Regenerate test SQL schemas after migrations + +# Service management (systemd-based) +yarn service:start / stop / restart / status / logs +``` + ## Discovery points -- Commands — `package.json` scripts (dev, build, service, test, lint, - migrations, duplicate/deps checks) - Documentation index — `docs/INDEX.md` - Env vars — `docs/envs.md` (keep this and `docker-compose.yaml` in sync when adding or removing env vars) @@ -20,6 +58,8 @@ caching, ANS-104 bundle unbundling, and multi-source data retrieval. - `src/system.ts` is the central DI wiring — all services, workers, data sources, resolvers, and lifecycle cleanup handlers are constructed here. +- `src/config.ts` parses all environment variables and exports typed + constants — this is where new env vars are added. - `src/data/` uses composite sources with fallback chains (cache → S3 → AR.IO peers → trusted gateways → Arweave nodes). Retrieval order is configurable via `ON_DEMAND_RETRIEVAL_ORDER` and @@ -30,6 +70,11 @@ caching, ANS-104 bundle unbundling, and multi-source data retrieval. - Filters (`ANS104_UNBUNDLE_FILTER`, `ANS104_INDEX_FILTER`, `WEBHOOK_INDEX_FILTER`) share a composable JSON filter system — see `docs/filters.md`. +- Background workers (`src/workers/`) handle block importing, data importing, + bundle unbundling, verification, and webhooks. Controlled by `START_WRITERS`. +- IPFS serving (`src/ipfs/`) is opt-in via `IPFS_ENABLED`. Uses a Kubo sidecar + for content retrieval with its own cache, rate limiter, and blocklist. Routes + mount before ArNS in `app.ts`. See `docs/ipfs-integration.md`. - Responses include trust headers indicating verification status. ## Gotchas @@ -52,6 +97,18 @@ Always use `createTestLogger()` from `test/test-logger.ts` in test files — never `winston.createLogger({ silent: true })`. Test output is written to `logs/test.log` (overwritten each run), not the console. +### Test imports + +Tests use `node:test` and `node:assert`: + +```typescript +import { describe, it, before, after, mock } from 'node:test'; +import { strict as assert } from 'node:assert'; +``` + +Common test stubs are in `test/stubs.ts`, SQLite helpers in +`test/sqlite-helpers.ts`. + ### Adding a database method Five coordinated edits are required: diff --git a/docker-compose.yaml b/docker-compose.yaml index 0e59621ed..07510b21d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -53,6 +53,7 @@ services: - ${HEADERS_DATA_PATH:-./data/headers}:/app/data/headers - ${SQLITE_DATA_PATH:-./data/sqlite}:/app/data/sqlite - ${DUCKDB_DATA_PATH:-./data/duckdb}:/app/data/duckdb + - ${IPFS_CACHE_DATA_PATH:-./data/ipfs-cache}:/app/data/ipfs-cache - ${TEMP_DATA_PATH:-./data/tmp}:/app/data/tmp - ${LMDB_DATA_PATH:-./data/lmdb}:/app/data/lmdb - ${PARQUET_DATA_PATH:-./data/parquet}:/app/data/parquet @@ -124,6 +125,18 @@ services: - RATE_LIMITER_IP_REFILL_PER_SEC=${RATE_LIMITER_IP_REFILL_PER_SEC:-} - RATE_LIMITER_IPS_AND_CIDRS_ALLOWLIST=${RATE_LIMITER_IPS_AND_CIDRS_ALLOWLIST:-} - RATE_LIMITER_ARNS_ALLOWLIST=${RATE_LIMITER_ARNS_ALLOWLIST:-} + - IPFS_ENABLED=${IPFS_ENABLED:-false} + - IPFS_KUBO_URL=${IPFS_KUBO_URL:-http://kubo:8080} + - IPFS_KUBO_REQUEST_TIMEOUT_MS=${IPFS_KUBO_REQUEST_TIMEOUT_MS:-} + - IPFS_STREAM_STALL_TIMEOUT_MS=${IPFS_STREAM_STALL_TIMEOUT_MS:-} + - IPFS_CACHE_PATH=${IPFS_CACHE_PATH:-} + - IPFS_CACHE_MAX_SIZE_BYTES=${IPFS_CACHE_MAX_SIZE_BYTES:-} + - IPFS_CACHE_CLEANUP_THRESHOLD_SECONDS=${IPFS_CACHE_CLEANUP_THRESHOLD_SECONDS:-} + - IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET=${IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET:-} + - IPFS_RATE_LIMITER_IP_REFILL_PER_SEC=${IPFS_RATE_LIMITER_IP_REFILL_PER_SEC:-} + - IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET=${IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET:-} + - IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC=${IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC:-} + - IPFS_MAX_RESPONSE_SIZE_BYTES=${IPFS_MAX_RESPONSE_SIZE_BYTES:-} - NODE_MAX_OLD_SPACE_SIZE=${NODE_MAX_OLD_SPACE_SIZE:-} - ENABLE_FS_HEADER_CACHE_CLEANUP=${ENABLE_FS_HEADER_CACHE_CLEANUP:-} - ON_DEMAND_RETRIEVAL_ORDER=${ON_DEMAND_RETRIEVAL_ORDER:-} @@ -578,6 +591,22 @@ services: networks: - ar-io-network + kubo: + image: ipfs/kubo:${KUBO_IMAGE_TAG:-v0.32.1} + profiles: + - ipfs + restart: unless-stopped + ports: + - '${IPFS_SWARM_PORT:-4001}:4001/tcp' + - '${IPFS_SWARM_PORT:-4001}:4001/udp' + environment: + - IPFS_PROFILE=${IPFS_PROFILE:-server} + volumes: + - ${IPFS_DATA_PATH:-./data/ipfs}:/data/ipfs + networks: + - ar-io-network + command: ['daemon', '--enable-gc'] + autoheal: image: willfarrell/autoheal@sha256:fd2c5500ab9210be9fa0d365162301eb0d16923f1d9a36de887f5d1751c6eb8c network_mode: none diff --git a/docs/INDEX.md b/docs/INDEX.md index b171f48d9..4bd5c90d7 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -23,6 +23,12 @@ Fast, offline lookups for data item to root transaction mappings. | [CDB64 Tools Reference](cdb64-tools.md) | CLI tools for creating indexes | | [CDB64 Format Specification](cdb64-format.md) | Technical file format details | +### IPFS Integration + +| Document | Description | +|----------|-------------| +| [IPFS Integration](ipfs-integration.md) | Architecture, deployment, and configuration for IPFS CID serving | + ### Rate Limiting & Payments | Document | Description | diff --git a/docs/envs.md b/docs/envs.md index 3fef85cf0..d112078eb 100644 --- a/docs/envs.md +++ b/docs/envs.md @@ -357,3 +357,25 @@ ingestion may be partial) for SQLite. | CLICKHOUSE_SQLITE_MIN_HEIGHT_ENABLED | Boolean | false | When true, restrict the SQLite fallback to heights above (ClickHouse max height - buffer) | | CLICKHOUSE_SQLITE_MIN_HEIGHT_BUFFER | Number | 10 | Heights reserved for SQLite near the ClickHouse tip, to guard against partially ingested recent blocks | | CLICKHOUSE_MAX_HEIGHT_CACHE_TTL_SECONDS | Number | 60 | TTL for the cached ClickHouse max-height lookup used by the boundary optimization | + +## IPFS + +When enabled, the gateway can serve IPFS content via `/ipfs/{CID}` path routes +and `{CID}.{root_host}` subdomain routes (same level as ArNS, works with +standard `*.{host}` wildcard TLS certs). Requires a Kubo IPFS node (available +as a Docker Compose sidecar via the `ipfs` profile). + +| ENV_NAME | TYPE | DEFAULT_VALUE | DESCRIPTION | +| ----------------------------------------- | ------- | ------------------- | ------------------------------------------------------------------- | +| IPFS_ENABLED | Boolean | false | Enable IPFS content serving | +| IPFS_KUBO_URL | String | http://kubo:8080 | Kubo HTTP gateway URL | +| IPFS_KUBO_REQUEST_TIMEOUT_MS | Number | 30000 | Connection timeout for Kubo requests (ms) | +| IPFS_STREAM_STALL_TIMEOUT_MS | Number | 30000 | Stall timeout — max time with no data before aborting stream (ms) | +| IPFS_CACHE_PATH | String | data/ipfs-cache | Directory for cached IPFS content | +| IPFS_CACHE_MAX_SIZE_BYTES | Number | 10737418240 (10 GB) | Maximum cache size before LRU eviction | +| IPFS_CACHE_CLEANUP_THRESHOLD_SECONDS | Number | 3600 | Age in seconds before cached files become eviction candidates | +| IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET | Number | 100000 | IPFS rate limiter: max tokens per IP bucket | +| IPFS_RATE_LIMITER_IP_REFILL_PER_SEC | Number | 20 | IPFS rate limiter: token refill rate per second (IP bucket) | +| IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET | Number | 1000000 | IPFS rate limiter: max tokens per resource bucket | +| IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC | Number | 100 | IPFS rate limiter: token refill rate per second (resource bucket) | +| IPFS_MAX_RESPONSE_SIZE_BYTES | Number | 1073741824 (1 GB) | Maximum IPFS content size the gateway will serve | diff --git a/docs/ipfs-integration.md b/docs/ipfs-integration.md new file mode 100644 index 000000000..8a60ef8f7 --- /dev/null +++ b/docs/ipfs-integration.md @@ -0,0 +1,541 @@ +# IPFS Integration + +This document describes the AR.IO Gateway's IPFS CID serving capability, +covering architecture, request flow, configuration, and deployment. + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [URL Patterns](#url-patterns) +- [Components](#components) +- [Data Flow](#data-flow) +- [Caching Strategy](#caching-strategy) +- [Security and Moderation](#security-and-moderation) +- [Docker Deployment](#docker-deployment) +- [Configuration Reference](#configuration-reference) +- [Phase 2: ArNS to IPFS Resolution](#phase-2-arns-to-ipfs-resolution) +- [Differences from Arweave Data Serving](#differences-from-arweave-data-serving) +- [Observability](#observability) + +## Overview + +### What This Feature Does + +The IPFS integration allows an AR.IO Gateway to serve IPFS content alongside +Arweave data. A local Kubo IPFS node runs as a Docker sidecar, and the gateway +proxies, caches, and moderates requests to it. This gives gateway operators a +single endpoint for both permanent Arweave data and IPFS-addressed content. + +### Why It Exists + +The Arweave Name System (ArNS) maps human-readable names to content addresses. +Today those addresses are Arweave transaction IDs. Adding IPFS CID support +means ArNS names can also point to IPFS content, bridging the two largest +decentralized storage networks under one naming layer without requiring changes +to the ArNS smart contract. Users get a single URL (e.g., +`my-app.arweave.dev/`) regardless of whether the underlying data lives on +Arweave or IPFS. + +### Phased Approach + +**Phase 1 -- Direct CID Access (current):** Users access IPFS content by +placing a CID in the URL path or subdomain. The gateway validates, rate-limits, +and caches the request, then proxies it to the local Kubo node. No ArNS +resolution is involved. + +**Phase 2 -- ArNS to CID Resolution (future):** ANT (Arweave Name Token) +records will be able to store an IPFS CID in their `transactionId` field. When +the gateway resolves an ArNS name and detects that the resolved ID is a CID +rather than an Arweave transaction ID, it routes the request to the IPFS service +instead of the Arweave data pipeline. This requires no contract changes -- CID +detection happens at the gateway level. + +## Architecture + +### Request Flow Diagram + +``` + +-----------+ + | Client | + +-----+-----+ + | + path: /ipfs/{CID} -or- subdomain: {CID}.ipfs.gateway.io + | + v + +----------+-----------+ + | Express Router / | + | IPFS Middleware | + +----------+-----------+ + | + 1. CID validation + | + v + +----------+-----------+ + | IPFS Blocklist | + | (451 if blocked) | + +----------+-----------+ + | + v + +----------+-----------+ + | IPFS Rate Limiter | + | (429 if exceeded) | + +----------+-----------+ + | + v + +----------+-----------+ + | IPFS Cache | + | (LRU filesystem) | + +----+----------+------+ + | | + cache hit cache miss + | | + v v + +--------+ +--+------------------+ + | Serve | | Kubo Data Source | + | from | | (HTTP fetch with | + | cache | | timeouts) | + +--------+ +--+------------------+ + | + tee stream to + cache + response + | + v + +-----+------+ + | Response | + | (headers, | + | stream) | + +------------+ +``` + +### Component Relationships + +``` +src/system.ts + | + +-- ipfs-service ----+-- kubo-data-source --> Kubo HTTP Gateway (sidecar) + | +-- ipfs-cache --> data/ipfs-cache/ + | +-- ipfs-blocklist --> data/ipfs-blocklist.txt + | +-- ipfs-rate-limiter --> token bucket (memory or Redis) + | + +-- middleware/ipfs (subdomain interception) + +-- routes/ipfs (path-based handlers) +``` + +## URL Patterns + +### Path-Based Access + +| Pattern | Example | Description | +|---------|---------|-------------| +| `/ipfs/{CID}` | `/ipfs/QmYwAPJzv5CZ...` | Fetch a single file or directory root | +| `/ipfs/{CID}/{path}` | `/ipfs/bafybeig.../images/logo.png` | Fetch a file within a UnixFS directory | + +### Subdomain-Based Access + +| Pattern | Example | Description | +|---------|---------|-------------| +| `{CID}.{root_host}` | `bafybeig...arweave.dev` | CIDv1 (base32) in subdomain | +| `{CID}.{root_host}/{path}` | `bafybeig...arweave.dev/index.html` | Subdomain with path | + +Subdomain-based access uses the `ARNS_ROOT_HOST` configuration. The `.ipfs.` +label in the hostname distinguishes IPFS requests from ArNS name resolution, +preventing collisions. For example, `my-app.arweave.dev` resolves as an ArNS +name, while `bafybeig...arweave.dev` resolves as an IPFS CID. + +### CIDv0 to CIDv1 Redirect + +CIDv0 identifiers (base58, starting with `Qm`) cannot be used in subdomains +because they are case-sensitive and DNS is case-insensitive. When a CIDv0 is +detected in a subdomain request, the gateway issues a 301 redirect to the +equivalent CIDv1 (base32) subdomain URL. + +For path-based requests, both CIDv0 and CIDv1 are accepted directly without +redirection. + +## Components + +### `src/lib/ipfs-cid.ts` -- CID Parsing and Conversion + +Utility module for working with IPFS Content Identifiers: + +- **CID validation**: Determines whether a string is a valid CIDv0 or CIDv1. +- **CID conversion**: Converts CIDv0 (base58btc) to CIDv1 (base32) for + subdomain compatibility. +- **CID normalization**: Produces a canonical form used as cache keys and + blocklist entries. + +### `src/ipfs/kubo-data-source.ts` -- Kubo HTTP Client + +Fetches content from the local Kubo IPFS gateway over HTTP: + +- **Connection timeout** (`IPFS_KUBO_REQUEST_TIMEOUT_MS`): Maximum time to + receive response headers from Kubo. Covers DNS, TCP, and TLS handshake plus + time-to-first-byte. +- **Stall timeout** (`IPFS_STREAM_STALL_TIMEOUT_MS`): Maximum idle time during + body streaming. The timer resets on each received chunk, so large but + actively-streaming transfers complete without issue. If no data arrives for + this duration, the stream is aborted. +- Returns a readable stream along with response metadata (content type, content + length). + +### `src/ipfs/ipfs-cache.ts` -- Bounded LRU Filesystem Cache + +A filesystem-backed cache separate from the Arweave contiguous data cache: + +- **Cache directory**: Configurable via `IPFS_CACHE_PATH` (default: + `data/ipfs-cache`). +- **Size limit**: Bounded by `IPFS_CACHE_MAX_SIZE_BYTES` (default: 10 GB). When + the limit is exceeded, the least recently used entries are evicted. +- **Cache key**: SHA-256 hash of the normalized CID concatenated with the + request path. This ensures consistent keys regardless of CID encoding. +- **Metadata**: Each cached entry has a companion `.meta` file storing content + type, content length, and the original CID. Metadata is read on cache hits to + set response headers without re-parsing the content. +- **Cleanup threshold**: `IPFS_CACHE_CLEANUP_THRESHOLD` controls how often (in + seconds) eviction scans run. + +### `src/ipfs/ipfs-blocklist.ts` -- CID Blocklist + +A file-based blocklist for content moderation: + +- **Format**: Plain text file, one CID per line. Lines starting with `#` are + treated as comments. Both CIDv0 and CIDv1 forms are normalized before + matching. +- **Hot-reload**: The blocklist file is watched for changes using filesystem + notifications. Additions and removals take effect without restarting the + gateway. +- **Response**: Blocked CIDs return HTTP 451 (Unavailable For Legal Reasons). + +### `src/ipfs/ipfs-rate-limiter.ts` -- Rate Limiter + +A dedicated token bucket rate limiter for IPFS traffic, separate from the +Arweave rate limiter: + +- **Per-IP bucket**: Controls how much a single client can fetch + (`IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET`, + `IPFS_RATE_LIMITER_IP_REFILL_PER_SEC`). +- **Per-resource bucket**: Controls how much a single CID can be served + globally (`IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET`, + `IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC`). +- Tokens represent bytes (1 token = 1 byte). Bucket sizes and refill rates are + configurable independently from the Arweave rate limiter. +- When limits are exceeded, the gateway returns HTTP 429 (Too Many Requests). + +### `src/ipfs/ipfs-service.ts` -- Service Orchestrator + +Coordinates all IPFS components behind a single interface: + +- Accepts a CID and optional path, runs it through the blocklist, rate limiter, + and cache, and falls back to the Kubo data source on cache miss. +- Handles stream teeing: on a cache miss, the Kubo response stream is split so + one branch writes to cache while the other streams to the client. +- Enforces `IPFS_MAX_RESPONSE_SIZE_BYTES` to prevent serving excessively large + files. +- Registered in `src/system.ts` alongside other data services, guarded by the + `IPFS_ENABLED` flag. + +### `src/middleware/ipfs.ts` -- Subdomain Middleware + +Express middleware that intercepts requests based on the `Host` header: + +- Parses the hostname to detect the `{CID}.{root_host}` pattern. +- Extracts the CID and any path from the URL. +- Performs CIDv0 to CIDv1 redirect when necessary. +- Forwards the extracted CID and path to the IPFS route handler. +- Must run before the ArNS subdomain middleware to prevent the CID from being + misinterpreted as an ArNS name. + +### `src/routes/ipfs.ts` -- Route Handlers + +Express route handlers for path-based IPFS access: + +- `GET /ipfs/:cid` and `GET /ipfs/:cid/*` routes. +- Validates the CID parameter, delegates to the IPFS service, and streams the + response with appropriate headers. +- Sets `Cache-Control`, `ETag`, and `X-Ipfs-Path` headers on successful + responses. + +## Data Flow + +The complete lifecycle of an IPFS request: + +1. **Request arrives.** Either path-based (`/ipfs/{CID}/path`) or + subdomain-based (`{CID}.ipfs.gateway.io/path`). Subdomain requests are + detected by the IPFS middleware and rewritten internally to match the + path-based route. + +2. **CID validation.** The CID string is parsed. If it is not a valid CIDv0 or + CIDv1, the request returns 400 (Bad Request). If it is a CIDv0 in a + subdomain context, a 301 redirect is issued to the CIDv1 equivalent. + +3. **Blocklist check.** The normalized CID is checked against the in-memory + blocklist. If matched, the request returns 451 (Unavailable For Legal + Reasons) immediately. + +4. **Rate limit check.** Both the per-IP and per-resource buckets are checked. + If either bucket is exhausted, the request returns 429 (Too Many Requests) + with a `Retry-After` header. + +5. **Cache lookup.** The cache key (SHA-256 of normalized CID + path) is looked + up in the filesystem cache. On a hit, the cached file and its `.meta` + companion are read and streamed to the client. + +6. **Kubo fetch.** On a cache miss, the service makes an HTTP GET to the local + Kubo gateway (`IPFS_KUBO_URL/ipfs/{CID}/{path}`). The response stream is + tee'd: one branch writes to the cache directory, the other streams directly + to the client. Both the connection timeout and the stall timeout apply during + this phase. + +7. **Response.** The following headers are set on successful responses: + + | Header | Value | Purpose | + |--------|-------|---------| + | `Cache-Control` | `public, max-age=31536000, immutable` | CID content never changes | + | `ETag` | `"{CID}"` | Content-addressed deduplication | + | `X-Ipfs-Path` | `/ipfs/{CID}/{path}` | IPFS ecosystem interop | + | `Content-Type` | Detected by Kubo or from `.meta` | Standard MIME typing | + +## Caching Strategy + +IPFS content is **content-addressed**: a given CID always maps to the same +bytes. This makes cached entries permanently valid -- there is no stale data and +no revalidation needed. + +### Cache Properties + +| Property | Details | +|----------|---------| +| **Location** | `IPFS_CACHE_PATH` (default `data/ipfs-cache`), separate from Arweave data | +| **Key** | SHA-256 hash of normalized CID concatenated with request path | +| **Eviction** | LRU (least recently used) when total size exceeds `IPFS_CACHE_MAX_SIZE_BYTES` | +| **Max size** | `IPFS_CACHE_MAX_SIZE_BYTES` (default 10 GB) | +| **Metadata** | Companion `.meta` JSON files store content type, size, and original CID | +| **Cleanup** | Eviction scans run every `IPFS_CACHE_CLEANUP_THRESHOLD` seconds (default 3600) | +| **Permanence** | No TTL-based expiration; entries are valid forever unless evicted for space | + +### Why a Separate Cache + +The Arweave contiguous data cache is archival in nature -- operators configure +it to retain as much data as possible, potentially without eviction. IPFS +content has different retention characteristics: it requires pinning to persist +on the IPFS network, and gateway operators may not want unbounded IPFS storage. +A separate bounded LRU cache gives operators independent control over Arweave +and IPFS storage budgets. + +## Security and Moderation + +### Content Moderation + +IPFS content moderation uses the same admin API as Arweave data moderation. +Block a CID using the existing endpoint: + +```bash +curl -X PUT http://localhost:4000/ar-io/admin/block-data \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"id": "bafkreigbk3hjz6oyiywqf7eknthwc2osvt5xi6b6igwljn2qrxkthqgrp4", "source": "manual", "notes": "Reason for block"}' +``` + +- Pass the CIDv1 base32 string as the `id` field (same field used for Arweave TX IDs). +- Blocked requests return HTTP 451 (Unavailable for Legal Reasons). +- One unified moderation system for all content (Arweave and IPFS). + +### Rate Limiting + +IPFS rate limiting uses a separate token pool from the Arweave rate limiter. +This prevents IPFS traffic from consuming Arweave rate limit capacity and gives +operators independent tuning for each protocol. + +- **Per-IP limits** prevent a single client from monopolizing bandwidth. +- **Per-resource limits** prevent a single popular CID from consuming all + available throughput. +- Token counts represent bytes of data served. + +### Size Limits + +`IPFS_MAX_RESPONSE_SIZE_BYTES` (default 1 GB) caps the maximum size of a single +IPFS response. Requests for content exceeding this limit are rejected. This +protects the gateway from serving unexpectedly large files that could exhaust +memory or disk. + +### Subdomain Isolation + +Subdomain-based access (`{CID}.ipfs.gateway.io`) provides browser origin +isolation. Each CID gets its own origin, preventing cross-content scripting +attacks. This follows the same security model used by IPFS gateways and the +existing ArNS subdomain sandboxing in the AR.IO Gateway. + +## Docker Deployment + +IPFS runs as an opt-in Docker Compose profile. Enabling it starts a Kubo sidecar +alongside the core gateway services. + +### Docker Compose Profile + +The `ipfs` profile adds a Kubo container: + +```yaml +services: + kubo: + image: ipfs/kubo:latest + profiles: + - ipfs + ports: + - "4001:4001" # Swarm (libp2p) - public, for peer connections + expose: + - "5001" # API - internal only, not exposed to host + - "8080" # Gateway - internal only, used by ar-io-node + volumes: + - ipfs-data:/data/ipfs + environment: + - IPFS_PROFILE=server + command: ["daemon", "--enable-gc"] +``` + +### Port Reference + +| Port | Protocol | Exposure | Purpose | +|------|----------|----------|---------| +| 4001 | TCP/UDP | Public | libp2p swarm -- peer discovery and content exchange | +| 5001 | HTTP | Internal | Kubo API -- used for pinning and node management | +| 8080 | HTTP | Internal | Kubo HTTP Gateway -- used by `kubo-data-source` to fetch content | + +### Volume + +The `ipfs-data` volume persists the Kubo datastore (block storage, peer +identity, configuration). This volume is independent of the gateway's `data/` +directory. + +### Enabling IPFS + +Start the gateway with the IPFS profile: + +```bash +docker compose --profile ipfs up -d +``` + +Or set `IPFS_ENABLED=true` in your `.env` and include `ipfs` in your active +profiles. + +### Garbage Collection + +The Kubo container starts with `--enable-gc`, which periodically removes +unpinned blocks from the local IPFS datastore. This prevents unbounded growth +of the Kubo volume. Content actively being served is protected from GC. + +## Configuration Reference + +All environment variables are opt-in. The feature is disabled by default. + +| Variable | Type | Default | Description | +|----------|------|---------|-------------| +| `IPFS_ENABLED` | Boolean | `false` | Master switch for IPFS support. When false, IPFS routes are not registered and the Kubo sidecar is not required. | +| `IPFS_KUBO_URL` | String | `http://kubo:8080` | Base URL of the local Kubo HTTP Gateway. In Docker, this is the container name. For local development, use `http://localhost:8080`. | +| `IPFS_KUBO_REQUEST_TIMEOUT_MS` | Number | `30000` | Connection timeout in milliseconds for Kubo requests (time to receive response headers). | +| `IPFS_STREAM_STALL_TIMEOUT_MS` | Number | `30000` | Stall timeout in milliseconds for streaming responses from Kubo. Stream is aborted if no data is received for this duration. Actively-streaming transfers are not affected. | +| `IPFS_CACHE_PATH` | String | `data/ipfs-cache` | Directory for the IPFS filesystem cache. Relative paths are resolved from the gateway's working directory. | +| `IPFS_CACHE_MAX_SIZE_BYTES` | Number | `10737418240` (10 GB) | Maximum total size of the IPFS cache directory. LRU eviction begins when this limit is exceeded. | +| `IPFS_CACHE_CLEANUP_THRESHOLD` | Number | `3600` | Interval in seconds between cache eviction scans. | +| `IPFS_BLOCKLIST_PATH` | String | `data/ipfs-blocklist.txt` | Path to the CID blocklist file. The file is watched for changes and reloaded automatically. | +| `IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET` | Number | `50000` | Maximum tokens (bytes) per IP bucket. | +| `IPFS_RATE_LIMITER_IP_REFILL_PER_SEC` | Number | `5` | Tokens added to each IP bucket per second. | +| `IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET` | Number | `200000` | Maximum tokens (bytes) per resource (CID) bucket. | +| `IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC` | Number | `20` | Tokens added to each resource bucket per second. | +| `IPFS_MAX_RESPONSE_SIZE_BYTES` | Number | `1073741824` (1 GB) | Maximum response size for a single IPFS request. Requests exceeding this are rejected. | + +## Phase 2: ArNS to IPFS Resolution + +Phase 2 connects ArNS naming to IPFS content, allowing `my-dapp.arweave.dev` to +serve IPFS-hosted content without the user needing to know the CID. + +### How It Works + +1. **ANT record stores a CID.** The Arweave Name Token contract's + `transactionId` field accepts any string. An ANT owner sets it to an IPFS CID + (e.g., `bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi`) + instead of an Arweave transaction ID. + +2. **Gateway resolves the ArNS name.** The standard ArNS resolution pipeline + fetches the ANT record and extracts the `transactionId`. + +3. **CID detection.** The gateway inspects the resolved ID. If it matches CID + format (multibase-prefixed, valid multicodec), it is classified as an IPFS + CID rather than an Arweave transaction ID. + +4. **Route to IPFS service.** Instead of fetching from the Arweave data pipeline + (cache, S3, peers, chunks), the gateway routes the request to the IPFS + service, which follows the same blocklist, rate limit, cache, and Kubo fetch + pipeline described above. + +### Key Design Decisions + +- **No contract changes.** CID detection happens entirely at the gateway level. + The ANT contract's `transactionId` field is a free-form string, so it already + accepts CIDs. +- **Transparent to users.** A user visiting `my-dapp.arweave.dev` does not need + to know whether the content is on Arweave or IPFS. The URL is the same either + way. +- **Owner-controlled.** The ANT owner decides where content lives by setting + the `transactionId` to either an Arweave TX ID or an IPFS CID. Switching + between storage backends is a single contract interaction. +- **Caching and moderation apply.** All Phase 1 protections (blocklist, rate + limits, cache) apply to ArNS-resolved IPFS content. + +## Differences from Arweave Data Serving + +| Aspect | Arweave | IPFS | +|--------|---------|------| +| **Addressing** | Transaction ID (43-char base64url) | CID (variable length, base32 or base58) | +| **Permanence** | Guaranteed by protocol (incentivized storage) | Requires pinning; content disappears if unpinned | +| **Path resolution** | Manifest JSON parsed by the gateway | UnixFS directories resolved by Kubo | +| **Verification** | Merkle proofs verified by the gateway | Block hashes verified internally by Kubo | +| **Caching** | Archival (operators retain data long-term, often without eviction) | LRU with bounded size (eviction when full) | +| **Cache-Control** | Varies by verification status and data source trust | `immutable` with 1-year max-age (content-addressed = never changes) | +| **Rate limiting** | Shared Arweave token bucket | Separate IPFS token bucket | +| **Data source** | Multi-source fallback chain (cache, S3, peers, gateways, Arweave nodes) | Single source: local Kubo node | +| **Upstream network** | Arweave protocol (block weave, mining incentives) | IPFS/libp2p (DHT, Bitswap) | +| **Moderation** | Transaction-level blocklist | CID-level blocklist (with CIDv0/v1 normalization) | + +## Observability + +### Prometheus Metrics + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `ipfs_requests_total` | Counter | `status`, `method` | Total IPFS requests by HTTP status and method | +| `ipfs_cache_hit_total` | Counter | -- | Cache hits | +| `ipfs_cache_miss_total` | Counter | -- | Cache misses | +| `ipfs_content_size_bytes` | Histogram | -- | Distribution of served content sizes | +| `ipfs_request_duration_seconds` | Histogram | `source` (`cache`, `kubo`) | Request latency by data source | +| `ipfs_blocked_total` | Counter | -- | Requests blocked by the CID blocklist | + +### Structured Logging + +Each IPFS component creates a Winston child logger with a component-specific +label (e.g., `ipfs-service`, `kubo-data-source`, `ipfs-cache`). Log output +follows the gateway's standard JSONL format and is written to the same log +destination as all other gateway logs. Key log events: + +- `ipfs-service`: Request start, cache hit/miss, Kubo fetch start/complete, + errors. +- `kubo-data-source`: HTTP request details, timeout events, stream stalls. +- `ipfs-cache`: Eviction events, write errors, cleanup scan results. +- `ipfs-blocklist`: File reload events, blocked CID matches. + +### OpenTelemetry Tracing + +IPFS requests generate OpenTelemetry spans that are written to +`logs/otel-spans.jsonl` alongside Arweave request spans. The span hierarchy: + +``` +ipfs.request (root span) + +-- ipfs.blocklist.check + +-- ipfs.ratelimit.check + +-- ipfs.cache.lookup + +-- ipfs.kubo.fetch (only on cache miss) + +-- ipfs.cache.write (only on cache miss) +``` + +Spans include attributes for the CID, path, cache hit/miss status, response +size, and Kubo fetch duration. diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 5f2a487cb..6a1d33b3c 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -2809,12 +2809,13 @@ paths: '/ar-io/admin/block-data': put: tags: [Admin] - summary: Blocks transactions or data-items so your AR.IO Gateway will not serve them. + summary: Blocks transactions, data-items, or IPFS CIDs so your AR.IO Gateway will not serve them. description: | - Submits a TX ID/data-item ID or sha-256 content hash for content you do not want your AR.IO Gateway to serve. Once submitted, your Gateway will not respond to requests for these transactions or data-items. + Submits a TX ID/data-item ID, IPFS CID, or sha-256 content hash for content you do not want your AR.IO Gateway to serve. Once submitted, your Gateway will not respond to requests for these transactions, data-items, or IPFS CIDs. + For IPFS content, pass the CIDv1 base32 string (e.g. bafkreigbk3hjz6oyiywqf7eknthwc2osvt5xi6b6igwljn2qrxkthqgrp4) as the id field. The gateway returns HTTP 451 for blocked CIDs. - WARNING - Testing a TX ID here WILL result in that data being blocked by your Gateway. + WARNING - Testing an ID here WILL result in that data being blocked by your Gateway. operationId: adminBlockData requestBody: required: true diff --git a/monitoring/grafana/dashboards/examples/ipfs-example.json b/monitoring/grafana/dashboards/examples/ipfs-example.json new file mode 100644 index 000000000..c3f4772f4 --- /dev/null +++ b/monitoring/grafana/dashboards/examples/ipfs-example.json @@ -0,0 +1,72 @@ +{ + "annotations": { "list": [] }, + "editable": true, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "reqps" } }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "title": "IPFS Requests/sec", + "type": "timeseries", + "targets": [{ "expr": "sum(rate(ipfs_requests_total[5m])) by (status)", "legendFormat": "{{status}}" }] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "percentunit" } }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "title": "IPFS Cache Hit Rate", + "type": "stat", + "targets": [{ "expr": "sum(rate(ipfs_cache_hit_total[5m])) / clamp_min(sum(rate(ipfs_cache_hit_total[5m])) + sum(rate(ipfs_cache_miss_total[5m])), 1)", "legendFormat": "hit rate" }] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "s" } }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 3, + "title": "IPFS Request Duration (p50/p95/p99)", + "type": "timeseries", + "targets": [ + { "expr": "histogram_quantile(0.50, sum(rate(ipfs_request_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p50" }, + { "expr": "histogram_quantile(0.95, sum(rate(ipfs_request_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p95" }, + { "expr": "histogram_quantile(0.99, sum(rate(ipfs_request_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p99" } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "bytes" } }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 4, + "title": "IPFS Content Size Distribution", + "type": "timeseries", + "targets": [ + { "expr": "histogram_quantile(0.50, sum(rate(ipfs_content_size_bytes_bucket[5m])) by (le))", "legendFormat": "p50 size" }, + { "expr": "histogram_quantile(0.95, sum(rate(ipfs_content_size_bytes_bucket[5m])) by (le))", "legendFormat": "p95 size" } + ] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" } }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "id": 5, + "title": "IPFS Blocked Requests", + "type": "timeseries", + "targets": [{ "expr": "sum(rate(ipfs_blocked_total[5m]))", "legendFormat": "blocked/sec" }] + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "reqps" } }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "id": 6, + "title": "IPFS by Route Type", + "type": "timeseries", + "targets": [{ "expr": "sum(rate(ipfs_requests_total[5m])) by (route_type)", "legendFormat": "{{route_type}}" }] + } + ], + "schemaVersion": 39, + "tags": ["ipfs", "ar-io"], + "time": { "from": "now-1h", "to": "now" }, + "title": "AR.IO IPFS", + "uid": "ar-io-ipfs" +} diff --git a/package.json b/package.json index c48f1424c..91300f497 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "memoizee": "^0.4.17", "middleware-async": "^1.4.0", "msgpackr": "^1.11.5", + "multiformats": "^13.1.0", "node-cache": "^5.1.2", "opossum": "^8.4.0", "opossum-prometheus": "^0.4.0", diff --git a/src/app.ts b/src/app.ts index a8d1187e7..6442a690f 100644 --- a/src/app.ts +++ b/src/app.ts @@ -25,6 +25,8 @@ import { datasetsRouter } from './routes/datasets.js'; import * as system from './system.js'; import { createX402Router } from './routes/x402.js'; import { createRateLimitRouter } from './routes/rate-limit.js'; +import { createIpfsSubdomainMiddleware } from './middleware/ipfs.js'; +import { createIpfsRouter, createIpfsHandler } from './routes/ipfs.js'; // Initialize DNS resolution for preferred chunk GET nodes (non-fatal on failure) try { @@ -136,6 +138,25 @@ if (system.rateLimiter !== undefined) { }), ); } +// IPFS routes — must be before ArNS to intercept {CID}.{host} subdomains +if (config.IPFS_ENABLED && system.ipfsService !== undefined) { + const ipfsHandler = createIpfsHandler({ + log, + ipfsService: system.ipfsService, + rateLimiter: system.ipfsRateLimiter, + paymentProcessor: system.paymentProcessor, + }); + app.use(createIpfsSubdomainMiddleware({ ipfsHandler })); + app.use( + createIpfsRouter({ + log, + ipfsService: system.ipfsService, + rateLimiter: system.ipfsRateLimiter, + paymentProcessor: system.paymentProcessor, + }), + ); +} + app.use(arnsRouter); app.use(openApiRouter); app.use(arIoRouter); diff --git a/src/config.ts b/src/config.ts index ef648587d..9b48bb9d1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -2143,3 +2143,67 @@ if (ENABLE_SAMPLING_DATA_SOURCE) { ); } } + +// +// IPFS +// + +export const IPFS_ENABLED = + env.varOrDefault('IPFS_ENABLED', 'false') === 'true'; + +export const IPFS_KUBO_URL = env.varOrDefault( + 'IPFS_KUBO_URL', + 'http://kubo:8080', +); + +export const IPFS_KUBO_REQUEST_TIMEOUT_MS = env.positiveIntOrDefault( + 'IPFS_KUBO_REQUEST_TIMEOUT_MS', + 30000, +); + +export const IPFS_STREAM_STALL_TIMEOUT_MS = env.positiveIntOrDefault( + 'IPFS_STREAM_STALL_TIMEOUT_MS', + 30000, +); + +export const IPFS_CACHE_PATH = env.varOrDefault( + 'IPFS_CACHE_PATH', + 'data/ipfs-cache', +); + +export const IPFS_CACHE_MAX_SIZE_BYTES = env.positiveIntOrDefault( + 'IPFS_CACHE_MAX_SIZE_BYTES', + 10 * 1024 * 1024 * 1024, // 10 GB +); + +// Reserved for future cache cleanup worker. Currently unused — LRU eviction +// in the in-memory index handles cache bounding. After restarts, disk usage +// may temporarily exceed IPFS_CACHE_MAX_SIZE_BYTES until the index rebuilds. +export const IPFS_CACHE_CLEANUP_THRESHOLD_SECONDS = env.positiveIntOrDefault( + 'IPFS_CACHE_CLEANUP_THRESHOLD_SECONDS', + 3600, +); + +export const IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET = env.positiveIntOrDefault( + 'IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET', + 100000, +); + +export const IPFS_RATE_LIMITER_IP_REFILL_PER_SEC = env.positiveIntOrDefault( + 'IPFS_RATE_LIMITER_IP_REFILL_PER_SEC', + 20, +); + +export const IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET = + env.positiveIntOrDefault( + 'IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET', + 1000000, + ); + +export const IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC = + env.positiveIntOrDefault('IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC', 100); + +export const IPFS_MAX_RESPONSE_SIZE_BYTES = env.positiveIntOrDefault( + 'IPFS_MAX_RESPONSE_SIZE_BYTES', + 1 * 1024 * 1024 * 1024, // 1 GB +); diff --git a/src/ipfs/ipfs-cache.ts b/src/ipfs/ipfs-cache.ts new file mode 100644 index 000000000..54af569ba --- /dev/null +++ b/src/ipfs/ipfs-cache.ts @@ -0,0 +1,254 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import crypto from 'node:crypto'; +import EventEmitter from 'node:events'; +import fs from 'node:fs'; +import { Readable } from 'node:stream'; +import { pipeline } from 'node:stream/promises'; +import winston from 'winston'; +import { LRUCache } from 'lru-cache'; + +import * as events from '../events.js'; +import { currentUnixTimestamp } from '../lib/time.js'; + +interface CacheEntry { + size: number; + contentType: string; +} + +export class IpfsFsCache { + private log: winston.Logger; + private baseDir: string; + private index: LRUCache; + private eventEmitter?: EventEmitter; + + constructor({ + log, + basePath, + maxSizeBytes, + eventEmitter, + }: { + log: winston.Logger; + basePath: string; + maxSizeBytes: number; + eventEmitter?: EventEmitter; + }) { + this.log = log.child({ class: this.constructor.name }); + this.baseDir = basePath; + this.eventEmitter = eventEmitter; + this.index = new LRUCache({ + maxSize: maxSizeBytes, + sizeCalculation: (entry) => entry.size, + dispose: (_entry, key) => { + // Delete file from disk when evicted from LRU + const dataPath = this.dataPath(key); + const metaPath = this.metaPath(key); + fs.promises.unlink(dataPath).catch(() => {}); + fs.promises.unlink(metaPath).catch(() => {}); + this.log.debug('Evicted IPFS cache entry', { key }); + }, + }); + } + + private cacheKey(cidString: string, path?: string): string { + const raw = + path !== undefined && path !== '' ? `${cidString}/${path}` : cidString; + return crypto.createHash('sha256').update(raw).digest('hex'); + } + + private dataDir(key: string): string { + const prefix = `${key.substring(0, 2)}/${key.substring(2, 4)}`; + return `${this.baseDir}/data/${prefix}`; + } + + private dataPath(key: string): string { + return `${this.dataDir(key)}/${key}`; + } + + private metaPath(key: string): string { + return `${this.dataDir(key)}/${key}.meta`; + } + + private tempDir(): string { + return `${this.baseDir}/tmp`; + } + + private createTempPath(): string { + return `${this.tempDir()}/${crypto.randomBytes(16).toString('hex')}`; + } + + async has(cidString: string, path?: string): Promise { + const key = this.cacheKey(cidString, path); + if (this.index.has(key)) { + return true; + } + // Check disk in case index was lost (restart) + try { + await fs.promises.access(this.dataPath(key), fs.constants.F_OK); + // Rebuild index entry from meta file + const meta = await this.readMeta(key); + if (meta) { + this.index.set(key, meta); + return true; + } + } catch { + // Not found + } + return false; + } + + async get( + cidString: string, + path?: string, + ): Promise< + { stream: Readable; size: number; contentType: string } | undefined + > { + const key = this.cacheKey(cidString, path); + let entry = this.index.get(key); + + // Rebuild index from disk if entry is missing (e.g., after restart) + if (!entry) { + try { + await fs.promises.access(this.dataPath(key), fs.constants.F_OK); + const meta = await this.readMeta(key); + if (meta) { + this.index.set(key, meta); + entry = meta; + } + } catch { + // File not on disk — true cache miss + } + } + + if (entry) { + const dataPath = this.dataPath(key); + try { + await fs.promises.access(dataPath, fs.constants.F_OK); + const stream = fs.createReadStream(dataPath); + return { + stream, + size: entry.size, + contentType: entry.contentType, + }; + } catch (error: any) { + this.log.error('Failed to read cached IPFS content', { + key, + message: error.message, + }); + this.index.delete(key); + } + } + return undefined; + } + + async put( + cidString: string, + stream: Readable, + size: number, + contentType: string, + path?: string, + ): Promise { + const key = this.cacheKey(cidString, path); + + try { + await fs.promises.mkdir(this.tempDir(), { recursive: true }); + const tempPath = this.createTempPath(); + const writeStream = fs.createWriteStream(tempPath); + + await pipeline(stream, writeStream); + + // Move to final location + const dataDir = this.dataDir(key); + await fs.promises.mkdir(dataDir, { recursive: true }); + await fs.promises.rename(tempPath, this.dataPath(key)); + + // Write metadata + const meta: CacheEntry = { size, contentType }; + await fs.promises.writeFile( + this.metaPath(key), + JSON.stringify(meta), + 'utf-8', + ); + + // Update index + this.index.set(key, meta); + + this.log.debug('Cached IPFS content', { cidString, path, key, size }); + } catch (error: any) { + this.log.error('Failed to cache IPFS content', { + cidString, + path, + message: error.message, + }); + } + } + + /** + * Finalize a cache entry from an already-written temp file. + * Used by the streaming cache writer to avoid double-copying data. + */ + async putFromFile( + cidString: string, + tempPath: string, + size: number, + contentType: string, + path?: string, + ): Promise { + const key = this.cacheKey(cidString, path); + + try { + const dataDir = this.dataDir(key); + await fs.promises.mkdir(dataDir, { recursive: true }); + await fs.promises.rename(tempPath, this.dataPath(key)); + + const meta: CacheEntry = { size, contentType }; + await fs.promises.writeFile( + this.metaPath(key), + JSON.stringify(meta), + 'utf-8', + ); + + this.index.set(key, meta); + + this.log.debug('Cached IPFS content from file', { + cidString, + path, + key, + size, + }); + + this.eventEmitter?.emit(events.DATA_CACHED, { + id: cidString, + hash: key, + dataSize: size, + contentType, + cachedAt: currentUnixTimestamp(), + }); + } catch (error: any) { + this.log.error('Failed to finalize cached IPFS content', { + cidString, + path, + message: error.message, + }); + // Clean up temp file on failure + fs.promises.unlink(tempPath).catch(() => {}); + } + } + + getCachePath(): string { + return this.baseDir; + } + + private async readMeta(key: string): Promise { + try { + const raw = await fs.promises.readFile(this.metaPath(key), 'utf-8'); + return JSON.parse(raw) as CacheEntry; + } catch { + return null; + } + } +} diff --git a/src/ipfs/ipfs-cid.test.ts b/src/ipfs/ipfs-cid.test.ts new file mode 100644 index 000000000..a0aa35934 --- /dev/null +++ b/src/ipfs/ipfs-cid.test.ts @@ -0,0 +1,110 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; + +import { + parseCid, + isValidCid, + isCidV0, + cidToV1Base32, + cidToString, +} from '../lib/ipfs-cid.js'; + +describe('ipfs-cid utilities', () => { + // Known CIDv1 base32 (dag-pb, sha2-256) + const CIDV1_BASE32 = + 'bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi'; + + // Known CIDv0 (Qm prefix, base58btc) + const CIDV0 = 'QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG'; + + describe('parseCid', () => { + it('parses a valid CIDv1 base32 string', () => { + const cid = parseCid(CIDV1_BASE32); + assert.notEqual(cid, null); + assert.equal(cid!.version, 1); + }); + + it('parses a valid CIDv0 string', () => { + const cid = parseCid(CIDV0); + assert.notEqual(cid, null); + assert.equal(cid!.version, 0); + }); + + it('returns null for invalid strings', () => { + assert.equal(parseCid('not-a-cid'), null); + assert.equal(parseCid(''), null); + assert.equal(parseCid('abc123'), null); + }); + + it('returns null for Arweave TX IDs (43-char base64url)', () => { + // A typical Arweave TX ID — not a valid CID + assert.equal( + parseCid('TB2wJyKrPnkAW79DAwlJYwpgdHKpijEJWQfcwX715Co'), + null, + ); + }); + }); + + describe('isValidCid', () => { + it('returns true for valid CIDs', () => { + assert.equal(isValidCid(CIDV1_BASE32), true); + assert.equal(isValidCid(CIDV0), true); + }); + + it('returns false for invalid strings', () => { + assert.equal(isValidCid('not-a-cid'), false); + assert.equal(isValidCid(''), false); + }); + }); + + describe('isCidV0', () => { + it('returns true for CIDv0', () => { + const cid = parseCid(CIDV0)!; + assert.equal(isCidV0(cid), true); + }); + + it('returns false for CIDv1', () => { + const cid = parseCid(CIDV1_BASE32)!; + assert.equal(isCidV0(cid), false); + }); + }); + + describe('cidToV1Base32', () => { + it('converts CIDv0 to CIDv1 base32', () => { + const result = cidToV1Base32(CIDV0); + // Result should start with 'bafy' (dag-pb, sha2-256) + assert.match(result, /^bafy/); + // Should be all lowercase (DNS-safe) + assert.equal(result, result.toLowerCase()); + }); + + it('returns CIDv1 base32 unchanged', () => { + const result = cidToV1Base32(CIDV1_BASE32); + assert.equal(result, CIDV1_BASE32); + }); + + it('throws for invalid CID strings', () => { + assert.throws(() => cidToV1Base32('not-a-cid')); + }); + }); + + describe('cidToString', () => { + it('returns base58btc for CIDv0', () => { + const cid = parseCid(CIDV0)!; + const result = cidToString(cid); + assert.match(result, /^Qm/); + }); + + it('returns base32 for CIDv1', () => { + const cid = parseCid(CIDV1_BASE32)!; + const result = cidToString(cid); + assert.match(result, /^bafy/); + }); + }); +}); diff --git a/src/ipfs/ipfs-rate-limiter.ts b/src/ipfs/ipfs-rate-limiter.ts new file mode 100644 index 000000000..f79bf0490 --- /dev/null +++ b/src/ipfs/ipfs-rate-limiter.ts @@ -0,0 +1,26 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import * as config from '../config.js'; +import { MemoryRateLimiter } from '../limiter/memory-rate-limiter.js'; + +/** + * Creates a separate MemoryRateLimiter instance for IPFS requests. + * This ensures IPFS traffic doesn't compete with Arweave traffic + * for rate-limit tokens. + */ +export function createIpfsRateLimiter(): MemoryRateLimiter { + return new MemoryRateLimiter({ + resourceCapacity: config.IPFS_RATE_LIMITER_RESOURCE_TOKENS_PER_BUCKET, + resourceRefillRate: config.IPFS_RATE_LIMITER_RESOURCE_REFILL_PER_SEC, + ipCapacity: config.IPFS_RATE_LIMITER_IP_TOKENS_PER_BUCKET, + ipRefillRate: config.IPFS_RATE_LIMITER_IP_REFILL_PER_SEC, + limitsEnabled: config.ENABLE_RATE_LIMITER && config.IPFS_ENABLED, + ipAllowlist: config.RATE_LIMITER_IPS_AND_CIDRS_ALLOWLIST, + capacityMultiplier: 1, + maxBuckets: 50000, + }); +} diff --git a/src/ipfs/ipfs-service.ts b/src/ipfs/ipfs-service.ts new file mode 100644 index 000000000..99967ee74 --- /dev/null +++ b/src/ipfs/ipfs-service.ts @@ -0,0 +1,285 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import fs from 'node:fs'; +import crypto from 'node:crypto'; +import { Readable } from 'node:stream'; +import winston from 'winston'; +import { Span } from '@opentelemetry/api'; + +import { cidToV1Base32 } from '../lib/ipfs-cid.js'; +import { startChildSpan } from '../tracing.js'; +import { IpfsFsCache } from './ipfs-cache.js'; +import { DataBlockListValidator } from '../types.js'; +import { + KuboDataSource, + IpfsBlockedError, + IpfsNotFoundError, + IpfsSizeLimitError, +} from './kubo-data-source.js'; +import * as metrics from '../metrics.js'; + +export interface IpfsGetContentResult { + stream: Readable; + size: number; + contentType: string; + cached: boolean; +} + +export class IpfsService { + private log: winston.Logger; + private dataSource: KuboDataSource; + private cache: IpfsFsCache; + private blockListValidator: DataBlockListValidator; + private maxResponseSizeBytes: number; + + constructor({ + log, + dataSource, + cache, + blockListValidator, + maxResponseSizeBytes, + }: { + log: winston.Logger; + dataSource: KuboDataSource; + cache: IpfsFsCache; + blockListValidator: DataBlockListValidator; + maxResponseSizeBytes: number; + }) { + this.log = log.child({ class: this.constructor.name }); + this.dataSource = dataSource; + this.cache = cache; + this.blockListValidator = blockListValidator; + this.maxResponseSizeBytes = maxResponseSizeBytes; + } + + async getContent({ + cidString, + path, + signal, + parentSpan, + }: { + cidString: string; + path?: string; + signal?: AbortSignal; + parentSpan?: Span; + }): Promise { + const span = startChildSpan( + 'IpfsService.getContent', + { + attributes: { + 'ipfs.cid': cidString, + 'ipfs.path': path ?? '', + }, + }, + parentSpan, + ); + + try { + // Normalize CID to v1 base32 for consistent caching + const normalizedCid = cidToV1Base32(cidString); + span.setAttribute('ipfs.cid_normalized', normalizedCid); + + // Check blocklist (uses the same admin API as Arweave data moderation) + if (await this.blockListValidator.isIdBlocked(normalizedCid)) { + metrics.ipfsBlockedTotal.inc(); + span.setAttribute('ipfs.blocked', true); + throw new IpfsBlockedError(`CID is blocked: ${normalizedCid}`); + } + + // Reject path traversal attempts + if (path !== undefined && (path.includes('..') || path.startsWith('/'))) { + throw new IpfsNotFoundError('Invalid IPFS path'); + } + + // Check cache + const cached = await this.cache.get(normalizedCid, path); + if (cached) { + this.log.debug('IPFS cache hit', { cid: normalizedCid, path }); + metrics.ipfsCacheHitTotal.inc(); + span.setAttributes({ + 'ipfs.cache': 'hit', + 'ipfs.size': cached.size, + }); + span.end(); + return { + stream: cached.stream, + size: cached.size, + contentType: cached.contentType, + cached: true, + }; + } + + metrics.ipfsCacheMissTotal.inc(); + span.setAttribute('ipfs.cache', 'miss'); + + // Fetch from Kubo + const result = await this.dataSource.getContent({ + cidString: normalizedCid, + path, + signal, + parentSpan: span, + }); + + span.setAttributes({ + 'ipfs.size': result.size, + 'ipfs.content_type': result.contentType, + }); + + // Enforce size limit when Content-Length is known + if ( + this.maxResponseSizeBytes > 0 && + result.size > 0 && + result.size > this.maxResponseSizeBytes + ) { + result.stream.destroy(); + throw new IpfsSizeLimitError( + `IPFS content size ${result.size} exceeds limit ${this.maxResponseSizeBytes}`, + ); + } + + // Stream directly to the client while writing to a temp file on disk + // for caching. No memory buffering — handles files of any size. + this.streamToCache( + normalizedCid, + path, + result.stream, + result.contentType, + ); + + // End span when stream completes + result.stream.on('end', () => span.end()); + result.stream.on('error', (err) => { + span.recordException(err); + span.end(); + }); + + return { + stream: result.stream, + size: result.size, + contentType: result.contentType, + cached: false, + }; + } catch (error: any) { + if (error.name !== 'AbortError') { + span.recordException(error); + } + span.end(); + throw error; + } + } + + /** + * Writes stream data to a temp cache file as it flows to the client. + * Non-blocking — errors are logged but don't affect the response. + * Buffers early chunks in memory until the write stream is ready, + * then flushes them to disk. + */ + private streamToCache( + cidString: string, + path: string | undefined, + stream: Readable, + contentType: string, + ): void { + const cacheDir = `${this.cache.getCachePath()}/tmp`; + const tempPath = `${cacheDir}/${crypto.randomBytes(16).toString('hex')}`; + let writeStream: fs.WriteStream | null = null; + let bytesWritten = 0; + let failed = false; + const pendingChunks: Buffer[] = []; + + const cleanup = () => { + if (writeStream) { + writeStream.destroy(); + writeStream = null; + } + pendingChunks.length = 0; + fs.promises.unlink(tempPath).catch(() => {}); + }; + + // Create temp directory and write stream + fs.promises + .mkdir(cacheDir, { recursive: true }) + .then(() => { + if (failed) return; + writeStream = fs.createWriteStream(tempPath); + writeStream.on('error', (error) => { + failed = true; + this.log.error('Cache write stream error', { + cid: cidString, + message: error.message, + }); + cleanup(); + }); + // Flush any chunks that arrived before writeStream was ready + for (const chunk of pendingChunks) { + writeStream.write(chunk); + } + pendingChunks.length = 0; + }) + .catch((error) => { + failed = true; + this.log.error('Failed to create cache temp dir', { + message: error.message, + }); + }); + + stream.on('data', (chunk: Buffer) => { + if (failed) return; + bytesWritten += chunk.length; + + // Enforce size limit during streaming (catches chunked responses + // that lack Content-Length) + if ( + this.maxResponseSizeBytes > 0 && + bytesWritten > this.maxResponseSizeBytes + ) { + failed = true; + stream.destroy( + new IpfsSizeLimitError( + `IPFS content exceeds limit during streaming: ${bytesWritten} > ${this.maxResponseSizeBytes}`, + ), + ); + cleanup(); + return; + } + + if (writeStream) { + writeStream.write(chunk); + } else { + // Buffer until writeStream is ready (typically only first 1-2 chunks) + pendingChunks.push(chunk); + } + }); + + stream.on('end', () => { + if (failed || !writeStream) { + // If writeStream never became ready, discard + failed = true; + cleanup(); + return; + } + writeStream.end(() => { + // Finalize: move temp file into cache + this.cache + .putFromFile(cidString, tempPath, bytesWritten, contentType, path) + .catch((error) => { + this.log.error('Failed to finalize IPFS cache entry', { + cid: cidString, + path, + message: error.message, + }); + cleanup(); + }); + }); + }); + + stream.on('error', () => { + failed = true; + cleanup(); + }); + } +} diff --git a/src/ipfs/kubo-data-source.test.ts b/src/ipfs/kubo-data-source.test.ts new file mode 100644 index 000000000..8a9ff690b --- /dev/null +++ b/src/ipfs/kubo-data-source.test.ts @@ -0,0 +1,105 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { describe, it, beforeEach } from 'node:test'; +import { strict as assert } from 'node:assert'; + +import { createTestLogger } from '../../test/test-logger.js'; +import { + KuboDataSource, + IpfsNotFoundError, + IpfsTimeoutError, + IpfsUnavailableError, +} from './kubo-data-source.js'; + +describe('KuboDataSource', () => { + const log = createTestLogger({ suite: 'KuboDataSource' }); + + let kuboDataSource: KuboDataSource; + + beforeEach(() => { + kuboDataSource = new KuboDataSource({ + log, + kuboUrl: 'http://localhost:8080', + requestTimeoutMs: 5000, + streamStallTimeoutMs: 5000, + }); + }); + + describe('getContent', () => { + it('constructs correct URL for bare CID', async () => { + // This test verifies URL construction without making a real request. + // A real integration test would require a running Kubo instance. + const controller = new AbortController(); + controller.abort(); // Abort immediately + + try { + await kuboDataSource.getContent({ + cidString: + 'bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi', + signal: controller.signal, + }); + } catch (error: any) { + // Expected to throw due to abort + assert.ok(error); + } + }); + + it('constructs correct URL for CID with path', async () => { + const controller = new AbortController(); + controller.abort(); + + try { + await kuboDataSource.getContent({ + cidString: + 'bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi', + path: 'images/logo.png', + signal: controller.signal, + }); + } catch (error: any) { + assert.ok(error); + } + }); + + it('throws AbortError when signal is already aborted', async () => { + const controller = new AbortController(); + controller.abort(); + + await assert.rejects( + () => + kuboDataSource.getContent({ + cidString: + 'bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi', + signal: controller.signal, + }), + (error: any) => { + assert.ok( + error.name === 'AbortError' || error.code === 'ERR_CANCELED', + ); + return true; + }, + ); + }); + }); + + describe('error types', () => { + it('IpfsNotFoundError has correct name', () => { + const error = new IpfsNotFoundError('not found'); + assert.equal(error.name, 'IpfsNotFoundError'); + assert.equal(error.message, 'not found'); + }); + + it('IpfsTimeoutError has correct name', () => { + const error = new IpfsTimeoutError('timeout'); + assert.equal(error.name, 'IpfsTimeoutError'); + }); + + it('IpfsUnavailableError has correct name', () => { + const error = new IpfsUnavailableError('unavailable'); + assert.equal(error.name, 'IpfsUnavailableError'); + }); + }); +}); diff --git a/src/ipfs/kubo-data-source.ts b/src/ipfs/kubo-data-source.ts new file mode 100644 index 000000000..776793d08 --- /dev/null +++ b/src/ipfs/kubo-data-source.ts @@ -0,0 +1,247 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { default as axios } from 'axios'; +import { Readable } from 'node:stream'; +import winston from 'winston'; +import { Span } from '@opentelemetry/api'; + +import { attachStallTimeout } from '../lib/stream.js'; +import { startChildSpan } from '../tracing.js'; + +export interface IpfsContentResult { + stream: Readable; + size: number; + contentType: string; +} + +export class KuboDataSource { + private log: winston.Logger; + private kuboUrl: string; + private requestTimeoutMs: number; + private streamStallTimeoutMs: number; + + constructor({ + log, + kuboUrl, + requestTimeoutMs, + streamStallTimeoutMs, + }: { + log: winston.Logger; + kuboUrl: string; + requestTimeoutMs: number; + streamStallTimeoutMs: number; + }) { + this.log = log.child({ class: this.constructor.name }); + this.kuboUrl = kuboUrl.replace(/\/$/, ''); + this.requestTimeoutMs = requestTimeoutMs; + this.streamStallTimeoutMs = streamStallTimeoutMs; + } + + async getContent({ + cidString, + path, + signal, + parentSpan, + }: { + cidString: string; + path?: string; + signal?: AbortSignal; + parentSpan?: Span; + }): Promise { + signal?.throwIfAborted(); + + // URL-encode path segments to prevent breaking the upstream request + const encodedPath = + path !== undefined && path !== '' + ? path + .split('/') + .map((seg) => encodeURIComponent(seg)) + .join('/') + : undefined; + const ipfsPath = + encodedPath !== undefined ? `${cidString}/${encodedPath}` : cidString; + const url = `${this.kuboUrl}/ipfs/${ipfsPath}`; + + const span = startChildSpan( + 'KuboDataSource.getContent', + { + attributes: { + 'ipfs.cid': cidString, + 'ipfs.path': path ?? '', + 'ipfs.url': url, + }, + }, + parentSpan, + ); + + this.log.debug('Fetching IPFS content from Kubo', { + cidString, + path, + url, + }); + + // Connection-phase timeout + const controller = new AbortController(); + const connectionTimer = setTimeout(() => { + controller.abort(new Error('Kubo connection timeout')); + }, this.requestTimeoutMs); + + // Forward client abort to our controller + const onClientAbort = () => controller.abort(signal?.reason); + if (signal?.aborted) { + onClientAbort(); + } else if (signal) { + signal.addEventListener('abort', onClientAbort, { once: true }); + } + + try { + const response = await axios.get(url, { + responseType: 'stream', + signal: controller.signal, + headers: { + 'Accept-Encoding': 'identity', + }, + maxRedirects: 5, + // Accept non-2xx so we can handle 404/408/504 ourselves + validateStatus: (status) => status < 500 || status === 504, + }); + + clearTimeout(connectionTimer); + signal?.removeEventListener('abort', onClientAbort); + + if (response.status === 404) { + (response.data as Readable).destroy(); + throw new IpfsNotFoundError( + `IPFS content not found: /ipfs/${ipfsPath}`, + ); + } + + if (response.status === 408 || response.status === 504) { + (response.data as Readable).destroy(); + throw new IpfsTimeoutError( + `Kubo timed out resolving: /ipfs/${ipfsPath}`, + ); + } + + if (response.status !== 200) { + const stream = response.data as Readable; + stream.destroy(); + throw new Error( + `Unexpected Kubo response status: ${response.status} for /ipfs/${ipfsPath}`, + ); + } + + const stream = response.data as Readable; + const rawContentLength = parseInt( + response.headers['content-length'] ?? '0', + 10, + ); + const contentLength = Number.isFinite(rawContentLength) + ? rawContentLength + : 0; + const contentType = + response.headers['content-type'] ?? 'application/octet-stream'; + + // Switch from connection timeout to stall timeout + attachStallTimeout(stream, this.streamStallTimeoutMs); + + span.setAttributes({ + 'ipfs.content_length': contentLength, + 'ipfs.content_type': contentType, + }); + span.addEvent('Kubo fetch successful'); + + this.log.debug('Kubo fetch successful', { + cidString, + path, + contentLength, + contentType, + }); + + // End span when stream finishes or errors + stream.on('end', () => span.end()); + stream.on('error', (err) => { + span.recordException(err); + span.end(); + }); + + return { + stream, + size: contentLength, + contentType, + }; + } catch (error: any) { + clearTimeout(connectionTimer); + signal?.removeEventListener('abort', onClientAbort); + + if (error.name !== 'AbortError') { + span.recordException(error); + } + span.end(); + + if (error instanceof IpfsNotFoundError) throw error; + if (error instanceof IpfsTimeoutError) throw error; + + if (error.name === 'AbortError' || error.code === 'ERR_CANCELED') { + if (signal?.aborted) { + throw error; // Client disconnected + } + throw new IpfsTimeoutError( + `Kubo request timed out for /ipfs/${ipfsPath}`, + ); + } + + if (error.code === 'ECONNREFUSED') { + throw new IpfsUnavailableError( + `Kubo service unavailable at ${this.kuboUrl}`, + ); + } + + this.log.error('Failed to fetch from Kubo', { + cidString, + path, + message: error.message, + }); + throw error; + } + } +} + +export class IpfsNotFoundError extends Error { + constructor(message: string) { + super(message); + this.name = 'IpfsNotFoundError'; + } +} + +export class IpfsTimeoutError extends Error { + constructor(message: string) { + super(message); + this.name = 'IpfsTimeoutError'; + } +} + +export class IpfsUnavailableError extends Error { + constructor(message: string) { + super(message); + this.name = 'IpfsUnavailableError'; + } +} + +export class IpfsBlockedError extends Error { + constructor(message: string) { + super(message); + this.name = 'IpfsBlockedError'; + } +} + +export class IpfsSizeLimitError extends Error { + constructor(message: string) { + super(message); + this.name = 'IpfsSizeLimitError'; + } +} diff --git a/src/lib/httpsig.ts b/src/lib/httpsig.ts index 34c61c296..fecf9b1f3 100644 --- a/src/lib/httpsig.ts +++ b/src/lib/httpsig.ts @@ -46,6 +46,9 @@ export const TRIGGER_HEADERS = new Set([ 'x-arweave-chunk-data-root', 'x-arweave-chunk-tx-id', 'x-ar-io-chunk-source-type', + // IPFS headers — presence of x-ipfs-path triggers signing for IPFS responses + 'x-ipfs-path', + 'x-ar-io-source', ]); /** @@ -53,7 +56,12 @@ export const TRIGGER_HEADERS = new Set([ * when at least one TRIGGER_HEADER is also present. Signing them alone is too * broad — nearly every response has a Content-Type. */ -export const CO_SIGNABLE_HEADERS = new Set(['content-type', 'content-digest']); +export const CO_SIGNABLE_HEADERS = new Set([ + 'content-type', + 'content-digest', + 'x-cache', + 'etag', +]); // Header predicates normalize case defensively, but callers should pass // lowercase where possible. Express's `res.getHeaders()` keys are already diff --git a/src/lib/ipfs-cid.ts b/src/lib/ipfs-cid.ts new file mode 100644 index 000000000..96c0350cd --- /dev/null +++ b/src/lib/ipfs-cid.ts @@ -0,0 +1,60 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { CID } from 'multiformats/cid'; +import { base32 } from 'multiformats/bases/base32'; +import { base58btc } from 'multiformats/bases/base58'; + +/** + * Safely parse a CID string (v0 or v1). Returns null on invalid input. + */ +export function parseCid(cidString: string): CID | null { + try { + return CID.parse(cidString); + } catch { + return null; + } +} + +/** + * Quick validation: attempts parse, returns boolean. + */ +export function isValidCid(cidString: string): boolean { + return parseCid(cidString) !== null; +} + +/** + * Check if a CID is version 0 (starts with `Qm`, base58btc, dag-pb codec). + */ +export function isCidV0(cid: CID): boolean { + return cid.version === 0; +} + +/** + * Convert any CID string to CIDv1 base32lower (DNS-safe). + * Returns the base32lower string representation. + * Throws if the input is not a valid CID. + */ +export function cidToV1Base32(cidString: string): string { + const cid = CID.parse(cidString); + if (cid.version === 0) { + return cid.toV1().toString(base32); + } + return cid.toString(base32); +} + +/** + * Canonical string representation of a CID. + * v0 → base58btc, v1 → base32lower. + */ +export function cidToString(cid: CID): string { + if (cid.version === 0) { + return cid.toString(base58btc); + } + return cid.toString(base32); +} + +export { CID }; diff --git a/src/metrics.ts b/src/metrics.ts index d937bf9df..66740d39c 100644 --- a/src/metrics.ts +++ b/src/metrics.ts @@ -929,3 +929,41 @@ export const httpSigErrorsTotal = new promClient.Counter({ name: 'httpsig_errors_total', help: 'Total HTTPSIG signing errors', }); + +// +// IPFS metrics +// + +export const ipfsRequestsTotal = new promClient.Counter({ + name: 'ipfs_requests_total', + help: 'Total IPFS content requests', + labelNames: ['route_type', 'status'] as const, +}); + +export const ipfsCacheHitTotal = new promClient.Counter({ + name: 'ipfs_cache_hit_total', + help: 'IPFS content cache hits', +}); + +export const ipfsCacheMissTotal = new promClient.Counter({ + name: 'ipfs_cache_miss_total', + help: 'IPFS content cache misses', +}); + +export const ipfsContentSizeHistogram = new promClient.Histogram({ + name: 'ipfs_content_size_bytes', + help: 'Distribution of IPFS content sizes', + buckets: [1024, 102400, 1048576, 10485760, 104857600], +}); + +export const ipfsRequestDurationHistogram = new promClient.Histogram({ + name: 'ipfs_request_duration_seconds', + help: 'Duration of IPFS content requests', + labelNames: ['route_type', 'cache_status'] as const, + buckets: [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], +}); + +export const ipfsBlockedTotal = new promClient.Counter({ + name: 'ipfs_blocked_total', + help: 'IPFS requests blocked by CID blocklist', +}); diff --git a/src/middleware/ipfs.ts b/src/middleware/ipfs.ts new file mode 100644 index 000000000..539883f67 --- /dev/null +++ b/src/middleware/ipfs.ts @@ -0,0 +1,100 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { Handler, Request, Response, NextFunction } from 'express'; + +import * as config from '../config.js'; +import { isValidCid, cidToV1Base32 } from '../lib/ipfs-cid.js'; + +/** + * Middleware that intercepts `{CID}.{root_host}` subdomain requests. + * Must be mounted BEFORE the ArNS middleware to prevent ArNS from attempting + * to resolve CIDs as ArNS names. + * + * CIDv1 base32 strings are ~59 characters — always longer than the ArNS + * name limit (51 chars), so there's no collision with ArNS names. + * This also avoids needing a multi-level wildcard TLS certificate + * (*.ipfs.host would require a separate cert from *.host). + * + * Express subdomain array for `bafyabc.my-gateway.io` (root=my-gateway.io): + * req.subdomains = ['bafyabc'] (single subdomain) + * req.subdomains[0] = CID + */ +export function createIpfsSubdomainMiddleware({ + ipfsHandler, +}: { + ipfsHandler: Handler; +}): Handler { + return (req: Request, res: Response, next: NextFunction) => { + if (!config.IPFS_ENABLED || config.ARNS_ROOT_HOSTS.length === 0) { + next(); + return; + } + + const matchedEntry = config.matchArnsRootHost(req.hostname); + if (matchedEntry === undefined) { + next(); + return; + } + + // For {CID}.{root_host}, we expect exactly 1 subdomain beyond + // the root host's own subdomain depth. + const cidLabelIndex = matchedEntry.subdomainLength; + + if ( + !Array.isArray(req.subdomains) || + req.subdomains.length !== cidLabelIndex + 1 + ) { + next(); + return; + } + + const cidLabel = req.subdomains[cidLabelIndex]; + if (!isValidCid(cidLabel)) { + // Not a CID — let ArNS handle it (likely an ArNS name) + next(); + return; + } + + // Attach IPFS context to request + (req as any).ipfsCid = cidLabel; + + // Handle /ipfs/ paths on subdomain requests. + // Kubo's directory listings generate absolute links like /ipfs/{CID}/file. + let reqPath: string | undefined = + req.path === '/' ? undefined : req.path.slice(1); + if (reqPath !== undefined && reqPath.startsWith('ipfs/')) { + const afterIpfs = reqPath.slice(5); // strip 'ipfs/' + const slashIdx = afterIpfs.indexOf('/'); + const pathCid = slashIdx >= 0 ? afterIpfs.slice(0, slashIdx) : afterIpfs; + const remainder = + slashIdx >= 0 ? afterIpfs.slice(slashIdx + 1) : undefined; + + if (pathCid === cidLabel) { + // Same CID — strip the redundant prefix + reqPath = remainder !== undefined ? remainder : undefined; + } else if (isValidCid(pathCid)) { + // Different CID — redirect to that CID's subdomain + try { + const targetCid = cidToV1Base32(pathCid); + const rootHost = matchedEntry.host; + const pathSuffix = remainder !== undefined ? `/${remainder}` : '/'; + res.redirect( + 302, + `${config.SANDBOX_PROTOCOL ?? req.protocol}://${targetCid}.${rootHost}${pathSuffix}`, + ); + return; + } catch { + // CID conversion failed — fall through to handler + } + } + } + (req as any).ipfsPath = reqPath; + + // Delegate to the IPFS handler + ipfsHandler(req, res, next); + }; +} diff --git a/src/routes/ar-io-info-builder.ts b/src/routes/ar-io-info-builder.ts index fac7a0552..03c143cfd 100644 --- a/src/routes/ar-io-info-builder.ts +++ b/src/routes/ar-io-info-builder.ts @@ -117,6 +117,13 @@ export interface HttpsigInfo { attestation?: HttpsigAttestationInfo; } +/** + * IPFS configuration exposed in the info endpoint. + */ +export interface IpfsInfo { + enabled: true; +} + /** * Complete AR.IO info endpoint response structure. */ @@ -131,6 +138,7 @@ export interface ArIoInfoResponse { rateLimiter?: RateLimiterInfo; x402?: X402Info; httpsig?: HttpsigInfo; + ipfs?: IpfsInfo; } /** @@ -174,6 +182,9 @@ export interface ArIoInfoConfig { rsaPublicKey: string; }; }; + ipfs?: { + enabled: boolean; + }; } /** @@ -311,5 +322,9 @@ export function buildArIoInfo(config: ArIoInfoConfig): ArIoInfoResponse { }; } + if (config.ipfs?.enabled) { + response.ipfs = { enabled: true }; + } + return response; } diff --git a/src/routes/ar-io.ts b/src/routes/ar-io.ts index 603df24b0..9b0023c72 100644 --- a/src/routes/ar-io.ts +++ b/src/routes/ar-io.ts @@ -217,6 +217,7 @@ export const arIoInfoHandler = (_req: Request, res: Response) => { : undefined, } : undefined, + ipfs: config.IPFS_ENABLED ? { enabled: true } : undefined, }); res.status(200).send(response); diff --git a/src/routes/ipfs.ts b/src/routes/ipfs.ts new file mode 100644 index 000000000..056ef8ae9 --- /dev/null +++ b/src/routes/ipfs.ts @@ -0,0 +1,332 @@ +/** + * AR.IO Gateway + * Copyright (C) 2022-2025 Permanent Data Solutions, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ +import { Router, Request, Response, Handler } from 'express'; +import { default as asyncHandler } from 'express-async-handler'; +import winston from 'winston'; + +import * as config from '../config.js'; +import * as metrics from '../metrics.js'; +import { + cidToV1Base32, + isValidCid, + parseCid, + isCidV0, +} from '../lib/ipfs-cid.js'; +import { IpfsService } from '../ipfs/ipfs-service.js'; +import { + IpfsBlockedError, + IpfsNotFoundError, + IpfsSizeLimitError, + IpfsTimeoutError, + IpfsUnavailableError, +} from '../ipfs/kubo-data-source.js'; +import { RateLimiter } from '../limiter/types.js'; +import { + checkPaymentAndRateLimits, + adjustRateLimitTokens, +} from '../handlers/data-handler-utils.js'; +import { PaymentProcessor } from '../payments/types.js'; +import { extractAllClientIPs } from '../lib/ip-utils.js'; + +export function createIpfsRouter({ + log, + ipfsService, + rateLimiter, + paymentProcessor, +}: { + log: winston.Logger; + ipfsService: IpfsService; + rateLimiter?: RateLimiter; + paymentProcessor?: PaymentProcessor; +}): Router { + const router = Router(); + const handler = createIpfsPathHandler({ + log, + ipfsService, + rateLimiter, + paymentProcessor, + }); + + router.get('/ipfs/:cid', handler); + router.get('/ipfs/:cid/*', handler); + + return router; +} + +export function createIpfsHandler({ + log, + ipfsService, + rateLimiter, + paymentProcessor, +}: { + log: winston.Logger; + ipfsService: IpfsService; + rateLimiter?: RateLimiter; + paymentProcessor?: PaymentProcessor; +}): Handler { + return asyncHandler(async (req: Request, res: Response) => { + const cidString = (req as any).ipfsCid as string; + const path = (req as any).ipfsPath as string | undefined; + await handleIpfsRequest({ + req, + res, + cidString, + path, + log, + ipfsService, + rateLimiter, + paymentProcessor, + routeType: 'subdomain', + }); + }); +} + +function createIpfsPathHandler({ + log, + ipfsService, + rateLimiter, + paymentProcessor, +}: { + log: winston.Logger; + ipfsService: IpfsService; + rateLimiter?: RateLimiter; + paymentProcessor?: PaymentProcessor; +}): Handler { + return asyncHandler(async (req: Request, res: Response) => { + const cidString = req.params.cid; + // Express puts wildcard content in req.params[0] + const path = req.params[0] || undefined; + + if (!isValidCid(cidString)) { + res.status(400).json({ error: 'Invalid CID' }); + return; + } + + // Redirect CIDv0 to CIDv1 subdomain if ArNS root hosts are configured. + // Uses {CID}.{host} (same level as ArNS names) — no .ipfs. label needed + // since CIDv1 base32 is always >51 chars (won't collide with ArNS names) + // and works with standard *.{host} wildcard TLS certificates. + const cid = parseCid(cidString); + if (cid !== null && isCidV0(cid) && config.ARNS_ROOT_HOSTS.length > 0) { + const v1Base32 = cidToV1Base32(cidString); + const rootHost = config.ARNS_ROOT_HOSTS[0].host; + const pathSuffix = path !== undefined ? `/${path}` : ''; + res.redirect( + 302, + `${config.SANDBOX_PROTOCOL ?? req.protocol}://${v1Base32}.${rootHost}${pathSuffix}`, + ); + return; + } + + await handleIpfsRequest({ + req, + res, + cidString, + path, + log, + ipfsService, + rateLimiter, + paymentProcessor, + routeType: 'path', + }); + }); +} + +async function handleIpfsRequest({ + req, + res, + cidString, + path, + log: parentLog, + ipfsService, + rateLimiter, + paymentProcessor, + routeType, +}: { + req: Request; + res: Response; + cidString: string; + path: string | undefined; + log: winston.Logger; + ipfsService: IpfsService; + rateLimiter?: RateLimiter; + paymentProcessor?: PaymentProcessor; + routeType: 'path' | 'subdomain'; +}): Promise { + const startTime = Date.now(); + const ipfsPath = path !== undefined ? `${cidString}/${path}` : cidString; + + parentLog.debug('Handling IPFS request', { cidString, path, routeType }); + + try { + const result = await ipfsService.getContent({ + cidString, + path, + signal: req.signal, + }); + + // Check payment and rate limits (x402 + rate limiting in one call). + // When Content-Length is unknown (chunked), use a conservative estimate + // that gets corrected in the token adjustment after streaming. + const contentSize = + result.size > 0 ? result.size : config.IPFS_MAX_RESPONSE_SIZE_BYTES; + const limitCheck = await checkPaymentAndRateLimits({ + req, + res, + id: cidToV1Base32(cidString), + contentSize, + contentType: result.contentType, + requestAttributes: { + hops: 0, + clientIps: extractAllClientIPs(req).clientIps, + }, + rateLimiter, + paymentProcessor, + }); + + if (!limitCheck.allowed) { + // Response already sent (402 or 429) by checkPaymentAndRateLimits + result.stream.destroy(); + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'rate_limited', + }); + return; + } + + // Set response headers + res.setHeader('Content-Type', result.contentType); + if (result.size > 0) { + res.setHeader('Content-Length', result.size); + } + // CIDs are content-addressed — content never changes + res.setHeader('Cache-Control', 'public, max-age=29030400, immutable'); + res.setHeader('ETag', `"${cidToV1Base32(cidString)}"`); + res.setHeader('X-Ipfs-Path', `/ipfs/${ipfsPath}`); + res.setHeader('X-Ar-Io-Source', 'ipfs'); + + if (result.cached) { + res.setHeader('X-Cache', 'HIT'); + } else { + res.setHeader('X-Cache', 'MISS'); + } + + // Track metrics + const cacheStatus = result.cached ? 'hit' : 'miss'; + metrics.ipfsRequestsTotal.inc({ route_type: routeType, status: 'success' }); + if (result.cached) { + metrics.ipfsCacheHitTotal.inc(); + } else { + metrics.ipfsCacheMissTotal.inc(); + } + if (result.size > 0) { + metrics.ipfsContentSizeHistogram.observe(result.size); + } + + // Pipe stream to response + result.stream.pipe(res); + + // Adjust rate limiter tokens after response completes + res.on('finish', () => { + const durationSec = (Date.now() - startTime) / 1000; + metrics.ipfsRequestDurationHistogram.observe( + { route_type: routeType, cache_status: cacheStatus }, + durationSec, + ); + + adjustRateLimitTokens({ + req, + responseSize: result.size > 0 ? result.size : contentSize, + initialResult: limitCheck, + rateLimiter, + }).catch((error) => { + parentLog.error('Failed to adjust rate limit tokens', { + message: error.message, + }); + }); + }); + + result.stream.on('error', (error) => { + parentLog.error('IPFS stream error', { + cidString, + path, + message: error.message, + }); + if (!res.headersSent) { + res.status(502).json({ error: 'IPFS stream failed' }); + } else { + res.destroy(); + } + }); + } catch (error: any) { + if (error instanceof IpfsBlockedError) { + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'blocked', + }); + res.setHeader( + 'Cache-Control', + `public, max-age=${config.CACHE_BLOCKED_MAX_AGE}, immutable`, + ); + res + .status(451) + .send( + `Requested content blocked by this node's content policy. Blocked ID: ${cidString}`, + ); + return; + } + + if (error instanceof IpfsNotFoundError) { + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'not_found', + }); + res.status(404).json({ error: 'IPFS content not found' }); + return; + } + + if (error instanceof IpfsTimeoutError) { + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'timeout', + }); + res.status(504).json({ error: 'IPFS request timed out' }); + return; + } + + if (error instanceof IpfsUnavailableError) { + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'unavailable', + }); + res.status(502).json({ error: 'IPFS service unavailable' }); + return; + } + + if (error instanceof IpfsSizeLimitError) { + metrics.ipfsRequestsTotal.inc({ + route_type: routeType, + status: 'size_exceeded', + }); + res.status(413).json({ error: 'Content exceeds size limit' }); + return; + } + + // Client disconnected + if (error.name === 'AbortError') { + return; + } + + metrics.ipfsRequestsTotal.inc({ route_type: routeType, status: 'error' }); + parentLog.error('IPFS request failed', { + cidString, + path, + message: error.message, + }); + res.status(500).json({ error: 'Internal server error' }); + } +} diff --git a/src/system.ts b/src/system.ts index 188dd365c..f7aab452b 100644 --- a/src/system.ts +++ b/src/system.ts @@ -1456,6 +1456,53 @@ if (dataVerificationWorker !== undefined) { dataVerificationWorker.start(); } +// +// IPFS subsystem (conditionally initialized) +// + +import { KuboDataSource } from './ipfs/kubo-data-source.js'; +import { IpfsFsCache } from './ipfs/ipfs-cache.js'; +import { IpfsService } from './ipfs/ipfs-service.js'; +import { createIpfsRateLimiter } from './ipfs/ipfs-rate-limiter.js'; +import { RateLimiter } from './limiter/types.js'; + +export let ipfsService: IpfsService | undefined; +export let ipfsRateLimiter: RateLimiter | undefined; + +if (config.IPFS_ENABLED) { + log.info('IPFS subsystem enabled, initializing...'); + + const kuboDataSource = new KuboDataSource({ + log, + kuboUrl: config.IPFS_KUBO_URL, + requestTimeoutMs: config.IPFS_KUBO_REQUEST_TIMEOUT_MS, + streamStallTimeoutMs: config.IPFS_STREAM_STALL_TIMEOUT_MS, + }); + + const ipfsCache = new IpfsFsCache({ + log, + basePath: config.IPFS_CACHE_PATH, + maxSizeBytes: config.IPFS_CACHE_MAX_SIZE_BYTES, + eventEmitter, + }); + + ipfsService = new IpfsService({ + log, + dataSource: kuboDataSource, + cache: ipfsCache, + blockListValidator: dataBlockListValidator, + maxResponseSizeBytes: config.IPFS_MAX_RESPONSE_SIZE_BYTES, + }); + + ipfsRateLimiter = createIpfsRateLimiter(); + + log.info('IPFS subsystem initialized', { + kuboUrl: config.IPFS_KUBO_URL, + cachePath: config.IPFS_CACHE_PATH, + maxCacheSize: config.IPFS_CACHE_MAX_SIZE_BYTES, + }); +} + export const blockedNamesCache = new BlockedNamesCache({ log, cacheTTL: 3600, diff --git a/test-ipfs.sh b/test-ipfs.sh new file mode 100755 index 000000000..6f262e563 --- /dev/null +++ b/test-ipfs.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# End-to-end IPFS integration test script +# Run this after starting the gateway with IPFS_ENABLED=true and Kubo running + +GATEWAY_URL="${1:-http://localhost:4000}" +KUBO_URL="${2:-http://localhost:8080}" + +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[0;33m' +NC='\033[0m' + +pass=0 +fail=0 + +test_case() { + local name="$1" + local expected_status="$2" + local url="$3" + local extra_args="${4:-}" + + local result + result=$(curl -s -o /tmp/ipfs-test-body -w "%{http_code}" --max-time 30 $extra_args "$url" 2>&1) + + if [ "$result" = "$expected_status" ]; then + echo -e " ${GREEN}PASS${NC} $name (HTTP $result)" + ((pass++)) + else + echo -e " ${RED}FAIL${NC} $name — expected $expected_status, got $result" + echo " URL: $url" + echo " Body: $(head -c 200 /tmp/ipfs-test-body)" + ((fail++)) + fi +} + +test_body_contains() { + local name="$1" + local expected_text="$2" + local url="$3" + + local body + body=$(curl -s --max-time 30 "$url" 2>&1) + local status=$? + + if echo "$body" | grep -q "$expected_text"; then + echo -e " ${GREEN}PASS${NC} $name" + ((pass++)) + else + echo -e " ${RED}FAIL${NC} $name — body doesn't contain '$expected_text'" + echo " Body: $(echo "$body" | head -c 200)" + ((fail++)) + fi +} + +test_header() { + local name="$1" + local header="$2" + local expected_value="$3" + local url="$4" + + local actual + actual=$(curl -s -I --max-time 30 "$url" 2>&1 | grep -i "^$header:" | head -1 | sed 's/^[^:]*: //' | tr -d '\r') + + if echo "$actual" | grep -qi "$expected_value"; then + echo -e " ${GREEN}PASS${NC} $name ($actual)" + ((pass++)) + else + echo -e " ${RED}FAIL${NC} $name — expected header '$header' to contain '$expected_value', got '$actual'" + ((fail++)) + fi +} + +echo "=========================================" +echo " AR.IO IPFS Integration — E2E Tests" +echo "=========================================" +echo "" +echo "Gateway: $GATEWAY_URL" +echo "Kubo: $KUBO_URL" +echo "" + +# --- Pre-flight: ensure Kubo is running --- +echo "--- Pre-flight ---" +kubo_status=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$KUBO_URL/ipfs/QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn" 2>&1) +if [ "$kubo_status" = "000" ]; then + echo -e " ${RED}FAIL${NC} Kubo not reachable at $KUBO_URL" + exit 1 +fi +echo -e " ${GREEN}OK${NC} Kubo is reachable" + +# Add test content to Kubo +echo "" +echo "--- Adding test content to Kubo ---" +FILE_CID=$(echo "Hello from AR.IO IPFS integration test!" | docker exec -i ar-io-node-kubo-1 ipfs add -q 2>&1) +echo " File CID: $FILE_CID" + +DIR_CID=$(docker exec ar-io-node-kubo-1 sh -c ' + mkdir -p /tmp/e2e-test + echo "

E2E Test

" > /tmp/e2e-test/index.html + echo "subfile content" > /tmp/e2e-test/sub.txt + ipfs add -r -q /tmp/e2e-test | tail -1 +' 2>&1) +echo " Dir CID: $DIR_CID" +echo "" + +# --- Test 1: Path-based single file --- +echo "--- Path-based access ---" +test_body_contains "GET /ipfs/{CID} serves file content" \ + "Hello from AR.IO IPFS integration test" \ + "$GATEWAY_URL/ipfs/$FILE_CID" + +# --- Test 2: Path-based directory with path --- +test_body_contains "GET /ipfs/{CID}/index.html serves directory file" \ + "E2E Test" \ + "$GATEWAY_URL/ipfs/$DIR_CID/index.html" + +test_body_contains "GET /ipfs/{CID}/sub.txt serves subfile" \ + "subfile content" \ + "$GATEWAY_URL/ipfs/$DIR_CID/sub.txt" + +# --- Test 3: Invalid CID --- +test_case "GET /ipfs/invalid-cid returns 400" \ + "400" \ + "$GATEWAY_URL/ipfs/not-a-valid-cid" + +# --- Test 4: Response headers --- +echo "" +echo "--- Response headers ---" +test_header "Cache-Control is immutable" \ + "cache-control" "immutable" \ + "$GATEWAY_URL/ipfs/$FILE_CID" + +test_header "X-Ipfs-Path header present" \ + "x-ipfs-path" "/ipfs/" \ + "$GATEWAY_URL/ipfs/$FILE_CID" + +test_header "X-Cache header present" \ + "x-cache" "" \ + "$GATEWAY_URL/ipfs/$FILE_CID" + +test_header "Content-Type is set" \ + "content-type" "" \ + "$GATEWAY_URL/ipfs/$FILE_CID" + +# --- Test 5: CIDv0 redirect to CIDv1 subdomain --- +echo "" +echo "--- CIDv0 redirect ---" +redirect_location=$(curl -s -o /dev/null -w "%{redirect_url}" --max-time 10 "$GATEWAY_URL/ipfs/$FILE_CID" 2>&1) +if echo "$redirect_location" | grep -q "ipfs"; then + echo -e " ${GREEN}PASS${NC} CIDv0 redirects to CIDv1 subdomain ($redirect_location)" + ((pass++)) +elif [ -z "$redirect_location" ]; then + echo -e " ${YELLOW}SKIP${NC} No redirect (CID may already be v1 or no ARNS_ROOT_HOST)" +else + echo -e " ${YELLOW}INFO${NC} Redirect: $redirect_location" +fi + +# --- Test 6: Second request should be cached --- +echo "" +echo "--- Caching ---" +# First request (cache miss) +curl -s -o /dev/null "$GATEWAY_URL/ipfs/$FILE_CID" 2>/dev/null +# Second request (should be cache hit) +cache_header=$(curl -s -I --max-time 10 "$GATEWAY_URL/ipfs/$FILE_CID" 2>&1 | grep -i "^x-cache:" | sed 's/^[^:]*: //' | tr -d '\r') +echo -e " ${GREEN}INFO${NC} X-Cache on second request: ${cache_header:-'(not set)'}" + +# --- Summary --- +echo "" +echo "=========================================" +echo -e " Results: ${GREEN}$pass passed${NC}, ${RED}$fail failed${NC}" +echo "=========================================" + +exit $fail