diff --git a/.gitignore b/.gitignore
index cf9381d..ccf99db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ dist/
 *.tsbuildinfo
 .worktrees/
 .superpowers/
+coverage/
diff --git a/.prettierignore b/.prettierignore
index 52af816..c45c1e5 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -2,3 +2,4 @@ dist/
 node_modules/
 pnpm-lock.yaml
 charts/
+coverage/
diff --git a/README.md b/README.md
index bd60779..3bb6657 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock)
 
-Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies.
+Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, vector databases, search, and more. Real HTTP server on a real port, fixture-driven, zero runtime dependencies.
 
 ## Quick Start
 
@@ -23,72 +23,106 @@ const url = await mock.start();
 await mock.stop();
 ```
 
-## When to Use This vs MSW
+## Usage Scenarios
 
-[MSW (Mock Service Worker)](https://mswjs.io/) is a popular API mocking library, but it solves a different problem.
+### In-process testing
 
-**The key difference is architecture.** llmock runs a real HTTP server on a port. MSW patches `http`/`https`/`fetch` modules inside a single Node.js process. MSW can only intercept requests from the process that calls `server.listen()` — child processes, separate services, and workers are unaffected.
+Use the programmatic API to start and stop the mock server in your test setup. Every test framework works — Vitest, Jest, Playwright, Mocha, anything.
 
-This matters for E2E tests where multiple processes make LLM API calls:
+```typescript
+import { LLMock } from "@copilotkit/llmock";
+
+const mock = new LLMock({ port: 5555 });
+mock.loadFixtureDir("./fixtures");
+const url = await mock.start();
+process.env.OPENAI_BASE_URL = `${url}/v1`;
+
+// ... run tests ...
+
+await mock.stop();
+```
+
+### Running locally
 
+Use the CLI with `--watch` to hot-reload fixtures as you edit them. Point your app at the mock and iterate without touching real APIs.
+
+```bash
+llmock -p 4010 -f ./fixtures --watch
 ```
-Playwright test runner (Node)
-  └─ controls browser → Next.js app (separate process)
-                            └─ OPENAI_BASE_URL → llmock :5555
-                                ├─ Mastra agent workers
-                                ├─ LangGraph workers
-                                └─ CopilotKit runtime
+
+### CI pipelines
+
+Use the Docker image with `--strict` mode and record-and-replay for deterministic, zero-cost CI runs.
+
+```yaml
+# GitHub Actions example
+- name: Start aimock
+  run: |
+    docker run -d --name aimock \
+      -v ./fixtures:/fixtures \
+      -p 4010:4010 \
+      ghcr.io/copilotkit/aimock \
+      llmock --strict -f /fixtures
+
+- name: Run tests
+  env:
+    OPENAI_BASE_URL: http://localhost:4010/v1
+  run: pnpm test
+
+- name: Stop aimock
+  run: docker stop aimock
 ```
 
-MSW can't intercept any of those calls. llmock can — it's a real server on a real port.
+### Cross-language testing
 
-**Use llmock when:**
+The Docker image runs as a standalone HTTP server — any language that speaks HTTP can use it. Python, Go, Rust, Ruby, Java, anything.
 
-- Multiple processes need to hit the same mock (E2E tests, agent frameworks, microservices)
-- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere)
-- You prefer defining fixtures as JSON files rather than code
-- You need a standalone CLI server
+```bash
+docker run -d -p 4010:4010 ghcr.io/copilotkit/aimock llmock -f /fixtures
 
-**Use MSW when:**
+# Python
+client = openai.OpenAI(base_url="http://localhost:4010/v1", api_key="mock")
 
-- All API calls originate from a single Node.js process (unit tests, SDK client tests)
-- You're mocking many different APIs, not just OpenAI
-- You want in-process interception without running a server
+# Go
+client := openai.NewClient(option.WithBaseURL("http://localhost:4010/v1"))
 
-| Capability                   | llmock                | MSW                                                                       |
-| ---------------------------- | --------------------- | ------------------------------------------------------------------------- |
-| Cross-process interception   | **Yes** (real server) | **No** (in-process only)                                                  |
-| OpenAI Chat Completions SSE  | **Built-in**          | Manual — build `data: {json}\n\n` + `[DONE]` yourself                     |
-| OpenAI Responses API SSE     | **Built-in**          | Manual — MSW's `sse()` sends `data:` events, not OpenAI's `event:` format |
-| Claude Messages API SSE      | **Built-in**          | Manual — build `event:`/`data:` SSE yourself                              |
-| Gemini streaming             | **Built-in**          | Manual — build `data:` SSE yourself                                       |
-| WebSocket APIs               | **Built-in**          | **No**                                                                    |
-| Fixture file loading (JSON)  | **Yes**               | **No** — handlers are code-only                                           |
-| Request journal / inspection | **Yes**               | **No** — track requests manually                                          |
-| Non-streaming responses      | **Yes**               | **Yes**                                                                   |
-| Error injection (one-shot)   | **Yes**               | **Yes** (via `server.use()`)                                              |
-| CLI for standalone use       | **Yes**               | **No**                                                                    |
-| Zero dependencies            | **Yes**               | **No** (~300KB)                                                           |
+# Rust
+let client = Client::new().with_base_url("http://localhost:4010/v1");
+```
 
 ## Features
 
-- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html)
+- **[Record-and-replay](https://llmock.copilotkit.dev/record-replay.html)** — VCR-style proxy records real API responses as fixtures for deterministic replay
+- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [Responses API](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html)
+- **[MCPMock](https://llmock.copilotkit.dev/mcp-mock.html)** — Mock MCP server with tools, resources, prompts, and session management
+- **[A2AMock](https://llmock.copilotkit.dev/a2a-mock.html)** — Mock A2A protocol server with agent cards, message routing, and streaming
+- **[VectorMock](https://llmock.copilotkit.dev/vector-mock.html)** — Mock vector database with Pinecone, Qdrant, and ChromaDB endpoints
+- **[Services](https://llmock.copilotkit.dev/services.html)** — Built-in search (Tavily), rerank (Cohere), and moderation (OpenAI) mocks
+- **[Chaos testing](https://llmock.copilotkit.dev/chaos-testing.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects
+- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics`
 - **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions
 - **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling
 - **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call
 - **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing
 - **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live
 - **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats
-- **[Chaos testing](https://llmock.copilotkit.dev/chaos-testing.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects
-- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics`
 - **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request
 - **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load`
 - **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing
 - **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines
-- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay
 - **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes
 - **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly
 
+## aimock CLI (Full-Stack Mock)
+
+For projects that need more than LLM mocking, the `aimock` CLI reads a JSON config file and serves all mock services on one port:
+
+```bash
+aimock --config aimock.json --port 4010
+```
+
+See the [aimock documentation](https://llmock.copilotkit.dev/aimock-cli.html) for config file format and Docker usage.
+
 ## CLI Quick Reference
 
 ```bash
@@ -97,6 +131,7 @@ llmock [options]
 
 | Option               | Short | Default      | Description                                 |
 | -------------------- | ----- | ------------ | ------------------------------------------- |
+| `--config`           |       |              | Config file for aimock CLI                  |
 | `--port`             | `-p`  | `4010`       | Port to listen on                           |
 | `--host`             | `-h`  | `127.0.0.1`  | Host to bind to                             |
 | `--fixtures`         | `-f`  | `./fixtures` | Path to fixtures directory or file          |
@@ -137,6 +172,19 @@ Full API reference, fixture format, E2E patterns, and provider-specific guides:
 
 **[https://llmock.copilotkit.dev/docs.html](https://llmock.copilotkit.dev/docs.html)**
 
+## llmock vs MSW
+
+[MSW (Mock Service Worker)](https://mswjs.io/) patches `http`/`https`/`fetch` inside a single Node.js process. llmock runs a real HTTP server on a real port that any process can reach — child processes, microservices, agent workers, Docker containers. MSW can't intercept any of those; llmock can. For a detailed comparison including other tools, see the [full comparison on the docs site](https://llmock.copilotkit.dev/#comparison).
+
+| Capability                 | llmock                       | MSW                    |
+| -------------------------- | ---------------------------- | ---------------------- |
+| Cross-process interception | **Yes** (real server)        | No (in-process only)   |
+| LLM SSE streaming          | **Built-in** (13+ providers) | Manual for each format |
+| Fixture files (JSON)       | **Yes**                      | No (code-only)         |
+| Record & replay            | **Yes**                      | No                     |
+| WebSocket APIs             | **Yes**                      | No                     |
+| Zero dependencies          | **Yes**                      | No (~300KB)            |
+
 ## Real-World Usage
 
 [CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs.
diff --git a/docs/a2a-mock.html b/docs/a2a-mock.html
new file mode 100644
index 0000000..9346e3b
--- /dev/null
+++ b/docs/a2a-mock.html
@@ -0,0 +1,279 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>A2AMock — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html" class="active">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>A2AMock</h1>
+        <p class="lead">
+          Mock A2A (Agent-to-Agent) protocol server for testing multi-agent systems. Implements the
+          A2A JSON-RPC protocol with agent card discovery, message routing, task management, and SSE
+          streaming.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> a2a = <span class="kw">new</span> A2AMock();
+
+a2a.registerAgent({
+  name: <span class="str">"translator"</span>,
+  description: <span class="str">"Translates text between languages"</span>,
+  skills: [{ id: <span class="str">"translate"</span>, name: <span class="str">"Translate"</span> }],
+});
+
+a2a.onMessage(<span class="str">"translator"</span>, <span class="str">"translate"</span>, [{ text: <span class="str">"Translated text"</span> }]);
+
+<span class="kw">const</span> url = <span class="kw">await</span> a2a.start();
+<span class="cm">// Agent card at: ${url}/.well-known/agent-card.json</span>
+<span class="cm">// JSON-RPC at: ${url}/</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount A2AMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> a2a = <span class="kw">new</span> A2AMock();
+
+a2a.registerAgent({ name: <span class="str">"assistant"</span> });
+a2a.onMessage(<span class="str">"assistant"</span>, <span class="str">"hello"</span>, [{ text: <span class="str">"Hi!"</span> }]);
+
+llm.mount(<span class="str">"/a2a"</span>, a2a);
+<span class="kw">await</span> llm.start();
+<span class="cm">// A2A available at http://127.0.0.1:5555/a2a</span></code></pre>
+        </div>
+
+        <h2>Subpath Import</h2>
+        <p>A2AMock is also available via a dedicated subpath import for tree-shaking:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Subpath import <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/a2a"</span>;</code></pre>
+        </div>
+
+        <h2>Agent Registration</h2>
+        <p>Register agents with skills and capabilities:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Register agents <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>a2a.registerAgent({
+  name: <span class="str">"researcher"</span>,
+  description: <span class="str">"Research assistant"</span>,
+  version: <span class="str">"1.0.0"</span>,
+  skills: [
+    { id: <span class="str">"search"</span>, name: <span class="str">"Web Search"</span>, tags: [<span class="str">"research"</span>] },
+    { id: <span class="str">"summarize"</span>, name: <span class="str">"Summarize"</span> },
+  ],
+  capabilities: { streaming: <span class="kw">true</span> },
+});</code></pre>
+        </div>
+
+        <h2>Message Patterns</h2>
+        <p>Route messages to responses using string or RegExp patterns:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Message patterns <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// String substring match</span>
+a2a.onMessage(<span class="str">"agent"</span>, <span class="str">"hello"</span>, [{ text: <span class="str">"Hi there!"</span> }]);
+
+<span class="cm">// RegExp match</span>
+a2a.onMessage(<span class="str">"agent"</span>, <span class="kw">/</span>^translate\s+(.+)<span class="kw">/i</span>, [{ text: <span class="str">"Translation result"</span> }]);
+
+<span class="cm">// Task with artifacts</span>
+a2a.onTask(<span class="str">"agent"</span>, <span class="str">"compute"</span>, [
+  { parts: [{ text: <span class="str">"42"</span> }], name: <span class="str">"result"</span> },
+]);</code></pre>
+        </div>
+
+        <h2>Streaming Tasks</h2>
+        <p>Simulate streaming responses with SSE events:</p>
+        <div class="code-block">
+          <div class="code-block-header">Streaming <span class="lang-tag">typescript</span></div>
+          <pre><code>a2a.onStreamingTask(<span class="str">"agent"</span>, <span class="str">"long-task"</span>, [
+  { type: <span class="str">"status"</span>, state: <span class="str">"TASK_STATE_WORKING"</span> },
+  { type: <span class="str">"artifact"</span>, parts: [{ text: <span class="str">"partial result"</span> }], name: <span class="str">"output"</span> },
+  { type: <span class="str">"artifact"</span>, parts: [{ text: <span class="str">"final result"</span> }], lastChunk: <span class="kw">true</span>, name: <span class="str">"output"</span> },
+], <span class="num">50</span>); <span class="cm">// 50ms delay between events</span></code></pre>
+        </div>
+
+        <h2>Config File</h2>
+        <p>A2AMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"a2a"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/a2a"</span>,
+    <span class="prop">"agents"</span>: [
+      {
+        <span class="prop">"name"</span>: <span class="str">"assistant"</span>,
+        <span class="prop">"description"</span>: <span class="str">"A helpful assistant"</span>,
+        <span class="prop">"skills"</span>: [{ <span class="prop">"id"</span>: <span class="str">"chat"</span>, <span class="prop">"name"</span>: <span class="str">"Chat"</span> }],
+        <span class="prop">"messages"</span>: [
+          { <span class="prop">"pattern"</span>: <span class="str">"hello"</span>, <span class="prop">"parts"</span>: [{ <span class="prop">"text"</span>: <span class="str">"Hi there!"</span> }] }
+        ]
+      }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>JSON-RPC Methods</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>SendMessage</code></td>
+              <td>Send a message, get a synchronous response</td>
+            </tr>
+            <tr>
+              <td><code>SendStreamingMessage</code></td>
+              <td>Send a message, get an SSE stream of events</td>
+            </tr>
+            <tr>
+              <td><code>GetTask</code></td>
+              <td>Retrieve a task by ID</td>
+            </tr>
+            <tr>
+              <td><code>ListTasks</code></td>
+              <td>List tasks, optionally filtered by contextId</td>
+            </tr>
+            <tr>
+              <td><code>CancelTask</code></td>
+              <td>Cancel a non-terminal task</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Agent Card</h2>
+        <p>
+          The agent card is served at <code>GET /.well-known/agent-card.json</code> and includes all
+          registered agents' skills and capabilities. The <code>A2A-Version: 1.0</code> header is
+          included on all responses.
+        </p>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>a2a.health();  <span class="cm">// { status: "ok", agents: 2, tasks: 5 }</span>
+a2a.reset();   <span class="cm">// Clears all agents and tasks</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/aimock-cli.html b/docs/aimock-cli.html
new file mode 100644
index 0000000..e4a0b26
--- /dev/null
+++ b/docs/aimock-cli.html
@@ -0,0 +1,333 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>aimock CLI — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html" class="active">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>aimock CLI</h1>
+        <p class="lead">
+          <code>aimock</code> is the full-stack mock orchestrator. Where <code>llmock</code> serves
+          LLM endpoints only, <code>aimock</code> reads a JSON config file and serves LLM mocks
+          alongside additional mock services (MCP, A2A, vector stores) on a single port.
+        </p>
+
+        <h2>aimock vs llmock</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>llmock CLI</th>
+              <th>aimock CLI</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>LLM mock endpoints</td>
+              <td>Yes</td>
+              <td>Yes</td>
+            </tr>
+            <tr>
+              <td>Additional mock services</td>
+              <td>No</td>
+              <td>Yes (via mount)</td>
+            </tr>
+            <tr>
+              <td>Config file</td>
+              <td>CLI flags only</td>
+              <td>JSON config file</td>
+            </tr>
+            <tr>
+              <td>Single-port routing</td>
+              <td>LLM paths only</td>
+              <td>All services on one port</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">Run aimock <span class="lang-tag">bash</span></div>
+          <pre><code>aimock --config aimock.json --port 4010</code></pre>
+        </div>
+
+        <h2>Config File Format</h2>
+        <p>
+          The config file is a JSON object describing which services to run and how to configure
+          them. The <code>llm</code> section configures the core LLMock server. Additional services
+          are mounted at path prefixes.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"llm"</span>: {
+    <span class="prop">"fixtures"</span>: <span class="str">"./fixtures"</span>,
+    <span class="prop">"latency"</span>: <span class="num">0</span>,
+    <span class="prop">"chunkSize"</span>: <span class="num">20</span>,
+    <span class="prop">"logLevel"</span>: <span class="str">"info"</span>,
+    <span class="prop">"validateOnLoad"</span>: <span class="kw">true</span>,
+    <span class="prop">"metrics"</span>: <span class="kw">true</span>,
+    <span class="prop">"strict"</span>: <span class="kw">false</span>
+  },
+  <span class="prop">"services"</span>: {
+    <span class="prop">"/mcp"</span>: {
+      <span class="prop">"type"</span>: <span class="str">"mcp"</span>,
+      <span class="prop">"tools"</span>: <span class="str">"./mcp-tools.json"</span>
+    },
+    <span class="prop">"/a2a"</span>: {
+      <span class="prop">"type"</span>: <span class="str">"a2a"</span>,
+      <span class="prop">"agents"</span>: <span class="str">"./a2a-agents.json"</span>
+    }
+  }
+}</code></pre>
+        </div>
+
+        <h3>Config Fields</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Field</th>
+              <th>Type</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>llm</code></td>
+              <td>object</td>
+              <td>
+                LLMock configuration. Accepts <code>fixtures</code>, <code>latency</code>,
+                <code>chunkSize</code>, <code>logLevel</code>, <code>validateOnLoad</code>,
+                <code>metrics</code>, <code>strict</code>, <code>chaos</code>,
+                <code>streamingProfile</code>.
+              </td>
+            </tr>
+            <tr>
+              <td><code>services</code></td>
+              <td>object</td>
+              <td>
+                Map of mount paths to service configs. Each key is a URL path prefix (e.g.
+                <code>/mcp</code>), each value describes the service type and its options.
+              </td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>CLI Flags</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Option</th>
+              <th>Default</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>--config</code></td>
+              <td><code>aimock.json</code></td>
+              <td>Path to JSON config file</td>
+            </tr>
+            <tr>
+              <td><code>--port</code></td>
+              <td><code>4010</code></td>
+              <td>Port to listen on (overrides config)</td>
+            </tr>
+            <tr>
+              <td><code>--host</code></td>
+              <td><code>127.0.0.1</code></td>
+              <td>Host to bind to (overrides config)</td>
+            </tr>
+            <tr>
+              <td><code>--help</code></td>
+              <td>&mdash;</td>
+              <td>Show help</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Single-Port Routing</h2>
+        <p>
+          All services share one port. Requests are routed by path prefix. LLM endpoints live at the
+          root, mounted services at their configured prefix:
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Path</th>
+              <th>Service</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>/v1/chat/completions</code></td>
+              <td>LLMock (OpenAI Chat Completions)</td>
+            </tr>
+            <tr>
+              <td><code>/v1/messages</code></td>
+              <td>LLMock (Anthropic Claude)</td>
+            </tr>
+            <tr>
+              <td><code>/v1/embeddings</code></td>
+              <td>LLMock (Embeddings)</td>
+            </tr>
+            <tr>
+              <td><code>/mcp/*</code></td>
+              <td>MCP mock service</td>
+            </tr>
+            <tr>
+              <td><code>/a2a/*</code></td>
+              <td>A2A mock service</td>
+            </tr>
+            <tr>
+              <td><code>/health</code></td>
+              <td>Unified health check (all services)</td>
+            </tr>
+            <tr>
+              <td><code>/metrics</code></td>
+              <td>Prometheus metrics (if enabled)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <p>
+          Path stripping is automatic &mdash; a request to <code>/mcp/tools/list</code> arrives at
+          the MCP service as <code>/tools/list</code>.
+        </p>
+
+        <h2>Docker Usage</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Docker run with config <span class="lang-tag">shell</span>
+          </div>
+          <pre><code><span class="cm"># Mount config and fixtures into the container</span>
+docker run -p 4010:4010 \
+  -v $(pwd)/aimock.json:/app/aimock.json \
+  -v $(pwd)/fixtures:/app/fixtures \
+  ghcr.io/copilotkit/llmock aimock --config /app/aimock.json --host 0.0.0.0</code></pre>
+        </div>
+
+        <h2>Docker Compose</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">docker-compose.yml <span class="lang-tag">yaml</span></div>
+          <pre><code><span class="prop">services</span>:
+  <span class="prop">aimock</span>:
+    <span class="prop">image</span>: <span class="str">ghcr.io/copilotkit/llmock:latest</span>
+    <span class="prop">command</span>: <span class="str">aimock --config /app/aimock.json --host 0.0.0.0</span>
+    <span class="prop">ports</span>:
+      - <span class="str">"4010:4010"</span>
+    <span class="prop">volumes</span>:
+      - <span class="str">./aimock.json:/app/aimock.json:ro</span>
+      - <span class="str">./fixtures:/app/fixtures:ro</span>
+
+  <span class="prop">app</span>:
+    <span class="prop">build</span>: <span class="str">.</span>
+    <span class="prop">environment</span>:
+      <span class="prop">OPENAI_BASE_URL</span>: <span class="str">http://aimock:4010/v1</span>
+      <span class="prop">MCP_SERVER_URL</span>: <span class="str">http://aimock:4010/mcp</span>
+    <span class="prop">depends_on</span>:
+      - <span class="str">aimock</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/aws-bedrock.html b/docs/aws-bedrock.html
index 09cf238..d2886c7 100644
--- a/docs/aws-bedrock.html
+++ b/docs/aws-bedrock.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/azure-openai.html b/docs/azure-openai.html
index c17a494..b86d677 100644
--- a/docs/azure-openai.html
+++ b/docs/azure-openai.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/chaos-testing.html b/docs/chaos-testing.html
index e0dfc67..908cf90 100644
--- a/docs/chaos-testing.html
+++ b/docs/chaos-testing.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html">Record &amp; Replay</a
           ><a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/chat-completions.html b/docs/chat-completions.html
index 353d4f2..461eaa7 100644
--- a/docs/chat-completions.html
+++ b/docs/chat-completions.html
@@ -71,10 +71,20 @@ <h3>Features</h3>
           <a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
           <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
           <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
diff --git a/docs/claude-messages.html b/docs/claude-messages.html
index d034278..4b26975 100644
--- a/docs/claude-messages.html
+++ b/docs/claude-messages.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/cohere.html b/docs/cohere.html
index 162f738..718c3f0 100644
--- a/docs/cohere.html
+++ b/docs/cohere.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html">Record &amp; Replay</a
           ><a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/compatible-providers.html b/docs/compatible-providers.html
index 2bb2179..11b161e 100644
--- a/docs/compatible-providers.html
+++ b/docs/compatible-providers.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/docker.html b/docs/docker.html
index 4f71441..19592cd 100644
--- a/docs/docker.html
+++ b/docs/docker.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html" class="active">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html" class="active">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/docs.html b/docs/docs.html
index 7df43c9..2e300a0 100644
--- a/docs/docs.html
+++ b/docs/docs.html
@@ -77,10 +77,20 @@ <h3>Features</h3>
           <a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
           <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
           <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
diff --git a/docs/drift-detection.html b/docs/drift-detection.html
index dc8f9d4..c7c7ec8 100644
--- a/docs/drift-detection.html
+++ b/docs/drift-detection.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html" class="active">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html" class="active">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/embeddings.html b/docs/embeddings.html
index 00d06f0..9e72ee7 100644
--- a/docs/embeddings.html
+++ b/docs/embeddings.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/error-injection.html b/docs/error-injection.html
index 80ac5ee..9e08369 100644
--- a/docs/error-injection.html
+++ b/docs/error-injection.html
@@ -70,11 +70,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/fixtures.html b/docs/fixtures.html
index 0a13382..f4006f2 100644
--- a/docs/fixtures.html
+++ b/docs/fixtures.html
@@ -70,11 +70,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/gemini.html b/docs/gemini.html
index b3beeb1..8ffbdc4 100644
--- a/docs/gemini.html
+++ b/docs/gemini.html
@@ -70,11 +70,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/index.html b/docs/index.html
index bc2e99e..11be12e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1034,6 +1034,7 @@
       <div class="container">
         <a href="#" class="nav-brand"> <span class="prompt">$</span> llmock </a>
         <ul class="nav-links">
+          <li><a href="#usage">Usage</a></li>
           <li><a href="#features">Features</a></li>
           <li><a href="#examples">Examples</a></li>
           <li><a href="#comparison">Comparison</a></li>
@@ -1073,8 +1074,9 @@
         <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1>
 
         <p class="hero-sub">
-          Real HTTP server. Real SSE streams. WebSocket APIs. Fixture-driven responses.
-          Multi-provider mock — OpenAI, Claude, Gemini — any process on the machine can reach it.
+          Mock infrastructure for testing AI applications — LLM APIs, MCP tools, A2A agents, vector
+          databases, search, and more. Real HTTP server on a real port. Fixture-driven. Zero
+          dependencies. Any process on the machine can reach it.
         </p>
 
         <div class="hero-actions">
@@ -1142,104 +1144,148 @@ <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1
       </div>
     </header>
 
+    <!-- ═══ How You'll Use It ═════════════════════════════════════════ -->
+    <section id="usage" class="reveal">
+      <div class="container">
+        <span class="section-label">How you'll use it</span>
+        <h2 class="section-title">From unit tests to production CI</h2>
+        <p class="section-desc">Four ways to run llmock, depending on what you need.</p>
+
+        <div class="features-grid" style="grid-template-columns: repeat(2, 1fr)">
+          <div class="feature-card">
+            <div class="feature-icon green">⚡</div>
+            <h3>Unit Tests</h3>
+            <p>
+              In-process programmatic API. Start and stop in your test setup &mdash; Vitest, Jest,
+              Playwright, Mocha, anything. TypeScript/JavaScript.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">🔄</div>
+            <h3>Local Development</h3>
+            <p>
+              CLI with <code>--watch</code> for hot-reload. Edit fixtures, see changes instantly.
+              Point your app at the mock and iterate without real API calls.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">🏗️</div>
+            <h3>CI/CD</h3>
+            <p>
+              Docker image + <code>--strict</code> mode + record-and-replay. Deterministic,
+              zero-cost, no API keys needed in CI.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">🌐</div>
+            <h3>Cross-Language</h3>
+            <p>
+              Docker image as a standalone HTTP server. Python, Go, Rust, Ruby, Java &mdash; any
+              language that speaks HTTP can hit it.
+            </p>
+          </div>
+        </div>
+      </div>
+    </section>
+
     <!-- ═══ Features ══════════════════════════════════════════════════ -->
     <section id="features" class="reveal">
       <div class="container">
         <span class="section-label">Why llmock</span>
         <h2 class="section-title">Stop paying for flaky tests</h2>
         <p class="section-desc">
-          Tests that hit real LLM APIs — OpenAI, Gemini, Anthropic — cost money, time out, and
-          produce non-deterministic results. llmock replaces those calls with immediate,
-          deterministic responses from a real HTTP server any process on the machine can reach.
+          Tests that hit real LLM APIs cost money, time out, and produce non-deterministic results.
+          llmock replaces those calls with immediate, deterministic responses from a real HTTP
+          server any process on the machine can reach.
         </p>
 
         <div class="features-grid">
           <div class="feature-card">
-            <div class="feature-icon green">⚡</div>
-            <h3>Real HTTP Server</h3>
+            <div class="feature-icon green">🔴</div>
+            <h3>Record &amp; Replay</h3>
             <p>
-              Runs on an actual port. Any process on the machine can reach it — Next.js, Mastra,
-              LangGraph, Agno, anything that speaks HTTP.
+              Proxy to real APIs, record responses as fixtures, then replay them deterministically
+              in tests. VCR-style workflow for zero-effort fixture creation.
             </p>
           </div>
           <div class="feature-card">
             <div class="feature-icon blue">📡</div>
-            <h3>Authentic SSE Streams</h3>
+            <h3>13+ LLM Providers</h3>
             <p>
-              OpenAI, Claude, and Gemini APIs — authentic SSE format for each provider. Streaming
-              and non-streaming modes.
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere &mdash; authentic
+              SSE format for each provider. Streaming and non-streaming.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon purple">📁</div>
-            <h3>JSON Fixture Files</h3>
+            <div class="feature-icon purple">🔌</div>
+            <h3>MCP Protocol</h3>
             <p>
-              Define responses as JSON — one file per feature. Load a directory, load a file, or
-              register fixtures programmatically.
+              Mock MCP servers with tools, resources, prompts, and session management. Test tool
+              integrations without real MCP infrastructure.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon amber">🔧</div>
-            <h3>Tool Call Support</h3>
+            <div class="feature-icon amber">🤝</div>
+            <h3>A2A Protocol</h3>
             <p>
-              Return tool calls with structured arguments. Match on tool names, tool result IDs, or
-              write custom predicates.
+              Mock A2A agent-to-agent protocol. Agent cards, message routing, streaming tasks
+              &mdash; test multi-agent systems in isolation.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon red">💥</div>
-            <h3>Error Injection</h3>
+            <div class="feature-icon red">📦</div>
+            <h3>Vector Databases</h3>
             <p>
-              Queue one-shot errors — 429 rate limits, 503 outages, whatever. Fires once, then
-              auto-removes itself.
+              Mock Pinecone, Qdrant, and ChromaDB endpoints. Test RAG pipelines without running real
+              vector databases.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon green">📋</div>
-            <h3>Request Journal</h3>
+            <div class="feature-icon green">🎲</div>
+            <h3>Chaos Testing</h3>
             <p>
-              Every request recorded. Inspect messages, verify tool calls, assert on conversation
-              history. HTTP and programmatic access.
+              Probabilistic failure injection &mdash; random errors, latency spikes, and stream
+              corruption for resilience testing.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon blue">🔌</div>
-            <h3>WebSocket APIs</h3>
+            <div class="feature-icon blue">📊</div>
+            <h3>Prometheus Metrics</h3>
             <p>
-              OpenAI Responses, OpenAI Realtime, and Gemini Live over WebSocket. Same fixtures, real
-              RFC 6455 framing, zero dependencies. Text + tool calls.
+              Expose request counts, latencies, and fixture match rates via a /metrics endpoint.
+              Grafana-ready.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon purple">🎛️</div>
-            <h3>Streaming Physics</h3>
+            <div class="feature-icon purple">🔧</div>
+            <h3>Tool Call Support</h3>
             <p>
-              Simulate realistic streaming timing with TTFT, TPS, and jitter. Test loading states
-              and streaming UX under real-world conditions.
+              Return tool calls with structured arguments. Match on tool names, tool result IDs, or
+              write custom predicates.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon red">🎲</div>
-            <h3>Chaos Testing</h3>
+            <div class="feature-icon amber">🔌</div>
+            <h3>WebSocket APIs</h3>
             <p>
-              Probabilistic failure injection &mdash; random errors, latency spikes, and stream
-              corruption for resilience testing.
+              OpenAI Responses, OpenAI Realtime, and Gemini Live over WebSocket. Same fixtures, real
+              RFC 6455 framing, zero dependencies.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon amber">📊</div>
-            <h3>Prometheus Metrics</h3>
+            <div class="feature-icon red">🎛️</div>
+            <h3>Streaming Physics</h3>
             <p>
-              Expose request counts, latencies, and fixture match rates via a /metrics endpoint.
-              Grafana-ready.
+              Simulate realistic streaming timing with TTFT, TPS, and jitter. Test loading states
+              and streaming UX under real-world conditions.
             </p>
           </div>
           <div class="feature-card">
-            <div class="feature-icon green">🔴</div>
-            <h3>Record &amp; Replay</h3>
+            <div class="feature-icon green">📋</div>
+            <h3>Request Journal</h3>
             <p>
-              Proxy to real APIs, record responses as fixtures, then replay them deterministically
-              in tests.
+              Every request recorded. Inspect messages, verify tool calls, assert on conversation
+              history. HTTP and programmatic access.
             </p>
           </div>
         </div>
@@ -2019,13 +2065,13 @@ <h2 class="section-title">Real-World Usage</h2>
           without hitting real APIs. The tests cover streaming text, tool calls, and multi-turn
           conversations across both v1 and v2 runtimes. See the
           <a
-            href="https://github.com/CopilotKit/CopilotKit/search?q=llmock&amp;type=code"
+            href="https://github.com/ag-ui-protocol/ag-ui/blob/main/apps/dojo/e2e/llmock-setup.ts"
             target="_blank"
             >test suite</a
           >
           and
           <a
-            href="https://github.com/CopilotKit/CopilotKit/search?q=fixtures+path%3A**%2Ffixtures&amp;type=code"
+            href="https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo/e2e/fixtures"
             target="_blank"
             >fixture files</a
           >
diff --git a/docs/mcp-mock.html b/docs/mcp-mock.html
new file mode 100644
index 0000000..19d7d9b
--- /dev/null
+++ b/docs/mcp-mock.html
@@ -0,0 +1,291 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>MCPMock — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html" class="active">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>MCPMock</h1>
+        <p class="lead">
+          Mock MCP (Model Context Protocol) server for testing tool integrations. Implements the
+          Streamable HTTP transport with JSON-RPC dispatch, session management, and full
+          tools/resources/prompts support.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> mcp = <span class="kw">new</span> MCPMock();
+
+mcp.addTool({ name: <span class="str">"search"</span>, description: <span class="str">"Search the web"</span> });
+mcp.onToolCall(<span class="str">"search"</span>, (args) =&gt; {
+  <span class="kw">return</span> <span class="str">`Results for: ${(args as { query: string }).query}`</span>;
+});
+
+<span class="kw">const</span> url = <span class="kw">await</span> mcp.start();
+<span class="cm">// Point your MCP client at `url`</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount MCPMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> mcp = <span class="kw">new</span> MCPMock();
+
+mcp.addTool({ name: <span class="str">"calc"</span>, description: <span class="str">"Calculator"</span> });
+mcp.onToolCall(<span class="str">"calc"</span>, (args) =&gt; <span class="str">"42"</span>);
+
+llm.mount(<span class="str">"/mcp"</span>, mcp);
+<span class="kw">await</span> llm.start();
+<span class="cm">// MCP available at http://127.0.0.1:5555/mcp</span></code></pre>
+        </div>
+
+        <h2>Subpath Import</h2>
+        <p>MCPMock is also available via a dedicated subpath import for tree-shaking:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Subpath import <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/mcp"</span>;</code></pre>
+        </div>
+
+        <h2>Tools</h2>
+        <p>Register tools and their handlers:</p>
+        <div class="code-block">
+          <div class="code-block-header">Tools API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="cm">// Register a tool definition</span>
+mcp.addTool({
+  name: <span class="str">"search"</span>,
+  description: <span class="str">"Search the web"</span>,
+  inputSchema: { type: <span class="str">"object"</span>, properties: { query: { type: <span class="str">"string"</span> } } },
+});
+
+<span class="cm">// Register a handler (returns string or MCPContent[])</span>
+mcp.onToolCall(<span class="str">"search"</span>, (args) =&gt; {
+  <span class="kw">const</span> { query } = args <span class="kw">as</span> { query: <span class="kw">string</span> };
+  <span class="kw">return</span> <span class="str">`Found 3 results for "${query}"`</span>;
+});
+
+<span class="cm">// Or return rich content</span>
+mcp.onToolCall(<span class="str">"rich-tool"</span>, () =&gt; [
+  { type: <span class="str">"text"</span>, text: <span class="str">"Hello"</span> },
+  { type: <span class="str">"image"</span>, data: <span class="str">"base64..."</span>, mimeType: <span class="str">"image/png"</span> },
+]);</code></pre>
+        </div>
+
+        <h2>Resources</h2>
+        <p>Register static resources that clients can read:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Resources API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>mcp.addResource(
+  { uri: <span class="str">"file:///readme.md"</span>, name: <span class="str">"README"</span>, mimeType: <span class="str">"text/markdown"</span> },
+  { text: <span class="str">"# My Project\nWelcome!"</span> },
+);</code></pre>
+        </div>
+
+        <h2>Prompts</h2>
+        <p>Register prompt templates with optional handlers:</p>
+        <div class="code-block">
+          <div class="code-block-header">Prompts API <span class="lang-tag">typescript</span></div>
+          <pre><code>mcp.addPrompt(
+  { name: <span class="str">"summarize"</span>, arguments: [{ name: <span class="str">"text"</span>, required: <span class="kw">true</span> }] },
+  (args) =&gt; ({
+    messages: [
+      { role: <span class="str">"user"</span>, content: { type: <span class="str">"text"</span>, text: <span class="str">`Summarize: ${(args as { text: string }).text}`</span> } },
+    ],
+  }),
+);</code></pre>
+        </div>
+
+        <h2>Config File</h2>
+        <p>MCPMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"mcp"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/mcp"</span>,
+    <span class="prop">"tools"</span>: [
+      { <span class="prop">"name"</span>: <span class="str">"search"</span>, <span class="prop">"description"</span>: <span class="str">"Search"</span>, <span class="prop">"result"</span>: <span class="str">"Found it"</span> }
+    ],
+    <span class="prop">"resources"</span>: [
+      { <span class="prop">"uri"</span>: <span class="str">"file:///data.json"</span>, <span class="prop">"name"</span>: <span class="str">"Data"</span>, <span class="prop">"text"</span>: <span class="str">"{\"key\": \"value\"}"</span> }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>Session Management</h2>
+        <p>
+          MCPMock implements full session management per the MCP Streamable HTTP spec. Each
+          <code>initialize</code> request creates a new session, and the session ID is returned via
+          the <code>Mcp-Session-Id</code> header. All subsequent requests must include this header.
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>initialize</code></td>
+              <td>Creates session, returns capabilities and session ID</td>
+            </tr>
+            <tr>
+              <td><code>tools/list</code></td>
+              <td>Lists all registered tools</td>
+            </tr>
+            <tr>
+              <td><code>tools/call</code></td>
+              <td>Calls a tool by name with arguments</td>
+            </tr>
+            <tr>
+              <td><code>resources/list</code></td>
+              <td>Lists all registered resources</td>
+            </tr>
+            <tr>
+              <td><code>resources/read</code></td>
+              <td>Reads a resource by URI</td>
+            </tr>
+            <tr>
+              <td><code>prompts/list</code></td>
+              <td>Lists all registered prompts</td>
+            </tr>
+            <tr>
+              <td><code>prompts/get</code></td>
+              <td>Gets a prompt by name with arguments</td>
+            </tr>
+            <tr>
+              <td><code>ping</code></td>
+              <td>Returns empty object (health check)</td>
+            </tr>
+            <tr>
+              <td><code>DELETE /</code></td>
+              <td>Destroys a session</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>mcp.health();       <span class="cm">// { status: "ok", tools: 2, resources: 1, prompts: 0, sessions: 1 }</span>
+mcp.getSessions();  <span class="cm">// Map of active sessions</span>
+mcp.getRequests();  <span class="cm">// Journal entries (when mounted with shared journal)</span>
+mcp.reset();        <span class="cm">// Clears all tools, resources, prompts, and sessions</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/metrics.html b/docs/metrics.html
index 71235c6..fd7f5ec 100644
--- a/docs/metrics.html
+++ b/docs/metrics.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html">Record &amp; Replay</a
           ><a href="metrics.html" class="active">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/mount.html b/docs/mount.html
new file mode 100644
index 0000000..7279f6b
--- /dev/null
+++ b/docs/mount.html
@@ -0,0 +1,299 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Mount &amp; Composition — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html" class="active">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Mount &amp; Composition</h1>
+        <p class="lead">
+          Mount additional mock services onto a running LLMock server. All services share one port,
+          one health endpoint, and one request journal &mdash; no port juggling, no service
+          discovery.
+        </p>
+
+        <h2>Mountable Interface</h2>
+        <p>
+          Any object that implements the <code>Mountable</code> interface can be mounted onto
+          LLMock. The interface requires a single method:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Mountable interface <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">interface</span> Mountable {
+  <span class="cm">/** Handle an incoming HTTP request. Return true if handled. */</span>
+  handleRequest(
+    req: IncomingMessage,
+    res: ServerResponse,
+    pathname: <span class="kw">string</span>,
+  ): Promise&lt;<span class="kw">boolean</span>&gt;;
+
+  <span class="cm">/** Optional: handle WebSocket upgrades. */</span>
+  handleUpgrade?(
+    socket: Socket,
+    head: Buffer,
+    pathname: <span class="kw">string</span>,
+  ): Promise&lt;<span class="kw">boolean</span>&gt;;
+
+  <span class="cm">/** Optional: health check. */</span>
+  health?(): { status: <span class="kw">string</span>; [key: <span class="kw">string</span>]: unknown };
+
+  <span class="cm">/** Optional: receive the shared journal instance. */</span>
+  setJournal?(journal: Journal): <span class="kw">void</span>;
+
+  <span class="cm">/** Optional: receive the base URL when mounted. */</span>
+  setBaseUrl?(url: <span class="kw">string</span>): <span class="kw">void</span>;
+}</code></pre>
+        </div>
+
+        <h2>LLMock.mount()</h2>
+        <p>
+          Mount a <code>Mountable</code> service at a path prefix. Requests matching the prefix are
+          forwarded to the service with the prefix stripped.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">mount() API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+
+llm.mount(<span class="str">"/mcp"</span>, mcpMock);   <span class="cm">// MCP tools at /mcp</span>
+llm.mount(<span class="str">"/a2a"</span>, a2aMock);   <span class="cm">// A2A agents at /a2a</span>
+
+<span class="kw">await</span> llm.start();
+<span class="cm">// All protocols accessible on port 5555</span></code></pre>
+        </div>
+
+        <h3>Path Stripping</h3>
+        <p>
+          When a request arrives at a mounted path, the prefix is stripped before the service sees
+          it. For example, a request to <code>/mcp/tools/list</code> arrives at the MCP service with
+          pathname <code>/tools/list</code>.
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Incoming Request</th>
+              <th>Mount Prefix</th>
+              <th>Service Sees</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST /mcp/tools/list</code></td>
+              <td><code>/mcp</code></td>
+              <td><code>/tools/list</code></td>
+            </tr>
+            <tr>
+              <td><code>POST /a2a/agents/run</code></td>
+              <td><code>/a2a</code></td>
+              <td><code>/agents/run</code></td>
+            </tr>
+            <tr>
+              <td><code>GET /mcp</code></td>
+              <td><code>/mcp</code></td>
+              <td><code>/</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h3>WebSocket Upgrade Support</h3>
+        <p>
+          If a mounted service implements <code>handleUpgrade()</code>, WebSocket upgrade requests
+          matching the mount prefix are forwarded to it. This enables WebSocket-based protocols like
+          MCP over StreamableHTTP or custom agent protocols.
+        </p>
+
+        <h2>Unified Health Endpoint</h2>
+        <p>
+          The <code>GET /health</code> endpoint aggregates health from all mounted services. Each
+          service that implements <code>health()</code> is included in the response:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Health response <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"status"</span>: <span class="str">"ok"</span>,
+  <span class="prop">"services"</span>: {
+    <span class="prop">"llm"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"fixtures"</span>: <span class="num">5</span> },
+    <span class="prop">"mcp"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"tools"</span>: <span class="num">2</span>, <span class="prop">"resources"</span>: <span class="num">1</span>, <span class="prop">"prompts"</span>: <span class="num">0</span>, <span class="prop">"sessions"</span>: <span class="num">0</span> },
+    <span class="prop">"a2a"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"agents"</span>: <span class="num">1</span>, <span class="prop">"tasks"</span>: <span class="num">0</span> }
+  }
+}</code></pre>
+        </div>
+        <p>
+          Each mounted service that implements <code>health()</code> contributes its status to the
+          response. The health endpoint always returns HTTP 200.
+        </p>
+
+        <h2>Shared Journal</h2>
+        <p>
+          All mounted services share the same request journal. Journal entries include a
+          <code>service</code> field indicating which service handled the request:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Journal entry <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">const</span> entries = mock.getRequests();
+<span class="cm">// [</span>
+<span class="cm">//   { service: "llm", method: "POST", path: "/v1/chat/completions", ... },</span>
+<span class="cm">//   { service: "/mcp", method: "POST", path: "/tools/list", ... },</span>
+<span class="cm">// ]</span></code></pre>
+        </div>
+
+        <h2>createMockSuite()</h2>
+        <p>
+          <code>createMockSuite()</code> provides a unified lifecycle for LLMock and all mounted
+          services. It creates the server, mounts services, and returns <code>start()</code> /
+          <code>stop()</code> / <code>reset()</code> methods that manage everything together.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            createMockSuite() <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { createMockSuite } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> suite = <span class="kw">await</span> createMockSuite({
+  llm: { port: <span class="num">0</span> },
+  mcp: {},    <span class="cm">// enables MCPMock, mounted at /mcp</span>
+  a2a: {},    <span class="cm">// enables A2AMock, mounted at /a2a</span>
+  vector: {}, <span class="cm">// enables VectorMock, mounted at /vector</span>
+});
+
+<span class="kw">await</span> suite.start();
+<span class="cm">// suite.llm — the underlying LLMock instance</span>
+<span class="cm">// suite.mcp — MCPMock instance (if configured)</span>
+<span class="cm">// suite.a2a — A2AMock instance (if configured)</span>
+<span class="cm">// suite.vector — VectorMock instance (if configured)</span>
+
+<span class="cm">// In tests:</span>
+afterEach(() =&gt; suite.reset());     <span class="cm">// resets LLM fixtures + all service state</span>
+afterAll(() =&gt; suite.stop());       <span class="cm">// stops everything</span></code></pre>
+        </div>
+
+        <h2>Built-in Services</h2>
+        <p>
+          llmock ships with first-party mock services that implement the <code>Mountable</code>
+          interface:
+        </p>
+        <ul>
+          <li>
+            <a href="mcp-mock.html"><strong>MCPMock</strong></a> &mdash; Mock MCP tool server with
+            JSON-RPC dispatch, session management, and tool/resource/prompt support
+          </li>
+          <li>
+            <a href="a2a-mock.html"><strong>A2AMock</strong></a> &mdash; Mock A2A agent-to-agent
+            protocol server with agent cards, message routing, and streaming
+          </li>
+          <li>
+            <a href="vector-mock.html"><strong>VectorMock</strong></a> &mdash; Mock vector database
+            with Pinecone, Qdrant, and ChromaDB endpoints
+          </li>
+        </ul>
+        <p>
+          All implement the <code>Mountable</code> interface and work with <code>mount()</code>,
+          <code>createMockSuite()</code>, and the <a href="aimock-cli.html">aimock CLI</a>.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/ollama.html b/docs/ollama.html
index 7251f3c..4688e42 100644
--- a/docs/ollama.html
+++ b/docs/ollama.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html">Record &amp; Replay</a
           ><a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/record-replay.html b/docs/record-replay.html
index d389159..35e125e 100644
--- a/docs/record-replay.html
+++ b/docs/record-replay.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html" class="active">Record &amp; Replay</a
           ><a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
@@ -303,6 +313,80 @@ <h2>Fixture Lifecycle</h2>
             recordings
           </li>
         </ul>
+        <h2>Local Development Workflow</h2>
+        <p>
+          Record once against real APIs, then replay from fixtures for fast, offline development.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Record then replay <span class="lang-tag">bash</span></div>
+          <pre><code># First run: record real API responses
+llmock --record --provider-openai https://api.openai.com -f ./fixtures
+
+# Subsequent runs: replay from recorded fixtures
+llmock -f ./fixtures</code></pre>
+        </div>
+
+        <h2>CI Pipeline Workflow</h2>
+        <p>
+          Use the Docker image in CI with <code>--strict</code> mode to ensure every request matches
+          a recorded fixture. No API keys needed, no flaky network calls.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            GitHub Actions example <span class="lang-tag">yaml</span>
+          </div>
+          <pre><code>- name: Start aimock
+  run: |
+    docker run -d --name aimock \
+      -v ./fixtures:/fixtures \
+      -p 4010:4010 \
+      ghcr.io/copilotkit/aimock \
+      llmock --strict -f /fixtures
+
+- name: Run tests
+  env:
+    OPENAI_BASE_URL: http://localhost:4010/v1
+  run: pnpm test
+
+- name: Stop aimock
+  run: docker stop aimock</code></pre>
+        </div>
+
+        <h2>Building Fixture Sets</h2>
+        <p>A practical workflow for building and maintaining fixture sets:</p>
+        <ol>
+          <li>Run with <code>--record</code> against real APIs during development</li>
+          <li>Review recorded fixtures in <code>fixtures/recorded/</code></li>
+          <li>Move and rename to organized fixture directories</li>
+          <li>Switch to <code>--strict</code> mode in CI</li>
+          <li>Re-record when upstream APIs change (drift detection catches this)</li>
+        </ol>
+
+        <h2>Cross-Language Testing</h2>
+        <p>
+          The Docker image serves any language that speaks HTTP. Point your client at the mock
+          server's URL instead of the real API.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Any language, one server <span class="lang-tag">bash</span>
+          </div>
+          <pre><code># Docker image serves all languages
+docker run -d -p 4010:4010 ghcr.io/copilotkit/aimock llmock -f /fixtures
+
+# Python
+import openai
+client = openai.OpenAI(base_url="http://localhost:4010/v1", api_key="mock")
+
+# Go
+client := openai.NewClient(option.WithBaseURL("http://localhost:4010/v1"))
+
+# Rust
+let client = Client::new().with_base_url("http://localhost:4010/v1");</code></pre>
+        </div>
       </main>
     </div>
     <footer class="docs-footer">
diff --git a/docs/responses-api.html b/docs/responses-api.html
index 4a4ee15..e7926af 100644
--- a/docs/responses-api.html
+++ b/docs/responses-api.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/sequential-responses.html b/docs/sequential-responses.html
index dadbded..0123838 100644
--- a/docs/sequential-responses.html
+++ b/docs/sequential-responses.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/services.html b/docs/services.html
new file mode 100644
index 0000000..86a9ce6
--- /dev/null
+++ b/docs/services.html
@@ -0,0 +1,277 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Services — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html" class="active">Services</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Services (Search / Rerank / Moderation)</h1>
+        <p class="lead">
+          Built-in service mocks for web search, reranking, and content moderation APIs. Register
+          fixture patterns on the LLMock instance and requests are matched by query/input text. No
+          separate server needed — services are built into the LLMock HTTP server.
+        </p>
+
+        <h2>Search (Tavily-Compatible)</h2>
+        <p>
+          Mock web search API at <code>POST /search</code>. Matches the request
+          <code>query</code> field against registered patterns.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">onSearch API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="kw">import</span> { LLMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> mock = <span class="kw">new</span> LLMock();
+
+<span class="cm">// String pattern — case-insensitive substring match</span>
+mock.onSearch(<span class="str">"weather"</span>, [
+  { title: <span class="str">"Weather Report"</span>, url: <span class="str">"https://example.com/weather"</span>, content: <span class="str">"Sunny today"</span> },
+]);
+
+<span class="cm">// RegExp pattern</span>
+mock.onSearch(<span class="kw">/</span>stock\s+price<span class="kw">/i</span>, [
+  { title: <span class="str">"ACME Stock"</span>, url: <span class="str">"https://example.com/stocks"</span>, content: <span class="str">"$42.00"</span>, score: <span class="num">0.95</span> },
+]);
+
+<span class="cm">// Catch-all — empty results for unmatched queries</span>
+mock.onSearch(<span class="kw">/</span>.*<span class="kw">/</span>, []);</code></pre>
+        </div>
+
+        <h3>Search Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/search</code></td>
+              <td><code>{ "query": "...", "max_results": 5 }</code></td>
+              <td><code>{ "results": [...] }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Rerank (Cohere-Compatible)</h2>
+        <p>
+          Mock reranking API at <code>POST /v2/rerank</code>. Matches the request
+          <code>query</code> field against registered patterns.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">onRerank API <span class="lang-tag">typescript</span></div>
+          <pre><code>mock.onRerank(<span class="str">"machine learning"</span>, [
+  { index: <span class="num">0</span>, relevance_score: <span class="num">0.99</span> },
+  { index: <span class="num">2</span>, relevance_score: <span class="num">0.85</span> },
+]);
+
+<span class="cm">// The response includes document text from the request body</span>
+<span class="cm">// Response format: { id, results: [{ index, relevance_score, document: { text } }] }</span></code></pre>
+        </div>
+
+        <h3>Rerank Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/v2/rerank</code></td>
+              <td><code>{ "query": "...", "documents": [...], "model": "..." }</code></td>
+              <td><code>{ "id": "...", "results": [...], "meta": {...} }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Moderation (OpenAI-Compatible)</h2>
+        <p>
+          Mock content moderation API at <code>POST /v1/moderations</code>. Matches the request
+          <code>input</code> field against registered patterns. Unmatched requests return a default
+          unflagged result.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            onModerate API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// Flag specific content</span>
+mock.onModerate(<span class="str">"violent"</span>, {
+  flagged: <span class="kw">true</span>,
+  categories: { violence: <span class="kw">true</span>, hate: <span class="kw">false</span> },
+  category_scores: { violence: <span class="num">0.95</span>, hate: <span class="num">0.01</span> },
+});
+
+<span class="cm">// Catch-all — everything passes</span>
+mock.onModerate(<span class="kw">/</span>.*<span class="kw">/</span>, {
+  flagged: <span class="kw">false</span>,
+  categories: {},
+});</code></pre>
+        </div>
+
+        <h3>Moderation Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/v1/moderations</code></td>
+              <td><code>{ "input": "..." }</code></td>
+              <td><code>{ "id": "...", "model": "...", "results": [...] }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Pattern Matching</h2>
+        <p>All three services use the same matching logic:</p>
+        <ul>
+          <li><strong>String patterns</strong> — case-insensitive substring match</li>
+          <li><strong>RegExp patterns</strong> — full regex test</li>
+          <li><strong>First match wins</strong> — register specific patterns before catch-alls</li>
+        </ul>
+
+        <h2>Config File</h2>
+        <p>Enable services via the aimock config file with default catch-all responses:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"services"</span>: {
+    <span class="prop">"search"</span>: <span class="kw">true</span>,
+    <span class="prop">"rerank"</span>: <span class="kw">true</span>,
+    <span class="prop">"moderate"</span>: <span class="kw">true</span>
+  }
+}</code></pre>
+        </div>
+        <p>
+          When enabled via config, each service registers a <code>/.*/</code> catch-all pattern.
+          Search and rerank return empty results; moderation returns unflagged. For custom
+          responses, use the programmatic API.
+        </p>
+
+        <h2>Journal Integration</h2>
+        <p>
+          All service requests are recorded in the LLMock journal with the
+          <code>service</code> field set to <code>"search"</code>, <code>"rerank"</code>, or
+          <code>"moderation"</code>:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Journal entries <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">const</span> entries = mock.getRequests();
+<span class="kw">const</span> searchRequests = entries.filter((e) =&gt; e.service === <span class="str">"search"</span>);
+<span class="kw">const</span> rerankRequests = entries.filter((e) =&gt; e.service === <span class="str">"rerank"</span>);
+<span class="kw">const</span> moderationRequests = entries.filter((e) =&gt; e.service === <span class="str">"moderation"</span>);</code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/streaming-physics.html b/docs/streaming-physics.html
index a092a18..8111698 100644
--- a/docs/streaming-physics.html
+++ b/docs/streaming-physics.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/structured-output.html b/docs/structured-output.html
index 55894cb..c518f02 100644
--- a/docs/structured-output.html
+++ b/docs/structured-output.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/vector-mock.html b/docs/vector-mock.html
new file mode 100644
index 0000000..f374f98
--- /dev/null
+++ b/docs/vector-mock.html
@@ -0,0 +1,315 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>VectorMock — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html" class="active">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>VectorMock</h1>
+        <p class="lead">
+          Mock vector database server for testing RAG pipelines and embedding-based retrieval.
+          Supports Pinecone, Qdrant, and ChromaDB API formats with collection management, upsert,
+          query, and delete operations.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { VectorMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> vector = <span class="kw">new</span> VectorMock();
+
+vector.addCollection(<span class="str">"docs"</span>, { dimension: <span class="num">1536</span> });
+vector.onQuery(<span class="str">"docs"</span>, [
+  { id: <span class="str">"doc-1"</span>, score: <span class="num">0.95</span>, metadata: { title: <span class="str">"Getting Started"</span> } },
+  { id: <span class="str">"doc-2"</span>, score: <span class="num">0.87</span>, metadata: { title: <span class="str">"API Reference"</span> } },
+]);
+
+<span class="kw">const</span> url = <span class="kw">await</span> vector.start();
+<span class="cm">// Point your vector DB client at `url`</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount VectorMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, VectorMock } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> vector = <span class="kw">new</span> VectorMock();
+
+vector.addCollection(<span class="str">"embeddings"</span>, { dimension: <span class="num">768</span> });
+vector.onQuery(<span class="str">"embeddings"</span>, [{ id: <span class="str">"v1"</span>, score: <span class="num">0.9</span> }]);
+
+llm.mount(<span class="str">"/vector"</span>, vector);
+<span class="kw">await</span> llm.start();
+<span class="cm">// Vector API at http://127.0.0.1:5555/vector</span></code></pre>
+        </div>
+
+        <h2>Collection Management</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Collections API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// Create a collection with a dimension</span>
+vector.addCollection(<span class="str">"products"</span>, { dimension: <span class="num">384</span> });
+
+<span class="cm">// Upsert vectors into a collection</span>
+vector.upsert(<span class="str">"products"</span>, [
+  { id: <span class="str">"p1"</span>, values: [<span class="num">0.1</span>, <span class="num">0.2</span>, ...], metadata: { name: <span class="str">"Widget"</span> } },
+  { id: <span class="str">"p2"</span>, values: [<span class="num">0.3</span>, <span class="num">0.4</span>, ...], metadata: { name: <span class="str">"Gadget"</span> } },
+]);
+
+<span class="cm">// Delete a collection</span>
+vector.deleteCollection(<span class="str">"products"</span>);</code></pre>
+        </div>
+
+        <h2>Query Handlers</h2>
+        <p>Register static results or dynamic handlers for query responses:</p>
+        <div class="code-block">
+          <div class="code-block-header">onQuery API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="cm">// Static results — always returns these</span>
+vector.onQuery(<span class="str">"docs"</span>, [
+  { id: <span class="str">"d1"</span>, score: <span class="num">0.95</span>, metadata: { topic: <span class="str">"setup"</span> } },
+]);
+
+<span class="cm">// Dynamic handler — receives the query, returns results</span>
+vector.onQuery(<span class="str">"docs"</span>, (query) =&gt; {
+  <span class="kw">const</span> topK = query.topK ?? <span class="num">10</span>;
+  <span class="kw">return</span> Array.from({ length: topK }, (_, i) =&gt; ({
+    id: <span class="str">`result-${i}`</span>,
+    score: <span class="num">1</span> - i * <span class="num">0.1</span>,
+  }));
+});</code></pre>
+        </div>
+
+        <h2>Pinecone-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/query</code></td>
+              <td>Query vectors by namespace</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/vectors/upsert</code></td>
+              <td>Upsert vectors into a namespace</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/vectors/delete</code></td>
+              <td>Delete vectors by ID</td>
+            </tr>
+            <tr>
+              <td><code>GET</code></td>
+              <td><code>/describe-index-stats</code></td>
+              <td>Get index statistics</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Qdrant-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/collections/{name}/points/search</code></td>
+              <td>Search points in a collection</td>
+            </tr>
+            <tr>
+              <td><code>PUT</code></td>
+              <td><code>/collections/{name}/points</code></td>
+              <td>Upsert points into a collection</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/collections/{name}/points/delete</code></td>
+              <td>Delete points by ID</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>ChromaDB-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/api/v1/collections/{id}/query</code></td>
+              <td>Query a collection</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/api/v1/collections/{id}/add</code></td>
+              <td>Add embeddings to a collection</td>
+            </tr>
+            <tr>
+              <td><code>GET</code></td>
+              <td><code>/api/v1/collections</code></td>
+              <td>List all collections</td>
+            </tr>
+            <tr>
+              <td><code>DELETE</code></td>
+              <td><code>/api/v1/collections/{id}</code></td>
+              <td>Delete a collection</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Config File</h2>
+        <p>VectorMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"vector"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/vector"</span>,
+    <span class="prop">"collections"</span>: [
+      {
+        <span class="prop">"name"</span>: <span class="str">"docs"</span>,
+        <span class="prop">"dimension"</span>: <span class="num">1536</span>,
+        <span class="prop">"vectors"</span>: [
+          { <span class="prop">"id"</span>: <span class="str">"v1"</span>, <span class="prop">"values"</span>: [<span class="num">0.1</span>, <span class="num">0.2</span>], <span class="prop">"metadata"</span>: { <span class="prop">"title"</span>: <span class="str">"Intro"</span> } }
+        ],
+        <span class="prop">"queryResults"</span>: [
+          { <span class="prop">"id"</span>: <span class="str">"v1"</span>, <span class="prop">"score"</span>: <span class="num">0.95</span>, <span class="prop">"metadata"</span>: { <span class="prop">"title"</span>: <span class="str">"Intro"</span> } }
+        ]
+      }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>vector.health();       <span class="cm">// { status: "ok", collections: 2 }</span>
+vector.getRequests();  <span class="cm">// Journal entries (when mounted with shared journal)</span>
+vector.reset();        <span class="cm">// Clears all collections and query handlers</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/vertex-ai.html b/docs/vertex-ai.html
index f439b5c..3adcb69 100644
--- a/docs/vertex-ai.html
+++ b/docs/vertex-ai.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html">WebSocket APIs</a
           ><a href="record-replay.html">Record &amp; Replay</a
           ><a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/docs/websocket.html b/docs/websocket.html
index 6a5909a..b283c2e 100644
--- a/docs/websocket.html
+++ b/docs/websocket.html
@@ -69,11 +69,21 @@ <h3>Features</h3>
           ><a href="websocket.html" class="active">WebSocket APIs</a>
           <a href="record-replay.html">Record &amp; Replay</a>
           <a href="metrics.html">Prometheus Metrics</a>
+          <a href="mount.html">Mount &amp; Composition</a>
         </div>
+        <div class="sidebar-section">
+          <h3>Tools</h3>
+          <a href="mcp-mock.html">MCPMock</a>
+          <a href="a2a-mock.html">A2AMock</a>
+          <a href="vector-mock.html">VectorMock</a>
+          <a href="services.html">Services</a>
+        </div>
+
         <div class="sidebar-section">
           <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="aimock-cli.html">aimock CLI</a>
+          <a href="drift-detection.html">Drift Detection</a>
         </div>
       </aside>
 
diff --git a/package.json b/package.json
index 24b47aa..ea197a7 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@copilotkit/llmock",
   "version": "1.6.0",
-  "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)",
+  "description": "Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, vector databases, search, and more. Zero dependencies.",
   "license": "MIT",
   "packageManager": "pnpm@10.28.2",
   "engines": {
@@ -18,13 +18,57 @@
         "types": "./dist/index.d.cts",
         "default": "./dist/index.cjs"
       }
+    },
+    "./mcp": {
+      "import": {
+        "types": "./dist/mcp-stub.d.ts",
+        "default": "./dist/mcp-stub.js"
+      },
+      "require": {
+        "types": "./dist/mcp-stub.d.cts",
+        "default": "./dist/mcp-stub.cjs"
+      }
+    },
+    "./a2a": {
+      "import": {
+        "types": "./dist/a2a-stub.d.ts",
+        "default": "./dist/a2a-stub.js"
+      },
+      "require": {
+        "types": "./dist/a2a-stub.d.cts",
+        "default": "./dist/a2a-stub.cjs"
+      }
+    },
+    "./vector": {
+      "import": {
+        "types": "./dist/vector-stub.d.ts",
+        "default": "./dist/vector-stub.js"
+      },
+      "require": {
+        "types": "./dist/vector-stub.d.cts",
+        "default": "./dist/vector-stub.cjs"
+      }
     }
   },
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",
   "types": "./dist/index.d.ts",
+  "typesVersions": {
+    "*": {
+      "mcp": [
+        "./dist/mcp-stub.d.ts"
+      ],
+      "a2a": [
+        "./dist/a2a-stub.d.ts"
+      ],
+      "vector": [
+        "./dist/vector-stub.d.ts"
+      ]
+    }
+  },
   "bin": {
-    "llmock": "./dist/cli.js"
+    "llmock": "./dist/cli.js",
+    "aimock": "./dist/aimock-cli.js"
   },
   "files": [
     "dist",
@@ -50,24 +94,25 @@
     "*.{ts,mts,js,mjs}": "eslint --fix"
   },
   "devDependencies": {
+    "@anthropic-ai/sdk": "^0.78.0",
     "@arethetypeswrong/cli": "^0.17.3",
     "@commitlint/cli": "^19.8.1",
     "@commitlint/config-conventional": "^19.8.0",
     "@eslint/js": "^9.30.0",
+    "@google/generative-ai": "^0.24.0",
+    "@types/node": "^22.0.0",
+    "@vitest/coverage-v8": "^3.2.4",
     "eslint": "^9.30.0",
     "eslint-config-prettier": "^10.1.5",
     "husky": "^9.1.7",
     "lint-staged": "^16.3.2",
+    "openai": "^4.0.0",
     "prettier": "^3.6.2",
     "publint": "^0.3.12",
     "tsdown": "^0.12.5",
+    "tsx": "^4.19.0",
     "typescript": "^5.8.3",
     "typescript-eslint": "^8.35.1",
-    "@anthropic-ai/sdk": "^0.78.0",
-    "@google/generative-ai": "^0.24.0",
-    "@types/node": "^22.0.0",
-    "openai": "^4.0.0",
-    "tsx": "^4.19.0",
     "vitest": "^3.2.1"
   }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index fa16176..a3e7fa1 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -29,6 +29,9 @@ importers:
       '@types/node':
         specifier: ^22.0.0
         version: 22.19.15
+      '@vitest/coverage-v8':
+        specifier: ^3.2.4
+        version: 3.2.4(vitest@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))
       eslint:
         specifier: ^9.30.0
         version: 9.39.3(jiti@2.6.1)
@@ -68,6 +71,10 @@ importers:
 
 packages:
 
+  '@ampproject/remapping@2.3.0':
+    resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
+    engines: {node: '>=6.0.0'}
+
   '@andrewbranch/untar.js@1.0.3':
     resolution: {integrity: sha512-Jh15/qVmrLGhkKJBdXlK1+9tY4lZruYjsgkDFj08ZmDiWVBLJcqkok7Z0/R0In+i1rScBpJlSvrTS2Lm41Pbnw==}
 
@@ -118,6 +125,10 @@ packages:
     resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==}
     engines: {node: '>=6.9.0'}
 
+  '@bcoe/v8-coverage@1.0.2':
+    resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==}
+    engines: {node: '>=18'}
+
   '@braidai/lang@1.1.2':
     resolution: {integrity: sha512-qBcknbBufNHlui137Hft8xauQMTZDKdophmLFv05r2eNmdIv/MlPuP4TdUknHG68UdWLgVZwgxVe735HzJNIwA==}
 
@@ -417,6 +428,14 @@ packages:
     resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==}
     engines: {node: '>=18.18'}
 
+  '@isaacs/cliui@8.0.2':
+    resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
+    engines: {node: '>=12'}
+
+  '@istanbuljs/schema@0.1.3':
+    resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==}
+    engines: {node: '>=8'}
+
   '@jridgewell/gen-mapping@0.3.13':
     resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
 
@@ -439,6 +458,10 @@ packages:
   '@oxc-project/types@0.115.0':
     resolution: {integrity: sha512-4n91DKnebUS4yjUHl2g3/b2T+IUdCfmoZGhmwsovZCDaJSs+QkVAM+0AqqTxHSsHfeiMuueT75cZaZcT/m0pSw==}
 
+  '@pkgjs/parseargs@0.11.0':
+    resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
+    engines: {node: '>=14'}
+
   '@publint/pack@0.1.4':
     resolution: {integrity: sha512-HDVTWq3H0uTXiU0eeSQntcVUTPP3GamzeXI41+x7uU9J65JgWQh3qWZHblR1i0npXfFtF+mxBiU2nJH8znxWnQ==}
     engines: {node: '>=18'}
@@ -758,6 +781,15 @@ packages:
     resolution: {integrity: sha512-KiROIzYdEV85YygXw6BI/Dx4fnBlFQu6Mq4QE4MOH9fFnhohw6wX/OAvDY2/C+ut0I3RSPKenvZJIVYqJNkhEw==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
+  '@vitest/coverage-v8@3.2.4':
+    resolution: {integrity: sha512-EyF9SXU6kS5Ku/U82E259WSnvg6c8KTjppUncuNdm5QHpe17mwREHnjDzozC8x9MZ0xfBUFSaLkRv4TMA75ALQ==}
+    peerDependencies:
+      '@vitest/browser': 3.2.4
+      vitest: 3.2.4
+    peerDependenciesMeta:
+      '@vitest/browser':
+        optional: true
+
   '@vitest/expect@3.2.4':
     resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==}
 
@@ -856,6 +888,9 @@ packages:
     resolution: {integrity: sha512-m1Q/RaVOnTp9JxPX+F+Zn7IcLYMzM8kZofDImfsKZd8MbR+ikdOzTeztStWqfrqIxZnYWryyI9ePm3NGjnZgGw==}
     engines: {node: '>=20.19.0'}
 
+  ast-v8-to-istanbul@0.3.12:
+    resolution: {integrity: sha512-BRRC8VRZY2R4Z4lFIL35MwNXmwVqBityvOIwETtsCSwvjl0IdgFsy9NhdaA6j74nUdtJJlIypeRhpDam19Wq3g==}
+
   asynckit@0.4.0:
     resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
 
@@ -872,6 +907,9 @@ packages:
   brace-expansion@1.1.12:
     resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==}
 
+  brace-expansion@2.0.2:
+    resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==}
+
   brace-expansion@5.0.4:
     resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==}
     engines: {node: 18 || 20 || >=22}
@@ -1053,12 +1091,18 @@ packages:
     resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
     engines: {node: '>= 0.4'}
 
+  eastasianwidth@0.2.0:
+    resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
+
   emoji-regex@10.6.0:
     resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==}
 
   emoji-regex@8.0.0:
     resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
 
+  emoji-regex@9.2.2:
+    resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+
   emojilib@2.4.0:
     resolution: {integrity: sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw==}
 
@@ -1222,6 +1266,10 @@ packages:
   flatted@3.3.4:
     resolution: {integrity: sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==}
 
+  foreground-child@3.3.1:
+    resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
+    engines: {node: '>=14'}
+
   form-data-encoder@1.7.2:
     resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
 
@@ -1270,6 +1318,11 @@ packages:
     resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
     engines: {node: '>=10.13.0'}
 
+  glob@10.5.0:
+    resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
+    deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
+    hasBin: true
+
   global-directory@4.0.1:
     resolution: {integrity: sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q==}
     engines: {node: '>=18'}
@@ -1304,6 +1357,9 @@ packages:
   hookable@5.5.3:
     resolution: {integrity: sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==}
 
+  html-escaper@2.0.2:
+    resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
+
   humanize-ms@1.2.1:
     resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
 
@@ -1369,10 +1425,32 @@ packages:
   isexe@2.0.0:
     resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
 
+  istanbul-lib-coverage@3.2.2:
+    resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==}
+    engines: {node: '>=8'}
+
+  istanbul-lib-report@3.0.1:
+    resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==}
+    engines: {node: '>=10'}
+
+  istanbul-lib-source-maps@5.0.6:
+    resolution: {integrity: sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==}
+    engines: {node: '>=10'}
+
+  istanbul-reports@3.2.0:
+    resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==}
+    engines: {node: '>=8'}
+
+  jackspeak@3.4.3:
+    resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
+
   jiti@2.6.1:
     resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
     hasBin: true
 
+  js-tokens@10.0.0:
+    resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==}
+
   js-tokens@4.0.0:
     resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
 
@@ -1478,6 +1556,13 @@ packages:
   magic-string@0.30.21:
     resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
 
+  magicast@0.3.5:
+    resolution: {integrity: sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ==}
+
+  make-dir@4.0.0:
+    resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
+    engines: {node: '>=10'}
+
   marked-terminal@7.3.0:
     resolution: {integrity: sha512-t4rBvPsHc57uE/2nJOLmMbZCQ4tgAccAED3ngXQqW6g+TxA488JzJ+FK3lQkzBQOI1mRV/r/Kq+1ZlJ4D0owQw==}
     engines: {node: '>=16.0.0'}
@@ -1520,9 +1605,17 @@ packages:
   minimatch@3.1.5:
     resolution: {integrity: sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==}
 
+  minimatch@9.0.9:
+    resolution: {integrity: sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==}
+    engines: {node: '>=16 || 14 >=14.17'}
+
   minimist@1.2.8:
     resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
 
+  minipass@7.1.3:
+    resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
+    engines: {node: '>=16 || 14 >=14.17'}
+
   mri@1.2.0:
     resolution: {integrity: sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==}
     engines: {node: '>=4'}
@@ -1599,6 +1692,9 @@ packages:
     resolution: {integrity: sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
 
+  package-json-from-dist@1.0.1:
+    resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
+
   package-manager-detector@1.6.0:
     resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}
 
@@ -1631,6 +1727,10 @@ packages:
     resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
     engines: {node: '>=8'}
 
+  path-scurry@1.11.1:
+    resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
+    engines: {node: '>=16 || 14 >=14.18'}
+
   pathe@2.0.3:
     resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
 
@@ -1788,6 +1888,10 @@ packages:
     resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
     engines: {node: '>=8'}
 
+  string-width@5.1.2:
+    resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
+    engines: {node: '>=12'}
+
   string-width@7.2.0:
     resolution: {integrity: sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==}
     engines: {node: '>=18'}
@@ -1819,6 +1923,10 @@ packages:
     resolution: {integrity: sha512-zFObLMyZeEwzAoKCyu1B91U79K2t7ApXuQfo8OuxwXLDgcKxuwM+YvcbIhm6QWqz7mHUH1TVytR1PwVVjEuMig==}
     engines: {node: '>=14.18'}
 
+  test-exclude@7.0.2:
+    resolution: {integrity: sha512-u9E6A+ZDYdp7a4WnarkXPZOx8Ilz46+kby6p1yZ8zsGTz9gYa6FIS7lj2oezzNKmtdyyJNNmmXDppga5GB7kSw==}
+    engines: {node: '>=18'}
+
   text-extensions@2.4.0:
     resolution: {integrity: sha512-te/NtwBwfiNRLf9Ijqx3T0nlqZiQ2XrrtBvu+cLL8ZRrGkO0NHTug8MYFKyoSrv/sHTaSKfilUkizV6XhxMJ3g==}
     engines: {node: '>=8'}
@@ -2054,6 +2162,10 @@ packages:
     resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
     engines: {node: '>=10'}
 
+  wrap-ansi@8.1.0:
+    resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
+    engines: {node: '>=12'}
+
   wrap-ansi@9.0.2:
     resolution: {integrity: sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==}
     engines: {node: '>=18'}
@@ -2093,6 +2205,11 @@ packages:
 
 snapshots:
 
+  '@ampproject/remapping@2.3.0':
+    dependencies:
+      '@jridgewell/gen-mapping': 0.3.13
+      '@jridgewell/trace-mapping': 0.3.31
+
   '@andrewbranch/untar.js@1.0.3': {}
 
   '@anthropic-ai/sdk@0.78.0':
@@ -2149,6 +2266,8 @@ snapshots:
       '@babel/helper-string-parser': 7.27.1
       '@babel/helper-validator-identifier': 7.28.5
 
+  '@bcoe/v8-coverage@1.0.2': {}
+
   '@braidai/lang@1.1.2': {}
 
   '@colors/colors@1.5.0':
@@ -2417,6 +2536,17 @@ snapshots:
 
   '@humanwhocodes/retry@0.4.3': {}
 
+  '@isaacs/cliui@8.0.2':
+    dependencies:
+      string-width: 5.1.2
+      string-width-cjs: string-width@4.2.3
+      strip-ansi: 7.2.0
+      strip-ansi-cjs: strip-ansi@6.0.1
+      wrap-ansi: 8.1.0
+      wrap-ansi-cjs: wrap-ansi@7.0.0
+
+  '@istanbuljs/schema@0.1.3': {}
+
   '@jridgewell/gen-mapping@0.3.13':
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
@@ -2444,6 +2574,9 @@ snapshots:
 
   '@oxc-project/types@0.115.0': {}
 
+  '@pkgjs/parseargs@0.11.0':
+    optional: true
+
   '@publint/pack@0.1.4': {}
 
   '@quansync/fs@1.0.0':
@@ -2694,6 +2827,25 @@ snapshots:
       '@typescript-eslint/types': 8.56.1
       eslint-visitor-keys: 5.0.1
 
+  '@vitest/coverage-v8@3.2.4(vitest@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))':
+    dependencies:
+      '@ampproject/remapping': 2.3.0
+      '@bcoe/v8-coverage': 1.0.2
+      ast-v8-to-istanbul: 0.3.12
+      debug: 4.4.3
+      istanbul-lib-coverage: 3.2.2
+      istanbul-lib-report: 3.0.1
+      istanbul-lib-source-maps: 5.0.6
+      istanbul-reports: 3.2.0
+      magic-string: 0.30.21
+      magicast: 0.3.5
+      std-env: 3.10.0
+      test-exclude: 7.0.2
+      tinyrainbow: 2.0.0
+      vitest: 3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
+    transitivePeerDependencies:
+      - supports-color
+
   '@vitest/expect@3.2.4':
     dependencies:
       '@types/chai': 5.2.3
@@ -2798,6 +2950,12 @@ snapshots:
       '@babel/parser': 7.29.0
       pathe: 2.0.3
 
+  ast-v8-to-istanbul@0.3.12:
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.31
+      estree-walker: 3.0.3
+      js-tokens: 10.0.0
+
   asynckit@0.4.0: {}
 
   balanced-match@1.0.2: {}
@@ -2811,6 +2969,10 @@ snapshots:
       balanced-match: 1.0.2
       concat-map: 0.0.1
 
+  brace-expansion@2.0.2:
+    dependencies:
+      balanced-match: 1.0.2
+
   brace-expansion@5.0.4:
     dependencies:
       balanced-match: 4.0.4
@@ -2977,10 +3139,14 @@ snapshots:
       es-errors: 1.3.0
       gopd: 1.2.0
 
+  eastasianwidth@0.2.0: {}
+
   emoji-regex@10.6.0: {}
 
   emoji-regex@8.0.0: {}
 
+  emoji-regex@9.2.2: {}
+
   emojilib@2.4.0: {}
 
   empathic@2.0.0: {}
@@ -3167,6 +3333,11 @@ snapshots:
 
   flatted@3.3.4: {}
 
+  foreground-child@3.3.1:
+    dependencies:
+      cross-spawn: 7.0.6
+      signal-exit: 4.1.0
+
   form-data-encoder@1.7.2: {}
 
   form-data@4.0.5:
@@ -3223,6 +3394,15 @@ snapshots:
     dependencies:
       is-glob: 4.0.3
 
+  glob@10.5.0:
+    dependencies:
+      foreground-child: 3.3.1
+      jackspeak: 3.4.3
+      minimatch: 9.0.9
+      minipass: 7.1.3
+      package-json-from-dist: 1.0.1
+      path-scurry: 1.11.1
+
   global-directory@4.0.1:
     dependencies:
       ini: 4.1.1
@@ -3247,6 +3427,8 @@ snapshots:
 
   hookable@5.5.3: {}
 
+  html-escaper@2.0.2: {}
+
   humanize-ms@1.2.1:
     dependencies:
       ms: 2.1.3
@@ -3292,8 +3474,37 @@ snapshots:
 
   isexe@2.0.0: {}
 
+  istanbul-lib-coverage@3.2.2: {}
+
+  istanbul-lib-report@3.0.1:
+    dependencies:
+      istanbul-lib-coverage: 3.2.2
+      make-dir: 4.0.0
+      supports-color: 7.2.0
+
+  istanbul-lib-source-maps@5.0.6:
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.31
+      debug: 4.4.3
+      istanbul-lib-coverage: 3.2.2
+    transitivePeerDependencies:
+      - supports-color
+
+  istanbul-reports@3.2.0:
+    dependencies:
+      html-escaper: 2.0.2
+      istanbul-lib-report: 3.0.1
+
+  jackspeak@3.4.3:
+    dependencies:
+      '@isaacs/cliui': 8.0.2
+    optionalDependencies:
+      '@pkgjs/parseargs': 0.11.0
+
   jiti@2.6.1: {}
 
+  js-tokens@10.0.0: {}
+
   js-tokens@4.0.0: {}
 
   js-tokens@9.0.1: {}
@@ -3392,6 +3603,16 @@ snapshots:
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
 
+  magicast@0.3.5:
+    dependencies:
+      '@babel/parser': 7.29.0
+      '@babel/types': 7.29.0
+      source-map-js: 1.2.1
+
+  make-dir@4.0.0:
+    dependencies:
+      semver: 7.7.4
+
   marked-terminal@7.3.0(marked@9.1.6):
     dependencies:
       ansi-escapes: 7.3.0
@@ -3430,8 +3651,14 @@ snapshots:
     dependencies:
       brace-expansion: 1.1.12
 
+  minimatch@9.0.9:
+    dependencies:
+      brace-expansion: 2.0.2
+
   minimist@1.2.8: {}
 
+  minipass@7.1.3: {}
+
   mri@1.2.0: {}
 
   ms@2.1.3: {}
@@ -3502,6 +3729,8 @@ snapshots:
     dependencies:
       p-limit: 4.0.0
 
+  package-json-from-dist@1.0.1: {}
+
   package-manager-detector@1.6.0: {}
 
   parent-module@1.0.1:
@@ -3529,6 +3758,11 @@ snapshots:
 
   path-key@3.1.1: {}
 
+  path-scurry@1.11.1:
+    dependencies:
+      lru-cache: 10.4.3
+      minipass: 7.1.3
+
   pathe@2.0.3: {}
 
   pathval@2.0.1: {}
@@ -3692,6 +3926,12 @@ snapshots:
       is-fullwidth-code-point: 3.0.0
       strip-ansi: 6.0.1
 
+  string-width@5.1.2:
+    dependencies:
+      eastasianwidth: 0.2.0
+      emoji-regex: 9.2.2
+      strip-ansi: 7.2.0
+
   string-width@7.2.0:
     dependencies:
       emoji-regex: 10.6.0
@@ -3726,6 +3966,12 @@ snapshots:
       has-flag: 4.0.0
       supports-color: 7.2.0
 
+  test-exclude@7.0.2:
+    dependencies:
+      '@istanbuljs/schema': 0.1.3
+      glob: 10.5.0
+      minimatch: 10.2.4
+
   text-extensions@2.4.0: {}
 
   thenify-all@1.6.0:
@@ -3950,6 +4196,12 @@ snapshots:
       string-width: 4.2.3
       strip-ansi: 6.0.1
 
+  wrap-ansi@8.1.0:
+    dependencies:
+      ansi-styles: 6.2.3
+      string-width: 5.1.2
+      strip-ansi: 7.2.0
+
   wrap-ansi@9.0.2:
     dependencies:
       ansi-styles: 6.2.3
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
index b740ed3..8e797ca 100644
--- a/skills/write-fixtures/SKILL.md
+++ b/skills/write-fixtures/SKILL.md
@@ -348,6 +348,154 @@ All providers share the same fixture pool — write fixtures once, they work for
 
 18. **Cohere requires `model` field** — returns 400 if `model` is missing from the request body.
 
+## Mount & Composition
+
+### mount() API
+
+Mount additional mock services onto a running LLMock server. All services share one port, one health endpoint, and one request journal.
+
+```typescript
+const llm = new LLMock({ port: 5555 });
+llm.mount("/mcp", mcpMock); // MCP tools at /mcp
+llm.mount("/a2a", a2aMock); // A2A agents at /a2a
+llm.mount("/vector", vectorMock); // Vector DB at /vector
+await llm.start();
+```
+
+Any object implementing the `Mountable` interface (a `handleRequest` method that returns `boolean`) can be mounted. Path prefixes are stripped before the service sees the request — `/mcp/tools/list` arrives as `/tools/list`.
+
+### createMockSuite()
+
+Unified lifecycle for LLMock + mounted services:
+
+```typescript
+import { createMockSuite } from "@copilotkit/llmock";
+
+const suite = createMockSuite({
+  port: 0,
+  fixtures: "./fixtures",
+  services: { "/mcp": mcpMock, "/a2a": a2aMock },
+});
+
+await suite.start();
+// suite.llm — the LLMock instance
+// suite.url — base URL
+
+afterEach(() => suite.reset()); // resets everything
+afterAll(() => suite.stop());
+```
+
+### aimock CLI config file
+
+The `aimock` CLI reads a JSON config and serves all services on one port:
+
+```bash
+aimock --config aimock.json --port 4010
+```
+
+Config format:
+
+```json
+{
+  "llm": {
+    "fixtures": "./fixtures",
+    "latency": 0,
+    "metrics": true
+  },
+  "services": {
+    "/mcp": { "type": "mcp", "tools": "./mcp-tools.json" },
+    "/a2a": { "type": "a2a", "agents": "./a2a-agents.json" }
+  }
+}
+```
+
+## VectorMock
+
+Mock vector database server for testing RAG pipelines. Supports Pinecone, Qdrant, and ChromaDB API formats.
+
+```typescript
+import { VectorMock } from "@copilotkit/llmock";
+
+const vector = new VectorMock();
+
+// Create a collection and register query results
+vector.addCollection("docs", { dimension: 1536 });
+vector.onQuery("docs", [
+  { id: "doc-1", score: 0.95, metadata: { title: "Getting Started" } },
+  { id: "doc-2", score: 0.87, metadata: { title: "API Reference" } },
+]);
+
+// Upsert vectors
+vector.upsert("docs", [
+  { id: "v1", values: [0.1, 0.2, ...], metadata: { title: "Intro" } },
+]);
+
+// Dynamic query handler
+vector.onQuery("docs", (query) => {
+  return [{ id: "result", score: 1.0, metadata: { topK: query.topK } }];
+});
+
+// Standalone or mounted
+const url = await vector.start();
+// Or: llm.mount("/vector", vector);
+```
+
+### VectorMock endpoints
+
+| Provider | Endpoints                                                                                                                                |
+| -------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| Pinecone | `POST /query`, `POST /vectors/upsert`, `POST /vectors/delete`, `GET /describe-index-stats`                                               |
+| Qdrant   | `POST /collections/{name}/points/search`, `PUT /collections/{name}/points`, `POST /collections/{name}/points/delete`                     |
+| ChromaDB | `POST /api/v1/collections/{id}/query`, `POST /api/v1/collections/{id}/add`, `GET /api/v1/collections`, `DELETE /api/v1/collections/{id}` |
+
+## Service Mocks (Search / Rerank / Moderation)
+
+Built-in mocks for common AI-adjacent services. Registered on the LLMock instance directly — no separate server needed.
+
+### Search (Tavily-compatible)
+
+```typescript
+// POST /search — matches request `query` field
+mock.onSearch("weather", [
+  { title: "Weather Report", url: "https://example.com", content: "Sunny today" },
+]);
+mock.onSearch(/stock\s+price/i, [
+  { title: "ACME Stock", url: "https://example.com", content: "$42", score: 0.95 },
+]);
+```
+
+### Rerank (Cohere-compatible)
+
+```typescript
+// POST /v2/rerank — matches request `query` field
+mock.onRerank("machine learning", [
+  { index: 0, relevance_score: 0.99 },
+  { index: 2, relevance_score: 0.85 },
+]);
+```
+
+### Moderation (OpenAI-compatible)
+
+```typescript
+// POST /v1/moderations — matches request `input` field
+mock.onModerate("violent", {
+  flagged: true,
+  categories: { violence: true, hate: false },
+  category_scores: { violence: 0.95, hate: 0.01 },
+});
+
+// Catch-all — everything passes
+mock.onModerate(/.*/, { flagged: false, categories: {} });
+```
+
+### Pattern matching
+
+All three services use the same matching logic:
+
+- **String patterns** — case-insensitive substring match
+- **RegExp patterns** — full regex test
+- **First match wins** — register specific patterns before catch-alls
+
 ## Debugging Fixture Mismatches
 
 When a fixture doesn't match:
@@ -408,6 +556,10 @@ const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
 | `clearRequests()`                       | Clear journal only                          |
 | `setChaos(opts)`                        | Set server-level chaos rates                |
 | `clearChaos()`                          | Remove server-level chaos                   |
+| `onSearch(pattern, results)`            | Match search requests by query              |
+| `onRerank(pattern, results)`            | Match rerank requests by query              |
+| `onModerate(pattern, result)`           | Match moderation requests by input          |
+| `mount(path, handler)`                  | Mount a Mountable (VectorMock, etc.)        |
 | `url` / `baseUrl`                       | Server URL (throws if not started)          |
 | `port`                                  | Server port number                          |
 
diff --git a/src/__tests__/a2a-mock.test.ts b/src/__tests__/a2a-mock.test.ts
new file mode 100644
index 0000000..bbef380
--- /dev/null
+++ b/src/__tests__/a2a-mock.test.ts
@@ -0,0 +1,1222 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { A2AMock } from "../a2a-mock.js";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+
+// ---- Helpers ----
+
+function get(
+  url: string,
+  path: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function post(
+  url: string,
+  path: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; data: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let data = "";
+        res.on("data", (chunk: Buffer) => (data += chunk));
+        res.on("end", () => resolve({ status: res.statusCode!, headers: res.headers, data }));
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+function postSSE(
+  url: string,
+  path: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; events: string[] }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let raw = "";
+        res.on("data", (chunk: Buffer) => (raw += chunk));
+        res.on("end", () => {
+          const events = raw
+            .split("\n\n")
+            .filter((e) => e.startsWith("data: "))
+            .map((e) => e.replace("data: ", ""));
+          resolve({ status: res.statusCode!, headers: res.headers, events });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params: unknown, id: number | string = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+// ---- Tests ----
+
+describe("A2AMock", () => {
+  let a2a: A2AMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (a2a) {
+      try {
+        await a2a.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "A2AMock server not started")) {
+          throw err;
+        }
+      }
+      a2a = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "Server not started")) {
+          throw err;
+        }
+      }
+      llm = null;
+    }
+  });
+
+  describe("standalone start/stop", () => {
+    it("starts and stops without error", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "test-agent" });
+      const url = await a2a.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await a2a.stop();
+      a2a = null;
+    });
+  });
+
+  describe("mounted mode via llm.mount", () => {
+    it("routes requests through LLMock mount", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({
+        name: "mounted-agent",
+        skills: [{ id: "s1", name: "greet" }],
+      });
+      a2a.onMessage("mounted-agent", "hello", [{ text: "hi from mount" }]);
+
+      llm = new LLMock();
+      llm.mount("/a2a", a2a);
+      await llm.start();
+
+      const res = await post(
+        llm.url,
+        "/a2a",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "hello" }] } }),
+      );
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.result.message.role).toBe("ROLE_AGENT");
+      expect(body.result.message.parts[0].text).toBe("hi from mount");
+
+      // Clean up - a2a doesn't have its own server in mounted mode
+      a2a = null;
+    });
+  });
+
+  describe("GET /.well-known/agent-card.json", () => {
+    it("returns agent card with skills", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({
+        name: "skill-agent",
+        description: "An agent with skills",
+        version: "2.0.0",
+        skills: [{ id: "s1", name: "translate", description: "Translates text", tags: ["i18n"] }],
+        capabilities: { streaming: true },
+      });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      expect(res.status).toBe(200);
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("skill-agent");
+      expect(card.description).toBe("An agent with skills");
+      expect(card.version).toBe("2.0.0");
+      expect(card.skills).toHaveLength(1);
+      expect(card.skills[0].id).toBe("s1");
+      expect(card.skills[0].name).toBe("translate");
+      expect(card.supportedInterfaces).toHaveLength(1);
+      expect(card.supportedInterfaces[0].protocolBinding).toBe("JSONRPC");
+      expect(card.capabilities.streaming).toBe(true);
+    });
+
+    it("includes A2A-Version header", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "header-agent" });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      expect(res.headers["a2a-version"]).toBe("1.0");
+    });
+  });
+
+  describe("SendMessage", () => {
+    it("returns message response matched by string substring", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "echo" });
+      a2a.onMessage("echo", "greet", [{ text: "Hello there!" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "please greet me" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.role).toBe("ROLE_AGENT");
+      expect(body.result.message.parts).toEqual([{ text: "Hello there!" }]);
+      expect(body.result.message.messageId).toBeDefined();
+    });
+
+    it("returns message response matched by RegExp", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "regex-agent" });
+      a2a.onMessage("regex-agent", /^hello\s+world$/i, [{ text: "matched regex" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "Hello World" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("matched regex");
+    });
+
+    it("returns task response with artifacts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "task-agent" });
+      a2a.onTask("task-agent", "compute", [
+        { parts: [{ text: "result: 42" }], name: "computation" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "compute something" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.id).toBeDefined();
+      expect(body.result.task.contextId).toBeDefined();
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+      expect(body.result.task.artifacts).toHaveLength(1);
+      expect(body.result.task.artifacts[0].parts[0].text).toBe("result: 42");
+    });
+
+    it("returns error when no pattern matches", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "strict" });
+      a2a.onMessage("strict", "specific-phrase", [{ text: "ok" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "something else entirely" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+      expect(body.error.message).toContain("No matching pattern");
+    });
+
+    it("includes A2A-Version header on response", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "hdr" });
+      a2a.onMessage("hdr", "ping", [{ text: "pong" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "ping" }] } }),
+      );
+      expect(res.headers["a2a-version"]).toBe("1.0");
+    });
+  });
+
+  describe("SendStreamingMessage", () => {
+    it("returns SSE stream with status and artifact events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "stream-agent" });
+      a2a.onStreamingTask("stream-agent", "stream", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "chunk1" }], name: "out" },
+        { type: "artifact", parts: [{ text: "chunk2" }], lastChunk: true, name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "stream this" }] } }),
+      );
+
+      expect(res.status).toBe(200);
+      expect(res.headers["content-type"]).toBe("text/event-stream");
+      expect(res.headers["a2a-version"]).toBe("1.0");
+      expect(res.events.length).toBe(3);
+
+      const evt0 = JSON.parse(res.events[0]);
+      expect(evt0.jsonrpc).toBe("2.0");
+      expect(evt0.result.task.status.state).toBe("TASK_STATE_WORKING");
+
+      const evt1 = JSON.parse(res.events[1]);
+      expect(evt1.result.artifact.parts[0].text).toBe("chunk1");
+
+      const evt2 = JSON.parse(res.events[2]);
+      expect(evt2.result.artifact.parts[0].text).toBe("chunk2");
+      expect(evt2.result.artifact.lastChunk).toBe(true);
+    });
+
+    it("preserves TASK_STATE_FAILED terminal state after streaming", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "fail-agent" });
+      a2a.onStreamingTask("fail-agent", "fail-task", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "partial" }], name: "out" },
+        { type: "status", state: "TASK_STATE_FAILED" },
+      ]);
+      const url = await a2a.start();
+
+      // Send streaming message — stream ends with TASK_STATE_FAILED
+      const streamRes = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "fail-task" }] } }),
+      );
+      expect(streamRes.status).toBe(200);
+
+      // Extract the task ID from the first SSE event
+      const firstEvent = JSON.parse(streamRes.events[0]);
+      const taskId = firstEvent.result.task.id;
+
+      // Verify via GetTask that the terminal state is preserved (not overwritten to COMPLETED)
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.status.state).toBe("TASK_STATE_FAILED");
+    });
+  });
+
+  describe("GetTask", () => {
+    it("returns stored task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "do-work", [{ parts: [{ text: "done" }] }]);
+      const url = await a2a.start();
+
+      // Create a task via SendMessage
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "do-work" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Retrieve it
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.id).toBe(taskId);
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+    });
+
+    it("returns -32001 for unknown task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("GetTask", { id: "nonexistent" }));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+      expect(body.error.message).toBe("Task not found");
+    });
+  });
+
+  describe("ListTasks", () => {
+    it("filters by contextId", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "job", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create two tasks
+      const r1 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "job 1" }] } }, 1),
+      );
+      const task1 = JSON.parse(r1.data).result.task;
+
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "job 2" }] } }, 2));
+
+      // List by contextId of task1
+      const listRes = await post(url, "/", jsonRpc("ListTasks", { contextId: task1.contextId }, 3));
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].id).toBe(task1.id);
+    });
+  });
+
+  describe("CancelTask", () => {
+    it("transitions task to CANCELED", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      // Use streaming to create a working task (non-terminal)
+      a2a.onTask("ta", "cancel-me", [{ parts: [{ text: "partial" }] }]);
+      const url = await a2a.start();
+
+      // Create task
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "cancel-me" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Task is COMPLETED, but let's test with a working task.
+      // We need to modify the task state to WORKING first for a meaningful test.
+      // Actually, per spec: CancelTask on completed → -32002. Let's test both paths.
+
+      // CancelTask on a completed task should return -32002
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32002);
+      expect(body.error.message).toBe("Task already in terminal state");
+    });
+
+    it("returns -32001 for unknown task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: "no-such" }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32001);
+    });
+
+    it("cancels a non-terminal task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "cancel-target", [{ parts: [{ text: "partial" }] }]);
+      const url = await a2a.start();
+
+      // Create a task via SendMessage (created as COMPLETED by default)
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "cancel-target" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Patch the task to WORKING state so we can test the cancel path.
+      // Tasks map is private but accessible at runtime for testing purposes.
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      const task = tasksMap.get(taskId)!;
+      task.status = { state: "TASK_STATE_WORKING", timestamp: new Date().toISOString() };
+
+      // Now cancel should succeed
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.status.state).toBe("TASK_STATE_CANCELED");
+
+      // Verify via GetTask
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 3));
+      const getBody = JSON.parse(getRes.data);
+      expect(getBody.result.task.status.state).toBe("TASK_STATE_CANCELED");
+    });
+  });
+
+  describe("multiple agents", () => {
+    it("routes messages to the correct agent", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "agent-a" });
+      a2a.registerAgent({ name: "agent-b" });
+      a2a.onMessage("agent-a", "alpha", [{ text: "from A" }]);
+      a2a.onMessage("agent-b", "beta", [{ text: "from B" }]);
+      const url = await a2a.start();
+
+      const resA = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "alpha request" }] } }, 1),
+      );
+      expect(JSON.parse(resA.data).result.message.parts[0].text).toBe("from A");
+
+      const resB = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "beta request" }] } }, 2),
+      );
+      expect(JSON.parse(resB.data).result.message.parts[0].text).toBe("from B");
+    });
+  });
+
+  describe("reset()", () => {
+    it("clears agents and tasks", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "resettable" });
+      a2a.onTask("resettable", "work", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create a task
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "work" }] } }));
+
+      const healthBefore = a2a.health();
+      expect(healthBefore.agents).toBe(1);
+      expect(healthBefore.tasks).toBe(1);
+
+      a2a.reset();
+
+      const healthAfter = a2a.health();
+      expect(healthAfter.agents).toBe(0);
+      expect(healthAfter.tasks).toBe(0);
+    });
+  });
+
+  describe("health()", () => {
+    it("returns agent and task counts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "h1" });
+      a2a.registerAgent({ name: "h2" });
+
+      const h = a2a.health();
+      expect(h.status).toBe("ok");
+      expect(h.agents).toBe(2);
+      expect(h.tasks).toBe(0);
+    });
+  });
+
+  describe("setJournal", () => {
+    it("journal entries have service: a2a", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "journaled" });
+      a2a.onMessage("journaled", "log-me", [{ text: "logged" }]);
+
+      const journal = new Journal();
+      a2a.setJournal(journal);
+
+      const url = await a2a.start();
+
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "log-me" }] } }));
+
+      const entries = journal.getAll();
+      expect(entries.length).toBeGreaterThanOrEqual(1);
+      expect(entries[0].service).toBe("a2a");
+    });
+
+    it("journals streaming messages", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "jstream" });
+      a2a.onStreamingTask("jstream", "log-stream", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "streamed" }], name: "out" },
+      ]);
+
+      const journal = new Journal();
+      a2a.setJournal(journal);
+
+      const url = await a2a.start();
+
+      await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "log-stream" }] } }),
+      );
+
+      const entries = journal.getAll();
+      expect(entries.length).toBeGreaterThanOrEqual(1);
+      expect(entries[0].service).toBe("a2a");
+    });
+  });
+
+  describe("ListTasks", () => {
+    it("filters by status", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "status-filter", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create two tasks
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "status-filter 1" }] } }, 1),
+      );
+      const r2 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "status-filter 2" }] } }, 2),
+      );
+      const task2Id = JSON.parse(r2.data).result.task.id;
+
+      // Patch task2 to WORKING so we can filter
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      tasksMap.get(task2Id)!.status = {
+        state: "TASK_STATE_WORKING",
+        timestamp: new Date().toISOString(),
+      };
+
+      // Filter by COMPLETED — should only return task1
+      const listRes = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { status: "TASK_STATE_COMPLETED" }, 3),
+      );
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].status.state).toBe("TASK_STATE_COMPLETED");
+    });
+
+    it("returns all tasks when no filters provided", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "all-tasks", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "all-tasks a" }] } }, 1),
+      );
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "all-tasks b" }] } }, 2),
+      );
+
+      const listRes = await post(url, "/", jsonRpc("ListTasks", {}, 3));
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(2);
+    });
+  });
+
+  describe("SendStreamingMessage", () => {
+    it("returns error when no streaming pattern matches", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "no-stream" });
+      a2a.onMessage("no-stream", "only-message", [{ text: "msg" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "no match" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+      expect(body.error.message).toContain("No matching pattern");
+    });
+
+    it("supports delayMs between events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "delayed" });
+      a2a.onStreamingTask(
+        "delayed",
+        "slow-stream",
+        [
+          { type: "status", state: "TASK_STATE_WORKING" },
+          { type: "artifact", parts: [{ text: "delayed-chunk" }], name: "out" },
+        ],
+        10, // 10ms delay between events
+      );
+      const url = await a2a.start();
+
+      const start = Date.now();
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "slow-stream" }] } }),
+      );
+      const elapsed = Date.now() - start;
+
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(2);
+      // With 2 events and 10ms delay each, at least ~20ms total
+      expect(elapsed).toBeGreaterThanOrEqual(15);
+    });
+  });
+
+  describe("SendMessage with streamingTask pattern", () => {
+    it("returns task response collapsing streaming events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "hybrid" });
+      a2a.onStreamingTask("hybrid", "hybrid-task", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "piece1" }], name: "result" },
+        { type: "artifact", parts: [{ text: "piece2" }], name: "result" },
+      ]);
+      const url = await a2a.start();
+
+      // Send via SendMessage (non-streaming) — should collapse artifacts
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "hybrid-task" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.artifacts).toHaveLength(2);
+      expect(body.result.task.artifacts[0].parts[0].text).toBe("piece1");
+      expect(body.result.task.artifacts[1].parts[0].text).toBe("piece2");
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+    });
+  });
+
+  describe("agent card defaults", () => {
+    it("uses fallback defaults for missing agent fields", async () => {
+      a2a = new A2AMock();
+      // Register one minimal agent (no description/version/skills/capabilities)
+      a2a.registerAgent({ name: "minimal" });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("minimal");
+      // buildAgentCard falls back to defaults for missing fields
+      expect(card.description).toBe("A2A mock agent");
+      expect(card.version).toBe("1.0.0");
+      expect(card.skills).toEqual([]);
+      expect(card.capabilities).toEqual({ streaming: true });
+    });
+  });
+
+  describe("error handling", () => {
+    it("returns parse error for invalid JSON body", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "err" });
+      const url = await a2a.start();
+
+      const res = await new Promise<{ status: number; data: string }>((resolve, reject) => {
+        const parsed = new URL(url);
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/",
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+          },
+          (r) => {
+            let data = "";
+            r.on("data", (chunk: Buffer) => (data += chunk));
+            r.on("end", () => resolve({ status: r.statusCode!, data }));
+          },
+        );
+        req.on("error", reject);
+        req.write("not json{{{");
+        req.end();
+      });
+
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32700);
+      expect(body.error.message).toBe("Parse error");
+    });
+
+    it("throws when registering patterns for unregistered agent", () => {
+      a2a = new A2AMock();
+      expect(() => a2a!.onMessage("ghost", "x", [{ text: "y" }])).toThrow(
+        'Agent "ghost" not registered',
+      );
+      expect(() => a2a!.onTask("ghost", "x", [{ parts: [{ text: "y" }] }])).toThrow(
+        'Agent "ghost" not registered',
+      );
+      expect(() => a2a!.onStreamingTask("ghost", "x", [])).toThrow('Agent "ghost" not registered');
+    });
+
+    it("throws when starting an already-started server", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "double-start" });
+      await a2a.start();
+
+      await expect(a2a.start()).rejects.toThrow("A2AMock server already started");
+    });
+
+    it("throws when stopping a non-started server", async () => {
+      a2a = new A2AMock();
+      await expect(a2a.stop()).rejects.toThrow("A2AMock server not started");
+      a2a = null; // prevent afterEach from trying to stop
+    });
+
+    it("throws when accessing url before start", () => {
+      a2a = new A2AMock();
+      expect(() => a2a!.url).toThrow("A2AMock server not started");
+      a2a = null;
+    });
+  });
+
+  describe("handleRequest routing", () => {
+    it("returns false for unrecognized methods/paths", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "route-test" });
+
+      // Test GET on / returns false
+      const fakeReq = {
+        method: "GET",
+        url: "/",
+        headers: {},
+      } as http.IncomingMessage;
+      const fakeRes = {
+        writeHead: () => {},
+        end: () => {},
+        setHeader: () => {},
+        headersSent: false,
+        statusCode: 200,
+      } as unknown as http.ServerResponse;
+
+      const result = await a2a.handleRequest(fakeReq, fakeRes, "/some-random-path");
+      expect(result).toBe(false);
+    });
+  });
+
+  describe("reset() chaining", () => {
+    it("returns this for method chaining", () => {
+      a2a = new A2AMock();
+      const returned = a2a.reset();
+      expect(returned).toBe(a2a);
+      a2a = null;
+    });
+  });
+
+  describe("setBaseUrl", () => {
+    it("sets the base URL used by agent card", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "base-url-agent" });
+      a2a.setBaseUrl("http://example.com:1234");
+
+      // The base URL is used in agent card
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      // After start(), baseUrl is overwritten with the actual URL
+      expect(card.supportedInterfaces[0].url).toBe(url);
+    });
+  });
+
+  describe("SendStreamingMessage without message field", () => {
+    it("uses text fallback for parts when message field is absent", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "nomsg" });
+      // Pattern matches empty string (extractText returns "" when no message field)
+      a2a.onStreamingTask("nomsg", "", [{ type: "status", state: "TASK_STATE_WORKING" }]);
+      const url = await a2a.start();
+
+      // Send streaming request where params has no "message" field — hits the else branch (line 263)
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { notMessage: "something" }),
+      );
+
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(1);
+    });
+  });
+
+  describe("constructor with custom options", () => {
+    it("accepts host and port options", async () => {
+      a2a = new A2AMock({ host: "127.0.0.1", port: 0 });
+      a2a.registerAgent({ name: "opts-agent" });
+      const url = await a2a.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  describe("streaming message with no message.parts", () => {
+    it("falls back to text extraction from message without parts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "noparts" });
+      // Pattern that matches empty string
+      a2a.onStreamingTask("noparts", "", [
+        { type: "artifact", parts: [{ text: "found" }], name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      // Send streaming request where message exists but has no parts
+      const res = await postSSE(url, "/", jsonRpc("SendStreamingMessage", { message: {} }));
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(1);
+      const evt = JSON.parse(res.events[0]);
+      expect(evt.result.artifact.parts[0].text).toBe("found");
+    });
+  });
+
+  describe("GetTask with missing params", () => {
+    it("returns -32001 when params is undefined", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("GetTask", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+    });
+  });
+
+  describe("CancelTask with missing params", () => {
+    it("returns -32001 when params is undefined", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("CancelTask", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+    });
+  });
+
+  describe("url getter", () => {
+    it("returns the base URL after start", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "url-test" });
+      await a2a.start();
+
+      // Access via getter, not the start() return value
+      expect(a2a.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  describe("extractText edge cases", () => {
+    it("handles message with non-text parts gracefully", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "et" });
+      // Pattern matches empty string since non-text parts are filtered out
+      a2a.onMessage("et", "", [{ text: "found-non-text" }]);
+      const url = await a2a.start();
+
+      // Send a message with data part only (no text fields)
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", {
+          message: { parts: [{ data: { foo: "bar" }, mediaType: "application/json" }] },
+        }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("found-non-text");
+    });
+
+    it("handles message with mixed text and non-text parts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "mixed" });
+      a2a.onMessage("mixed", "hello", [{ text: "matched-mixed" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", {
+          message: {
+            parts: [{ data: { x: 1 }, mediaType: "application/json" }, { text: "hello" }],
+          },
+        }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("matched-mixed");
+    });
+
+    it("handles empty parts array", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "empty" });
+      a2a.onMessage("empty", "", [{ text: "empty-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", { message: { parts: [] } }));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("empty-match");
+    });
+
+    it("handles missing message field entirely", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "nomsg" });
+      a2a.onMessage("nomsg", "", [{ text: "no-msg-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", {}));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("no-msg-match");
+    });
+
+    it("handles undefined params", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "undef" });
+      a2a.onMessage("undef", "", [{ text: "undef-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("undef-match");
+    });
+  });
+
+  describe("streaming task stored in tasks map", () => {
+    it("task created by streaming is retrievable via GetTask", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "stored" });
+      a2a.onStreamingTask("stored", "store-test", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "streamed-data" }], name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "store-test" }] } }),
+      );
+
+      // Extract task ID from the first event
+      const evt0 = JSON.parse(res.events[0]);
+      const taskId = evt0.result.task.id;
+
+      // Retrieve task via GetTask
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.id).toBe(taskId);
+      // After streaming completes, task should be COMPLETED
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+      expect(body.result.task.artifacts).toHaveLength(1);
+    });
+  });
+
+  describe("ListTasks combined filters", () => {
+    it("filters by both contextId and status", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "combo-filter", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create task
+      const r1 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "combo-filter 1" }] } }, 1),
+      );
+      const task1 = JSON.parse(r1.data).result.task;
+
+      // Filter with matching contextId and status
+      const listRes = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { contextId: task1.contextId, status: "TASK_STATE_COMPLETED" }, 2),
+      );
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].id).toBe(task1.id);
+
+      // Filter with matching contextId but wrong status
+      const listRes2 = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { contextId: task1.contextId, status: "TASK_STATE_WORKING" }, 3),
+      );
+      const body2 = JSON.parse(listRes2.data);
+      expect(body2.result.tasks).toHaveLength(0);
+    });
+  });
+
+  describe("registerAgent chaining", () => {
+    it("returns this for method chaining", () => {
+      a2a = new A2AMock();
+      const returned = a2a.registerAgent({ name: "chain1" });
+      expect(returned).toBe(a2a);
+      a2a = null;
+    });
+  });
+
+  describe("onMessage/onTask/onStreamingTask chaining", () => {
+    it("all return this for method chaining", () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "chain" });
+
+      const r1 = a2a.onMessage("chain", "x", [{ text: "y" }]);
+      expect(r1).toBe(a2a);
+
+      const r2 = a2a.onTask("chain", "x", [{ parts: [{ text: "y" }] }]);
+      expect(r2).toBe(a2a);
+
+      const r3 = a2a.onStreamingTask("chain", "x", []);
+      expect(r3).toBe(a2a);
+
+      a2a = null;
+    });
+  });
+
+  describe("streaming event append flag", () => {
+    it("includes append flag on artifact events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "appender" });
+      a2a.onStreamingTask("appender", "append-test", [
+        { type: "artifact", parts: [{ text: "chunk1" }], name: "out", append: true },
+        {
+          type: "artifact",
+          parts: [{ text: "chunk2" }],
+          name: "out",
+          append: true,
+          lastChunk: true,
+        },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "append-test" }] } }),
+      );
+
+      expect(res.events.length).toBe(2);
+      const evt0 = JSON.parse(res.events[0]);
+      expect(evt0.result.artifact.append).toBe(true);
+      const evt1 = JSON.parse(res.events[1]);
+      expect(evt1.result.artifact.append).toBe(true);
+      expect(evt1.result.artifact.lastChunk).toBe(true);
+    });
+  });
+
+  describe("agent card with no agents registered", () => {
+    it("returns defaults when no agents are registered", async () => {
+      a2a = new A2AMock();
+      // Don't register any agent — buildAgentCard should use fallback defaults
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("a2a-mock");
+      expect(card.description).toBe("A2A mock agent");
+      expect(card.version).toBe("1.0.0");
+      expect(card.skills).toEqual([]);
+      expect(card.capabilities).toEqual({ streaming: true });
+    });
+  });
+
+  describe("CancelTask on FAILED task", () => {
+    it("returns -32002 for FAILED terminal state", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "fail-cancel", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "fail-cancel" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Patch task to FAILED state
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      tasksMap.get(taskId)!.status = {
+        state: "TASK_STATE_FAILED",
+        timestamp: new Date().toISOString(),
+      };
+
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32002);
+    });
+  });
+
+  describe("unknown JSON-RPC method", () => {
+    it("returns method not found error", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("NonExistentMethod", {}));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32601);
+      expect(body.error.message).toBe("Method not found");
+    });
+  });
+
+  describe("findStreamingMatch", () => {
+    it("returns null when no streaming patterns exist", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "msg-only" });
+      a2a.onMessage("msg-only", "hello", [{ text: "hi" }]);
+      const url = await a2a.start();
+
+      // SendStreamingMessage with text that only matches a message pattern (not streaming)
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "hello" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+    });
+  });
+});
diff --git a/src/__tests__/aimock-cli.test.ts b/src/__tests__/aimock-cli.test.ts
new file mode 100644
index 0000000..767063d
--- /dev/null
+++ b/src/__tests__/aimock-cli.test.ts
@@ -0,0 +1,626 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import { execFile, type ChildProcess } from "node:child_process";
+import { existsSync, mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+import { runAimockCli, type AimockCliDeps } from "../aimock-cli.js";
+import type { AimockConfig } from "../config-loader.js";
+
+const CLI_PATH = resolve(__dirname, "../../dist/aimock-cli.js");
+const CLI_AVAILABLE = existsSync(CLI_PATH);
+
+/** Spawn the CLI and collect stdout/stderr/exit code. */
+function runCli(
+  args: string[],
+  opts: { timeout?: number } = {},
+): Promise<{ stdout: string; stderr: string; code: number | null }> {
+  const timeout = opts.timeout ?? 5000;
+  return new Promise((res) => {
+    const cp = execFile("node", [CLI_PATH, ...args], { timeout }, (err, stdout, stderr) => {
+      const code = cp.exitCode ?? (err && "code" in err ? (err as { code: number }).code : null);
+      res({ stdout, stderr, code });
+    });
+  });
+}
+
+/**
+ * Spawn the CLI expecting a long-running server.  Returns the child
+ * process plus helpers to read accumulated output and send signals.
+ */
+function spawnCli(args: string[]): {
+  cp: ChildProcess;
+  stdout: () => string;
+  stderr: () => string;
+  kill: (signal?: NodeJS.Signals) => void;
+  waitForOutput: (match: RegExp, timeoutMs?: number) => Promise<void>;
+} {
+  let out = "";
+  let err = "";
+  const cp = execFile("node", [CLI_PATH, ...args]);
+  cp.stdout?.on("data", (d) => {
+    out += d;
+  });
+  cp.stderr?.on("data", (d) => {
+    err += d;
+  });
+
+  const waitForOutput = (match: RegExp, timeoutMs = 5000): Promise<void> =>
+    new Promise((resolve, reject) => {
+      const deadline = setTimeout(() => {
+        reject(new Error(`Timed out waiting for ${match} — stdout: ${out}, stderr: ${err}`));
+      }, timeoutMs);
+
+      const check = () => {
+        if (match.test(out) || match.test(err)) {
+          clearTimeout(deadline);
+          resolve();
+          return;
+        }
+        setTimeout(check, 50);
+      };
+      check();
+    });
+
+  return {
+    cp,
+    stdout: () => out,
+    stderr: () => err,
+    kill: (signal: NodeJS.Signals = "SIGTERM") => cp.kill(signal),
+    waitForOutput,
+  };
+}
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "aimock-cli-test-"));
+}
+
+function writeConfig(dir: string, config: object, name = "aimock.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(filePath, JSON.stringify(config), "utf-8");
+  return filePath;
+}
+
+function writeFixtureFile(dir: string, name = "fixtures.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(
+    filePath,
+    JSON.stringify({
+      fixtures: [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "Hello from aimock test!" },
+        },
+      ],
+    }),
+    "utf-8",
+  );
+  return filePath;
+}
+
+/* ================================================================== */
+/* Integration tests (require dist build)                              */
+/* ================================================================== */
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: --help", () => {
+  it("prints usage text and exits with code 0", async () => {
+    const { stdout, code } = await runCli(["--help"]);
+    expect(stdout).toContain("Usage: aimock");
+    expect(stdout).toContain("--config");
+    expect(code).toBe(0);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: argument validation", () => {
+  it("exits with error when --config is missing", async () => {
+    const { stderr, code } = await runCli([]);
+    expect(stderr).toContain("--config is required");
+    expect(code).toBe(1);
+  });
+
+  it("exits with error for missing config file", async () => {
+    const { stderr, code } = await runCli(["--config", "/nonexistent/aimock.json"]);
+    expect(stderr).toContain("Failed to load config");
+    expect(code).toBe(1);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: server lifecycle", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("starts server with valid config, responds to requests, exits on SIGTERM", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const configPath = writeConfig(tmpDir, {
+      llm: { fixtures: fixturePath },
+    });
+
+    const child = spawnCli(["--config", configPath]);
+    await child.waitForOutput(/listening on/i, 5000);
+
+    // Extract the URL from output
+    const match = child.stdout().match(/listening on (http:\/\/\S+)/);
+    expect(match).not.toBeNull();
+    const url = match![1];
+
+    // Verify server responds to a request
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.ok).toBe(true);
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("applies port override from --port flag", async () => {
+    const configPath = writeConfig(tmpDir, {});
+    const child = spawnCli(["--config", configPath, "--port", "0"]);
+    await child.waitForOutput(/listening on/i, 5000);
+
+    expect(child.stdout()).toContain("listening on");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("exits with error for invalid JSON config", async () => {
+    const configPath = join(tmpDir, "bad.json");
+    writeFileSync(configPath, "{ not json", "utf-8");
+
+    const { stderr, code } = await runCli(["--config", configPath]);
+    expect(stderr).toContain("Failed to load config");
+    expect(code).toBe(1);
+  });
+});
+
+/* ================================================================== */
+/* Unit tests (exercise runAimockCli directly for coverage)            */
+/* ================================================================== */
+
+/** Helper: call runAimockCli with captured output and a synchronous exit stub. */
+function callCli(
+  argv: string[],
+  overrides: Partial<AimockCliDeps> = {},
+): { logs: string[]; errors: string[]; exitCode: number | null } {
+  const logs: string[] = [];
+  const errors: string[] = [];
+  let exitCode: number | null = null;
+
+  runAimockCli({
+    argv,
+    log: (msg) => logs.push(msg),
+    logError: (msg) => errors.push(msg),
+    exit: (code) => {
+      exitCode = code;
+    },
+    ...overrides,
+  });
+
+  return { logs, errors, exitCode };
+}
+
+describe("runAimockCli: --help flag", () => {
+  it("prints help and exits 0", () => {
+    const { logs, exitCode } = callCli(["--help"]);
+    expect(exitCode).toBe(0);
+    expect(logs.join("\n")).toContain("Usage: aimock");
+    expect(logs.join("\n")).toContain("--config");
+    expect(logs.join("\n")).toContain("--port");
+    expect(logs.join("\n")).toContain("--host");
+  });
+});
+
+describe("runAimockCli: missing --config", () => {
+  it("prints error and exits 1 when no args given", () => {
+    const { errors, exitCode } = callCli([]);
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("--config is required");
+  });
+});
+
+describe("runAimockCli: unknown flag (strict parsing)", () => {
+  it("prints error and exits 1 for unknown flags", () => {
+    const { errors, exitCode } = callCli(["--unknown-flag"]);
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("Error:");
+  });
+});
+
+describe("runAimockCli: config loading failure", () => {
+  it("prints error and exits 1 when loadConfig throws an Error", () => {
+    const { errors, exitCode } = callCli(["--config", "/fake/path.json"], {
+      loadConfigFn: () => {
+        throw new Error("ENOENT: no such file");
+      },
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("Failed to load config");
+    expect(errors.join("\n")).toContain("ENOENT: no such file");
+  });
+
+  it("handles non-Error throws from loadConfig", () => {
+    const { errors, exitCode } = callCli(["--config", "/fake/path.json"], {
+      loadConfigFn: () => {
+        throw "string error";
+      },
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("string error");
+  });
+});
+
+describe("runAimockCli: successful server start", () => {
+  // Track shutdown functions so we can clean up signal handlers after each test
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("calls startFromConfig with correct args and logs the URL", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const mockLlmock = { stop: mockStop };
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: mockLlmock,
+      url: "http://127.0.0.1:9876",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({ port: 3000 } as AimockConfig);
+    const logs: string[] = [];
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/some/config.json"],
+      log: (msg) => logs.push(msg),
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    // Wait for the async main() to complete
+    await vi.waitFor(() => {
+      expect(logs).toContain("aimock server listening on http://127.0.0.1:9876");
+    });
+
+    expect(loadConfigFn).toHaveBeenCalledWith(resolve("/some/config.json"));
+    expect(startFromConfigFn).toHaveBeenCalledWith(
+      { port: 3000 },
+      { port: undefined, host: undefined },
+    );
+    expect(exitCode).toBeNull(); // no exit — server stays running
+    expect(errors).toHaveLength(0);
+  });
+
+  it("passes port and host overrides to startFromConfig", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://0.0.0.0:8080",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+
+    runAimockCli({
+      argv: ["--config", "/c.json", "--port", "8080", "--host", "0.0.0.0"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 8080, host: "0.0.0.0" });
+  });
+
+  it("passes short flags correctly (-c, -p, -h)", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://localhost:5555",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+
+    runAimockCli({
+      argv: ["-c", "/c.json", "-p", "5555", "-h", "localhost"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 5555, host: "localhost" });
+  });
+});
+
+describe("runAimockCli: startFromConfig failure", () => {
+  it("logs error and exits 1 when startFromConfig rejects", async () => {
+    const startFromConfigFn = vi.fn().mockRejectedValue(new Error("bind EADDRINUSE"));
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+    });
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("bind EADDRINUSE");
+  });
+
+  it("handles non-Error rejection from startFromConfig", async () => {
+    const startFromConfigFn = vi.fn().mockRejectedValue("raw string rejection");
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+    });
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("raw string rejection");
+  });
+});
+
+describe("runAimockCli: onReady and shutdown", () => {
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("invokes onReady callback after server starts", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(cleanupFn).not.toBeNull();
+    });
+  });
+
+  it("shutdown calls llmock.stop()", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+    let shutdownFn: (() => void) | null = null;
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        shutdownFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(shutdownFn).not.toBeNull();
+    });
+
+    // Calling shutdown removes signal handlers and stops the server
+    shutdownFn!();
+    cleanupFn = null; // Already cleaned up by shutdown
+    expect(logs).toContain("Shutting down...");
+    expect(mockStop).toHaveBeenCalled();
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(0);
+    });
+  });
+
+  it("shutdown logs error and exits 1 when llmock.stop() rejects", async () => {
+    const mockStop = vi.fn().mockRejectedValue(new Error("close ENOTCONN"));
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let shutdownFn: (() => void) | null = null;
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        shutdownFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(shutdownFn).not.toBeNull();
+    });
+
+    shutdownFn!();
+    cleanupFn = null;
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("Shutdown error");
+    expect(errors.join("\n")).toContain("close ENOTCONN");
+  });
+});
+
+describe("runAimockCli: port parsing edge case", () => {
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("passes undefined port when --port is not provided", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: undefined, host: undefined });
+  });
+
+  it("rejects non-numeric port (NaN)", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port", "abc"], {
+      loadConfigFn,
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("rejects negative port", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port=-1"], { loadConfigFn });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("rejects port above 65535", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port", "99999"], {
+      loadConfigFn,
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("converts string port to number", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://127.0.0.1:4242",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json", "--port", "4242"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 4242, host: undefined });
+  });
+});
diff --git a/src/__tests__/bedrock-stream.test.ts b/src/__tests__/bedrock-stream.test.ts
index 0fa3f03..349dea0 100644
--- a/src/__tests__/bedrock-stream.test.ts
+++ b/src/__tests__/bedrock-stream.test.ts
@@ -1,9 +1,15 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
 import { crc32 } from "node:zlib";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { converseToCompletionRequest } from "../bedrock-converse.js";
+import {
+  converseToCompletionRequest,
+  handleConverse,
+  handleConverseStream,
+} from "../bedrock-converse.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -1153,3 +1159,722 @@ describe("POST /model/{modelId}/converse (error fixture)", () => {
     expect(body.error.message).toBe("Rate limited");
   });
 });
+
+// ─── converseToCompletionRequest: edge case branches ─────────────────────────
+
+describe("converseToCompletionRequest (edge cases)", () => {
+  it("handles empty system array (no system message pushed)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles system with empty text (no system message pushed)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "" }],
+      },
+      "model",
+    );
+    // Empty systemText → no system message
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles user text content blocks with missing text (text ?? '' fallback)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [{ text: undefined }],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "" });
+  });
+
+  it("handles assistant text-only messages (no toolUse blocks)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [{ text: "Just text" }],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: "Just text" });
+  });
+
+  it("handles assistant empty content (content: null)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles user tool result with missing text in content items (text ?? '' fallback)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_x",
+                  content: [{ text: undefined }, { text: "result" }],
+                },
+              },
+            ],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "result",
+      tool_call_id: "toolu_x",
+    });
+  });
+
+  it("handles user tool results with text blocks alongside", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_x",
+                  content: [{ text: "ok" }],
+                },
+              },
+              { text: "extra info" },
+            ],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "ok",
+      tool_call_id: "toolu_x",
+    });
+    expect(result.messages[1]).toEqual({ role: "user", content: "extra info" });
+  });
+
+  it("omits tools when no toolConfig is provided", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("omits tools when toolConfig has empty tools array", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        toolConfig: { tools: [] },
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles inferenceConfig without temperature (undefined)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        inferenceConfig: { maxTokens: 100 },
+      },
+      "model",
+    );
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it("handles assistant text blocks with missing text alongside toolUse (text ?? '')", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              { text: undefined },
+              {
+                toolUse: {
+                  toolUseId: "toolu_123",
+                  name: "fn",
+                  input: {},
+                },
+              },
+            ],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+    // Empty text → content is null (falsy)
+    expect(result.messages[0].content).toBeNull();
+  });
+});
+
+// ─── Converse response edge cases ───────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse (malformed tool call arguments)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("falls back to empty input for malformed JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "bad-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/converse (tool call with no id)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("generates tool use id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-tool" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "no-id-tool" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.toolUseId).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /model/{modelId}/converse (tool call with empty arguments)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to {} when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "empty-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/converse (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "err-no-status" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "err-no-status" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+describe("POST /model/{modelId}/invoke-with-response-stream (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when streaming error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "err-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── Direct handler tests for req.method/req.url fallback branches ──────────
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleConverse (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: [{ text: "hi" }] }],
+    });
+
+    await handleConverse(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/converse");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(req, res, "{bad", "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "err" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "tool" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "embed" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleConverseStream (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "hi" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/converse-stream");
+  });
+
+  it("uses fallback for malformed JSON in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      "{bad",
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for missing messages in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "err" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "tool" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "embed" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/bedrock.test.ts b/src/__tests__/bedrock.test.ts
index 969365c..5fc47d9 100644
--- a/src/__tests__/bedrock.test.ts
+++ b/src/__tests__/bedrock.test.ts
@@ -1,8 +1,10 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { bedrockToCompletionRequest } from "../bedrock.js";
+import { bedrockToCompletionRequest, handleBedrock, handleBedrockStream } from "../bedrock.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -528,6 +530,996 @@ describe("bedrockToCompletionRequest", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// bedrockToCompletionRequest: edge case branches
+// ---------------------------------------------------------------------------
+
+describe("bedrockToCompletionRequest (edge cases)", () => {
+  it("handles system content blocks with missing text (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: [{ type: "text" }, { type: "text", text: "Hello" }] as unknown[],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // First block has undefined text → falls back to ""
+    expect(result.messages[0]).toEqual({ role: "system", content: "Hello" });
+  });
+
+  it("handles empty system text (no system message pushed)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: [{ type: "text" }] as unknown[],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // Empty systemText → no system message
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles tool_result content as array of content blocks", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_123",
+                content: [
+                  { type: "text", text: "Part 1" },
+                  { type: "text", text: " Part 2" },
+                ],
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "Part 1 Part 2",
+      tool_call_id: "toolu_123",
+    });
+  });
+
+  it("handles tool_result with non-string non-array content (fallback to '')", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_789",
+                content: undefined,
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "",
+      tool_call_id: "toolu_789",
+    });
+  });
+
+  it("handles assistant tool_use block with missing id (generates one)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                name: "search",
+                input: { query: "test" },
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].id).toMatch(/^toolu_/);
+  });
+
+  it("handles assistant tool_use block with missing name (falls back to '')", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                input: { x: 1 },
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("handles assistant tool_use with string input", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                name: "fn",
+                input: '{"key":"value"}',
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"key":"value"}');
+  });
+
+  it("handles assistant tool_use with undefined input (falls back to {})", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                name: "fn",
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe("{}");
+  });
+
+  it("handles assistant content that is neither string nor array (null branch)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: 42,
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles assistant text-only content blocks (no tool_use, content or null)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [{ type: "text", text: "Just text" }],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: "Just text" });
+  });
+
+  it("handles assistant empty content blocks (content: null)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    // Empty array → no tool_use blocks, textContent is "" → null
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles user message with content blocks but no tool_results (text extraction)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Hello " },
+              { type: "text", text: "World" },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "Hello World" });
+  });
+
+  it("handles tool_result content blocks with missing text (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_abc",
+                content: [{ type: "text" }, { type: "image" }],
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // First block has no text → "", second is image (filtered out)
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles user message with text blocks alongside tool_results (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_abc",
+                content: "result",
+              },
+              {
+                type: "text",
+                // text field missing - uses ?? ""
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // tool result + text block with missing text
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[1].content).toBe("");
+  });
+
+  it("omits system message when system field is absent", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+  });
+
+  it("omits tools when tools array is empty", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        tools: [],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("sets stream to false", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.stream).toBe(false);
+  });
+
+  it("passes through temperature", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+        temperature: 0.5,
+      },
+      "model",
+    );
+    expect(result.temperature).toBe(0.5);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// handleBedrock: invoke-level edge case branches
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (unknown response type)", () => {
+  it("returns 500 for embedding fixture on invoke endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-invoke" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "embed-invoke" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "err-no-status" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (malformed tool call arguments)", () => {
+  it("falls back to empty object for malformed JSON in non-streaming", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "bad-args" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/invoke (tool call with no id)", () => {
+  it("generates tool use id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-tool" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "no-id-tool" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /model/{modelId}/invoke (tool call with empty arguments)", () => {
+  it("defaults to {} when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "empty-args" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & {
+  _written: string;
+  _status: number;
+  _headers: Record<string, string>;
+} {
+  const res = {
+    _written: "",
+    _status: 0,
+    _headers: {} as Record<string, string>,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number, headers?: Record<string, string>) {
+      res._status = status;
+      res.statusCode = status;
+      if (headers) Object.assign(res._headers, headers);
+    },
+    setHeader(name: string, value: string) {
+      res._headers[name] = value;
+    },
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & {
+    _written: string;
+    _status: number;
+    _headers: Record<string, string>;
+  };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleBedrock (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback values when req.method and req.url are undefined", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/invoke");
+  });
+
+  it("uses fallback for malformed JSON with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrock(req, res, "{bad", "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for missing messages with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrock(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(
+      req,
+      res,
+      raw,
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "err" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "tool" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for unknown response type with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "embed" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleBedrockStream (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback values when req.method and req.url are undefined", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/invoke-with-response-stream");
+  });
+
+  it("uses fallback for malformed JSON in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrockStream(
+      req,
+      res,
+      "{bad",
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for missing messages in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrockStream(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(req, res, raw, "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "err" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "tool" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "embed" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Error fixture with error.type ?? "api_error" fallback
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (error fixture no error type)", () => {
+  it("defaults to 'api_error' when error.type is undefined", async () => {
+    const noTypeError: Fixture = {
+      match: { userMessage: "err-no-type" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+      },
+    };
+    instance = await createServer([noTypeError]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "err-no-type" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("api_error");
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildBedrockStreamTextEvents / buildBedrockStreamToolCallEvents unit tests
+// ---------------------------------------------------------------------------
+
+import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "../bedrock.js";
+import { Logger } from "../logger.js";
+
+describe("buildBedrockStreamTextEvents", () => {
+  it("creates correct event sequence for empty content", () => {
+    const events = buildBedrockStreamTextEvents("", 10);
+    // Should have: messageStart, contentBlockStart, contentBlockStop, messageStop (no deltas)
+    expect(events).toHaveLength(4);
+    expect(events[0].eventType).toBe("messageStart");
+    expect(events[1].eventType).toBe("contentBlockStart");
+    expect(events[2].eventType).toBe("contentBlockStop");
+    expect(events[3].eventType).toBe("messageStop");
+  });
+
+  it("chunks content according to chunkSize", () => {
+    const events = buildBedrockStreamTextEvents("ABCDEF", 2);
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    expect(deltas).toHaveLength(3);
+    expect((deltas[0].payload as { delta: { text: string } }).delta.text).toBe("AB");
+    expect((deltas[1].payload as { delta: { text: string } }).delta.text).toBe("CD");
+    expect((deltas[2].payload as { delta: { text: string } }).delta.text).toBe("EF");
+  });
+});
+
+describe("buildBedrockStreamToolCallEvents", () => {
+  const logger = new Logger("silent");
+
+  it("falls back to '{}' for malformed JSON arguments", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: "NOT VALID" }],
+      100,
+      logger,
+    );
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((e) => (e.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(fullJson).toBe("{}");
+  });
+
+  it("generates tool use id when not provided", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: '{"x":1}' }],
+      100,
+      logger,
+    );
+    const startEvent = events.find((e) => e.eventType === "contentBlockStart");
+    const payload = startEvent!.payload as {
+      start: { toolUse: { toolUseId: string } };
+    };
+    expect(payload.start.toolUse.toolUseId).toMatch(/^toolu_/);
+  });
+
+  it("uses provided tool id", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: '{"x":1}', id: "custom_id" }],
+      100,
+      logger,
+    );
+    const startEvent = events.find((e) => e.eventType === "contentBlockStart");
+    const payload = startEvent!.payload as {
+      start: { toolUse: { toolUseId: string } };
+    };
+    expect(payload.start.toolUse.toolUseId).toBe("custom_id");
+  });
+
+  it("uses '{}' when arguments is empty string", () => {
+    const events = buildBedrockStreamToolCallEvents([{ name: "fn", arguments: "" }], 100, logger);
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((e) => (e.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(fullJson).toBe("{}");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // strict:true returns 503 for unmatched Bedrock request
 // ---------------------------------------------------------------------------
diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts
index a7655d9..7671104 100644
--- a/src/__tests__/cohere.test.ts
+++ b/src/__tests__/cohere.test.ts
@@ -1,8 +1,10 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { cohereToCompletionRequest } from "../cohere.js";
+import { cohereToCompletionRequest, handleCohere } from "../cohere.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -931,6 +933,135 @@ describe("POST /v2/chat (journal)", () => {
   });
 });
 
+// ─── Integration tests: POST /v2/chat (streaming malformed tool call args) ──
+
+describe("POST /v2/chat (streaming malformed tool call arguments)", () => {
+  it("falls back to '{}' for malformed JSON in streaming tool call", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "bad-stream-args" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming tool call with empty args) ─
+
+describe("POST /v2/chat (streaming tool call with empty arguments)", () => {
+  it("defaults to '{}' when arguments is empty string in streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "empty-stream-args" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (tool call with empty/missing args non-streaming) ─
+
+describe("POST /v2/chat (non-streaming tool call with empty arguments)", () => {
+  it("defaults to '{}' when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-ns" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "empty-args-ns" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls[0].function.arguments).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (tool call with no id, non-streaming) ─
+
+describe("POST /v2/chat (non-streaming tool call with no id)", () => {
+  it("generates tool call id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-ns" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "no-id-ns" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls[0].id).toMatch(/^call_/);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture streaming) ─────────────
+
+describe("POST /v2/chat (error fixture streaming)", () => {
+  it("returns error fixture with correct status even when stream:true", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "fail" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Streaming tool call with explicit fixture id
 // ---------------------------------------------------------------------------
@@ -994,3 +1125,294 @@ describe("POST /v2/chat (streaming tool call with fixture-provided id)", () => {
     expect(endDelta.finish_reason).toBe("TOOL_CALL");
   });
 });
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleCohere (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response (non-streaming) with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "hi" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "hi" }], stream: true }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for missing model", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: "hi" }] }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "x" }] }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "x" }] }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "err" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for non-streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "tool" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({
+        model: "cmd-r",
+        messages: [{ role: "user", content: "tool" }],
+        stream: true,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "embed" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/config-loader.test.ts b/src/__tests__/config-loader.test.ts
new file mode 100644
index 0000000..861a9e8
--- /dev/null
+++ b/src/__tests__/config-loader.test.ts
@@ -0,0 +1,790 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { loadConfig, startFromConfig } from "../config-loader.js";
+import type { AimockConfig } from "../config-loader.js";
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "config-loader-test-"));
+}
+
+function writeConfig(dir: string, config: AimockConfig, name = "aimock.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(filePath, JSON.stringify(config), "utf-8");
+  return filePath;
+}
+
+function writeFixtureFile(dir: string, name = "fixtures.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(
+    filePath,
+    JSON.stringify({
+      fixtures: [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "Hello from config test!" },
+        },
+      ],
+    }),
+    "utf-8",
+  );
+  return filePath;
+}
+
+describe("loadConfig", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("reads valid JSON config", () => {
+    const config: AimockConfig = { port: 5000, host: "0.0.0.0", metrics: true };
+    const filePath = writeConfig(tmpDir, config);
+    const result = loadConfig(filePath);
+    expect(result.port).toBe(5000);
+    expect(result.host).toBe("0.0.0.0");
+    expect(result.metrics).toBe(true);
+  });
+
+  it("throws on invalid JSON", () => {
+    const filePath = join(tmpDir, "bad.json");
+    writeFileSync(filePath, "{ not valid json", "utf-8");
+    expect(() => loadConfig(filePath)).toThrow();
+  });
+
+  it("throws on missing file", () => {
+    expect(() => loadConfig(join(tmpDir, "nonexistent.json"))).toThrow();
+  });
+});
+
+describe("startFromConfig", () => {
+  let tmpDir: string;
+  let cleanups: Array<() => Promise<void>> = [];
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+    cleanups = [];
+  });
+
+  afterEach(async () => {
+    for (const cleanup of cleanups) {
+      await cleanup();
+    }
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("creates server with LLM fixtures from a file", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const config: AimockConfig = { llm: { fixtures: fixturePath } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(llmock.getFixtures()).toHaveLength(1);
+
+    // Verify server responds
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.ok).toBe(true);
+  });
+
+  it("creates server with LLM fixtures from a directory", async () => {
+    const fixtureDir = join(tmpDir, "fixtures");
+    mkdirSync(fixtureDir);
+    writeFixtureFile(fixtureDir, "test.json");
+
+    const config: AimockConfig = { llm: { fixtures: fixtureDir } };
+    const { llmock } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(llmock.getFixtures()).toHaveLength(1);
+  });
+
+  it("with metrics: true, /metrics returns 200", async () => {
+    const config: AimockConfig = { metrics: true };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/metrics`);
+    expect(resp.status).toBe(200);
+  });
+
+  it("with strict: true, unmatched request returns 503", async () => {
+    const config: AimockConfig = { strict: true };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "no match" }],
+      }),
+    });
+    expect(resp.status).toBe(503);
+  });
+
+  it("with chaos config, chaos applies", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const config: AimockConfig = {
+      llm: { fixtures: fixturePath, chaos: { dropRate: 1.0 } },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.status).toBe(500);
+  });
+
+  it("with mcp tools config, MCPMock created and tools/list works", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        tools: [
+          { name: "search", description: "Search the web" },
+          { name: "calc", description: "Calculator", result: "42" },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+    const sessionId = initRes.headers.get("mcp-session-id");
+    expect(sessionId).toBeTruthy();
+
+    // Send initialized notification
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "notifications/initialized",
+        params: {},
+      }),
+    });
+
+    // List tools
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "tools/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.tools).toHaveLength(2);
+    expect(listData.result.tools[0].name).toBe("search");
+    expect(listData.result.tools[1].name).toBe("calc");
+
+    // Call tool with result
+    const callRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "tools/call",
+        params: { name: "calc", arguments: {} },
+        id: 3,
+      }),
+    });
+    const callData = await callRes.json();
+    expect(callData.result.content).toEqual([{ type: "text", text: "42" }]);
+  });
+
+  it("with mcp resources config, resources are served", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        resources: [
+          { uri: "file:///readme.md", name: "README", mimeType: "text/markdown", text: "# Hello" },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "notifications/initialized",
+        params: {},
+      }),
+    });
+
+    // Read resource
+    const readRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "resources/read",
+        params: { uri: "file:///readme.md" },
+        id: 2,
+      }),
+    });
+    const readData = await readRes.json();
+    expect(readData.result.contents[0].text).toBe("# Hello");
+  });
+
+  it("with mcp custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        path: "/custom-mcp",
+        tools: [{ name: "test-tool" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const initRes = await fetch(`${url}/custom-mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+  });
+
+  it("with a2a agents config, A2AMock created and agent card served", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "test-agent",
+            description: "A test agent",
+            skills: [{ id: "s1", name: "greet" }],
+            messages: [{ pattern: "hello", parts: [{ text: "Hi there!" }] }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Get agent card
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("test-agent");
+    expect(card.skills).toHaveLength(1);
+
+    // Send message
+    const msgRes = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "hello world" }] } },
+        id: 1,
+      }),
+    });
+    expect(msgRes.status).toBe(200);
+    const msgData = await msgRes.json();
+    expect(msgData.result.message.parts[0].text).toBe("Hi there!");
+  });
+
+  it("with a2a custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        path: "/agents",
+        agents: [{ name: "custom-agent" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const cardRes = await fetch(`${url}/agents/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("custom-agent");
+  });
+
+  it("port/host overrides work", async () => {
+    const config: AimockConfig = { port: 9999, host: "0.0.0.0" };
+    const { llmock, url } = await startFromConfig(config, { port: 0, host: "127.0.0.1" });
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("starts with no config sections at all", async () => {
+    const config: AimockConfig = {};
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("with vector collections config, VectorMock created and collections work", async () => {
+    const config: AimockConfig = {
+      vector: {
+        collections: [
+          {
+            name: "docs",
+            dimension: 3,
+            vectors: [
+              { id: "v1", values: [1, 0, 0], metadata: { title: "doc1" } },
+              { id: "v2", values: [0, 1, 0], metadata: { title: "doc2" } },
+            ],
+            queryResults: [{ id: "v1", score: 0.95, metadata: { title: "doc1" } }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Query the vector collection via Pinecone-compatible endpoint
+    const resp = await fetch(`${url}/vector/query`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "docs",
+        vector: [1, 0, 0],
+        topK: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.matches).toHaveLength(1);
+    expect(data.matches[0].id).toBe("v1");
+  });
+
+  it("with vector custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      vector: {
+        path: "/my-vector",
+        collections: [{ name: "test", dimension: 2 }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Upsert to the custom path (Pinecone-compatible)
+    const resp = await fetch(`${url}/my-vector/vectors/upsert`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "test",
+        vectors: [{ id: "a", values: [1, 0] }],
+      }),
+    });
+    expect(resp.status).toBe(200);
+  });
+
+  it("with vector collections without vectors or queryResults", async () => {
+    const config: AimockConfig = {
+      vector: {
+        collections: [{ name: "empty", dimension: 4 }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Upsert to the collection to verify it was created (Pinecone-compatible)
+    const resp = await fetch(`${url}/vector/vectors/upsert`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "empty",
+        vectors: [{ id: "x", values: [1, 0, 0, 0] }],
+      }),
+    });
+    expect(resp.status).toBe(200);
+  });
+
+  it("with vector config but no collections array", async () => {
+    const config: AimockConfig = { vector: {} };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("with services.search enabled, /v1/search returns empty results", async () => {
+    const config: AimockConfig = { services: { search: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/search`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ query: "test query" }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results).toEqual([]);
+  });
+
+  it("with services.rerank enabled, /v1/rerank returns empty results", async () => {
+    const config: AimockConfig = { services: { rerank: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v2/rerank`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ query: "test", documents: ["a", "b"] }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results).toEqual([]);
+  });
+
+  it("with services.moderate enabled, /v1/moderations returns unflagged", async () => {
+    const config: AimockConfig = { services: { moderate: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/moderations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ input: "some text" }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results[0].flagged).toBe(false);
+  });
+
+  it("with mcp prompts config, prompts are served", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        prompts: [
+          {
+            name: "greeting",
+            description: "A greeting prompt",
+            result: {
+              messages: [{ role: "assistant", content: { type: "text", text: "Hello!" } }],
+            },
+          },
+          {
+            name: "no-result-prompt",
+            description: "A prompt without a result handler",
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized", params: {} }),
+    });
+
+    // List prompts
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "prompts/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.prompts).toHaveLength(2);
+
+    // Get prompt with result handler
+    const getRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "prompts/get",
+        params: { name: "greeting" },
+        id: 3,
+      }),
+    });
+    const getData = await getRes.json();
+    expect(getData.result.messages[0].content.text).toBe("Hello!");
+  });
+
+  it("with mcp serverInfo config, serverInfo is set", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        serverInfo: { name: "test-server", version: "1.0.0" },
+        tools: [{ name: "t1" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+    const initData = await initRes.json();
+    expect(initData.result.serverInfo.name).toBe("test-server");
+    expect(initData.result.serverInfo.version).toBe("1.0.0");
+  });
+
+  it("with mcp resource without content, addResource called with undefined content", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        resources: [{ uri: "file:///empty.txt", name: "empty" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized", params: {} }),
+    });
+
+    // List resources — resource should be registered
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "resources/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.resources).toHaveLength(1);
+    expect(listData.result.resources[0].uri).toBe("file:///empty.txt");
+  });
+
+  it("with a2a tasks config, tasks are handled", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "task-agent",
+            description: "An agent with tasks",
+            skills: [{ id: "s1", name: "do-work" }],
+            tasks: [
+              {
+                pattern: "work",
+                artifacts: [
+                  {
+                    artifactId: "a1",
+                    parts: [{ text: "result of work" }],
+                  },
+                ],
+              },
+            ],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Send message that matches a task pattern
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: {
+          message: { parts: [{ text: "do some work" }] },
+        },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.task.artifacts[0].parts[0].text).toBe("result of work");
+  });
+
+  it("with a2a streamingTasks config, streamingTasks are handled", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "stream-agent",
+            description: "An agent with streaming tasks",
+            skills: [{ id: "s1", name: "stream" }],
+            streamingTasks: [
+              {
+                pattern: "stream",
+                events: [
+                  {
+                    kind: "status-update",
+                    taskId: "t1",
+                    status: { state: "working", message: { parts: [{ text: "streaming..." }] } },
+                  },
+                ],
+                delayMs: 0,
+              },
+            ],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Verify agent card is available (streaming tasks also need agent registered)
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("stream-agent");
+  });
+
+  it("with a2a custom path, mounts at specified path for tasks", async () => {
+    // Already tested for messages in existing test; verify the a2a path default as well
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "default-path-agent",
+            messages: [{ pattern: "hi", parts: [{ text: "hey" }] }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Default A2A path is /a2a
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+  });
+
+  it("with a2a message pattern without parts, uses default empty text part", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "fallback-agent",
+            messages: [{ pattern: "anything" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Send message matching pattern — default parts [{ text: "" }] should be used
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "anything at all" }] } },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.message.parts[0].text).toBe("");
+  });
+
+  it("with a2a task pattern without artifacts, uses default empty array", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "no-artifact-agent",
+            tasks: [{ pattern: "work" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "do work" }] } },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.task.artifacts).toEqual([]);
+  });
+
+  it("with a2a streamingTask pattern without events, uses default empty array", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "no-events-agent",
+            streamingTasks: [{ pattern: "stream" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Verify agent card is available (confirms registration works)
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("no-events-agent");
+  });
+
+  it("with record config, llmock receives record settings", async () => {
+    const config: AimockConfig = {
+      llm: {
+        record: {
+          providers: { openai: "sk-test-key" },
+          fixturePath: "/tmp/recorded-fixtures",
+        },
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Server should start successfully with record config
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+});
diff --git a/src/__tests__/cross-cutting.test.ts b/src/__tests__/cross-cutting.test.ts
new file mode 100644
index 0000000..738214c
--- /dev/null
+++ b/src/__tests__/cross-cutting.test.ts
@@ -0,0 +1,748 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { MCPMock } from "../mcp-mock.js";
+import { A2AMock } from "../a2a-mock.js";
+import { VectorMock } from "../vector-mock.js";
+import { createMockSuite, type MockSuite } from "../suite.js";
+import { startFromConfig, type AimockConfig } from "../config-loader.js";
+
+// ---- HTTP Helpers ----
+
+function httpRequest(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params?: unknown, id: number = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+async function initMcpSession(url: string, path: string): Promise<string> {
+  const res = await httpRequest(url, path, "POST", jsonRpc("initialize", {}, 1) as object);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  await httpRequest(
+    url,
+    path,
+    "POST",
+    { jsonrpc: "2.0", method: "notifications/initialized" },
+    { "mcp-session-id": sessionId },
+  );
+  return sessionId;
+}
+
+// ==========================================================================
+// 1. Multi-mock composition on a single server
+// ==========================================================================
+
+describe("cross-cutting: multi-mock composition", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("mounts LLM + MCP + A2A + Vector on one server and all respond", async () => {
+    llmock = new LLMock();
+
+    // Configure LLM fixture
+    llmock.on({ userMessage: /hello/ }, { content: "Hi from LLM" });
+
+    // MCP
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "calc", description: "calculator" });
+    mcp.onToolCall("calc", () => "42");
+    llmock.mount("/mcp", mcp);
+
+    // A2A
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "helper", description: "helper agent" });
+    a2a.onMessage("helper", /.*/, [{ text: "I can help" }]);
+    llmock.mount("/a2a", a2a);
+
+    // Vector
+    const vector = new VectorMock();
+    vector.addCollection("docs", { dimension: 3 });
+    vector.onQuery("docs", [
+      { id: "d1", score: 0.9, values: [1, 0, 0], metadata: { title: "doc1" } },
+    ]);
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // LLM completions
+    const llmRes = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello world" }],
+    });
+    expect(llmRes.status).toBe(200);
+    const llmData = JSON.parse(llmRes.body);
+    expect(llmData.choices[0].message.content).toBe("Hi from LLM");
+
+    // MCP tools/list
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    const mcpRes = await httpRequest(
+      llmock.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(mcpRes.status).toBe(200);
+    const mcpData = JSON.parse(mcpRes.body);
+    expect(mcpData.result.tools).toHaveLength(1);
+    expect(mcpData.result.tools[0].name).toBe("calc");
+
+    // MCP tool call
+    const callRes = await httpRequest(
+      llmock.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/call", { name: "calc", arguments: {} }, 3) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(callRes.status).toBe(200);
+    const callData = JSON.parse(callRes.body);
+    expect(callData.result.content[0].text).toBe("42");
+
+    // A2A agent card
+    const a2aRes = await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(a2aRes.status).toBe(200);
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("helper");
+
+    // Vector query
+    const vecRes = await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "docs",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    expect(vecRes.status).toBe(200);
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("d1");
+  });
+
+  it("streaming LLM responses work alongside mounted mocks", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /stream/ }, { content: "streamed response" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "noop" });
+    llmock.mount("/mcp", mcp);
+
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream this" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    expect(res.body).toContain("data: ");
+    // Collect text from SSE chunks
+    const chunks = res.body
+      .split("\n\n")
+      .filter((l) => l.startsWith("data: ") && !l.includes("[DONE]"))
+      .map((l) => JSON.parse(l.slice(6)));
+    const text = chunks
+      .map(
+        (c: { choices: Array<{ delta: { content?: string } }> }) =>
+          c.choices[0]?.delta?.content ?? "",
+      )
+      .join("");
+    expect(text).toBe("streamed response");
+  });
+});
+
+// ==========================================================================
+// 2. Health endpoint aggregation
+// ==========================================================================
+
+describe("cross-cutting: health endpoint aggregation", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("health endpoint aggregates status from all mounted mocks", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /.*/ }, { content: "ok" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "t1" });
+    mcp.addTool({ name: "t2" });
+    mcp.addResource({ uri: "file://r1", name: "r1" });
+    llmock.mount("/mcp", mcp);
+
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "ag1", description: "test" });
+    llmock.mount("/a2a", a2a);
+
+    const vector = new VectorMock();
+    vector.addCollection("col1", { dimension: 3 });
+    vector.addCollection("col2", { dimension: 5 });
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/health", "GET");
+    expect(res.status).toBe(200);
+
+    const health = JSON.parse(res.body);
+    expect(health.status).toBe("ok");
+    expect(health.services).toBeDefined();
+
+    // LLM service status
+    expect(health.services.llm).toBeDefined();
+    expect(health.services.llm.status).toBe("ok");
+    expect(health.services.llm.fixtures).toBe(1);
+
+    // MCP service status
+    expect(health.services.mcp).toBeDefined();
+    expect(health.services.mcp.status).toBe("ok");
+    expect(health.services.mcp.tools).toBe(2);
+    expect(health.services.mcp.resources).toBe(1);
+
+    // A2A service status
+    expect(health.services.a2a).toBeDefined();
+    expect(health.services.a2a.status).toBe("ok");
+    expect(health.services.a2a.agents).toBe(1);
+
+    // Vector service status
+    expect(health.services.vector).toBeDefined();
+    expect(health.services.vector.status).toBe("ok");
+    expect(health.services.vector.collections).toBe(2);
+  });
+
+  it("health endpoint with no mounts returns simple status", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/health", "GET");
+    expect(res.status).toBe(200);
+
+    const health = JSON.parse(res.body);
+    expect(health.status).toBe("ok");
+    expect(health.services).toBeUndefined();
+  });
+});
+
+// ==========================================================================
+// 3. Journal captures requests across all mock types
+// ==========================================================================
+
+describe("cross-cutting: journal across mock types", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("journal records LLM, MCP, A2A, and Vector requests in order", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /journal/ }, { content: "noted" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "log-tool" });
+    llmock.mount("/mcp", mcp);
+
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "journal-agent", description: "test" });
+    a2a.onMessage("journal-agent", /.*/, [{ text: "logged" }]);
+    llmock.mount("/a2a", a2a);
+
+    const vector = new VectorMock();
+    vector.addCollection("journal-col", { dimension: 2 });
+    vector.onQuery("journal-col", [{ id: "j1", score: 1.0, values: [1, 0] }]);
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // 1. LLM request
+    await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "journal test" }],
+    });
+
+    // 2. MCP request
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // 3. A2A request (agent card GET)
+    await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+
+    // 4. Vector request
+    await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "journal-col",
+      vector: [1, 0],
+      topK: 3,
+    });
+
+    const entries = llmock.getRequests();
+
+    // Should have entries from all services
+    // LLM entry
+    const llmEntries = entries.filter((e) => e.path === "/v1/chat/completions");
+    expect(llmEntries.length).toBeGreaterThanOrEqual(1);
+
+    // MCP entries (initialize + notification + tools/list)
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+
+    // Vector entries
+    const vectorEntries = entries.filter((e) => e.service === "vector");
+    expect(vectorEntries.length).toBeGreaterThanOrEqual(1);
+
+    // All entries have timestamps and IDs
+    for (const entry of entries) {
+      expect(entry.id).toBeTruthy();
+      expect(entry.timestamp).toBeGreaterThan(0);
+    }
+  });
+
+  it("journal entries from mounts have correct service tags", async () => {
+    llmock = new LLMock();
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "svc-tool" });
+    llmock.mount("/mcp", mcp);
+
+    const vector = new VectorMock();
+    vector.addCollection("svc-col", { dimension: 2 });
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // MCP request
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // Vector request (describe-index-stats is a Pinecone GET endpoint)
+    await httpRequest(llmock.url, "/vector/describe-index-stats", "GET");
+
+    const entries = llmock.getRequests();
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    const vectorEntries = entries.filter((e) => e.service === "vector");
+
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+    for (const e of mcpEntries) {
+      expect(e.service).toBe("mcp");
+    }
+
+    expect(vectorEntries.length).toBeGreaterThanOrEqual(1);
+    for (const e of vectorEntries) {
+      expect(e.service).toBe("vector");
+    }
+  });
+});
+
+// ==========================================================================
+// 4. Config loader with multi-mock configurations
+// ==========================================================================
+
+describe("cross-cutting: config loader with all mock types", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("startFromConfig with MCP + A2A + Vector all configured", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        path: "/mcp",
+        serverInfo: { name: "config-mcp", version: "1.0.0" },
+        tools: [{ name: "config-tool", description: "from config", result: "config-result" }],
+        resources: [{ uri: "file://readme", name: "README", text: "Hello from config" }],
+      },
+      a2a: {
+        path: "/a2a",
+        agents: [
+          {
+            name: "config-agent",
+            description: "from config",
+            messages: [{ pattern: "hello", parts: [{ text: "Hi from config agent" }] }],
+          },
+        ],
+      },
+      vector: {
+        path: "/vector",
+        collections: [
+          {
+            name: "config-col",
+            dimension: 3,
+            vectors: [{ id: "cv1", values: [1, 0, 0], metadata: { src: "config" } }],
+            queryResults: [
+              { id: "cv1", score: 0.99, values: [1, 0, 0], metadata: { src: "config" } },
+            ],
+          },
+        ],
+      },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Health should show all services
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    const health = JSON.parse(healthRes.body);
+    expect(health.services.mcp).toBeDefined();
+    expect(health.services.mcp.tools).toBe(1);
+    expect(health.services.a2a).toBeDefined();
+    expect(health.services.a2a.agents).toBe(1);
+    expect(health.services.vector).toBeDefined();
+    expect(health.services.vector.collections).toBe(1);
+
+    // MCP tool call works
+    const sessionId = await initMcpSession(result.url, "/mcp");
+    const toolCallRes = await httpRequest(
+      result.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/call", { name: "config-tool", arguments: {} }, 3) as object,
+      { "mcp-session-id": sessionId },
+    );
+    const toolData = JSON.parse(toolCallRes.body);
+    expect(toolData.result.content[0].text).toBe("config-result");
+
+    // A2A agent card works
+    const a2aRes = await httpRequest(result.url, "/a2a/.well-known/agent-card.json", "GET");
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("config-agent");
+
+    // Vector query works
+    const vecRes = await httpRequest(result.url, "/vector/query", "POST", {
+      namespace: "config-col",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("cv1");
+  });
+
+  it("startFromConfig with custom mount paths", async () => {
+    const config: AimockConfig = {
+      mcp: { path: "/custom-mcp" },
+      a2a: { path: "/custom-a2a" },
+      vector: { path: "/custom-vector" },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Health shows custom paths
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    const health = JSON.parse(healthRes.body);
+    expect(health.services["custom-mcp"]).toBeDefined();
+    expect(health.services["custom-a2a"]).toBeDefined();
+    expect(health.services["custom-vector"]).toBeDefined();
+  });
+
+  it("startFromConfig with services (search, rerank, moderate)", async () => {
+    const config: AimockConfig = {
+      services: { search: true, rerank: true, moderate: true },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Search endpoint should respond
+    const searchRes = await httpRequest(result.url, "/search", "POST", {
+      query: "test query",
+    });
+    expect(searchRes.status).toBe(200);
+
+    // Rerank endpoint should respond
+    const rerankRes = await httpRequest(result.url, "/v2/rerank", "POST", {
+      query: "test",
+      documents: ["a", "b"],
+    });
+    expect(rerankRes.status).toBe(200);
+
+    // Moderation endpoint should respond
+    const modRes = await httpRequest(result.url, "/v1/moderations", "POST", {
+      input: "test content",
+    });
+    expect(modRes.status).toBe(200);
+  });
+
+  it("startFromConfig with empty config starts cleanly", async () => {
+    const config: AimockConfig = {};
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    expect(healthRes.status).toBe(200);
+  });
+});
+
+// ==========================================================================
+// 5. Suite runner with heterogeneous mock types
+// ==========================================================================
+
+describe("cross-cutting: suite runner with heterogeneous mocks", () => {
+  let suite: MockSuite | null = null;
+
+  afterEach(async () => {
+    if (suite) {
+      await suite.stop();
+      suite = null;
+    }
+  });
+
+  it("suite with all mocks supports concurrent requests to different services", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+
+    suite.llm.on({ userMessage: /concurrent/ }, { content: "concurrent reply" });
+    suite.mcp!.addTool({ name: "conc-tool" });
+    suite.a2a!.registerAgent({ name: "conc-agent", description: "concurrent" });
+    suite.vector!.addCollection("conc-col", { dimension: 2 });
+    suite.vector!.onQuery("conc-col", [{ id: "c1", score: 0.8, values: [1, 0] }]);
+
+    await suite.start();
+
+    // Fire all requests concurrently
+    const [llmRes, mcpInitRes, a2aRes, vecRes] = await Promise.all([
+      httpRequest(suite.llm.url, "/v1/chat/completions", "POST", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "concurrent test" }],
+      }),
+      httpRequest(suite.llm.url, "/mcp", "POST", jsonRpc("initialize", {}, 1) as object),
+      httpRequest(suite.llm.url, "/a2a/.well-known/agent-card.json", "GET"),
+      httpRequest(suite.llm.url, "/vector/query", "POST", {
+        namespace: "conc-col",
+        vector: [1, 0],
+        topK: 3,
+      }),
+    ]);
+
+    expect(llmRes.status).toBe(200);
+    expect(mcpInitRes.status).toBe(200);
+    expect(a2aRes.status).toBe(200);
+    expect(vecRes.status).toBe(200);
+
+    // Verify content
+    const llmData = JSON.parse(llmRes.body);
+    expect(llmData.choices[0].message.content).toBe("concurrent reply");
+
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("conc-agent");
+
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches[0].id).toBe("c1");
+  });
+
+  it("suite.reset() clears all mock state but server stays running", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    suite.llm.on({ userMessage: /test/ }, { content: "before reset" });
+    suite.mcp!.addTool({ name: "reset-tool" });
+    suite.a2a!.registerAgent({ name: "reset-agent", description: "test" });
+    suite.vector!.addCollection("reset-col", { dimension: 2 });
+
+    await suite.start();
+
+    // Verify mcp has tools before reset
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+    const beforeRes = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(JSON.parse(beforeRes.body).result.tools).toHaveLength(1);
+
+    suite.reset();
+
+    // After reset, MCP tools cleared (need new session since sessions also cleared)
+    const sessionId2 = await initMcpSession(suite.llm.url, "/mcp");
+    const afterRes = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId2 },
+    );
+    expect(JSON.parse(afterRes.body).result.tools).toHaveLength(0);
+
+    // Health still works
+    const healthRes = await httpRequest(suite.llm.url, "/health", "GET");
+    expect(healthRes.status).toBe(200);
+  });
+});
+
+// ==========================================================================
+// 6. Late-mount journal/baseUrl fix verification
+// ==========================================================================
+
+describe("cross-cutting: late-mount journal and baseUrl wiring", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("mount added after start() gets journal wired — requests are journaled", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Mount MCP after start
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "late-tool" });
+    llmock.mount("/mcp", mcp);
+
+    // Make a request to the late mount
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // Journal should have captured the MCP requests
+    const entries = llmock.getRequests();
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it("mount added after start() gets baseUrl wired — A2A agent card has correct URL", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Mount A2A after start
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "late-agent", description: "added after start" });
+    llmock.mount("/a2a", a2a);
+
+    // Agent card should be accessible and have the correct baseUrl in url field
+    const res = await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(res.status).toBe(200);
+    const card = JSON.parse(res.body);
+    expect(card.name).toBe("late-agent");
+    // The card's supportedInterfaces[0].url should contain the server URL + /a2a mount path
+    expect(card.supportedInterfaces[0].url).toContain(llmock.url + "/a2a");
+  });
+
+  it("mount added after start() appears in health endpoint", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Health before any mounts — no services
+    const healthBefore = await httpRequest(llmock.url, "/health", "GET");
+    JSON.parse(healthBefore.body); // verify it's valid JSON
+    // With 0 mounts but mounts array exists, the server checks mounts.length
+    // Since we add after, the array is shared so it should pick up new mounts
+
+    // Mount vector after start
+    const vector = new VectorMock();
+    vector.addCollection("late-col", { dimension: 3 });
+    llmock.mount("/vector", vector);
+
+    // Health after mount — should show vector
+    const healthAfter = await httpRequest(llmock.url, "/health", "GET");
+    const dataAfter = JSON.parse(healthAfter.body);
+    expect(dataAfter.services).toBeDefined();
+    expect(dataAfter.services.vector).toBeDefined();
+    expect(dataAfter.services.vector.status).toBe("ok");
+    expect(dataAfter.services.vector.collections).toBe(1);
+  });
+
+  it("late-mounted vector mock handles requests correctly", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /.*/ }, { content: "llm works" });
+    await llmock.start();
+
+    // Mount vector after start
+    const vector = new VectorMock();
+    vector.addCollection("late-vec", { dimension: 2 });
+    vector.onQuery("late-vec", [
+      { id: "lv1", score: 0.95, values: [1, 0], metadata: { late: true } },
+    ]);
+    llmock.mount("/vector", vector);
+
+    // LLM still works
+    const llmRes = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "test" }],
+    });
+    expect(llmRes.status).toBe(200);
+
+    // Late-mounted vector works
+    const vecRes = await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "late-vec",
+      vector: [1, 0],
+      topK: 3,
+    });
+    expect(vecRes.status).toBe(200);
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("lv1");
+
+    // Verify journal captured both
+    const entries = llmock.getRequests();
+    const llmEntries = entries.filter((e) => e.path === "/v1/chat/completions");
+    const vecEntries = entries.filter((e) => e.service === "vector");
+    expect(llmEntries.length).toBe(1);
+    expect(vecEntries.length).toBeGreaterThanOrEqual(1);
+  });
+});
diff --git a/src/__tests__/embeddings.test.ts b/src/__tests__/embeddings.test.ts
index 61a3efa..317a02f 100644
--- a/src/__tests__/embeddings.test.ts
+++ b/src/__tests__/embeddings.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import {
   isEmbeddingResponse,
   generateDeterministicEmbedding,
@@ -7,6 +8,9 @@ import {
 } from "../helpers.js";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
+import { handleEmbeddings } from "../embeddings.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // ---------------------------------------------------------------------------
 // isEmbeddingResponse type guard
@@ -717,3 +721,280 @@ describe("POST /v1/embeddings (Unicode input handling)", () => {
     expect(body1.data[0].embedding).not.toEqual(body2.data[0].embedding);
   });
 });
+
+// ─── Branch coverage: strict mode, error defaults, incompatible response ─────
+
+describe("POST /v1/embeddings (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "unmatched input",
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.code).toBe("no_fixture_match");
+  });
+});
+
+describe("POST /v1/embeddings (error response with default status)", () => {
+  it("defaults error status to 500 when status field is omitted", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "error-no-status" },
+        response: {
+          error: {
+            message: "Server error",
+            type: "server_error",
+          },
+        } as Fixture["response"],
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "error-no-status test",
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Server error");
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleEmbeddings (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/embeddings when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "hello",
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for strict mode with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "unmatched",
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const errorFixture: Fixture = {
+      match: { inputText: "err" },
+      response: {
+        error: { message: "Fail", type: "server_error" },
+        status: 500,
+      },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "err input",
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for embedding fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const embFixture: Fixture = {
+      match: { inputText: "embed" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "embed this",
+      }),
+      [embFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for incompatible fixture response with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const badFixture: Fixture = {
+      match: { predicate: () => true },
+      response: { content: "text, not embedding" },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "anything",
+      }),
+      [badFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/gemini.test.ts b/src/__tests__/gemini.test.ts
index 6823ed6..ea5e7ea 100644
--- a/src/__tests__/gemini.test.ts
+++ b/src/__tests__/gemini.test.ts
@@ -691,6 +691,31 @@ describe("Gemini journal", () => {
   });
 });
 
+// ─── Error fixture without type field ─────────────────────────────────────────
+
+describe("Gemini error fixture without type", () => {
+  it("falls back to ERROR status when error.type is undefined", async () => {
+    const noTypeFixture: Fixture = {
+      match: { userMessage: "no-type-error" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+        status: 500,
+      },
+    };
+    instance = await createServer([noTypeFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "no-type-error" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+    expect(body.error.status).toBe("ERROR");
+  });
+});
+
 // ─── CORS ───────────────────────────────────────────────────────────────────
 
 describe("Gemini CORS", () => {
@@ -703,3 +728,322 @@ describe("Gemini CORS", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Input conversion: additional branch coverage ────────────────────────────
+
+describe("geminiToCompletionRequest — additional branches", () => {
+  it("defaults role to 'user' when content.role is missing", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ parts: [{ text: "no role" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // role defaults to "user"
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+    expect(result.messages[0].content).toBe("no role");
+  });
+
+  it("converts functionResponse.response that is a string", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "search",
+                  response: "plain string response" as unknown as Record<string, unknown>,
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("tool");
+    // String response is used directly
+    expect(result.messages[0].content).toBe("plain string response");
+  });
+
+  it("includes text parts alongside functionResponse parts", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "search",
+                  response: { data: "result" },
+                },
+              },
+              { text: "Additional context" },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // functionResponse → tool message, then text → user message
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("Additional context");
+  });
+
+  it("handles tools with empty functionDeclarations", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+        tools: [{}],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // No functionDeclarations → tools should be undefined
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles empty systemInstruction text", () => {
+    const result = geminiToCompletionRequest(
+      {
+        systemInstruction: { parts: [{ functionCall: { name: "x", args: {} } }] },
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // systemInstruction has no text parts → no system message
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+  });
+});
+
+// ─── Streaming: empty content ────────────────────────────────────────────────
+
+describe("Gemini streaming empty content", () => {
+  it("streams a single empty-text chunk for empty content", async () => {
+    const emptyFixture: Fixture = {
+      match: { userMessage: "empty" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty" }] }],
+    });
+
+    expect(res.status).toBe(200);
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { text: string }[] };
+        finishReason?: string;
+      }[];
+      usageMetadata?: unknown;
+    }[];
+
+    // Empty content produces a single chunk with empty text
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].text).toBe("");
+    expect(chunks[0].candidates[0].finishReason).toBe("STOP");
+    expect(chunks[0].usageMetadata).toBeDefined();
+  });
+});
+
+// ─── Tool call with malformed JSON arguments ─────────────────────────────────
+
+describe("Gemini tool call malformed arguments", () => {
+  it("non-streaming: falls back to empty args for malformed JSON", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args" },
+      response: {
+        toolCalls: [{ name: "broken_tool", arguments: "{not valid json}" }],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "malformed-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("broken_tool");
+    // Falls back to empty args
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({});
+    expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+
+  it("non-streaming: uses empty object for empty arguments string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "no_args_tool", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("no_args_tool");
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+
+  it("streaming: falls back to empty args for malformed JSON", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-stream" },
+      response: {
+        toolCalls: [{ name: "broken_tool", arguments: "{{bad}}" }],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "malformed-stream" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { functionCall: { name: string; args: unknown } }[] };
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.name).toBe("broken_tool");
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+
+  it("streaming: uses empty object for empty arguments string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-stream" },
+      response: {
+        toolCalls: [{ name: "no_args_tool", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty-args-stream" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { functionCall: { name: string; args: unknown } }[] };
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.name).toBe("no_args_tool");
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+});
+
+// ─── Strict mode ─────────────────────────────────────────────────────────────
+
+describe("Gemini strict mode", () => {
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "nomatch-strict" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.status).toBe("UNAVAILABLE");
+  });
+});
+
+// ─── Streaming interruptions ─────────────────────────────────────────────────
+
+describe("Gemini streaming interruptions", () => {
+  it("text: records interruption in journal when stream is truncated", async () => {
+    const interruptFixture: Fixture = {
+      match: { userMessage: "interrupt-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      latency: 10,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([interruptFixture]);
+
+    // The server destroys the connection mid-stream, so the client will get
+    // a socket error. Use a race with a timeout to avoid hanging.
+    const parsed = new URL(instance.url);
+    await new Promise<void>((resolve) => {
+      const data = JSON.stringify({
+        contents: [{ role: "user", parts: [{ text: "interrupt-text" }] }],
+      });
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: "/v1beta/models/gemini-2.0-flash:streamGenerateContent",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", () => {});
+          res.on("end", () => resolve());
+          res.on("error", () => resolve());
+          res.on("close", () => resolve());
+        },
+      );
+      req.on("error", () => resolve());
+      req.write(data);
+      req.end();
+    });
+
+    // Wait briefly for the server to finish processing
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Journal should record interruption
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("tool call: records interruption in journal when disconnected", async () => {
+    const interruptToolFixture: Fixture = {
+      match: { userMessage: "interrupt-tool" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+      },
+      disconnectAfterMs: 1,
+      latency: 100,
+    };
+    instance = await createServer([interruptToolFixture]);
+
+    try {
+      await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "interrupt-tool" }] }],
+      });
+    } catch {
+      // Expected — socket hang up
+    }
+
+    // Wait briefly for the server to finish processing
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Journal should record interruption
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+});
diff --git a/src/__tests__/jsonrpc.test.ts b/src/__tests__/jsonrpc.test.ts
new file mode 100644
index 0000000..cb93a83
--- /dev/null
+++ b/src/__tests__/jsonrpc.test.ts
@@ -0,0 +1,483 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import * as http from "node:http";
+import { createJsonRpcDispatcher, type JsonRpcResponse, type MethodHandler } from "../jsonrpc.js";
+
+// --- helpers ---
+
+function makeReqRes(): {
+  req: http.IncomingMessage;
+  res: http.ServerResponse & {
+    _statusCode: number;
+    _headers: Record<string, string>;
+    _body: string;
+  };
+} {
+  const req = Object.create(http.IncomingMessage.prototype) as http.IncomingMessage;
+  const res = {
+    _statusCode: 0,
+    _headers: {} as Record<string, string>,
+    _body: "",
+    writeHead(statusCode: number, headers?: Record<string, string>) {
+      this._statusCode = statusCode;
+      if (headers) Object.assign(this._headers, headers);
+      return this;
+    },
+    end(body?: string) {
+      if (body !== undefined) this._body = body;
+    },
+  } as unknown as http.ServerResponse & {
+    _statusCode: number;
+    _headers: Record<string, string>;
+    _body: string;
+  };
+  return { req, res };
+}
+
+function parseBody(res: { _body: string }): unknown {
+  return res._body ? JSON.parse(res._body) : undefined;
+}
+
+// --- tests ---
+
+describe("createJsonRpcDispatcher", () => {
+  let echoHandler: MethodHandler;
+
+  beforeEach(() => {
+    echoHandler = vi.fn(async (params, id) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+  });
+
+  it("calls method handler and returns response for valid request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    const body = JSON.stringify({ jsonrpc: "2.0", method: "echo", params: { a: 1 }, id: 1 });
+    await dispatch(req, res, body);
+
+    expect(res._statusCode).toBe(200);
+    expect(res._headers["Content-Type"]).toBe("application/json");
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.jsonrpc).toBe("2.0");
+    expect(parsed.id).toBe(1);
+    expect(parsed.result).toEqual({ a: 1 });
+  });
+
+  it("returns -32700 on invalid JSON", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, "not json{{{");
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32700);
+    expect(parsed.error?.message).toContain("Parse error");
+    expect(parsed.id).toBeNull();
+  });
+
+  it("returns -32600 when jsonrpc field is missing", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ method: "echo", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when method field is missing", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32601 when method is not found", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "missing", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32601);
+    expect(parsed.error?.message).toContain("Method not found");
+  });
+
+  it("returns -32603 when handler throws", async () => {
+    const throwHandler: MethodHandler = async () => {
+      throw new Error("boom");
+    };
+    const dispatch = createJsonRpcDispatcher({ methods: { boom: throwHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "boom", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32603);
+    expect(parsed.error?.message).toContain("Internal error");
+  });
+
+  it("propagates custom error returned by handler", async () => {
+    const errorHandler: MethodHandler = async (_params, id) => ({
+      jsonrpc: "2.0",
+      id,
+      error: { code: -32602, message: "Invalid params", data: { field: "x" } },
+    });
+    const dispatch = createJsonRpcDispatcher({ methods: { bad: errorHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "bad", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32602);
+    expect(parsed.error?.data).toEqual({ field: "x" });
+  });
+
+  it("returns 202 with no body for notification (no id) and calls handler with null id", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { notify: handler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "notify", params: {} }));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handler IS called for side effects, but with null id (not 0)
+    expect(handler).toHaveBeenCalledWith({}, null, expect.anything());
+  });
+
+  it("fires onNotification callback for notifications", async () => {
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: {},
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "log", params: { msg: "hi" } }),
+    );
+
+    expect(onNotification).toHaveBeenCalledWith("log", { msg: "hi" });
+    expect(res._statusCode).toBe(202);
+  });
+
+  it("handles batch of 2 requests and returns array of 2 responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "echo", params: "a", id: 1 },
+      { jsonrpc: "2.0", method: "echo", params: "b", id: 2 },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(Array.isArray(parsed)).toBe(true);
+    expect(parsed).toHaveLength(2);
+    expect(parsed[0].id).toBe(1);
+    expect(parsed[0].result).toBe("a");
+    expect(parsed[1].id).toBe(2);
+    expect(parsed[1].result).toBe("b");
+  });
+
+  it("batch with mixed requests and notifications returns only request responses", async () => {
+    const handler = vi.fn(async (params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: handler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "echo", params: "a", id: 1 },
+      { jsonrpc: "2.0", method: "echo", params: "notify-me" }, // notification, no id
+      { jsonrpc: "2.0", method: "echo", params: "b", id: 2 },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(parsed).toHaveLength(2);
+    expect(parsed[0].id).toBe(1);
+    expect(parsed[1].id).toBe(2);
+  });
+
+  it("returns -32600 for empty batch", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify([]));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("returns single object (not array) for single request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: null, id: 42 }),
+    );
+
+    const parsed = parseBody(res);
+    expect(Array.isArray(parsed)).toBe(false);
+    expect((parsed as JsonRpcResponse).id).toBe(42);
+  });
+
+  it("sets Content-Type to application/json on JSON responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: null, id: 1 }),
+    );
+
+    expect(res._headers["Content-Type"]).toBe("application/json");
+  });
+
+  it("passes params, id, and req to handler", async () => {
+    const spy = vi.fn(async (_params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: null,
+    }));
+    const dispatch = createJsonRpcDispatcher({ methods: { test: spy } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "test", params: { x: 1 }, id: "abc" }),
+    );
+
+    expect(spy).toHaveBeenCalledWith({ x: 1 }, "abc", req);
+  });
+
+  it("returns -32600 when jsonrpc is not '2.0'", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "1.0", method: "echo", id: 1 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when method is not a string", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: 123, id: 1 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when entry is not an object (e.g. a number)", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // A batch entry that is a raw number, not an object
+    await dispatch(req, res, JSON.stringify([42]));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(parsed).toHaveLength(1);
+    expect(parsed[0].error?.code).toBe(-32600);
+    expect(parsed[0].id).toBeNull();
+  });
+
+  it("returns -32600 for a single non-object request (string)", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify("just a string"));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("does not invoke method handler for notifications (spec compliance)", async () => {
+    const methodHandler = vi.fn(async (_params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: "should not be called",
+    }));
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: { foo: methodHandler },
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    // Notification: no id field
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "foo", params: { x: 1 } }));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handler IS called for side effects, but with null id (not 0)
+    expect(methodHandler).toHaveBeenCalledWith({ x: 1 }, null, expect.anything());
+    expect(onNotification).toHaveBeenCalledWith("foo", { x: 1 });
+  });
+
+  it("handles request with id: 0 as a real request, not a notification", async () => {
+    const methodHandler = vi.fn(async (params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: { echo: methodHandler },
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    // id: 0 is a valid JSON-RPC id — this is a request, not a notification
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: { val: "zero" }, id: 0 }),
+    );
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.jsonrpc).toBe("2.0");
+    expect(parsed.id).toBe(0);
+    expect(parsed.result).toEqual({ val: "zero" });
+    expect(methodHandler).toHaveBeenCalledWith({ val: "zero" }, 0, req);
+    expect(onNotification).not.toHaveBeenCalled();
+  });
+
+  it("returns result: null when handler returns null", async () => {
+    const nullHandler: MethodHandler = async () => null;
+    const dispatch = createJsonRpcDispatcher({ methods: { noop: nullHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "noop", id: 5 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.id).toBe(5);
+    expect(parsed.result).toBeNull();
+    expect(parsed.error).toBeUndefined();
+  });
+
+  it("coerces non-string/number id to null in error responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // id is a boolean - not valid per JSON-RPC spec
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "missing", id: true }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32601);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("treats id: undefined (present but undefined) as notification", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { ping: handler } });
+    const { req, res } = makeReqRes();
+
+    // JSON.stringify strips undefined values, so id won't be in the output.
+    // We test this by constructing a request without id at all.
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "ping", params: {} }));
+
+    expect(res._statusCode).toBe(202);
+    // Handler IS called for side effects with null id
+    expect(handler).toHaveBeenCalledWith({}, null, expect.anything());
+  });
+
+  it("stringifies non-Error thrown values in internal error message", async () => {
+    const throwHandler: MethodHandler = async () => {
+      throw "raw string error";
+    };
+    const dispatch = createJsonRpcDispatcher({ methods: { bad: throwHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "bad", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32603);
+    expect(parsed.error?.message).toContain("raw string error");
+  });
+
+  it("returns -32600 with numeric id when jsonrpc is wrong", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: 99 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBe(99);
+  });
+
+  it("returns -32600 with string id when jsonrpc is wrong", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: "str-id" }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBe("str-id");
+  });
+
+  it("returns -32600 with null id when id is non-string/non-number in invalid request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // id is an object — not a valid JSON-RPC id type
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: { bad: true } }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("batch of all notifications returns 202 with no body", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { ping: handler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "ping", params: {} },
+      { jsonrpc: "2.0", method: "ping", params: {} },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handlers called with null id for each notification
+    expect(handler).toHaveBeenCalledTimes(2);
+  });
+});
diff --git a/src/__tests__/mcp-mock.test.ts b/src/__tests__/mcp-mock.test.ts
new file mode 100644
index 0000000..b411662
--- /dev/null
+++ b/src/__tests__/mcp-mock.test.ts
@@ -0,0 +1,966 @@
+import { describe, it, expect, afterEach, vi } from "vitest";
+import * as http from "node:http";
+import { MCPMock } from "../mcp-mock.js";
+import { LLMock } from "../llmock.js";
+import type { MCPContent } from "../mcp-types.js";
+
+// ---- HTTP Helpers ----
+
+interface HttpResult {
+  status: number;
+  headers: http.IncomingHttpHeaders;
+  body: string;
+}
+
+function request(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<HttpResult> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  id?: number,
+  extraHeaders?: Record<string, string>,
+) {
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  if (id !== undefined) {
+    body.id = id;
+  } else {
+    body.id = 1;
+  }
+  return request(url, path, "POST", body, extraHeaders);
+}
+
+function notification(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  extraHeaders?: Record<string, string>,
+) {
+  // Notifications have no id field
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  return request(url, path, "POST", body, extraHeaders);
+}
+
+async function initSession(url: string, path = "/"): Promise<string> {
+  const res = await jsonRpc(url, path, "initialize", {}, 1);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  // Send initialized notification
+  await notification(url, path, "notifications/initialized", {}, { "mcp-session-id": sessionId });
+  return sessionId;
+}
+
+// ---- Tests ----
+
+describe("MCPMock", () => {
+  let mcp: MCPMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (mcp) {
+      try {
+        await mcp.stop();
+      } catch {
+        // not started
+      }
+      mcp = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch {
+        // not started
+      }
+      llm = null;
+    }
+  });
+
+  // ---- Standalone mode ----
+
+  describe("standalone mode", () => {
+    it("starts and stops", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await mcp.stop();
+      mcp = null;
+    });
+
+    it("handles initialize handshake", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      expect(res.status).toBe(200);
+
+      const data = JSON.parse(res.body);
+      expect(data.result.protocolVersion).toBe("2025-03-26");
+      expect(data.result.capabilities).toEqual({ tools: {}, resources: {}, prompts: {} });
+      expect(data.result.serverInfo).toEqual({ name: "mcp-mock", version: "1.0.0" });
+
+      const sessionId = res.headers["mcp-session-id"];
+      expect(sessionId).toBeDefined();
+      expect(typeof sessionId).toBe("string");
+    });
+
+    it("custom serverInfo", async () => {
+      mcp = new MCPMock({ serverInfo: { name: "test-server", version: "2.0.0" } });
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      const data = JSON.parse(res.body);
+      expect(data.result.serverInfo).toEqual({ name: "test-server", version: "2.0.0" });
+    });
+  });
+
+  // ---- Mounted mode ----
+
+  describe("mounted mode", () => {
+    it("routes via LLMock mount", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      const res = await jsonRpc(llm.url, "/mcp", "tools/list", {}, 2, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result.tools).toHaveLength(1);
+      expect(data.result.tools[0].name).toBe("echo");
+    });
+
+    it("does not intercept non-root paths", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      llm.onMessage("hello", { content: "world" });
+      await llm.start();
+
+      // /mcp/something should fall through because MCPMock only handles /
+      const res = await request(llm.url, "/v1/chat/completions", "POST", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.choices[0].message.content).toBe("world");
+    });
+  });
+
+  // ---- Session management ----
+
+  describe("sessions", () => {
+    it("initialize returns session ID in header", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      expect(res.headers["mcp-session-id"]).toBeDefined();
+    });
+
+    it("notifications/initialized marks session as ready", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const initRes = await jsonRpc(url, "/", "initialize", {}, 1);
+      const sessionId = initRes.headers["mcp-session-id"] as string;
+
+      // Send notification (no id field)
+      const notifRes = await notification(
+        url,
+        "/",
+        "notifications/initialized",
+        {},
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      // Notifications return 202
+      expect(notifRes.status).toBe(202);
+
+      // Session should be initialized
+      const sessions = mcp.getSessions();
+      const session = sessions.get(sessionId);
+      expect(session).toBeDefined();
+      expect(session!.initialized).toBe(true);
+    });
+
+    it("missing session header returns 400", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1);
+      expect(res.status).toBe(400);
+    });
+
+    it("invalid session ID returns 404", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, {
+        "mcp-session-id": "nonexistent-id",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("uninitialized session rejects requests with -32002", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo" });
+      const url = await mcp.start();
+
+      // Step 1: send initialize to get a session ID
+      const initRes = await jsonRpc(url, "/", "initialize", {}, 1);
+      const sessionId = initRes.headers["mcp-session-id"] as string;
+      expect(sessionId).toBeDefined();
+
+      // Step 2: WITHOUT sending notifications/initialized, try tools/list
+      const res = await jsonRpc(url, "/", "tools/list", {}, 2, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(400);
+
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32002);
+      expect(data.error.message).toBe("Session not initialized");
+    });
+
+    it("DELETE removes session", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const sessionId = await initSession(url);
+
+      // DELETE the session
+      const delRes = await request(url, "/", "DELETE", undefined, {
+        "mcp-session-id": sessionId,
+      });
+      expect(delRes.status).toBe(200);
+
+      // Session should be gone
+      const sessions = mcp.getSessions();
+      expect(sessions.has(sessionId)).toBe(false);
+
+      // Subsequent requests with that session ID should 404
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("DELETE with missing header returns 400", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await request(url, "/", "DELETE");
+      expect(res.status).toBe(400);
+    });
+
+    it("DELETE with unknown session returns 404", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await request(url, "/", "DELETE", undefined, {
+        "mcp-session-id": "does-not-exist",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("multiple concurrent sessions", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "test-tool" });
+      const url = await mcp.start();
+
+      const session1 = await initSession(url);
+      const session2 = await initSession(url);
+
+      expect(session1).not.toBe(session2);
+
+      // Both sessions can make requests
+      const res1 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session1 });
+      const res2 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session2 });
+
+      expect(JSON.parse(res1.body).result.tools).toHaveLength(1);
+      expect(JSON.parse(res2.body).result.tools).toHaveLength(1);
+
+      // Delete one session, other still works
+      await request(url, "/", "DELETE", undefined, { "mcp-session-id": session1 });
+
+      const res3 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session2 });
+      expect(res3.status).toBe(200);
+
+      const res4 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session1 });
+      expect(res4.status).toBe(404);
+    });
+  });
+
+  // ---- Tools ----
+
+  describe("tools", () => {
+    it("tools/list returns registered tools", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({
+        name: "search",
+        description: "Search the web",
+        inputSchema: { type: "object" },
+      });
+      mcp.addTool({ name: "calc", description: "Calculator" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.tools).toHaveLength(2);
+      expect(data.result.tools[0]).toEqual({
+        name: "search",
+        description: "Search the web",
+        inputSchema: { type: "object" },
+      });
+      expect(data.result.tools[1]).toEqual({
+        name: "calc",
+        description: "Calculator",
+      });
+    });
+
+    it("tools/call with function handler", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("add", (args: unknown) => {
+        const { a, b } = args as { a: number; b: number };
+        return `${a + b}`;
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "tools/call",
+        { name: "add", arguments: { a: 2, b: 3 } },
+        1,
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toEqual([{ type: "text", text: "5" }]);
+    });
+
+    it("tools/call with MCPContent[] handler", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("rich", (): MCPContent[] => {
+        return [
+          { type: "text", text: "hello" },
+          { type: "text", text: "world" },
+        ];
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "rich" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toHaveLength(2);
+    });
+
+    it("tools/call unknown tool returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "nonexistent" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+
+    it("tools/call handler error returns isError: true", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("fail", () => {
+        throw new Error("Something went wrong");
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "fail" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(true);
+      expect(data.result.content).toEqual([{ type: "text", text: "Something went wrong" }]);
+    });
+
+    it("tools/call with no handler returns empty content", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "noop" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "noop" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toEqual([]);
+    });
+  });
+
+  // ---- Resources ----
+
+  describe("resources", () => {
+    it("resources/list returns registered resources", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///readme.md", name: "README", mimeType: "text/markdown" },
+        { text: "# Hello" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.resources).toHaveLength(1);
+      expect(data.result.resources[0]).toEqual({
+        uri: "file:///readme.md",
+        name: "README",
+        mimeType: "text/markdown",
+      });
+    });
+
+    it("resources/read returns content", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///data.json", name: "Data", mimeType: "application/json" },
+        { text: '{"key":"value"}', mimeType: "application/json" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///data.json" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents).toHaveLength(1);
+      expect(data.result.contents[0].uri).toBe("file:///data.json");
+      expect(data.result.contents[0].text).toBe('{"key":"value"}');
+      expect(data.result.contents[0].mimeType).toBe("application/json");
+    });
+
+    it("resources/read unknown URI returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///nope" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+  });
+
+  // ---- Prompts ----
+
+  describe("prompts", () => {
+    it("prompts/list returns registered prompts", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({
+        name: "summarize",
+        description: "Summarize text",
+        arguments: [{ name: "text", required: true }],
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.prompts).toHaveLength(1);
+      expect(data.result.prompts[0].name).toBe("summarize");
+    });
+
+    it("prompts/get with handler returns result", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt(
+        { name: "greet", arguments: [{ name: "name", required: true }] },
+        (args: unknown) => {
+          const { name } = args as { name: string };
+          return {
+            messages: [
+              { role: "user", content: { type: "text" as const, text: `Hello, ${name}!` } },
+            ],
+          };
+        },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "prompts/get",
+        { name: "greet", arguments: { name: "World" } },
+        1,
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.messages).toHaveLength(1);
+      expect(data.result.messages[0].content.text).toBe("Hello, World!");
+    });
+
+    it("prompts/get unknown prompt returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "missing" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+  });
+
+  // ---- Ping ----
+
+  describe("ping", () => {
+    it("returns empty object", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "ping", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result).toEqual({});
+    });
+  });
+
+  // ---- Reset ----
+
+  describe("reset", () => {
+    it("clears tools, resources, prompts, and sessions", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      mcp.addResource({ uri: "file:///r1", name: "R1" });
+      mcp.addPrompt({ name: "p1" });
+      const url = await mcp.start();
+
+      await initSession(url);
+      expect(mcp.getSessions().size).toBe(1);
+
+      mcp.reset();
+
+      const health = mcp.health();
+      expect(health.tools).toBe(0);
+      expect(health.resources).toBe(0);
+      expect(health.prompts).toBe(0);
+      expect(health.sessions).toBe(0);
+    });
+  });
+
+  // ---- Health ----
+
+  describe("health", () => {
+    it("returns counts", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      mcp.addTool({ name: "t2" });
+      mcp.addResource({ uri: "file:///r1", name: "R1" });
+
+      const health = mcp.health();
+      expect(health).toEqual({
+        status: "ok",
+        tools: 2,
+        resources: 1,
+        prompts: 0,
+        sessions: 0,
+      });
+    });
+  });
+
+  // ---- Tools edge cases ----
+
+  describe("tools edge cases", () => {
+    it("tools/call with missing name returns -32602", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { arguments: {} }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing tool name");
+    });
+
+    it("onToolCall on existing tool attaches handler", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+      mcp.onToolCall("echo", (args: unknown) => {
+        return `echoed: ${JSON.stringify(args)}`;
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "tools/call",
+        { name: "echo", arguments: { msg: "hi" } },
+        1,
+        { "mcp-session-id": sessionId },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content[0].text).toContain("hi");
+    });
+
+    it("tools/call handler throwing non-Error returns string coercion", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("throws-string", () => {
+        throw "raw string error";
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "throws-string" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(true);
+      expect(data.result.content[0].text).toBe("raw string error");
+    });
+  });
+
+  // ---- Resources edge cases ----
+
+  describe("resources edge cases", () => {
+    it("resources/read with missing URI returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing resource URI");
+    });
+
+    it("resources/read with blob content", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///image.png", name: "Image" },
+        { blob: "aGVsbG8=", mimeType: "image/png" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///image.png" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents[0].blob).toBe("aGVsbG8=");
+      expect(data.result.contents[0].mimeType).toBe("image/png");
+      expect(data.result.contents[0].text).toBeUndefined();
+    });
+
+    it("resources/read with no content fields", async () => {
+      mcp = new MCPMock();
+      mcp.addResource({ uri: "file:///empty", name: "Empty" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///empty" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents[0].uri).toBe("file:///empty");
+      expect(data.result.contents[0].text).toBeUndefined();
+      expect(data.result.contents[0].blob).toBeUndefined();
+    });
+  });
+
+  // ---- Prompts edge cases ----
+
+  describe("prompts edge cases", () => {
+    it("prompts/get with missing name returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { arguments: {} }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing prompt name");
+    });
+
+    it("prompts/get with no handler returns empty messages", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "no-handler" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "no-handler" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.messages).toEqual([]);
+    });
+
+    it("prompts/get handler error returns -32603", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "fail" }, () => {
+        throw new Error("prompt boom");
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "fail" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32603);
+      expect(data.error.message).toContain("prompt boom");
+    });
+
+    it("prompts/get handler throwing non-Error returns string coercion", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "fail-string" }, () => {
+        throw "string error";
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "fail-string" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32603);
+      expect(data.error.message).toContain("string error");
+    });
+  });
+
+  // ---- Protocol edge cases ----
+
+  describe("protocol edge cases", () => {
+    it("malformed JSON body returns parse error", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      // Send invalid JSON to the server
+      // The request helper sends no body when body is undefined,
+      // so we need to send raw invalid JSON
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": "12",
+              "mcp-session-id": sessionId,
+            },
+          },
+          (response) => {
+            const chunks: Buffer[] = [];
+            response.on("data", (c: Buffer) => chunks.push(c));
+            response.on("end", () => {
+              resolve({
+                status: response.statusCode ?? 0,
+                headers: response.headers,
+                body: Buffer.concat(chunks).toString(),
+              });
+            });
+          },
+        );
+        req.on("error", reject);
+        req.write("{not valid}!");
+        req.end();
+      });
+
+      expect(result.status).toBe(200);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32700);
+    });
+
+    it("non-POST/non-DELETE method is rejected in mounted mode", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const res = await request(llm.url, "/mcp", "GET");
+      // MCPMock returns false for GET, so LLMock handles it (likely 404 or similar)
+      expect(res.status).not.toBe(200);
+    });
+  });
+
+  // ---- Lifecycle edge cases ----
+
+  describe("lifecycle", () => {
+    it("start() when already started throws", async () => {
+      mcp = new MCPMock();
+      await mcp.start();
+      await expect(mcp.start()).rejects.toThrow("Server already started");
+    });
+
+    it("stop() when not started throws", async () => {
+      mcp = new MCPMock();
+      await expect(mcp.stop()).rejects.toThrow("Server not started");
+      mcp = null; // prevent afterEach from trying to stop
+    });
+
+    it("start() with explicit host and port options", async () => {
+      mcp = new MCPMock({ host: "127.0.0.1", port: 0 });
+      const url = await mcp.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+
+    it("standalone server catch block handles requestHandler rejection", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      // Monkey-patch the private requestHandler to throw
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      (mcp as any).requestHandler = async () => {
+        throw new Error("synthetic handler crash");
+      };
+
+      // Suppress console.error noise
+      const spy = vi.spyOn(console, "error").mockImplementation(() => {});
+
+      const res = await request(url, "/", "POST", { jsonrpc: "2.0", method: "initialize", id: 1 });
+      expect(res.status).toBe(500);
+      expect(res.body).toBe("Internal server error");
+
+      spy.mockRestore();
+    });
+
+    it("getRequests() with no journal returns empty array", () => {
+      mcp = new MCPMock();
+      expect(mcp.getRequests()).toEqual([]);
+    });
+
+    it("reset() returns this for chaining", () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      const result = mcp.reset();
+      expect(result).toBe(mcp);
+    });
+  });
+
+  // ---- Journal ----
+
+  describe("journal", () => {
+    it("setJournal records entries with service: mcp", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      await jsonRpc(llm.url, "/mcp", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+
+      const entries = llm.getRequests();
+      const mcpEntries = entries.filter((e) => e.service === "mcp");
+      expect(mcpEntries.length).toBeGreaterThan(0);
+    });
+
+    it("getRequests() returns filtered journal entries when journal is set", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      await jsonRpc(llm.url, "/mcp", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+
+      // Use mcp.getRequests() directly (not llm.getRequests())
+      const mcpEntries = mcp.getRequests();
+      expect(mcpEntries.length).toBeGreaterThan(0);
+      expect((mcpEntries[0] as { service: string }).service).toBe("mcp");
+    });
+  });
+});
diff --git a/src/__tests__/messages.test.ts b/src/__tests__/messages.test.ts
index 927ac46..1cc72ed 100644
--- a/src/__tests__/messages.test.ts
+++ b/src/__tests__/messages.test.ts
@@ -1,8 +1,11 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { claudeToCompletionRequest } from "../messages.js";
+import { claudeToCompletionRequest, handleMessages } from "../messages.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -806,3 +809,916 @@ describe("POST /v1/messages (CORS)", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Branch coverage: ?? defaults and fallback paths ─────────────────────────
+
+describe("claudeToCompletionRequest (fallback branches)", () => {
+  it("handles tool_result with undefined content (defaults to empty string)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              // content intentionally omitted (undefined)
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles tool_result with text blocks alongside in same user message", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              content: "result data",
+            },
+            { type: "text", text: "follow up question" },
+          ],
+        },
+      ],
+    });
+    // Should produce tool message + user message
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[0].content).toBe("result data");
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("follow up question");
+  });
+
+  it("handles text content blocks with missing text (text ?? '')", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text" }, // text field missing
+          ] as Array<{ type: "text"; text?: string }>,
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles assistant tool_use block with missing id (generates one)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              // id intentionally omitted
+              name: "my_tool",
+              input: { x: 1 },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].id).toMatch(/^toolu_/);
+  });
+
+  it("handles assistant tool_use block with missing name (defaults to empty)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              // name intentionally omitted
+              input: { x: 1 },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("handles assistant tool_use with string input", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              name: "my_tool",
+              input: '{"already":"stringified"}',
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"already":"stringified"}');
+  });
+
+  it("handles assistant tool_use with undefined input (defaults to {})", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              name: "my_tool",
+              // input intentionally omitted
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe("{}");
+  });
+
+  it("handles assistant content blocks with text and tool_use together", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            { type: "text", text: "Let me help with that." },
+            {
+              type: "tool_use",
+              id: "toolu_123",
+              name: "search",
+              input: { q: "test" },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("Let me help with that.");
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+  });
+
+  it("handles assistant content blocks with only text (no tool_use)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Just text" }],
+        },
+      ],
+    });
+    // No tool_use blocks, so textContent is used; no tool_calls
+    expect(result.messages[0].content).toBe("Just text");
+    expect(result.messages[0].tool_calls).toBeUndefined();
+  });
+
+  it("handles assistant content blocks with empty text (null fallback)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [{ type: "image" }] as Array<{
+            type: "text" | "tool_use" | "image";
+            text?: string;
+          }>,
+        },
+      ],
+    });
+    // No text blocks, no tool_use blocks → textContent is "" → falls to null
+    expect(result.messages[0].content).toBeNull();
+  });
+
+  it("handles system as empty content blocks array (no system message added)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: [{ type: "text" }] as Array<{ type: "text"; text?: string }>,
+      messages: [{ role: "user", content: "hi" }],
+    });
+    // text ?? "" gives "", which is falsy → no system message pushed
+    expect(result.messages[0].role).toBe("user");
+    expect(result.messages).toHaveLength(1);
+  });
+
+  it("returns undefined tools for empty tools array", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hi" }],
+      tools: [],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles tool_result with nested text blocks where text is missing", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_789",
+              content: [
+                { type: "text" }, // text field missing
+              ] as Array<{ type: "text"; text?: string }>,
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles text blocks in tool_result+text user message where text is missing", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              content: "result",
+            },
+            { type: "text" }, // text missing → text ?? ""
+          ] as Array<{
+            type: "text" | "tool_result";
+            text?: string;
+            tool_use_id?: string;
+            content?: string;
+          }>,
+        },
+      ],
+    });
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("");
+  });
+
+  it("handles system content blocks with text ?? '' in filter/map", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: [
+        { type: "text", text: "Part 1" },
+        { type: "text", text: " Part 2" },
+      ],
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.messages[0]).toEqual({ role: "system", content: "Part 1 Part 2" });
+  });
+});
+
+describe("POST /v1/messages (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "unmatched" }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+describe("POST /v1/messages (error response with default status)", () => {
+  it("defaults error status to 500 when status field is omitted", async () => {
+    const errorNoStatus: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: {
+          message: "Internal failure",
+          type: "server_error",
+        },
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoStatus]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "error-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+
+  it("defaults error.type to api_error when type is omitted", async () => {
+    const errorNoType: Fixture = {
+      match: { userMessage: "error-no-type" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+        status: 500,
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoType]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "error-no-type" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("api_error");
+  });
+});
+
+describe("POST /v1/messages (tool call with malformed JSON arguments)", () => {
+  it("falls back to {} for malformed tool call arguments in non-streaming", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args" },
+      response: {
+        toolCalls: [
+          {
+            name: "broken_tool",
+            arguments: "not valid json{",
+          },
+        ],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "malformed-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+
+  it("falls back to {} for malformed tool call arguments in streaming", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args-stream" },
+      response: {
+        toolCalls: [
+          {
+            name: "broken_tool",
+            arguments: "{{invalid}}",
+          },
+        ],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "malformed-args-stream" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseClaudeSSEEvents(res.body);
+    // The arguments delta should contain "{}" since the malformed JSON falls back to {}
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" &&
+        (e.delta as { type: string })?.type === "input_json_delta",
+    ) as (SSEEvent & { delta: { partial_json: string } })[];
+    const fullJson = deltas.map((d) => d.delta.partial_json).join("");
+    expect(JSON.parse(fullJson)).toEqual({});
+  });
+});
+
+describe("POST /v1/messages (tool call with empty arguments)", () => {
+  it("defaults empty arguments to '{}' in non-streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [
+          {
+            name: "no_args_tool",
+            arguments: "",
+          },
+        ],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "empty-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+
+  it("defaults empty arguments to '{}' in streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-stream" },
+      response: {
+        toolCalls: [
+          {
+            name: "no_args_tool",
+            arguments: "",
+          },
+        ],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "empty-args-stream" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseClaudeSSEEvents(res.body);
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" &&
+        (e.delta as { type: string })?.type === "input_json_delta",
+    ) as (SSEEvent & { delta: { partial_json: string } })[];
+    const fullJson = deltas.map((d) => d.delta.partial_json).join("");
+    expect(JSON.parse(fullJson)).toEqual({});
+  });
+});
+
+describe("POST /v1/messages (tool call with explicit id)", () => {
+  it("uses explicit tool call id in non-streaming", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-explicit-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "toolu_explicit_123",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-explicit-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toBe("toolu_explicit_123");
+  });
+
+  it("uses explicit tool call id in streaming", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-explicit-id-stream" },
+      response: {
+        toolCalls: [
+          {
+            id: "toolu_explicit_456",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-explicit-id-stream" }],
+      stream: true,
+    });
+    const events = parseClaudeSSEEvents(res.body);
+    const blockStart = events.find(
+      (e) =>
+        e.type === "content_block_start" &&
+        (e.content_block as { type: string })?.type === "tool_use",
+    ) as SSEEvent & { content_block: { id: string } };
+    expect(blockStart.content_block.id).toBe("toolu_explicit_456");
+  });
+
+  it("generates tool call id when id is empty string", async () => {
+    const toolEmptyId: Fixture = {
+      match: { userMessage: "tool-empty-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolEmptyId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-empty-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /v1/messages (streaming interruption)", () => {
+  it("truncates text stream after specified chunks and records interruption", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "truncate-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+    try {
+      await post(`${instance.url}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "truncate-text" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncates tool call stream after specified chunks and records interruption", async () => {
+    const truncatedToolFixture: Fixture = {
+      match: { userMessage: "truncate-tool" },
+      response: {
+        toolCalls: [
+          {
+            name: "my_func",
+            arguments: '{"key":"value"}',
+          },
+        ],
+      },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedToolFixture]);
+    try {
+      await post(`${instance.url}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "truncate-tool" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("POST /v1/messages (streaming tool call journal)", () => {
+  it("records streaming tool call response in journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(toolFixture);
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleMessages (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/messages when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback method/path on no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "fail" }],
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(429);
+  });
+
+  it("uses fallback for streaming text with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "weather" }],
+        stream: true,
+      }),
+      [toolFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "badtype" }],
+      }),
+      [badResponseFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for strict mode no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(503);
+  });
+});
diff --git a/src/__tests__/mount.test.ts b/src/__tests__/mount.test.ts
new file mode 100644
index 0000000..fd97835
--- /dev/null
+++ b/src/__tests__/mount.test.ts
@@ -0,0 +1,388 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+import type { Mountable } from "../types.js";
+
+// ---- Helpers ----
+
+function get(url: string, path: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function post(url: string, path: string, body: object): Promise<{ status: number; data: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let data = "";
+        res.on("data", (chunk: Buffer) => (data += chunk));
+        res.on("end", () => resolve({ status: res.statusCode!, data }));
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+// ---- Test Mountable implementations ----
+
+class TestMount implements Mountable {
+  requests: Array<{ pathname: string }> = [];
+  journal: Journal | null = null;
+
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    this.requests.push({ pathname });
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+
+  health() {
+    return { status: "ok", requests: this.requests.length };
+  }
+
+  setJournal(j: Journal) {
+    this.journal = j;
+  }
+}
+
+class PassThroughMount implements Mountable {
+  /* eslint-disable @typescript-eslint/no-unused-vars */
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    return false;
+  }
+  /* eslint-enable @typescript-eslint/no-unused-vars */
+}
+
+class NoHealthMount implements Mountable {
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+}
+
+class BaseUrlMount implements Mountable {
+  baseUrl: string | null = null;
+
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+
+  setBaseUrl(url: string) {
+    this.baseUrl = url;
+  }
+}
+
+// ---- Tests ----
+
+describe("Mountable interface", () => {
+  let mock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "Server not started")) {
+          throw err;
+        }
+      }
+      mock = null;
+    }
+  });
+
+  describe("mount dispatch", () => {
+    it("routes /test/foo to handler with /foo", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/test/foo");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/foo" });
+      expect(mount.requests).toHaveLength(1);
+      expect(mount.requests[0].pathname).toBe("/foo");
+    });
+
+    it("routes /test to handler with /", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/test");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/" });
+      expect(mount.requests[0].pathname).toBe("/");
+    });
+
+    it("falls through to LLMock when handler returns false", async () => {
+      const mount = new PassThroughMount();
+      mock = new LLMock();
+      mock.mount("/v1/chat", mount);
+      mock.onMessage("hello", { content: "fixture response" });
+      await mock.start();
+
+      const res = await post(mock.url, "/v1/chat/completions", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.choices[0].message.content).toBe("fixture response");
+    });
+
+    it("does not intercept non-mount paths", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      mock.onMessage("hello", { content: "normal response" });
+      await mock.start();
+
+      const res = await post(mock.url, "/v1/chat/completions", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.choices[0].message.content).toBe("normal response");
+      expect(mount.requests).toHaveLength(0);
+    });
+
+    it("routes to correct mount with two mounts at /a and /b", async () => {
+      const mountA = new TestMount();
+      const mountB = new TestMount();
+      mock = new LLMock();
+      mock.mount("/a", mountA);
+      mock.mount("/b", mountB);
+      await mock.start();
+
+      await get(mock.url, "/a/foo");
+      await get(mock.url, "/b/bar");
+
+      expect(mountA.requests).toHaveLength(1);
+      expect(mountA.requests[0].pathname).toBe("/foo");
+      expect(mountB.requests).toHaveLength(1);
+      expect(mountB.requests[0].pathname).toBe("/bar");
+    });
+
+    it("does not match paths that share a prefix but not a segment boundary", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/app", mount);
+      await mock.start();
+
+      // /application should NOT be intercepted by mount at /app
+      const res = await get(mock.url, "/application");
+      expect(res.status).toBe(404);
+      expect(mount.requests).toHaveLength(0);
+
+      // But /app/foo should be intercepted
+      const res2 = await get(mock.url, "/app/foo");
+      expect(res2.status).toBe(200);
+      expect(mount.requests).toHaveLength(1);
+    });
+
+    it("mount added after start() works immediately", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      await mock.start();
+
+      // Mount after server is already running
+      mock.mount("/late", mount);
+
+      const res = await get(mock.url, "/late/endpoint");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/endpoint" });
+    });
+  });
+
+  describe("unified health", () => {
+    it("returns services with llm and mounted service health", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      mock.onMessage("x", { content: "y" });
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({
+        status: "ok",
+        services: {
+          llm: { status: "ok", fixtures: 1 },
+          test: { status: "ok", requests: 0 },
+        },
+      });
+    });
+
+    it("mount without health() is not in health response", async () => {
+      const mount = new NoHealthMount();
+      mock = new LLMock();
+      mock.mount("/noh", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      const body = JSON.parse(res.body);
+      expect(body.services).toBeDefined();
+      expect(body.services.noh).toBeUndefined();
+      expect(body.services.llm).toBeDefined();
+    });
+  });
+
+  describe("shared journal", () => {
+    it("setJournal is called with the shared journal", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      expect(mount.journal).toBeInstanceOf(Journal);
+      expect(mount.journal).toBe(mock.journal);
+    });
+
+    it("journal entry can include service field", async () => {
+      // Create a mount that writes a journal entry with service field
+      const serviceMount: Mountable = {
+        journal: null as Journal | null,
+        /* eslint-disable @typescript-eslint/no-unused-vars */
+        async handleRequest(
+          req: http.IncomingMessage,
+          res: http.ServerResponse,
+          pathname: string,
+        ): Promise<boolean> {
+          /* eslint-enable @typescript-eslint/no-unused-vars */
+          if (this.journal) {
+            this.journal.add({
+              method: "GET",
+              path: "/svc/test",
+              headers: {},
+              body: null,
+              service: "my-service",
+              response: { status: 200, fixture: null },
+            });
+          }
+          res.writeHead(200);
+          res.end("ok");
+          return true;
+        },
+        setJournal(j: Journal) {
+          this.journal = j;
+        },
+      };
+
+      mock = new LLMock();
+      mock.mount("/svc", serviceMount);
+      await mock.start();
+
+      await get(mock.url, "/svc/test");
+
+      const entries = mock.getRequests();
+      expect(entries).toHaveLength(1);
+      expect(entries[0].service).toBe("my-service");
+    });
+  });
+
+  describe("setBaseUrl", () => {
+    it("calls setBaseUrl with the server URL + mount path on start", async () => {
+      const mount = new BaseUrlMount();
+      mock = new LLMock();
+      mock.mount("/svc", mount);
+      await mock.start();
+
+      expect(mount.baseUrl).toBe(mock.url + "/svc");
+    });
+
+    it("does not call setBaseUrl on mounts that do not implement it", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      // Should not throw even though TestMount has no setBaseUrl
+      await mock.start();
+      expect(mock.url).toBeDefined();
+    });
+  });
+
+  describe("health without mounts", () => {
+    it("returns simple status ok without services key", async () => {
+      mock = new LLMock();
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ status: "ok" });
+      expect(body.services).toBeUndefined();
+    });
+  });
+
+  describe("mount() chaining", () => {
+    it("returns this for chaining", () => {
+      mock = new LLMock();
+      const mount = new TestMount();
+      const result = mock.mount("/test", mount);
+      expect(result).toBe(mock);
+    });
+  });
+});
diff --git a/src/__tests__/ndjson-writer.test.ts b/src/__tests__/ndjson-writer.test.ts
new file mode 100644
index 0000000..31604aa
--- /dev/null
+++ b/src/__tests__/ndjson-writer.test.ts
@@ -0,0 +1,216 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { PassThrough } from "node:stream";
+import type * as http from "node:http";
+import { writeNDJSONStream } from "../ndjson-writer.js";
+
+// ---------------------------------------------------------------------------
+// Mock response helper (mirrors sse-writer.test.ts pattern)
+// ---------------------------------------------------------------------------
+
+function makeMockResponse(): {
+  res: http.ServerResponse;
+  output: () => string;
+  headers: () => Record<string, string | string[] | number | undefined>;
+  ended: () => boolean;
+} {
+  const stream = new PassThrough();
+  const chunks: Buffer[] = [];
+  stream.on("data", (chunk: Buffer) => chunks.push(chunk));
+
+  const writtenHeaders: Record<string, string | string[] | number | undefined> = {};
+  let isEnded = false;
+
+  const res = {
+    setHeader(name: string, value: string) {
+      writtenHeaders[name] = value;
+    },
+    write(data: string) {
+      stream.write(data);
+    },
+    end(data?: string) {
+      if (data !== undefined) {
+        stream.write(data);
+      }
+      isEnded = true;
+      stream.end();
+    },
+    writableEnded: false,
+  } as unknown as http.ServerResponse;
+
+  return {
+    res,
+    output: () => Buffer.concat(chunks).toString("utf8"),
+    headers: () => writtenHeaders,
+    ended: () => isEnded,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// writeNDJSONStream
+// ---------------------------------------------------------------------------
+
+describe("writeNDJSONStream", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("sets correct NDJSON headers", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeNDJSONStream(res, []);
+    expect(headers()["Content-Type"]).toBe("application/x-ndjson");
+    expect(headers()["Cache-Control"]).toBe("no-cache");
+    expect(headers()["Connection"]).toBe("keep-alive");
+  });
+
+  it("writes each chunk as a JSON line", async () => {
+    const { res, output } = makeMockResponse();
+    const chunks = [{ text: "hello" }, { text: "world" }];
+    await writeNDJSONStream(res, chunks);
+
+    const lines = output().trim().split("\n");
+    expect(lines).toHaveLength(2);
+    expect(JSON.parse(lines[0])).toEqual({ text: "hello" });
+    expect(JSON.parse(lines[1])).toEqual({ text: "world" });
+  });
+
+  it("calls res.end() when done", async () => {
+    const { res, ended } = makeMockResponse();
+    await writeNDJSONStream(res, [{ done: true }]);
+    expect(ended()).toBe(true);
+  });
+
+  it("returns true on normal completion", async () => {
+    const { res } = makeMockResponse();
+    const result = await writeNDJSONStream(res, [{ ok: true }]);
+    expect(result).toBe(true);
+  });
+
+  it("returns true immediately when res.writableEnded is already true", async () => {
+    const { res, headers } = makeMockResponse();
+    Object.defineProperty(res, "writableEnded", { get: () => true });
+    const result = await writeNDJSONStream(res, [{ text: "should not write" }]);
+    expect(result).toBe(true);
+    // Should not have set any headers (returned before writing)
+    expect(headers()["Content-Type"]).toBeUndefined();
+  });
+
+  it("returns false when signal is aborted after delay", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const promise = writeNDJSONStream(res, chunks, {
+      latency: 100,
+      signal: controller.signal,
+    });
+
+    // Abort during the delay before the second chunk
+    controller.abort();
+    await vi.runAllTimersAsync();
+    const result = await promise;
+
+    expect(result).toBe(false);
+    vi.useRealTimers();
+  });
+
+  it("returns false when signal is aborted after a chunk is sent", async () => {
+    const { res, output } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [{ text: "A" }, { text: "B" }, { text: "C" }];
+    let chunksSent = 0;
+    const result = await writeNDJSONStream(res, chunks, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        chunksSent++;
+        if (chunksSent === 1) controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    const body = output();
+    expect(body).toContain(JSON.stringify({ text: "A" }));
+  });
+
+  it("returns true when res.writableEnded becomes true mid-loop", async () => {
+    const { res, output } = makeMockResponse();
+    let writeCount = 0;
+    const originalWrite = res.write.bind(res);
+    res.write = ((data: string) => {
+      writeCount++;
+      originalWrite(data);
+      if (writeCount === 1) {
+        // Simulate the response ending externally after first chunk
+        Object.defineProperty(res, "writableEnded", { get: () => true });
+      }
+      return true;
+    }) as typeof res.write;
+
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const result = await writeNDJSONStream(res, chunks);
+
+    expect(result).toBe(true);
+    // Only first chunk should have been written
+    const body = output();
+    expect(body).toContain(JSON.stringify({ text: "A" }));
+    expect(body).not.toContain(JSON.stringify({ text: "B" }));
+  });
+
+  it("onChunkSent fires per chunk", async () => {
+    const { res } = makeMockResponse();
+    const chunks = [{ a: 1 }, { b: 2 }, { c: 3 }];
+    let count = 0;
+    await writeNDJSONStream(res, chunks, {
+      onChunkSent: () => {
+        count++;
+      },
+    });
+    expect(count).toBe(3);
+  });
+
+  it("applies latency delay between chunks", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const promise = writeNDJSONStream(res, chunks, { latency: 50 });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+    vi.useRealTimers();
+  });
+
+  it("handles undefined options (defaults)", async () => {
+    const { res, output } = makeMockResponse();
+    const result = await writeNDJSONStream(res, [{ test: true }]);
+    expect(result).toBe(true);
+    expect(output()).toContain('{"test":true}');
+  });
+
+  it("does not end stream if already ended by external code", async () => {
+    const { res } = makeMockResponse();
+    // Process no chunks, but simulate writableEnded becoming true externally
+    const originalEnd = res.end.bind(res);
+    let endCallCount = 0;
+    res.end = ((...args: unknown[]) => {
+      endCallCount++;
+      return (originalEnd as (...a: unknown[]) => void)(...args);
+    }) as typeof res.end;
+
+    // Set writableEnded after headers are set but before end is called
+    const chunks = [{ x: 1 }];
+    const originalWrite = res.write.bind(res);
+    res.write = ((data: string) => {
+      originalWrite(data);
+      Object.defineProperty(res, "writableEnded", {
+        get: () => true,
+        configurable: true,
+      });
+      return true;
+    }) as typeof res.write;
+
+    await writeNDJSONStream(res, chunks);
+    // res.end should not be called because writableEnded was true
+    expect(endCallCount).toBe(0);
+  });
+});
diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts
index 1a5a217..58bb459 100644
--- a/src/__tests__/ollama.test.ts
+++ b/src/__tests__/ollama.test.ts
@@ -1,9 +1,11 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { ollamaToCompletionRequest } from "../ollama.js";
+import { ollamaToCompletionRequest, handleOllama, handleOllamaGenerate } from "../ollama.js";
 import { writeNDJSONStream } from "../ndjson-writer.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -1044,6 +1046,240 @@ describe("POST /api/chat (error fixture no explicit status)", () => {
   });
 });
 
+// ─── Integration tests: POST /api/chat (unknown response type) ──────────────
+
+describe("POST /api/chat (unknown response type)", () => {
+  it("returns 500 for embedding fixture", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-chat" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "embed-chat" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (error fixture streaming) ────────────
+
+describe("POST /api/chat (error fixture streaming)", () => {
+  it("returns error fixture for streaming request too", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "fail" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (malformed JSON) ─────────────────
+
+describe("POST /api/generate (malformed JSON)", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/api/generate`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (unknown response type streaming) ─
+
+describe("POST /api/generate (unknown response type streaming)", () => {
+  it("returns 500 for tool call fixture on /api/generate (streaming default)", async () => {
+    const tcFixture: Fixture = {
+      match: { userMessage: "tool-gen-stream" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([tcFixture]);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "tool-gen-stream",
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (error fixture streaming) ────────
+
+describe("POST /api/generate (error fixture streaming)", () => {
+  it("returns error fixture for streaming generate request", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "fail",
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming malformed tool call args) ──
+
+describe("POST /api/chat (streaming malformed tool call arguments)", () => {
+  it("falls back to empty object for malformed JSON in streaming", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bad-stream-args" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming tool call with empty args) ─
+
+describe("POST /api/chat (streaming tool call with empty arguments)", () => {
+  it("defaults to {} when arguments is empty string (streaming)", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "empty-stream-args" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: POST /api/generate (interruption) ───────────────────
+
+describe("POST /api/generate (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-gen" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        prompt: "truncate-gen",
+        // stream omitted → defaults to true
+      });
+      const parsed = new URL(`${instance!.url}/api/generate`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.aborted).toBe(true);
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Unit tests: ollamaToCompletionRequest edge cases ───────────────────────
+
+describe("ollamaToCompletionRequest (edge cases)", () => {
+  it("handles missing options (temperature and max_tokens undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.temperature).toBeUndefined();
+    expect(result.max_tokens).toBeUndefined();
+  });
+
+  it("handles stream undefined (passes through as undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.stream).toBeUndefined();
+  });
+
+  it("handles empty tools array (returns undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
 // ---------------------------------------------------------------------------
 // writeNDJSONStream with non-zero latency
 // ---------------------------------------------------------------------------
@@ -1112,3 +1348,489 @@ describe("writeNDJSONStream with non-zero latency", () => {
     }
   });
 });
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleOllama (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for non-streaming text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "hi" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", messages: [{ role: "user", content: "hi" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "x" }],
+        stream: false,
+      }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "x" }],
+        stream: false,
+      }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "err" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for non-streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "tool" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", messages: [{ role: "user", content: "tool" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "embed" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleOllamaGenerate (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for non-streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "hi", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "hi" }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for missing prompt", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "x", stream: false }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "x", stream: false }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "err", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for unknown response type (non-streaming)", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "embed", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for unknown response type (streaming)", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "embed" }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index f2ac2c0..6e41f4f 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -2608,6 +2608,313 @@ describe("recorder streaming edge cases", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// buildFixtureResponse — additional format variants for branch coverage
+// ---------------------------------------------------------------------------
+
+describe("buildFixtureResponse additional format variants", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstream(responseBody: object): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("detects Bedrock Converse format (output.message.content text)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      output: {
+        message: {
+          role: "assistant",
+          content: [{ text: "Hello from Bedrock Converse" }],
+        },
+      },
+      stopReason: "end_turn",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "bedrock converse test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Bedrock Converse");
+  });
+
+  it("detects Bedrock Converse toolUse format", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      output: {
+        message: {
+          role: "assistant",
+          content: [
+            {
+              toolUse: {
+                name: "get_weather",
+                input: { city: "NYC" },
+              },
+            },
+          ],
+        },
+      },
+      stopReason: "tool_use",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "bedrock converse tooluse test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+  });
+
+  it("detects Anthropic tool_use with string input", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      content: [
+        {
+          type: "tool_use",
+          id: "toolu_str",
+          name: "search",
+          input: '{"query":"hello"}',
+        },
+      ],
+      role: "assistant",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "anthropic string input test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    // When input is a string, it's used as-is
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"hello"}');
+  });
+
+  it("detects Gemini functionCall with string args", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      candidates: [
+        {
+          content: {
+            parts: [
+              {
+                functionCall: {
+                  name: "search",
+                  args: '{"query":"hello"}',
+                },
+              },
+            ],
+          },
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "gemini string args test" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"hello"}');
+  });
+
+  it("detects Ollama message.content as array format", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: {
+        role: "assistant",
+        content: [{ text: "Array content from Ollama" }],
+      },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "ollama array content test" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Array content from Ollama");
+  });
+
+  it("detects Ollama tool_calls with string arguments", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: {
+        role: "assistant",
+        content: "",
+        tool_calls: [
+          {
+            function: {
+              name: "search",
+              arguments: '{"query":"test"}',
+            },
+          },
+        ],
+      },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "ollama string args test" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Invalid upstream URL — 502 with proxy_error
+// ---------------------------------------------------------------------------
+
+describe("recorder invalid upstream URL", () => {
+  it("returns 502 for invalid upstream URL format", async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      logLevel: "silent",
+      record: {
+        providers: { openai: "not-a-valid-url" },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "invalid url test" }],
+    });
+
+    expect(resp.status).toBe(502);
+    const body = JSON.parse(resp.body);
+    expect(body.error.type).toBe("proxy_error");
+    expect(body.error.message).toContain("Invalid upstream URL");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
diff --git a/src/__tests__/responses.test.ts b/src/__tests__/responses.test.ts
index 370c341..5f66880 100644
--- a/src/__tests__/responses.test.ts
+++ b/src/__tests__/responses.test.ts
@@ -1,8 +1,15 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { responsesInputToMessages, responsesToCompletionRequest } from "../responses.js";
+import {
+  responsesInputToMessages,
+  responsesToCompletionRequest,
+  handleResponses,
+} from "../responses.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -728,3 +735,623 @@ describe("POST /v1/responses (CORS)", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Branch coverage: ?? defaults and fallback paths ─────────────────────────
+
+describe("responsesInputToMessages (fallback branches)", () => {
+  it("generates call_id when function_call has no call_id", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          name: "do_thing",
+          arguments: '{"x":1}',
+          // call_id intentionally omitted
+        },
+      ],
+    });
+    expect(messages).toHaveLength(1);
+    expect(messages[0].tool_calls![0].id).toMatch(/^call_/);
+  });
+
+  it("defaults name to empty string when function_call has no name", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          call_id: "call_abc",
+          // name intentionally omitted
+          arguments: '{"x":1}',
+        },
+      ],
+    });
+    expect(messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("defaults arguments to empty string when function_call has no arguments", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          call_id: "call_abc",
+          name: "do_thing",
+          // arguments intentionally omitted
+        },
+      ],
+    });
+    expect(messages[0].tool_calls![0].function.arguments).toBe("");
+  });
+
+  it("defaults output to empty string when function_call_output has no output", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call_output",
+          call_id: "call_abc",
+          // output intentionally omitted
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("");
+  });
+
+  it("handles content parts with missing text (text ?? '')", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          role: "user",
+          content: [
+            { type: "input_text" }, // text field missing
+          ] as Array<{ type: string; text?: string }>,
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("");
+  });
+
+  it("handles output_text content parts", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          role: "assistant",
+          content: [{ type: "output_text", text: "response text" }] as Array<{
+            type: string;
+            text?: string;
+          }>,
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("response text");
+  });
+
+  it("handles system role input item", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [{ role: "system", content: "You are helpful" }],
+    });
+    expect(messages).toEqual([{ role: "system", content: "You are helpful" }]);
+  });
+});
+
+describe("responsesToCompletionRequest (tool filtering)", () => {
+  it("filters out non-function type tools", () => {
+    const result = responsesToCompletionRequest({
+      model: "gpt-4",
+      input: [{ role: "user", content: "hi" }],
+      tools: [
+        { type: "function", name: "real_tool", description: "a tool" },
+        { type: "web_search" as "function", name: "web", description: "search" },
+      ],
+    });
+    // Only the "function" type tool should be included
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0].function.name).toBe("real_tool");
+  });
+});
+
+describe("POST /v1/responses (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "unmatched" }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.code).toBe("no_fixture_match");
+  });
+});
+
+describe("POST /v1/responses (error response with default status)", () => {
+  it("defaults error status to 500 when status is omitted", async () => {
+    const errorNoStatus: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: {
+          message: "Something broke",
+          type: "server_error",
+        },
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoStatus]);
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "error-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something broke");
+  });
+});
+
+describe("POST /v1/responses (latency and chunkSize defaults)", () => {
+  it("uses server default latency when fixture has no latency", async () => {
+    instance = await createServer([textFixture], { latency: 0 });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    const events = parseResponsesSSEEvents(res.body);
+    expect(events.length).toBeGreaterThan(0);
+  });
+
+  it("uses server default chunkSize when fixture has no chunkSize", async () => {
+    instance = await createServer([textFixture], { chunkSize: 3 });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    const events = parseResponsesSSEEvents(res.body);
+    const deltas = events.filter((e) => e.type === "response.output_text.delta");
+    // "Hi there!" = 9 chars, chunkSize 3 => 3 deltas
+    expect(deltas).toHaveLength(3);
+  });
+});
+
+describe("POST /v1/responses (tool call with explicit id)", () => {
+  it("uses explicit tool call id when provided", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-with-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "call_explicit_123",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+
+    // Non-streaming
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-with-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.output[0].call_id).toBe("call_explicit_123");
+  });
+
+  it("uses explicit tool call id in streaming mode", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-with-id-stream" },
+      response: {
+        toolCalls: [
+          {
+            id: "call_explicit_456",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-with-id-stream" }],
+      stream: true,
+    });
+    const events = parseResponsesSSEEvents(res.body);
+    const itemAdded = events.find((e) => e.type === "response.output_item.added") as SSEEvent & {
+      item: { call_id: string };
+    };
+    expect(itemAdded.item.call_id).toBe("call_explicit_456");
+  });
+
+  it("generates tool call id when id is empty string", async () => {
+    const toolEmptyId: Fixture = {
+      match: { userMessage: "tool-empty-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolEmptyId]);
+
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-empty-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    // Empty string is falsy, so it should generate an id
+    expect(body.output[0].call_id).toMatch(/^call_/);
+  });
+});
+
+describe("POST /v1/responses (streaming interruption)", () => {
+  it("truncates text stream after specified chunks and records interruption", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "truncate-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+    try {
+      await post(`${instance.url}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "truncate-text" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    // Wait briefly for journal to be updated
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncates tool call stream after specified chunks and records interruption", async () => {
+    const truncatedToolFixture: Fixture = {
+      match: { userMessage: "truncate-tool" },
+      response: {
+        toolCalls: [
+          {
+            name: "my_func",
+            arguments: '{"key":"value"}',
+          },
+        ],
+      },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedToolFixture]);
+    try {
+      await post(`${instance.url}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "truncate-tool" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("POST /v1/responses (streaming text — journal records tool call fixture)", () => {
+  it("records streaming tool call response in journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(toolFixture);
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleResponses (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/responses when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback method/path on no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback method/path for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "fail" }],
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(429);
+  });
+
+  it("uses fallback for streaming text with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "weather" }],
+        stream: true,
+      }),
+      [toolFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "badtype" }],
+      }),
+      [badResponseFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for strict mode no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(503);
+  });
+});
diff --git a/src/__tests__/services.test.ts b/src/__tests__/services.test.ts
new file mode 100644
index 0000000..5a9fbc6
--- /dev/null
+++ b/src/__tests__/services.test.ts
@@ -0,0 +1,674 @@
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import { Readable, Writable } from "node:stream";
+import type * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { handleModeration } from "../moderation.js";
+import { handleRerank } from "../rerank.js";
+import { handleSearch } from "../search.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
+
+// ---------------------------------------------------------------------------
+// Service mock endpoints: search, rerank, moderation
+// ---------------------------------------------------------------------------
+
+let mock: LLMock;
+
+afterEach(async () => {
+  if (mock) {
+    await mock.stop();
+  }
+});
+
+async function post(url: string, body: unknown): Promise<{ status: number; json: unknown }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  const json = await res.json();
+  return { status: res.status, json };
+}
+
+async function postRaw(url: string, raw: string): Promise<{ status: number; json: unknown }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: raw,
+  });
+  const json = await res.json();
+  return { status: res.status, json };
+}
+
+// ---------------------------------------------------------------------------
+// POST /search
+// ---------------------------------------------------------------------------
+
+describe("POST /search", () => {
+  it("returns matching results for a string pattern", async () => {
+    mock = new LLMock();
+    mock.onSearch("weather", [
+      { title: "Weather Report", url: "https://example.com/weather", content: "Sunny today" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "What is the weather?" });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string; url: string; content: string }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].title).toBe("Weather Report");
+    expect(data.results[0].url).toBe("https://example.com/weather");
+    expect(data.results[0].content).toBe("Sunny today");
+  });
+
+  it("returns empty results when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onSearch("weather", [
+      { title: "Weather Report", url: "https://example.com/weather", content: "Sunny today" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "stock prices" });
+
+    expect(status).toBe(200);
+    const data = json as { results: unknown[] };
+    expect(data.results).toHaveLength(0);
+  });
+
+  it("matches with RegExp patterns", async () => {
+    mock = new LLMock();
+    mock.onSearch(/\bweather\b/i, [
+      { title: "Weather", url: "https://example.com", content: "Rain expected", score: 0.95 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "WEATHER forecast" });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ score?: number }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].score).toBe(0.95);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v2/rerank
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/rerank", () => {
+  it("returns scored results for a matching query", async () => {
+    mock = new LLMock();
+    mock.onRerank("machine learning", [
+      { index: 0, relevance_score: 0.99 },
+      { index: 2, relevance_score: 0.85 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "What is machine learning?",
+      documents: ["ML is a subset of AI", "Cooking recipes", "Deep learning overview"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      id: string;
+      results: Array<{
+        index: number;
+        relevance_score: number;
+        document: { text: string };
+      }>;
+      meta: { billed_units: { search_units: number } };
+    };
+    expect(data.id).toMatch(/^rerank-/);
+    expect(data.results).toHaveLength(2);
+    expect(data.results[0].index).toBe(0);
+    expect(data.results[0].relevance_score).toBe(0.99);
+    expect(data.results[0].document.text).toBe("ML is a subset of AI");
+    expect(data.results[1].index).toBe(2);
+    expect(data.results[1].document.text).toBe("Deep learning overview");
+    expect(data.meta.billed_units.search_units).toBe(0);
+  });
+
+  it("returns empty results when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onRerank("machine learning", [{ index: 0, relevance_score: 0.99 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "cooking tips",
+      documents: ["How to bake bread"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: unknown[] };
+    expect(data.results).toHaveLength(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v1/moderations
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/moderations", () => {
+  it("returns flagged result for matching content", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent", {
+      flagged: true,
+      categories: { violence: true, hate: false },
+      category_scores: { violence: 0.95, hate: 0.01 },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "This is violent content",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      id: string;
+      model: string;
+      results: Array<{
+        flagged: boolean;
+        categories: Record<string, boolean>;
+        category_scores: Record<string, number>;
+      }>;
+    };
+    expect(data.id).toMatch(/^modr-/);
+    expect(data.model).toBe("text-moderation-latest");
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].flagged).toBe(true);
+    expect(data.results[0].categories.violence).toBe(true);
+    expect(data.results[0].category_scores!.violence).toBe(0.95);
+  });
+
+  it("returns unflagged default result when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent", {
+      flagged: true,
+      categories: { violence: true },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "A nice sunny day",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ flagged: boolean; categories: Record<string, boolean> }>;
+    };
+    expect(data.results[0].flagged).toBe(false);
+    expect(data.results[0].categories.violence).toBe(false);
+  });
+
+  it("matches with RegExp catch-all", async () => {
+    mock = new LLMock();
+    mock.onModerate(/.*/, {
+      flagged: false,
+      categories: {},
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "Anything at all",
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /search — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /search — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onSearch("anything", [{ title: "T", url: "https://t.com", content: "C" }]);
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/search`, "{not valid json");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("respects max_results to limit returned results", async () => {
+    mock = new LLMock();
+    mock.onSearch("docs", [
+      { title: "Doc 1", url: "https://1.com", content: "First" },
+      { title: "Doc 2", url: "https://2.com", content: "Second" },
+      { title: "Doc 3", url: "https://3.com", content: "Third" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, {
+      query: "docs topic",
+      max_results: 2,
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string }> };
+    expect(data.results).toHaveLength(2);
+    expect(data.results[0].title).toBe("Doc 1");
+    expect(data.results[1].title).toBe("Doc 2");
+  });
+
+  it("returns all results when max_results is 0 or undefined", async () => {
+    mock = new LLMock();
+    mock.onSearch("docs", [
+      { title: "Doc 1", url: "https://1.com", content: "First" },
+      { title: "Doc 2", url: "https://2.com", content: "Second" },
+    ]);
+    const url = await mock.start();
+
+    // max_results = 0 should not limit (the code checks > 0)
+    const { json: json0 } = await post(`${url}/search`, {
+      query: "docs topic",
+      max_results: 0,
+    });
+    expect((json0 as { results: unknown[] }).results).toHaveLength(2);
+
+    // No max_results at all
+    const { json: jsonNone } = await post(`${url}/search`, { query: "docs topic" });
+    expect((jsonNone as { results: unknown[] }).results).toHaveLength(2);
+  });
+
+  it("handles missing query field gracefully", async () => {
+    mock = new LLMock();
+    mock.onSearch(/.*/i, [{ title: "Catch All", url: "https://all.com", content: "Everything" }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, {});
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].title).toBe("Catch All");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v2/rerank — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/rerank — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onRerank("anything", [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/v2/rerank`, "{{bad json!!");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("extracts text from object documents with text property", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [
+      { index: 0, relevance_score: 0.95 },
+      { index: 1, relevance_score: 0.8 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: [{ text: "Object doc with text field" }, "Plain string doc"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ index: number; document: { text: string } }>;
+    };
+    expect(data.results[0].document.text).toBe("Object doc with text field");
+    expect(data.results[1].document.text).toBe("Plain string doc");
+  });
+
+  it("returns empty text for documents that are neither string nor {text}", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: [42],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ document: { text: string } }>;
+    };
+    expect(data.results[0].document.text).toBe("");
+  });
+
+  it("returns empty text when document index is out of bounds", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 5, relevance_score: 0.9 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: ["only one doc"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ index: number; document: { text: string } }>;
+    };
+    expect(data.results[0].index).toBe(5);
+    expect(data.results[0].document.text).toBe("");
+  });
+
+  it("handles missing query and documents gracefully", async () => {
+    mock = new LLMock();
+    mock.onRerank(/.*/i, [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, { model: "rerank-v3.5" });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ document: { text: string } }>;
+    };
+    // document at index 0 of empty array -> undefined -> empty text
+    expect(data.results[0].document.text).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v1/moderations — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/moderations — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onModerate("anything", { flagged: false, categories: {} });
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/v1/moderations`, "not-json");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("handles array input by joining elements", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent hate", {
+      flagged: true,
+      categories: { violence: true, hate: true },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: ["violent", "hate"],
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(true);
+  });
+
+  it("handles missing input field gracefully", async () => {
+    mock = new LLMock();
+    mock.onModerate(/.*/i, {
+      flagged: false,
+      categories: { sexual: false },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {});
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// /v2/rerank does NOT conflict with /v2/chat (Cohere endpoint)
+// ---------------------------------------------------------------------------
+
+describe("/v2/rerank vs /v2/chat", () => {
+  it("routes /v2/rerank to rerank handler, not Cohere chat", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.9 }]);
+    mock.onMessage("test", { content: "Cohere response" });
+    const url = await mock.start();
+
+    // Rerank endpoint should work
+    const rerankRes = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: ["doc1"],
+      model: "rerank-v3.5",
+    });
+    expect(rerankRes.status).toBe(200);
+    const rerankData = rerankRes.json as { id: string; results: unknown[] };
+    expect(rerankData.id).toMatch(/^rerank-/);
+    expect(rerankData.results).toHaveLength(1);
+
+    // Cohere chat endpoint should still work
+    const chatRes = await post(`${url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "test" }],
+    });
+    expect(chatRes.status).toBe(200);
+    const chatData = chatRes.json as { message?: unknown };
+    // Cohere chat returns a different shape — just verify it's not a rerank response
+    expect(chatData).not.toHaveProperty("meta");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Journal records service requests
+// ---------------------------------------------------------------------------
+
+describe("Journal records service requests", () => {
+  it("records search, rerank, and moderation requests in the journal", async () => {
+    mock = new LLMock();
+    mock.onSearch("test", [{ title: "Test", url: "https://test.com", content: "Test content" }]);
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.9 }]);
+    mock.onModerate("test", { flagged: false, categories: {} });
+    const url = await mock.start();
+
+    await post(`${url}/search`, { query: "test query" });
+    await post(`${url}/v2/rerank`, { query: "test query", documents: ["doc"], model: "m" });
+    await post(`${url}/v1/moderations`, { input: "test input" });
+
+    const requests = mock.getRequests();
+    expect(requests).toHaveLength(3);
+
+    expect(requests[0].path).toBe("/search");
+    expect(requests[0].service).toBe("search");
+
+    expect(requests[1].path).toBe("/v2/rerank");
+    expect(requests[1].service).toBe("rerank");
+
+    expect(requests[2].path).toBe("/v1/moderations");
+    expect(requests[2].service).toBe("moderation");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Direct handler tests — exercises ?? fallback branches for req.method/req.url
+// ---------------------------------------------------------------------------
+
+/**
+ * Creates a minimal mock IncomingMessage with optional method/url overrides.
+ * When method or url is omitted, the property is undefined — which triggers
+ * the ?? fallback branches in journal.add() calls.
+ */
+function createMockReq(opts: { method?: string; url?: string } = {}): http.IncomingMessage {
+  const readable = new Readable({ read() {} }) as http.IncomingMessage;
+  readable.headers = {};
+  if (opts.method !== undefined) readable.method = opts.method;
+  else (readable as Partial<http.IncomingMessage>).method = undefined;
+  if (opts.url !== undefined) readable.url = opts.url;
+  else (readable as Partial<http.IncomingMessage>).url = undefined;
+  return readable;
+}
+
+/**
+ * Creates a mock ServerResponse that captures writeHead status and end body.
+ */
+function createMockRes(): http.ServerResponse & { _status: number; _body: string } {
+  const writable = new Writable({
+    write(_chunk, _encoding, cb) {
+      cb();
+    },
+  }) as http.ServerResponse & { _status: number; _body: string };
+  writable._status = 0;
+  writable._body = "";
+  writable.writeHead = function (statusCode: number) {
+    this._status = statusCode;
+    return this;
+  } as unknown as typeof writable.writeHead;
+  writable.end = function (body?: string) {
+    if (body) this._body = body;
+    return this;
+  } as unknown as typeof writable.end;
+  return writable;
+}
+
+const noop = () => {};
+
+describe("Direct handler — moderation ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq(); // method and url are undefined
+    const res = createMockRes();
+
+    await handleModeration(req, res, "{bad json!!", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v1/moderations");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq(); // method and url are undefined
+    const res = createMockRes();
+
+    await handleModeration(
+      req,
+      res,
+      JSON.stringify({ input: "hello" }),
+      [],
+      journal,
+      { logger },
+      noop,
+    );
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v1/moderations");
+  });
+});
+
+describe("Direct handler — rerank ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleRerank(req, res, "not json", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v2/rerank");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleRerank(
+      req,
+      res,
+      JSON.stringify({ query: "test", documents: ["doc1"] }),
+      [],
+      journal,
+      { logger },
+      noop,
+    );
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v2/rerank");
+  });
+});
+
+describe("Direct handler — search ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleSearch(req, res, "{{bad", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/search");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleSearch(req, res, JSON.stringify({ query: "test" }), [], journal, { logger }, noop);
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/search");
+  });
+});
diff --git a/src/__tests__/suite.test.ts b/src/__tests__/suite.test.ts
new file mode 100644
index 0000000..984f2dd
--- /dev/null
+++ b/src/__tests__/suite.test.ts
@@ -0,0 +1,195 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { createMockSuite, type MockSuite } from "../suite.js";
+
+// ---- HTTP Helpers ----
+
+function httpRequest(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params?: unknown, id: number = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+async function initMcpSession(url: string, path: string): Promise<string> {
+  const res = await httpRequest(url, path, "POST", jsonRpc("initialize", {}, 1) as object);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  // Send initialized notification
+  await httpRequest(
+    url,
+    path,
+    "POST",
+    { jsonrpc: "2.0", method: "notifications/initialized" },
+    { "mcp-session-id": sessionId },
+  );
+  return sessionId;
+}
+
+// ---- Tests ----
+
+describe("createMockSuite", () => {
+  let suite: MockSuite | null = null;
+
+  afterEach(async () => {
+    if (suite) {
+      await suite.stop();
+      suite = null;
+    }
+  });
+
+  it("with llm only — start/stop/reset work", async () => {
+    suite = await createMockSuite({ llm: {} });
+    await suite.start();
+
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(suite.mcp).toBeUndefined();
+    expect(suite.a2a).toBeUndefined();
+    expect(suite.vector).toBeUndefined();
+
+    // Reset should not throw
+    suite.reset();
+
+    await suite.stop();
+    suite = null;
+  });
+
+  it("with mcp — MCPMock mounted, tools/list works", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {} });
+    suite.mcp!.addTool({ name: "test-tool", description: "A test tool" });
+    await suite.start();
+
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+
+    const res = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(res.status).toBe(200);
+    const data = JSON.parse(res.body);
+    expect(data.result.tools).toHaveLength(1);
+    expect(data.result.tools[0].name).toBe("test-tool");
+  });
+
+  it("with a2a — A2AMock mounted, agent card served", async () => {
+    suite = await createMockSuite({ llm: {}, a2a: {} });
+    suite.a2a!.registerAgent({
+      name: "suite-agent",
+      description: "Agent in suite",
+    });
+    await suite.start();
+
+    const res = await httpRequest(suite.llm.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(res.status).toBe(200);
+    const card = JSON.parse(res.body);
+    expect(card.name).toBe("suite-agent");
+  });
+
+  it("with vector — VectorMock mounted, query works", async () => {
+    suite = await createMockSuite({ llm: {}, vector: {} });
+    suite.vector!.addCollection("test-col", { dimension: 3 });
+    suite.vector!.onQuery("test-col", [
+      { id: "v1", score: 0.95, values: [1, 0, 0], metadata: { label: "first" } },
+    ]);
+    await suite.start();
+
+    const res = await httpRequest(suite.llm.url, "/vector/query", "POST", {
+      namespace: "test-col",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    expect(res.status).toBe(200);
+    const data = JSON.parse(res.body);
+    expect(data.matches).toHaveLength(1);
+    expect(data.matches[0].id).toBe("v1");
+    expect(data.matches[0].score).toBe(0.95);
+  });
+
+  it("reset() delegates to all present mocks including a2a and vector", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    suite.mcp!.addTool({ name: "reset-tool", description: "will be cleared" });
+    suite.a2a!.registerAgent({ name: "reset-agent", description: "will be cleared" });
+    suite.vector!.addCollection("reset-col", { dimension: 2 });
+    await suite.start();
+
+    // reset() should not throw and should delegate to all mocks
+    expect(() => suite!.reset()).not.toThrow();
+
+    // After reset, mcp tools should be cleared — verify via tools/list returning empty
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+    const res = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    const data = JSON.parse(res.body);
+    expect(data.result.tools).toHaveLength(0);
+  });
+
+  it("default options — creates suite with no explicit llm options", async () => {
+    suite = await createMockSuite();
+    await suite.start();
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(suite.mcp).toBeUndefined();
+    expect(suite.a2a).toBeUndefined();
+    expect(suite.vector).toBeUndefined();
+    await suite.stop();
+    suite = null;
+  });
+
+  it("all mocks — suite with all four mock types", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    expect(suite.llm).toBeDefined();
+    expect(suite.mcp).toBeDefined();
+    expect(suite.a2a).toBeDefined();
+    expect(suite.vector).toBeDefined();
+    await suite.start();
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+});
diff --git a/src/__tests__/vector-mock.test.ts b/src/__tests__/vector-mock.test.ts
new file mode 100644
index 0000000..a4ef393
--- /dev/null
+++ b/src/__tests__/vector-mock.test.ts
@@ -0,0 +1,1166 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { VectorMock } from "../vector-mock.js";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+
+// ---- HTTP Helpers ----
+
+interface HttpResult {
+  status: number;
+  body: string;
+}
+
+function request(url: string, path: string, method: string, body?: unknown): Promise<HttpResult> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body !== undefined ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function get(url: string, path: string): Promise<HttpResult> {
+  return request(url, path, "GET");
+}
+
+function post(url: string, path: string, body: unknown): Promise<HttpResult> {
+  return request(url, path, "POST", body);
+}
+
+function put(url: string, path: string, body: unknown): Promise<HttpResult> {
+  return request(url, path, "PUT", body);
+}
+
+function del(url: string, path: string): Promise<HttpResult> {
+  return request(url, path, "DELETE");
+}
+
+// ---- Tests ----
+
+describe("VectorMock", () => {
+  let vector: VectorMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (vector) {
+      try {
+        await vector.stop();
+      } catch {
+        // not started
+      }
+      vector = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch {
+        // not started
+      }
+      llm = null;
+    }
+  });
+
+  // ---- Standalone mode ----
+
+  describe("standalone mode", () => {
+    it("starts and stops", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await vector.stop();
+      vector = null;
+    });
+  });
+
+  // ---- Mounted mode ----
+
+  describe("mounted mode", () => {
+    it("routes via LLMock mount", async () => {
+      vector = new VectorMock();
+      vector
+        .addCollection("default", { dimension: 3 })
+        .onQuery("default", [{ id: "v1", score: 0.95 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      const res = await post(llm.url, "/vector/query", {
+        vector: [0.1, 0.2, 0.3],
+        topK: 5,
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(1);
+      expect(data.matches[0].id).toBe("v1");
+    });
+  });
+
+  // ---- Configuration ----
+
+  describe("addCollection + onQuery", () => {
+    it("static results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test-col", { dimension: 3 });
+      vector.onQuery("test-col", [
+        { id: "a", score: 0.9, metadata: { label: "first" } },
+        { id: "b", score: 0.8 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 10,
+        namespace: "test-col",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(2);
+      expect(data.matches[0].id).toBe("a");
+      expect(data.matches[0].metadata).toEqual({ label: "first" });
+    });
+
+    it("function handler", async () => {
+      vector = new VectorMock();
+      vector.addCollection("dynamic", { dimension: 2 });
+      vector.onQuery("dynamic", (query) => {
+        const topK = query.topK ?? 1;
+        return Array.from({ length: topK }, (_, i) => ({
+          id: `result-${i}`,
+          score: 1 - i * 0.1,
+        }));
+      });
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 3,
+        namespace: "dynamic",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(3);
+      expect(data.matches[0].id).toBe("result-0");
+      expect(data.matches[2].id).toBe("result-2");
+    });
+  });
+
+  // ---- Pinecone endpoints ----
+
+  describe("Pinecone", () => {
+    it("POST /query returns correct response format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [
+        { id: "vec-1", score: 0.99, metadata: { category: "test" } },
+        { id: "vec-2", score: 0.85 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [0.1, 0.2, 0.3],
+        topK: 5,
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toBeDefined();
+      expect(data.matches).toHaveLength(2);
+      expect(data.matches[0]).toEqual({ id: "vec-1", score: 0.99, metadata: { category: "test" } });
+      expect(data.matches[1]).toEqual({ id: "vec-2", score: 0.85 });
+    });
+
+    it("POST /vectors/upsert returns upsertedCount", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [
+          { id: "v1", values: [1.0, 2.0], metadata: { tag: "a" } },
+          { id: "v2", values: [3.0, 4.0] },
+        ],
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.upsertedCount).toBe(2);
+    });
+
+    it("POST /vectors/delete returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.upsert("default", [
+        { id: "v1", values: [1, 2] },
+        { id: "v2", values: [3, 4] },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toEqual({});
+    });
+
+    it("GET /describe-index-stats", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [
+        { id: "v1", values: new Array(128).fill(0) },
+        { id: "v2", values: new Array(128).fill(0) },
+      ]);
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.dimension).toBe(128);
+      expect(data.totalVectorCount).toBe(2);
+    });
+  });
+
+  // ---- Qdrant endpoints ----
+
+  describe("Qdrant", () => {
+    it("POST /collections/{name}/points/search returns correct format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 3 });
+      vector.onQuery("my-col", [{ id: "q1", score: 0.95, metadata: { source: "web" } }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my-col/points/search", {
+        vector: [0.1, 0.2, 0.3],
+        limit: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result).toBeDefined();
+      expect(data.result).toHaveLength(1);
+      expect(data.result[0]).toEqual({ id: "q1", score: 0.95, payload: { source: "web" } });
+    });
+
+    it("PUT /collections/{name}/points returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/my-col/points", {
+        points: [
+          { id: "p1", vector: [1.0, 2.0], payload: { tag: "a" } },
+          { id: "p2", vector: [3.0, 4.0] },
+        ],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.status).toBe("ok");
+    });
+
+    it("POST /collections/{name}/points/delete returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 2 });
+      vector.upsert("my-col", [{ id: "p1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my-col/points/delete", {
+        points: ["p1"],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.status).toBe("ok");
+    });
+  });
+
+  // ---- ChromaDB endpoints ----
+
+  describe("ChromaDB", () => {
+    it("POST /api/v1/collections/{id}/query returns correct format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-col", { dimension: 3 });
+      vector.onQuery("chroma-col", [
+        { id: "c1", score: 0.12, metadata: { source: "doc" } },
+        { id: "c2", score: 0.34 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/chroma-col/query", {
+        query_embeddings: [[0.1, 0.2, 0.3]],
+        n_results: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.ids).toEqual([["c1", "c2"]]);
+      expect(data.distances).toEqual([[0.12, 0.34]]);
+      expect(data.metadatas).toEqual([[{ source: "doc" }, null]]);
+    });
+
+    it("POST /api/v1/collections/{id}/add returns true", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-col", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/chroma-col/add", {
+        ids: ["d1", "d2"],
+        embeddings: [
+          [1, 2],
+          [3, 4],
+        ],
+        metadatas: [{ label: "a" }, { label: "b" }],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toBe(true);
+    });
+
+    it("GET /api/v1/collections lists collections", async () => {
+      vector = new VectorMock();
+      vector.addCollection("col-a", { dimension: 3 });
+      vector.addCollection("col-b", { dimension: 5 });
+      const url = await vector.start();
+
+      const res = await get(url, "/api/v1/collections");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toHaveLength(2);
+      const names = data.map((c: { name: string }) => c.name).sort();
+      expect(names).toEqual(["col-a", "col-b"]);
+    });
+
+    it("DELETE /api/v1/collections/{id} deletes collection", async () => {
+      vector = new VectorMock();
+      vector.addCollection("to-delete", { dimension: 3 });
+      const url = await vector.start();
+
+      const res = await del(url, "/api/v1/collections/to-delete");
+      expect(res.status).toBe(200);
+
+      // Verify it's gone
+      const listRes = await get(url, "/api/v1/collections");
+      const data = JSON.parse(listRes.body);
+      expect(data).toHaveLength(0);
+    });
+  });
+
+  // ---- Error cases ----
+
+  describe("error handling", () => {
+    it("unknown collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "nonexistent",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("malformed JSON body returns 400 for POST (standalone)", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      // Send invalid JSON via raw http request
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json {{{";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/query",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(400);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBe("Malformed JSON body");
+    });
+
+    it("malformed JSON body returns 400 for POST (mounted mode)", async () => {
+      vector = new VectorMock();
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      const parsed = new URL(llm.url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json {{{";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/vector/query",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(400);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBe("Malformed JSON body");
+    });
+
+    it("malformed JSON body is ignored for GET requests", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [{ id: "v1", values: new Array(128).fill(0) }]);
+      const url = await vector.start();
+
+      // GET with invalid body should still work (body ignored for GET)
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/describe-index-stats",
+            method: "GET",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(200);
+      const data = JSON.parse(result.body);
+      expect(data.dimension).toBe(128);
+    });
+
+    it("unhandled route returns 404 in standalone mode", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await get(url, "/nonexistent/path");
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toBe("Not found");
+    });
+
+    it("Qdrant search on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/missing/points/search", {
+        vector: [0.1, 0.2],
+        limit: 5,
+      });
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.status.error).toContain("missing");
+    });
+
+    it("ChromaDB query on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/missing/query", {
+        query_embeddings: [[0.1, 0.2]],
+        n_results: 5,
+      });
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toContain("missing");
+    });
+
+    it("ChromaDB delete on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await del(url, "/api/v1/collections/missing");
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toContain("missing");
+    });
+  });
+
+  // ---- Default/edge-case behavior ----
+
+  describe("defaults and edge cases", () => {
+    it("Pinecone query uses 'default' namespace when none specified", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.onQuery("default", [{ id: "d1", score: 0.5 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        // no namespace field
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(1);
+      expect(data.matches[0].id).toBe("d1");
+    });
+
+    it("Pinecone query defaults topK to 10 and truncates results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      // Return 15 results from handler
+      vector.onQuery(
+        "default",
+        Array.from({ length: 15 }, (_, i) => ({ id: `v${i}`, score: 1 - i * 0.01 })),
+      );
+      const url = await vector.start();
+
+      // No topK specified - should default to 10
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        namespace: "default",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(10);
+    });
+
+    it("Pinecone upsert auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [{ id: "v1", values: [1.0, 2.0] }],
+        namespace: "auto-created",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.upsertedCount).toBe(1);
+
+      // Verify the collection exists via describe-index-stats
+      const stats = await get(url, "/describe-index-stats");
+      const statsData = JSON.parse(stats.body);
+      expect(statsData.totalVectorCount).toBe(1);
+    });
+
+    it("Pinecone upsert with default namespace", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [{ id: "v1", values: [1.0, 2.0] }],
+        // no namespace - defaults to "default"
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(1);
+    });
+
+    it("Pinecone delete on non-existent collection is a no-op", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        namespace: "nonexistent",
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Pinecone delete with default namespace", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        // no namespace - defaults to "default"
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant upsert auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/new-col/points", {
+        points: [{ id: "p1", vector: [1.0, 2.0], payload: { tag: "auto" } }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("Qdrant delete on non-existent collection is a no-op", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/nonexistent/points/delete", {
+        points: ["p1"],
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant search defaults limit to 10 and truncates results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test-qdrant", { dimension: 2 });
+      vector.onQuery(
+        "test-qdrant",
+        Array.from({ length: 15 }, (_, i) => ({ id: `q${i}`, score: 1 - i * 0.01 })),
+      );
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/test-qdrant/points/search", {
+        vector: [1, 0],
+        // no limit specified - defaults to 10
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result).toHaveLength(10);
+    });
+
+    it("ChromaDB add auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/auto-col/add", {
+        ids: ["d1"],
+        embeddings: [[1, 2, 3]],
+        metadatas: [{ label: "auto" }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+
+      // Verify collection shows up
+      const listRes = await get(url, "/api/v1/collections");
+      const list = JSON.parse(listRes.body);
+      expect(list).toHaveLength(1);
+      expect(list[0].name).toBe("auto-col");
+    });
+
+    it("ChromaDB query with multiple query_embeddings", async () => {
+      vector = new VectorMock();
+      vector.addCollection("multi-q", { dimension: 2 });
+      vector.onQuery("multi-q", [{ id: "r1", score: 0.5 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/multi-q/query", {
+        query_embeddings: [
+          [1, 0],
+          [0, 1],
+        ],
+        n_results: 5,
+      });
+      const data = JSON.parse(res.body);
+      // Should have results for each query embedding
+      expect(data.ids).toHaveLength(2);
+      expect(data.distances).toHaveLength(2);
+      expect(data.metadatas).toHaveLength(2);
+    });
+
+    it("ChromaDB query defaults n_results to 10", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default-n", { dimension: 2 });
+      vector.onQuery(
+        "default-n",
+        Array.from({ length: 15 }, (_, i) => ({ id: `c${i}`, score: i * 0.1 })),
+      );
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/default-n/query", {
+        query_embeddings: [[1, 0]],
+        // no n_results - defaults to 10
+      });
+      const data = JSON.parse(res.body);
+      expect(data.ids[0]).toHaveLength(10);
+    });
+
+    it("describe-index-stats with no collections returns zeros", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.dimension).toBe(0);
+      expect(data.totalVectorCount).toBe(0);
+    });
+
+    it("Qdrant search result uses payload instead of metadata", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-meta", { dimension: 2 });
+      vector.onQuery("qdrant-meta", [{ id: "q1", score: 0.8 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/qdrant-meta/points/search", {
+        vector: [1, 0],
+        limit: 5,
+      });
+      const data = JSON.parse(res.body);
+      // No metadata -> no payload key at all
+      expect(data.result[0]).toEqual({ id: "q1", score: 0.8 });
+      expect(data.result[0]).not.toHaveProperty("payload");
+    });
+
+    it("Pinecone query result omits metadata when undefined", async () => {
+      vector = new VectorMock();
+      vector.addCollection("no-meta", { dimension: 2 });
+      vector.onQuery("no-meta", [{ id: "v1", score: 0.9 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        namespace: "no-meta",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches[0]).toEqual({ id: "v1", score: 0.9 });
+      expect(data.matches[0]).not.toHaveProperty("metadata");
+    });
+  });
+
+  // ---- Reset ----
+
+  describe("reset", () => {
+    it("clears collections and query handlers", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test", { dimension: 3 });
+      vector.onQuery("test", [{ id: "v1", score: 0.9 }]);
+
+      vector.reset();
+
+      expect(vector.health().collections).toBe(0);
+    });
+
+    it("reset clears query handlers so queries return empty", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test", { dimension: 3 });
+      vector.onQuery("test", [{ id: "v1", score: 0.9 }]);
+      const url = await vector.start();
+
+      vector.reset();
+      vector.addCollection("test", { dimension: 3 });
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "test",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(0);
+    });
+  });
+
+  // ---- Health ----
+
+  describe("health", () => {
+    it("returns collection count", () => {
+      vector = new VectorMock();
+      vector.addCollection("a", { dimension: 3 });
+      vector.addCollection("b", { dimension: 5 });
+
+      const health = vector.health();
+      expect(health).toEqual({ status: "ok", collections: 2 });
+    });
+  });
+
+  // ---- Journal ----
+
+  describe("journal", () => {
+    it("shared journal with service: vector", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      await post(llm.url, "/vector/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      const entries = llm.getRequests();
+      const vectorEntries = entries.filter((e) => e.service === "vector");
+      expect(vectorEntries.length).toBeGreaterThan(0);
+      expect(vectorEntries[0].service).toBe("vector");
+    });
+  });
+
+  // ---- getRequests ----
+
+  describe("getRequests", () => {
+    it("returns empty array without journal", () => {
+      vector = new VectorMock();
+      expect(vector.getRequests()).toEqual([]);
+    });
+  });
+
+  // ---- Lifecycle errors ----
+
+  describe("lifecycle", () => {
+    it("start() throws if already started", async () => {
+      vector = new VectorMock();
+      await vector.start();
+      await expect(vector.start()).rejects.toThrow("Server already started");
+    });
+
+    it("stop() throws if not started", async () => {
+      vector = new VectorMock();
+      await expect(vector.stop()).rejects.toThrow("Server not started");
+    });
+  });
+
+  // ---- deleteCollection ----
+
+  describe("deleteCollection", () => {
+    it("removes the collection and its query handler", () => {
+      vector = new VectorMock();
+      vector.addCollection("to-remove", { dimension: 3 });
+      vector.onQuery("to-remove", [{ id: "v1", score: 0.9 }]);
+
+      vector.deleteCollection("to-remove");
+      expect(vector.health().collections).toBe(0);
+    });
+  });
+
+  // ---- upsert method ----
+
+  describe("upsert method", () => {
+    it("auto-creates collection when it does not exist", async () => {
+      vector = new VectorMock();
+      vector.upsert("auto", [{ id: "v1", values: [1, 2, 3] }]);
+      expect(vector.health().collections).toBe(1);
+    });
+
+    it("updates existing vectors in a collection", async () => {
+      vector = new VectorMock();
+      vector.addCollection("col", { dimension: 2 });
+      vector.upsert("col", [{ id: "v1", values: [1, 2] }]);
+      vector.upsert("col", [{ id: "v1", values: [3, 4] }]);
+      // Should still have 1 collection, and the vector is updated (not duplicated)
+      expect(vector.health().collections).toBe(1);
+    });
+  });
+
+  // ---- Constructor options ----
+
+  describe("constructor", () => {
+    it("accepts custom host and port options", async () => {
+      vector = new VectorMock({ host: "127.0.0.1", port: 0 });
+      const url = await vector.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  // ---- ChromaDB add edge cases ----
+
+  describe("ChromaDB add edge cases", () => {
+    it("adds with missing optional fields (no embeddings, no metadatas)", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/sparse-col/add", {
+        ids: ["d1", "d2"],
+        // no embeddings, no metadatas
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+
+    it("adds with missing embedding for specific index", async () => {
+      vector = new VectorMock();
+      vector.addCollection("partial", { dimension: 2 });
+      const url = await vector.start();
+
+      // embeddings array shorter than ids - embeddings[1] will be undefined
+      const res = await post(url, "/api/v1/collections/partial/add", {
+        ids: ["d1", "d2"],
+        embeddings: [[1, 2]],
+        metadatas: [{ a: 1 }, { b: 2 }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+  });
+
+  // ---- Standalone journal ----
+
+  describe("standalone journal", () => {
+    it("journals requests in standalone mode when journal is set via setJournal", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      // Manually set a journal to cover the standalone journal branch
+      const journal = new Journal();
+      vector.setJournal(journal);
+
+      const url = await vector.start();
+
+      await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      // getRequests should return journal entries filtered to service=vector
+      const requests = vector.getRequests();
+      expect(requests.length).toBeGreaterThan(0);
+      expect(requests[0].service).toBe("vector");
+    });
+
+    it("does NOT journal unhandled requests in standalone mode", async () => {
+      vector = new VectorMock();
+      const journal = new Journal();
+      vector.setJournal(journal);
+      const url = await vector.start();
+
+      const res = await get(url, "/nonexistent");
+      expect(res.status).toBe(404);
+
+      // Unhandled 404 should NOT create a journal entry
+      const requests = vector.getRequests();
+      expect(requests).toHaveLength(0);
+    });
+
+    it("journals handled requests in standalone mode", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [{ id: "v1", values: new Array(128).fill(0) }]);
+      const journal = new Journal();
+      vector.setJournal(journal);
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+
+      // Handled 200 SHOULD create a journal entry
+      const requests = vector.getRequests();
+      expect(requests).toHaveLength(1);
+      expect(requests[0].service).toBe("vector");
+    });
+
+    it("journals requests in mounted mode via LLMock", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      await post(llm.url, "/vector/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      const requests = vector.getRequests();
+      expect(requests.length).toBeGreaterThan(0);
+      expect(requests[0].service).toBe("vector");
+    });
+  });
+
+  // ---- Qdrant URL-encoded collection names ----
+
+  describe("URL-encoded collection names", () => {
+    it("Qdrant handles URL-encoded collection names", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my collection", { dimension: 2 });
+      vector.onQuery("my collection", [{ id: "q1", score: 0.8 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my%20collection/points/search", {
+        vector: [1, 0],
+        limit: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result).toHaveLength(1);
+    });
+  });
+
+  // ---- resolveQuery with no handler ----
+
+  describe("query with no handler", () => {
+    it("returns empty matches when collection exists but no query handler set", async () => {
+      vector = new VectorMock();
+      vector.addCollection("no-handler", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        namespace: "no-handler",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(0);
+    });
+  });
+
+  // ---- Missing/empty body field defaults ----
+
+  describe("missing body field defaults", () => {
+    it("Qdrant delete with no points field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-del", { dimension: 2 });
+      vector.upsert("qdrant-del", [{ id: "p1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      // Send body without 'points' field — should default to empty array, delete nothing
+      const res = await post(url, "/collections/qdrant-del/points/delete", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("ChromaDB query with no query_embeddings field returns empty results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-empty", { dimension: 2 });
+      vector.onQuery("chroma-empty", [{ id: "c1", score: 0.5 }]);
+      const url = await vector.start();
+
+      // Send body without 'query_embeddings' — should default to empty array
+      const res = await post(url, "/api/v1/collections/chroma-empty/query", {});
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.ids).toEqual([]);
+      expect(data.distances).toEqual([]);
+      expect(data.metadatas).toEqual([]);
+    });
+
+    it("ChromaDB add with no ids field is a no-op", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-noid", { dimension: 2 });
+      const url = await vector.start();
+
+      // Send body without 'ids' — should default to empty array, add nothing
+      const res = await post(url, "/api/v1/collections/chroma-noid/add", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+
+    it("Pinecone upsert with no vectors field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        namespace: "default",
+        // no vectors field
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(0);
+    });
+
+    it("Pinecone upsert auto-creates collection with dimension 0 when vectors is empty", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [],
+        namespace: "empty-vec",
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(0);
+
+      // Collection was auto-created with dimension 0
+      const stats = await get(url, "/describe-index-stats");
+      const data = JSON.parse(stats.body);
+      expect(data.dimension).toBe(0);
+      expect(data.totalVectorCount).toBe(0);
+    });
+
+    it("Qdrant upsert auto-creates collection with dimension 0 when points is empty", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/empty-qdrant/points", {
+        points: [],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("Pinecone delete with no ids field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.upsert("default", [{ id: "v1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        namespace: "default",
+        // no ids field
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant upsert with no points field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-empty", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/qdrant-empty/points", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+  });
+});
diff --git a/src/__tests__/ws-framing.test.ts b/src/__tests__/ws-framing.test.ts
index 16ff6f4..d1714e2 100644
--- a/src/__tests__/ws-framing.test.ts
+++ b/src/__tests__/ws-framing.test.ts
@@ -475,4 +475,239 @@ describe("connection lifecycle", () => {
     socket.destroy();
     await new Promise((r) => setTimeout(r, 150));
   });
+
+  it("close() is a no-op when already closed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    ws.close(1000, "first close");
+    expect(ws.isClosed).toBe(true);
+
+    // Second close should be a no-op (branch: close when already closed)
+    ws.close(1001, "second close");
+    expect(ws.isClosed).toBe(true);
+
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+
+  it("destroy() is a no-op when already closed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    ws.close(1000, "closed");
+    expect(ws.isClosed).toBe(true);
+
+    // destroy should be a no-op (branch: destroy when already closed)
+    ws.destroy();
+    expect(ws.isClosed).toBe(true);
+
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+
+  it("destroy() destroys the socket and emits close 1006", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    ws.destroy();
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1006);
+    expect(reason).toBe("Connection destroyed");
+    expect(ws.isClosed).toBe(true);
+  });
+
+  it("emits close 1006 when TCP socket closes unexpectedly", async () => {
+    // Use a raw socket pair to directly control the server-side socket
+    const [clientSide, serverSide] = await new Promise<[net.Socket, net.Socket]>((resolve) => {
+      const srv = net.createServer((conn) => {
+        resolve([client, conn]);
+      });
+      srv.listen(0);
+      const port = (srv.address() as net.AddressInfo).port;
+      const client = net.connect({ port, host: "127.0.0.1" });
+      cleanupFns.push(() => {
+        srv.close();
+        if (!client.destroyed) client.destroy();
+      });
+    });
+
+    serverSide.on("error", () => {});
+    clientSide.on("error", () => {});
+
+    const ws = new WebSocketConnection(serverSide);
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    // Destroy the server-side socket to simulate unexpected connection loss
+    serverSide.destroy();
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1006);
+    expect(reason).toBe("Connection lost");
+    expect(ws.isClosed).toBe(true);
+  });
+
+  it("handles close frame with empty payload (no code)", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    // Send a close frame with empty payload (no status code)
+    socket.write(createMaskedFrame(OP_CLOSE, Buffer.alloc(0)));
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1005);
+    expect(reason).toBe("");
+  });
+
+  it("ignores unsolicited pong frames", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    // Send unsolicited pong — should be silently ignored
+    socket.write(createMaskedFrame(OP_PONG, Buffer.from("pong-data")));
+
+    // Then send a text message to confirm parsing continues
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+    socket.write(createMaskedFrame(OP_TEXT, Buffer.from("after-pong")));
+
+    const msg = await received;
+    expect(msg).toBe("after-pong");
+  });
+
+  it("writeFrame is a no-op when socket is already destroyed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    // Destroy the underlying socket
+    socket.destroy();
+    // Wait for the destroy to propagate
+    await new Promise((r) => setTimeout(r, 50));
+
+    // send() calls writeFrame internally — should not throw
+    // The ws is not closed yet (closed flag is separate from socket.destroyed)
+    // We need to access a fresh connection and destroy its socket
+    // Actually, socket.destroy fires the "close" event which sets closed=true.
+    // So let's test this differently: use a connection where socket.destroyed
+    // is true but closed might not be set yet.
+    // The writeFrame guard is tested implicitly by other tests, but let's
+    // verify send on a destroyed socket doesn't throw.
+    expect(() => ws.send("test")).not.toThrow();
+  });
+
+  it("handles binary/unknown opcode frames by ignoring them", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const OP_BINARY = 0x2;
+    // Send a binary frame — should be silently ignored
+    socket.write(createMaskedFrame(OP_BINARY, Buffer.from("binary-data")));
+
+    // Then send a text message to confirm parsing continues
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+    socket.write(createMaskedFrame(OP_TEXT, Buffer.from("after-binary")));
+
+    const msg = await received;
+    expect(msg).toBe("after-binary");
+  });
+});
+
+describe("upgradeToWebSocket", () => {
+  it("rejects upgrade when Sec-WebSocket-Key header is missing", async () => {
+    // Create a separate server that catches the throw from upgradeToWebSocket
+    let caughtError: Error | null = null;
+    const server = http.createServer();
+    server.on("connection", (socket) => {
+      socket.on("error", () => {});
+    });
+    server.on("upgrade", (req, socket) => {
+      socket.on("error", () => {});
+      try {
+        upgradeToWebSocket(req, socket as net.Socket);
+      } catch (err) {
+        caughtError = err as Error;
+      }
+    });
+    server.listen(0);
+    const port = (server.address() as net.AddressInfo).port;
+
+    const socket = net.connect({ port, host: "127.0.0.1" });
+    socket.on("error", () => {});
+    trackCleanup(server, socket);
+
+    const response = new Promise<string>((resolve) => {
+      let buf = "";
+      socket.on("data", (chunk: Buffer) => {
+        buf += chunk.toString();
+        if (buf.includes("\r\n\r\n")) {
+          resolve(buf);
+        }
+      });
+    });
+
+    socket.write(
+      "GET / HTTP/1.1\r\n" +
+        "Host: localhost\r\n" +
+        "Upgrade: websocket\r\n" +
+        "Connection: Upgrade\r\n" +
+        "Sec-WebSocket-Version: 13\r\n" +
+        "\r\n",
+    );
+
+    const resp = await response;
+    expect(resp).toContain("400 Bad Request");
+    // Wait for server to process
+    await new Promise((r) => setTimeout(r, 50));
+    expect(caughtError).not.toBeNull();
+    expect(caughtError!.message).toBe("Missing Sec-WebSocket-Key header");
+  });
 });
diff --git a/src/__tests__/ws-gemini-live.test.ts b/src/__tests__/ws-gemini-live.test.ts
index 19c6e95..f53aab1 100644
--- a/src/__tests__/ws-gemini-live.test.ts
+++ b/src/__tests__/ws-gemini-live.test.ts
@@ -447,6 +447,411 @@ describe("WebSocket Gemini Live BidiGenerateContent", () => {
     ws.close();
   });
 
+  it("returns error for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send("not valid json {{{}");
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Malformed JSON");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error for unrecognized message type (no setup/clientContent/toolResponse)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send message with no recognized field
+    ws.send(JSON.stringify({ someUnknownField: true }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Expected clientContent or toolResponse");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("closes with 1008 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("unknown-no-match-strict"));
+
+    await ws.waitForClose();
+  });
+
+  it("handles empty content text response", async () => {
+    const emptyFixture: Fixture = {
+      match: { userMessage: "empty-content" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("empty-content"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+    expect(msg.serverContent.modelTurn.parts[0].text).toBe("");
+    expect(msg.serverContent.turnComplete).toBe(true);
+
+    ws.close();
+  });
+
+  it("handles setup without model (uses default)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    // Send setup without model field
+    ws.send(JSON.stringify({ setup: {} }));
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    ws.close();
+  });
+
+  it("handles setup with tools", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(
+      JSON.stringify({
+        setup: {
+          model: "gemini-2.0-flash-exp",
+          tools: [
+            {
+              functionDeclarations: [
+                {
+                  name: "get_weather",
+                  description: "Gets weather",
+                  parameters: { type: "object" },
+                },
+              ],
+            },
+          ],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    ws.close();
+  });
+
+  it("handles model turns with text in conversation history", async () => {
+    // Test conversion of model turns with text content
+    const multiTurnFixture: Fixture = {
+      match: { userMessage: "follow-up" },
+      response: { content: "Follow-up response" },
+    };
+    instance = await createServer([multiTurnFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with both user and model turns
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            { role: "user", parts: [{ text: "first" }] },
+            { role: "model", parts: [{ text: "model reply" }] },
+            { role: "user", parts: [{ text: "follow-up" }] },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles model turns with function calls in conversation history", async () => {
+    const afterFuncFixture: Fixture = {
+      match: { userMessage: "after-func" },
+      response: { content: "After function response" },
+    };
+    instance = await createServer([afterFuncFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with model turn containing functionCall
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            { role: "user", parts: [{ text: "do something" }] },
+            {
+              role: "model",
+              parts: [{ functionCall: { name: "search", args: { q: "test" } } }],
+            },
+            {
+              role: "user",
+              parts: [
+                { functionResponse: { name: "search", response: "results", id: "call_1" } },
+                { text: "after-func" },
+              ],
+            },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles toolResponse with non-string response values", async () => {
+    const toolResultFixture2: Fixture = {
+      match: { toolCallId: "call_gemini_search_0" },
+      response: { content: "Search result" },
+    };
+    instance = await createServer([toolResultFixture2]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse where response is an object (not string)
+    ws.send(
+      JSON.stringify({
+        toolResponse: {
+          functionResponses: [
+            { name: "search", response: { results: ["a", "b"] }, id: "call_gemini_search_0" },
+          ],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles tool call with malformed JSON arguments in fixture", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "search", arguments: "not-json{{{" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("bad-args"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    // Should still produce a toolCall with empty args object
+    expect(msg.toolCall).toBeDefined();
+    expect(msg.toolCall.functionCalls[0].name).toBe("search");
+    expect(msg.toolCall.functionCalls[0].args).toEqual({});
+
+    ws.close();
+  });
+
+  it("handles error fixture with default status 500", async () => {
+    const errorNoStatusFixture: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: { message: "Something went wrong", type: "server_error" },
+      },
+    };
+    instance = await createServer([errorNoStatusFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("error-no-status"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(500);
+    expect(msg.error.message).toBe("Something went wrong");
+
+    ws.close();
+  });
+
+  it("handles turn with missing role (defaults to user)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with turn missing role field
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [{ parts: [{ text: "hello" }] }],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles user turn with functionResponse that has string response", async () => {
+    // Fixture that matches a tool call id
+    const toolResultFixtureStr: Fixture = {
+      match: { toolCallId: "call_gemini_search_0" },
+      response: { content: "Result processed" },
+    };
+    instance = await createServer([toolResultFixtureStr]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with functionResponse where response is a string
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            {
+              role: "user",
+              parts: [{ functionResponse: { name: "search", response: "string-result" } }],
+            },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles toolResponse with fallback id and string response", async () => {
+    // Fixture matching on tool call id
+    const toolResultFixture3: Fixture = {
+      match: { toolCallId: "call_gemini_lookup_0" },
+      response: { content: "Lookup done" },
+    };
+    instance = await createServer([toolResultFixture3]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse without id (relies on fallback) and with string response
+    ws.send(
+      JSON.stringify({
+        toolResponse: {
+          functionResponses: [{ name: "lookup", response: "string-response-value" }],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles setup with tools that have empty functionDeclarations", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(
+      JSON.stringify({
+        setup: {
+          model: "gemini-2.0-flash-exp",
+          tools: [{}], // No functionDeclarations
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    // Verify we can still send messages after setup with empty tools
+    ws.send(clientContentMsg("hello"));
+    const raw2 = await ws.waitForMessages(2);
+    const msg2 = JSON.parse(raw2[1]);
+    expect(msg2.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles unknown response type gracefully", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird-response-gemini" },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      response: { unknownField: "value" } as any,
+    };
+    instance = await createServer([weirdFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("weird-response-gemini"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(500);
+    expect(msg.error.message).toBe("Fixture response did not match any known type");
+    expect(msg.error.status).toBe("INTERNAL");
+
+    ws.close();
+  });
+
   it("returns error when message sent before setup", async () => {
     instance = await createServer(allFixtures);
     const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
diff --git a/src/__tests__/ws-realtime.test.ts b/src/__tests__/ws-realtime.test.ts
index ee3f5bb..c63ecff 100644
--- a/src/__tests__/ws-realtime.test.ts
+++ b/src/__tests__/ws-realtime.test.ts
@@ -2,6 +2,7 @@ import { describe, it, expect, afterEach } from "vitest";
 import { createServer, type ServerInstance } from "../server.js";
 import type { Fixture } from "../types.js";
 import { connectWebSocket } from "./ws-test-client.js";
+import { realtimeItemsToMessages } from "../ws-realtime.js";
 
 // --- fixtures ---
 
@@ -58,6 +59,40 @@ function sessionUpdate(config: Record<string, unknown>): string {
   return JSON.stringify({ type: "session.update", session: config });
 }
 
+function functionCallOutputItem(callId: string, output: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "function_call_output",
+      call_id: callId,
+      output,
+    },
+  });
+}
+
+function functionCallItem(name: string, callId: string, args: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "function_call",
+      name,
+      call_id: callId,
+      arguments: args,
+    },
+  });
+}
+
+function systemMessageItem(text: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "message",
+      role: "system",
+      content: [{ type: "input_text", text }],
+    },
+  });
+}
+
 // --- tests ---
 
 let instance: ServerInstance | null = null;
@@ -547,6 +582,216 @@ describe("WebSocket /v1/realtime", () => {
     expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
   });
 
+  it("sends error for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send("this is not { valid json");
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe("Malformed JSON");
+
+    ws.close();
+  });
+
+  it("sends error when conversation.item.create is missing item", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(JSON.stringify({ type: "conversation.item.create" }));
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe(
+      "Missing 'item' in conversation.item.create",
+    );
+
+    ws.close();
+  });
+
+  it("assigns auto-generated item.id when missing in conversation.item.create", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Send item without id
+    ws.send(
+      JSON.stringify({
+        type: "conversation.item.create",
+        item: {
+          type: "message",
+          role: "user",
+          content: [{ type: "input_text", text: "hello" }],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("conversation.item.created");
+    const item = event.item as Record<string, unknown>;
+    expect(item.id).toBeDefined();
+    expect((item.id as string).startsWith("item-")).toBe(true);
+
+    ws.close();
+  });
+
+  it("session.update updates modalities, model, and temperature", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(
+      sessionUpdate({
+        modalities: ["text", "audio"],
+        model: "gpt-4o-mini-realtime",
+        temperature: 0.5,
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("session.updated");
+    const session = event.session as Record<string, unknown>;
+    expect(session.modalities).toEqual(["text", "audio"]);
+    expect(session.model).toBe("gpt-4o-mini-realtime");
+    expect(session.temperature).toBe(0.5);
+
+    ws.close();
+  });
+
+  it("ignores unknown message types silently", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Send unknown message type
+    ws.send(JSON.stringify({ type: "some.unknown.type" }));
+
+    // Then send a valid message to confirm processing continues
+    ws.send(conversationItemCreate("user", "hello"));
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    // The unknown message is silently ignored, so next message is the item.created
+    expect(event.type).toBe("conversation.item.created");
+
+    ws.close();
+  });
+
+  it("handles function_call and function_call_output conversation items", async () => {
+    // Fixture that matches after tool call output is in conversation
+    const afterToolFixture: Fixture = {
+      match: { toolCallId: "call_123" },
+      response: { content: "Tool result processed" },
+    };
+    instance = await createServer([afterToolFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Add function_call item
+    ws.send(functionCallItem("get_weather", "call_123", '{"city":"NYC"}'));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    // Add function_call_output item
+    ws.send(functionCallOutputItem("call_123", "Sunny, 72F"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Text response: response.created + output_item.added + content_part.added
+    // + text.delta(s) + text.done + content_part.done + output_item.done + response.done
+    // "Tool result processed" = 21 chars / chunkSize 20 = 2 deltas = 9 events
+    // Total: 3 + 9 = 12
+    const allRaw = await ws.waitForMessages(12);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    const types = responseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Verify text deltas reconstruct correctly
+    const deltas = responseEvents.filter((e) => e.type === "response.text.delta");
+    const fullText = deltas.map((d) => d.delta).join("");
+    expect(fullText).toBe("Tool result processed");
+
+    ws.close();
+  });
+
+  it("handles system role message items", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Add system message item
+    ws.send(systemMessageItem("You are a helpful assistant"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    // Add user message
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for text response
+    const allRaw = await ws.waitForMessages(11);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    expect(responseEvents[0].type).toBe("response.created");
+    expect(responseEvents[responseEvents.length - 1].type).toBe("response.done");
+
+    ws.close();
+  });
+
+  it("closes with 1008 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "unknown-no-match"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Connection should be closed with 1008
+    await ws.waitForClose();
+  });
+
+  it("handles instructions in session for fixture matching", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Set instructions
+    ws.send(sessionUpdate({ instructions: "You are a helpful assistant." }));
+    await ws.waitForMessages(2); // + session.updated
+
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for text response
+    const allRaw = await ws.waitForMessages(11);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    expect(responseEvents[0].type).toBe("response.created");
+    expect(responseEvents[responseEvents.length - 1].type).toBe("response.done");
+
+    ws.close();
+  });
+
   it("accumulates conversation state across multiple response.create calls", async () => {
     instance = await createServer(allFixtures);
     const ws = await connectWebSocket(instance.url, "/v1/realtime");
@@ -585,4 +830,155 @@ describe("WebSocket /v1/realtime", () => {
 
     ws.close();
   });
+
+  it("handles error fixture with default status (no explicit status)", async () => {
+    const errorNoStatusFixture: Fixture = {
+      match: { userMessage: "error-no-status-rt" },
+      response: {
+        error: { message: "Internal failure", type: "server_error" },
+      },
+    };
+    instance = await createServer([errorNoStatusFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "error-no-status-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    const allRaw = await ws.waitForMessages(4);
+    const responseEvents = parseEvents(allRaw.slice(2));
+    expect(responseEvents[1].type).toBe("response.done");
+    const doneResp = responseEvents[1].response as Record<string, unknown>;
+    expect(doneResp.status).toBe("failed");
+
+    ws.close();
+  });
+
+  it("handles unknown response type gracefully", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird-response-rt" },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      response: { unknownField: "value" } as any,
+    };
+    instance = await createServer([weirdFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "weird-response-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    const allRaw = await ws.waitForMessages(3);
+    const event = JSON.parse(allRaw[2]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe(
+      "Fixture response did not match any known type",
+    );
+
+    ws.close();
+  });
+});
+
+// ─── Unit tests: realtimeItemsToMessages ─────────────────────────────────────
+
+describe("realtimeItemsToMessages", () => {
+  it("converts message items with all role types", () => {
+    const items = [
+      { type: "message" as const, role: "user" as const, content: [{ type: "text", text: "hi" }] },
+      {
+        type: "message" as const,
+        role: "assistant" as const,
+        content: [{ type: "text", text: "hello" }],
+      },
+      {
+        type: "message" as const,
+        role: "system" as const,
+        content: [{ type: "text", text: "you are helpful" }],
+      },
+    ];
+
+    const messages = realtimeItemsToMessages(items);
+    expect(messages).toEqual([
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "hello" },
+      { role: "system", content: "you are helpful" },
+    ]);
+  });
+
+  it("adds system message when instructions provided", () => {
+    const items = [
+      { type: "message" as const, role: "user" as const, content: [{ type: "text", text: "hi" }] },
+    ];
+    const messages = realtimeItemsToMessages(items, "Be helpful");
+    expect(messages[0]).toEqual({ role: "system", content: "Be helpful" });
+    expect(messages[1]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("converts function_call items with fallback for missing name", () => {
+    const mockLogger = { warn: () => {}, error: () => {}, info: () => {}, debug: () => {} };
+    const items = [
+      {
+        type: "function_call" as const,
+        call_id: "call_123",
+        arguments: '{"q":"test"}',
+        // name is missing
+      },
+    ];
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const messages = realtimeItemsToMessages(items, undefined, mockLogger as any);
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("assistant");
+    expect(messages[0].tool_calls![0].id).toBe("call_123");
+    expect(messages[0].tool_calls![0].function.name).toBe("");
+    expect(messages[0].tool_calls![0].function.arguments).toBe('{"q":"test"}');
+  });
+
+  it("converts function_call items with auto-generated call_id and empty arguments", () => {
+    const items = [
+      {
+        type: "function_call" as const,
+        name: "search",
+        // call_id and arguments missing
+      },
+    ];
+    const messages = realtimeItemsToMessages(items);
+    expect(messages.length).toBe(1);
+    expect(messages[0].tool_calls![0].id).toMatch(/^call_/);
+    expect(messages[0].tool_calls![0].function.name).toBe("search");
+    expect(messages[0].tool_calls![0].function.arguments).toBe("");
+  });
+
+  it("converts function_call_output items with fallback for missing output", () => {
+    const mockLogger = { warn: () => {}, error: () => {}, info: () => {}, debug: () => {} };
+    const items = [
+      {
+        type: "function_call_output" as const,
+        call_id: "call_456",
+        // output is missing
+      },
+    ];
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const messages = realtimeItemsToMessages(items, undefined, mockLogger as any);
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("tool");
+    expect(messages[0].content).toBe("");
+    expect(messages[0].tool_call_id).toBe("call_456");
+  });
+
+  it("handles message items with missing content", () => {
+    const items = [
+      {
+        type: "message" as const,
+        role: "user" as const,
+        // content missing
+      },
+    ];
+    const messages = realtimeItemsToMessages(items);
+    expect(messages[0].content).toBe("");
+  });
 });
diff --git a/src/a2a-handler.ts b/src/a2a-handler.ts
new file mode 100644
index 0000000..62ba1e5
--- /dev/null
+++ b/src/a2a-handler.ts
@@ -0,0 +1,324 @@
+import type { JsonRpcResponse, MethodHandler } from "./jsonrpc.js";
+import type {
+  A2AAgentDefinition,
+  A2AArtifact,
+  A2APart,
+  A2AStreamEvent,
+  A2ATask,
+  A2ATaskState,
+} from "./a2a-types.js";
+import { generateId } from "./helpers.js";
+
+// ---- Pattern types ----
+
+export interface MessagePatternEntry {
+  kind: "message";
+  pattern: string | RegExp;
+  agentName: string;
+  parts: A2APart[];
+}
+
+export interface TaskPatternEntry {
+  kind: "task";
+  pattern: string | RegExp;
+  agentName: string;
+  artifacts: A2AArtifact[];
+}
+
+export interface StreamingTaskPatternEntry {
+  kind: "streamingTask";
+  pattern: string | RegExp;
+  agentName: string;
+  events: A2AStreamEvent[];
+  delayMs?: number;
+}
+
+export type PatternEntry = MessagePatternEntry | TaskPatternEntry | StreamingTaskPatternEntry;
+
+// ---- Helpers ----
+
+function extractText(params: unknown): string {
+  const p = params as Record<string, unknown> | undefined;
+  if (!p?.message) return "";
+  const msg = p.message as Record<string, unknown>;
+  const parts = msg.parts as Array<Record<string, unknown>> | undefined;
+  if (!Array.isArray(parts)) return "";
+  return parts
+    .filter((part) => typeof part.text === "string")
+    .map((part) => part.text as string)
+    .join(" ");
+}
+
+function matchPattern(text: string, pattern: string | RegExp): boolean {
+  if (typeof pattern === "string") {
+    return text.includes(pattern);
+  }
+  return pattern.test(text);
+}
+
+const TERMINAL_STATES: Set<string> = new Set([
+  "TASK_STATE_COMPLETED",
+  "TASK_STATE_FAILED",
+  "TASK_STATE_CANCELED",
+]);
+
+// ---- Agent card builder ----
+
+export function buildAgentCard(
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+  baseUrl: string,
+): Record<string, unknown> {
+  // Use the first registered agent as the primary card, or a default
+  const first = agents.values().next().value;
+  const def = first?.def;
+
+  return {
+    name: def?.name ?? "a2a-mock",
+    description: def?.description ?? "A2A mock agent",
+    version: def?.version ?? "1.0.0",
+    supportedInterfaces: [
+      {
+        url: baseUrl,
+        protocolBinding: "JSONRPC",
+        protocolVersion: "1.0",
+      },
+    ],
+    skills: def?.skills ?? [],
+    capabilities: def?.capabilities ?? { streaming: true },
+  };
+}
+
+// ---- Method handlers ----
+
+export function createA2AMethods(
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+  tasks: Map<string, A2ATask>,
+): Record<string, MethodHandler> {
+  function findMatch(text: string): PatternEntry | null {
+    for (const agent of agents.values()) {
+      for (const entry of agent.patterns) {
+        if (matchPattern(text, entry.pattern)) {
+          return entry;
+        }
+      }
+    }
+    return null;
+  }
+
+  function createTask(
+    agentName: string,
+    artifacts: A2AArtifact[],
+    userParts: A2APart[],
+    state: A2ATaskState = "TASK_STATE_COMPLETED",
+  ): A2ATask {
+    const taskId = generateId("task");
+    const contextId = generateId("ctx");
+    const task: A2ATask = {
+      id: taskId,
+      contextId,
+      status: { state, timestamp: new Date().toISOString() },
+      artifacts,
+      history: [
+        {
+          messageId: generateId("msg"),
+          role: "ROLE_USER",
+          parts: userParts,
+        },
+      ],
+    };
+    tasks.set(taskId, task);
+    return task;
+  }
+
+  const methods: Record<string, MethodHandler> = {
+    SendMessage: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const text = extractText(params);
+      const entry = findMatch(text);
+
+      if (!entry) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        };
+      }
+
+      const p = params as Record<string, unknown> | undefined;
+      const msg = p?.message as Record<string, unknown> | undefined;
+      const userParts: A2APart[] = (msg?.parts as A2APart[]) ?? [{ text }];
+
+      if (entry.kind === "message") {
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            message: {
+              messageId: generateId("msg"),
+              role: "ROLE_AGENT",
+              parts: entry.parts,
+            },
+          },
+        };
+      }
+
+      if (entry.kind === "task") {
+        const task = createTask(entry.agentName, entry.artifacts, userParts);
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            task: {
+              id: task.id,
+              contextId: task.contextId,
+              status: task.status,
+              artifacts: task.artifacts,
+            },
+          },
+        };
+      }
+
+      // streamingTask patterns matched via SendMessage just return task
+      if (entry.kind === "streamingTask") {
+        const artifacts: A2AArtifact[] = [];
+        for (const evt of entry.events) {
+          if (evt.type === "artifact") {
+            artifacts.push({ parts: evt.parts, name: evt.name });
+          }
+        }
+        const task = createTask(entry.agentName, artifacts, userParts);
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            task: {
+              id: task.id,
+              contextId: task.contextId,
+              status: task.status,
+              artifacts: task.artifacts,
+            },
+          },
+        };
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        error: { code: -32000, message: "No matching pattern for message" },
+      };
+    },
+
+    // SendStreamingMessage is handled specially in A2AMock (SSE response),
+    // but we register a placeholder so the dispatcher doesn't return "method not found".
+    SendStreamingMessage: async (
+      params: unknown,
+      id: string | number,
+    ): Promise<JsonRpcResponse | null> => {
+      // This is intercepted before reaching the dispatcher in a2a-mock.ts
+      // If it reaches here, return an error
+      const text = extractText(params);
+      const entry = findMatch(text);
+      if (!entry) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        };
+      }
+      return null;
+    },
+
+    GetTask: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const taskId = p?.id as string | undefined;
+
+      if (!taskId || !tasks.has(taskId)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32001, message: "Task not found" },
+        };
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { task: tasks.get(taskId) },
+      };
+    },
+
+    ListTasks: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const contextId = p?.contextId as string | undefined;
+      const status = p?.status as string | undefined;
+
+      let results = Array.from(tasks.values());
+
+      if (contextId) {
+        results = results.filter((t) => t.contextId === contextId);
+      }
+      if (status) {
+        results = results.filter((t) => t.status.state === status);
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { tasks: results },
+      };
+    },
+
+    CancelTask: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const taskId = p?.id as string | undefined;
+
+      if (!taskId || !tasks.has(taskId)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32001, message: "Task not found" },
+        };
+      }
+
+      const task = tasks.get(taskId)!;
+
+      if (TERMINAL_STATES.has(task.status.state)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32002, message: "Task already in terminal state" },
+        };
+      }
+
+      task.status = {
+        state: "TASK_STATE_CANCELED",
+        timestamp: new Date().toISOString(),
+      };
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { task },
+      };
+    },
+  };
+
+  return methods;
+}
+
+// ---- Streaming helpers ----
+
+export function findStreamingMatch(
+  text: string,
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+): StreamingTaskPatternEntry | null {
+  for (const agent of agents.values()) {
+    for (const entry of agent.patterns) {
+      if (entry.kind === "streamingTask" && matchPattern(text, entry.pattern)) {
+        return entry;
+      }
+    }
+  }
+  return null;
+}
+
+export { extractText };
diff --git a/src/a2a-mock.ts b/src/a2a-mock.ts
new file mode 100644
index 0000000..24bac74
--- /dev/null
+++ b/src/a2a-mock.ts
@@ -0,0 +1,370 @@
+import * as http from "node:http";
+import type { Mountable } from "./types.js";
+import type { Journal } from "./journal.js";
+import type {
+  A2AAgentDefinition,
+  A2AArtifact,
+  A2AMockOptions,
+  A2APart,
+  A2AStreamEvent,
+  A2ATask,
+} from "./a2a-types.js";
+import type { PatternEntry } from "./a2a-handler.js";
+import {
+  buildAgentCard,
+  createA2AMethods,
+  extractText,
+  findStreamingMatch,
+} from "./a2a-handler.js";
+import { createJsonRpcDispatcher } from "./jsonrpc.js";
+import { generateId, flattenHeaders, readBody } from "./helpers.js";
+
+export class A2AMock implements Mountable {
+  private agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }> = new Map();
+  private tasks: Map<string, A2ATask> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private options: A2AMockOptions;
+  private baseUrl = "";
+  private dispatcher: ReturnType<typeof createJsonRpcDispatcher>;
+
+  constructor(options?: A2AMockOptions) {
+    this.options = options ?? {};
+    this.dispatcher = this.buildDispatcher();
+  }
+
+  private buildDispatcher() {
+    const methods = createA2AMethods(this.agents, this.tasks);
+    return createJsonRpcDispatcher({ methods });
+  }
+
+  // ---- Agent registration ----
+
+  registerAgent(def: A2AAgentDefinition): this {
+    this.agents.set(def.name, { def, patterns: [] });
+    return this;
+  }
+
+  // ---- Pattern registration ----
+
+  onMessage(agentName: string, pattern: string | RegExp, parts: A2APart[]): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "message", pattern, agentName, parts });
+    return this;
+  }
+
+  onTask(agentName: string, pattern: string | RegExp, artifacts: A2AArtifact[]): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "task", pattern, agentName, artifacts });
+    return this;
+  }
+
+  onStreamingTask(
+    agentName: string,
+    pattern: string | RegExp,
+    events: A2AStreamEvent[],
+    delayMs?: number,
+  ): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "streamingTask", pattern, agentName, events, delayMs });
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    // Agent card endpoint
+    if (req.method === "GET" && pathname === "/.well-known/agent-card.json") {
+      const card = buildAgentCard(this.agents, this.baseUrl);
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "A2A-Version": "1.0",
+      });
+      res.end(JSON.stringify(card));
+      return true;
+    }
+
+    // JSON-RPC endpoint
+    if (req.method === "POST" && (pathname === "/" || pathname === "")) {
+      const body = await readBody(req);
+
+      // Check for SendStreamingMessage before dispatching
+      let parsed: unknown;
+      try {
+        parsed = JSON.parse(body);
+      } catch {
+        res.writeHead(200, {
+          "Content-Type": "application/json",
+          "A2A-Version": "1.0",
+        });
+        res.end(
+          JSON.stringify({
+            jsonrpc: "2.0",
+            id: null,
+            error: { code: -32700, message: "Parse error" },
+          }),
+        );
+        return true;
+      }
+
+      if (isStreamingRequest(parsed)) {
+        await this.handleStreamingMessage(parsed as Record<string, unknown>, req, res);
+        return true;
+      }
+
+      // Regular JSON-RPC dispatch
+      // Add A2A-Version header before dispatching
+      res.setHeader("A2A-Version", "1.0");
+
+      await this.dispatcher(req, res, body);
+
+      // Journal the request after the handler completes
+      if (this.journal) {
+        this.journal.add({
+          method: req.method ?? "POST",
+          path: pathname,
+          headers: flattenHeaders(req.headers),
+          body: null,
+          service: "a2a",
+          response: { status: res.statusCode, fixture: null },
+        });
+      }
+
+      return true;
+    }
+
+    return false;
+  }
+
+  health(): { status: string; agents: number; tasks: number } {
+    return {
+      status: "ok",
+      agents: this.agents.size,
+      tasks: this.tasks.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("A2AMock server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise<string>((resolve, reject) => {
+      const srv = http.createServer(async (req, res) => {
+        const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+        await this.handleRequest(req, res, url.pathname).catch((err) => {
+          console.error("A2AMock request error:", err);
+          if (!res.headersSent) {
+            res.writeHead(500);
+            res.end("Internal server error");
+          }
+        });
+      });
+
+      srv.on("error", reject);
+
+      srv.listen(port, host, () => {
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          this.baseUrl = `http://${host}:${addr.port}`;
+        }
+        this.server = srv;
+        resolve(this.baseUrl);
+      });
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("A2AMock server not started");
+    }
+    const srv = this.server;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err: Error | undefined) => (err ? reject(err) : resolve()));
+    });
+    this.server = null;
+  }
+
+  get url(): string {
+    if (!this.server) {
+      throw new Error("A2AMock server not started");
+    }
+    return this.baseUrl;
+  }
+
+  // ---- Reset ----
+
+  reset(): this {
+    this.agents.clear();
+    this.tasks.clear();
+    return this;
+  }
+
+  // ---- Internal: set base URL when mounted ----
+
+  setBaseUrl(url: string): void {
+    this.baseUrl = url;
+  }
+
+  // ---- Private: streaming handler ----
+
+  private async handleStreamingMessage(
+    parsed: Record<string, unknown>,
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    const params = parsed.params as Record<string, unknown> | undefined;
+    const id = parsed.id as string | number;
+    const text = extractText(params);
+    const entry = findStreamingMatch(text, this.agents);
+
+    if (!entry) {
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "A2A-Version": "1.0",
+      });
+      res.end(
+        JSON.stringify({
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        }),
+      );
+      return;
+    }
+
+    // Create task for the streaming response
+    const taskId = generateId("task");
+    const contextId = generateId("ctx");
+    const userParts: A2APart[] = params?.message
+      ? (((params.message as Record<string, unknown>).parts as A2APart[]) ?? [{ text }])
+      : [{ text }];
+
+    const task: A2ATask = {
+      id: taskId,
+      contextId,
+      status: { state: "TASK_STATE_WORKING", timestamp: new Date().toISOString() },
+      artifacts: [],
+      history: [
+        {
+          messageId: generateId("msg"),
+          role: "ROLE_USER",
+          parts: userParts,
+        },
+      ],
+    };
+    this.tasks.set(taskId, task);
+
+    // Write SSE response
+    res.writeHead(200, {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+      "A2A-Version": "1.0",
+    });
+
+    const delayMs = entry.delayMs ?? 0;
+
+    for (const event of entry.events) {
+      if (delayMs > 0) {
+        await delay(delayMs);
+      }
+
+      let resultPayload: Record<string, unknown>;
+
+      if (event.type === "status") {
+        task.status = { state: event.state, timestamp: new Date().toISOString() };
+        resultPayload = {
+          task: {
+            id: task.id,
+            contextId: task.contextId,
+            status: task.status,
+          },
+        };
+      } else {
+        // artifact event
+        const artifact = {
+          parts: event.parts,
+          name: event.name,
+          append: event.append,
+          lastChunk: event.lastChunk,
+        };
+        task.artifacts.push({ parts: event.parts, name: event.name });
+        resultPayload = {
+          task: {
+            id: task.id,
+            contextId: task.contextId,
+            status: task.status,
+          },
+          artifact,
+        };
+      }
+
+      const envelope = JSON.stringify({
+        jsonrpc: "2.0",
+        id,
+        result: resultPayload,
+      });
+
+      res.write(`data: ${envelope}\n\n`);
+    }
+
+    // Final completion — only set COMPLETED if the task is not already in a terminal state
+    const TERMINAL_A2A_STATES = new Set([
+      "TASK_STATE_COMPLETED",
+      "TASK_STATE_FAILED",
+      "TASK_STATE_CANCELED",
+    ]);
+    if (!TERMINAL_A2A_STATES.has(task.status.state)) {
+      task.status = { state: "TASK_STATE_COMPLETED", timestamp: new Date().toISOString() };
+    }
+
+    res.end();
+
+    // Journal
+    if (this.journal) {
+      this.journal.add({
+        method: "POST",
+        path: "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "a2a",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+  }
+}
+
+// ---- Helpers ----
+
+function isStreamingRequest(parsed: unknown): boolean {
+  if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) return false;
+  const obj = parsed as Record<string, unknown>;
+  return obj.method === "SendStreamingMessage";
+}
+
+function delay(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
diff --git a/src/a2a-stub.ts b/src/a2a-stub.ts
new file mode 100644
index 0000000..79b6de4
--- /dev/null
+++ b/src/a2a-stub.ts
@@ -0,0 +1,13 @@
+export { A2AMock } from "./a2a-mock.js";
+export type {
+  A2AMockOptions,
+  A2AAgentDefinition,
+  A2APart,
+  A2AArtifact,
+  A2ATaskResponse,
+  A2AStreamEvent,
+  A2ATask,
+  A2AMessage,
+  A2ARole,
+  A2ATaskState,
+} from "./a2a-types.js";
diff --git a/src/a2a-types.ts b/src/a2a-types.ts
new file mode 100644
index 0000000..550610c
--- /dev/null
+++ b/src/a2a-types.ts
@@ -0,0 +1,56 @@
+export interface A2AMockOptions {
+  port?: number;
+  host?: string;
+}
+
+export interface A2AAgentDefinition {
+  name: string;
+  description?: string;
+  version?: string;
+  skills?: Array<{ id: string; name: string; description?: string; tags?: string[] }>;
+  capabilities?: { streaming?: boolean };
+}
+
+export type A2APart =
+  | { text: string }
+  | { data: unknown; mediaType?: string }
+  | { url: string; mediaType?: string };
+
+export interface A2AArtifact {
+  artifactId?: string;
+  name?: string;
+  description?: string;
+  parts: A2APart[];
+}
+
+export interface A2ATaskResponse {
+  artifacts?: A2AArtifact[];
+}
+
+export type A2AStreamEvent =
+  | { type: "status"; state: A2ATaskState }
+  | { type: "artifact"; parts: A2APart[]; append?: boolean; lastChunk?: boolean; name?: string };
+
+export interface A2ATask {
+  id: string;
+  contextId: string;
+  status: { state: A2ATaskState; timestamp: string };
+  artifacts: A2AArtifact[];
+  history: A2AMessage[];
+}
+
+export type A2ARole = "ROLE_USER" | "ROLE_AGENT";
+
+export interface A2AMessage {
+  messageId: string;
+  role: A2ARole;
+  parts: A2APart[];
+}
+
+export type A2ATaskState =
+  | "TASK_STATE_SUBMITTED"
+  | "TASK_STATE_WORKING"
+  | "TASK_STATE_COMPLETED"
+  | "TASK_STATE_FAILED"
+  | "TASK_STATE_CANCELED"
+  | "TASK_STATE_INPUT_REQUIRED";
diff --git a/src/aimock-cli.ts b/src/aimock-cli.ts
new file mode 100644
index 0000000..77e8b6c
--- /dev/null
+++ b/src/aimock-cli.ts
@@ -0,0 +1,124 @@
+#!/usr/bin/env node
+import { parseArgs } from "node:util";
+import { resolve } from "node:path";
+import { loadConfig, startFromConfig } from "./config-loader.js";
+
+const HELP = `
+Usage: aimock [options]
+
+Options:
+  -c, --config <path>   Path to aimock config JSON file (required)
+  -p, --port <number>   Port override (default: from config or 0)
+  -h, --host <string>   Host override (default: from config or 127.0.0.1)
+      --help            Show this help message
+`.trim();
+
+export interface AimockCliDeps {
+  argv?: string[];
+  log?: (msg: string) => void;
+  logError?: (msg: string) => void;
+  exit?: (code: number) => void;
+  loadConfigFn?: typeof loadConfig;
+  startFromConfigFn?: typeof startFromConfig;
+  onReady?: (ctx: { shutdown: () => void }) => void;
+}
+
+export function runAimockCli(deps: AimockCliDeps = {}): void {
+  /* v8 ignore next 6 -- defaults used only when called from CLI entry point */
+  const argv = deps.argv ?? process.argv.slice(2);
+  const log = deps.log ?? console.log.bind(console);
+  const logError = deps.logError ?? console.error.bind(console);
+  const exit = deps.exit ?? process.exit.bind(process);
+  const loadConfigFn = deps.loadConfigFn ?? loadConfig;
+  const startFromConfigFn = deps.startFromConfigFn ?? startFromConfig;
+
+  let values;
+  try {
+    ({ values } = parseArgs({
+      args: argv,
+      options: {
+        config: { type: "string", short: "c" },
+        port: { type: "string", short: "p" },
+        host: { type: "string", short: "h" },
+        help: { type: "boolean", default: false },
+      },
+      strict: true,
+    }));
+  } catch (err) {
+    /* v8 ignore next -- parseArgs always throws Error subclasses */
+    const msg = err instanceof Error ? err.message : String(err);
+    logError(`Error: ${msg}\n\n${HELP}`);
+    exit(1);
+    return;
+  }
+
+  if (values.help) {
+    log(HELP);
+    exit(0);
+    return;
+  }
+  if (!values.config) {
+    logError("Error: --config is required.\n\n" + HELP);
+    exit(1);
+    return;
+  }
+
+  const configPath = resolve(values.config);
+  let config;
+  try {
+    config = loadConfigFn(configPath);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    logError(`Failed to load config from ${configPath}: ${msg}`);
+    exit(1);
+    return;
+  }
+
+  const port = values.port ? Number(values.port) : undefined;
+  if (
+    port !== undefined &&
+    (Number.isNaN(port) || !Number.isInteger(port) || port < 0 || port > 65535)
+  ) {
+    logError(`Error: invalid port "${values.port}".\n\n${HELP}`);
+    exit(1);
+    return;
+  }
+  const host = values.host;
+
+  async function main() {
+    const { llmock, url } = await startFromConfigFn(config!, { port, host });
+    log(`aimock server listening on ${url}`);
+
+    function shutdown() {
+      log("Shutting down...");
+      process.removeListener("SIGINT", shutdown);
+      process.removeListener("SIGTERM", shutdown);
+      llmock.stop().then(
+        () => exit(0),
+        (err) => {
+          logError(`Shutdown error: ${err instanceof Error ? err.message : String(err)}`);
+          exit(1);
+        },
+      );
+    }
+    process.on("SIGINT", shutdown);
+    process.on("SIGTERM", shutdown);
+
+    if (deps.onReady) {
+      deps.onReady({ shutdown });
+    }
+  }
+
+  main().catch((err) => {
+    logError(err instanceof Error ? err.message : String(err));
+    exit(1);
+  });
+}
+
+// Run when executed as a script (not when imported for testing).
+/* v8 ignore start -- entry-point guard, exercised by integration tests */
+const scriptName = process.argv[1] ?? "";
+if (scriptName.endsWith("aimock-cli.js") || scriptName.endsWith("aimock-cli.ts")) {
+  runAimockCli();
+}
+/* v8 ignore stop */
diff --git a/src/config-loader.ts b/src/config-loader.ts
new file mode 100644
index 0000000..df67772
--- /dev/null
+++ b/src/config-loader.ts
@@ -0,0 +1,243 @@
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { LLMock } from "./llmock.js";
+import { MCPMock } from "./mcp-mock.js";
+import { A2AMock } from "./a2a-mock.js";
+import type { ChaosConfig, RecordConfig } from "./types.js";
+import type { MCPToolDefinition, MCPPromptDefinition } from "./mcp-types.js";
+import type { A2AAgentDefinition, A2APart, A2AArtifact, A2AStreamEvent } from "./a2a-types.js";
+import { VectorMock } from "./vector-mock.js";
+import type { QueryResult } from "./vector-types.js";
+import { Logger } from "./logger.js";
+
+export interface MCPConfigTool extends MCPToolDefinition {
+  result?: string;
+}
+
+export interface MCPConfigResource {
+  uri: string;
+  name: string;
+  mimeType?: string;
+  description?: string;
+  text?: string;
+  blob?: string;
+}
+
+export interface MCPConfigPrompt extends MCPPromptDefinition {
+  result?: {
+    messages: Array<{ role: string; content: { type: string; text: string } }>;
+  };
+}
+
+export interface MCPConfig {
+  path?: string;
+  serverInfo?: { name: string; version: string };
+  tools?: MCPConfigTool[];
+  resources?: MCPConfigResource[];
+  prompts?: MCPConfigPrompt[];
+}
+
+export interface A2AConfigPattern {
+  pattern: string;
+  parts?: A2APart[];
+  artifacts?: A2AArtifact[];
+  events?: A2AStreamEvent[];
+  delayMs?: number;
+}
+
+export interface A2AConfigAgent extends A2AAgentDefinition {
+  messages?: A2AConfigPattern[];
+  tasks?: A2AConfigPattern[];
+  streamingTasks?: A2AConfigPattern[];
+}
+
+export interface A2AConfig {
+  path?: string;
+  agents?: A2AConfigAgent[];
+}
+
+export interface VectorConfigCollection {
+  name: string;
+  dimension: number;
+  vectors?: Array<{
+    id: string;
+    values: number[];
+    metadata?: Record<string, unknown>;
+  }>;
+  queryResults?: QueryResult[];
+}
+
+export interface VectorConfig {
+  path?: string;
+  collections?: VectorConfigCollection[];
+}
+
+export interface AimockConfig {
+  llm?: {
+    fixtures?: string;
+    chaos?: ChaosConfig;
+    record?: RecordConfig;
+  };
+  mcp?: MCPConfig;
+  a2a?: A2AConfig;
+  vector?: VectorConfig;
+  services?: { search?: boolean; rerank?: boolean; moderate?: boolean };
+  metrics?: boolean;
+  strict?: boolean;
+  port?: number;
+  host?: string;
+}
+
+export function loadConfig(configPath: string): AimockConfig {
+  const raw = fs.readFileSync(configPath, "utf-8");
+  return JSON.parse(raw) as AimockConfig;
+}
+
+export async function startFromConfig(
+  config: AimockConfig,
+  overrides?: { port?: number; host?: string },
+): Promise<{ llmock: LLMock; url: string }> {
+  const logger = new Logger("info");
+
+  // Load fixtures if specified
+  const llmock = new LLMock({
+    port: overrides?.port ?? config.port ?? 0,
+    host: overrides?.host ?? config.host ?? "127.0.0.1",
+    chaos: config.llm?.chaos,
+    record: config.llm?.record,
+    metrics: config.metrics,
+    strict: config.strict,
+  });
+
+  if (config.llm?.fixtures) {
+    const fixturePath = path.resolve(config.llm.fixtures);
+    const stat = fs.statSync(fixturePath);
+    if (stat.isDirectory()) {
+      llmock.loadFixtureDir(fixturePath);
+    } else {
+      llmock.loadFixtureFile(fixturePath);
+    }
+  }
+
+  // MCP
+  if (config.mcp) {
+    const mcpConfig = config.mcp;
+    const mcp = new MCPMock({
+      serverInfo: mcpConfig.serverInfo,
+    });
+
+    if (mcpConfig.tools) {
+      for (const tool of mcpConfig.tools) {
+        const { result, ...def } = tool;
+        mcp.addTool(def);
+        if (result !== undefined) {
+          mcp.onToolCall(def.name, () => result);
+        }
+      }
+    }
+
+    if (mcpConfig.resources) {
+      for (const res of mcpConfig.resources) {
+        mcp.addResource(
+          { uri: res.uri, name: res.name, mimeType: res.mimeType, description: res.description },
+          res.text !== undefined || res.blob !== undefined
+            ? { text: res.text, blob: res.blob, mimeType: res.mimeType }
+            : undefined,
+        );
+      }
+    }
+
+    if (mcpConfig.prompts) {
+      for (const prompt of mcpConfig.prompts) {
+        const { result, ...def } = prompt;
+        if (result) {
+          mcp.addPrompt(def, () => result as import("./mcp-types.js").MCPPromptResult);
+        } else {
+          mcp.addPrompt(def);
+        }
+      }
+    }
+
+    const mcpPath = mcpConfig.path ?? "/mcp";
+    llmock.mount(mcpPath, mcp);
+    logger.info(`MCPMock mounted at ${mcpPath}`);
+  }
+
+  // A2A
+  if (config.a2a) {
+    const a2aConfig = config.a2a;
+    const a2a = new A2AMock();
+
+    if (a2aConfig.agents) {
+      for (const agentConfig of a2aConfig.agents) {
+        const { messages, tasks, streamingTasks, ...def } = agentConfig;
+        a2a.registerAgent(def);
+
+        if (messages) {
+          for (const m of messages) {
+            a2a.onMessage(def.name, m.pattern, m.parts ?? [{ text: "" }]);
+          }
+        }
+
+        if (tasks) {
+          for (const t of tasks) {
+            a2a.onTask(def.name, t.pattern, t.artifacts ?? []);
+          }
+        }
+
+        if (streamingTasks) {
+          for (const s of streamingTasks) {
+            a2a.onStreamingTask(def.name, s.pattern, s.events ?? [], s.delayMs);
+          }
+        }
+      }
+    }
+
+    const a2aPath = a2aConfig.path ?? "/a2a";
+    llmock.mount(a2aPath, a2a);
+    logger.info(`A2AMock mounted at ${a2aPath}`);
+  }
+
+  // Vector
+  if (config.vector) {
+    const vectorConfig = config.vector;
+    const vector = new VectorMock();
+
+    if (vectorConfig.collections) {
+      for (const col of vectorConfig.collections) {
+        vector.addCollection(col.name, { dimension: col.dimension });
+
+        if (col.vectors && col.vectors.length > 0) {
+          vector.upsert(col.name, col.vectors);
+        }
+
+        if (col.queryResults) {
+          vector.onQuery(col.name, col.queryResults);
+        }
+      }
+    }
+
+    const vectorPath = vectorConfig.path ?? "/vector";
+    llmock.mount(vectorPath, vector);
+    logger.info(`VectorMock mounted at ${vectorPath}`);
+  }
+
+  // Services — configure default catch-all responses
+  if (config.services) {
+    if (config.services.search) {
+      llmock.onSearch(/.*/, []);
+      logger.info("Search service enabled with default empty results");
+    }
+    if (config.services.rerank) {
+      llmock.onRerank(/.*/, []);
+      logger.info("Rerank service enabled with default empty results");
+    }
+    if (config.services.moderate) {
+      llmock.onModerate(/.*/, { flagged: false, categories: {} });
+      logger.info("Moderation service enabled with default unflagged results");
+    }
+  }
+
+  const url = await llmock.start();
+  return { llmock, url };
+}
diff --git a/src/helpers.ts b/src/helpers.ts
index ae48a19..3d25272 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -226,6 +226,26 @@ export function buildToolCallCompletion(toolCalls: ToolCall[], model: string): C
   };
 }
 
+// ─── HTTP helpers ─────────────────────────────────────────────────────────
+
+export function readBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    req.on("data", (chunk: Buffer) => chunks.push(chunk));
+    req.on("end", () => resolve(Buffer.concat(chunks).toString()));
+    req.on("error", reject);
+  });
+}
+
+// ─── Pattern matching ─────────────────────────────────────────────────────
+
+export function matchesPattern(text: string, pattern: string | RegExp): boolean {
+  if (typeof pattern === "string") {
+    return text.toLowerCase().includes(pattern.toLowerCase());
+  }
+  return pattern.test(text);
+}
+
 // ─── Embedding helpers ─────────────────────────────────────────────────────
 
 const DEFAULT_EMBEDDING_DIMENSIONS = 1536;
diff --git a/src/index.ts b/src/index.ts
index ddb960a..14fd325 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -54,6 +54,15 @@ export { handleOllama, handleOllamaGenerate, ollamaToCompletionRequest } from ".
 // Cohere
 export { handleCohere, cohereToCompletionRequest } from "./cohere.js";
 
+// Service mocks
+export { handleSearch } from "./search.js";
+export type { SearchResult, SearchFixture } from "./search.js";
+export { handleRerank } from "./rerank.js";
+export type { RerankResult, RerankFixture } from "./rerank.js";
+export { handleModeration } from "./moderation.js";
+export type { ModerationResult, ModerationFixture } from "./moderation.js";
+export type { ServiceFixtures } from "./server.js";
+
 // WebSocket
 export { WebSocketConnection, upgradeToWebSocket, computeAcceptKey } from "./ws-framing.js";
 export { handleWebSocketResponses } from "./ws-responses.js";
@@ -102,6 +111,60 @@ export {
 } from "./stream-collapse.js";
 export type { CollapseResult } from "./stream-collapse.js";
 
+// Mountable
+export type { Mountable } from "./types.js";
+
+// MCP
+export { MCPMock } from "./mcp-mock.js";
+export type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPPromptDefinition,
+  MCPContent,
+  MCPResourceContent,
+  MCPPromptResult,
+  MCPSession,
+} from "./mcp-types.js";
+
+// Vector
+export { VectorMock } from "./vector-mock.js";
+export type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
+
+// A2A
+export { A2AMock } from "./a2a-mock.js";
+export type {
+  A2AMockOptions,
+  A2AAgentDefinition,
+  A2APart,
+  A2AArtifact,
+  A2ATaskResponse,
+  A2AStreamEvent,
+  A2ATask,
+  A2AMessage,
+  A2ARole,
+  A2ATaskState,
+} from "./a2a-types.js";
+
+// JSON-RPC
+export { createJsonRpcDispatcher } from "./jsonrpc.js";
+export type { JsonRpcResponse, MethodHandler, JsonRpcDispatcherOptions } from "./jsonrpc.js";
+
+// Config loader
+export { loadConfig, startFromConfig } from "./config-loader.js";
+export type { AimockConfig } from "./config-loader.js";
+
+// Suite
+export { createMockSuite } from "./suite.js";
+export type { MockSuite, MockSuiteOptions } from "./suite.js";
+
 // Types
 export type {
   ChatMessage,
@@ -131,4 +194,7 @@ export type {
   ToolCallMessage,
   RecordConfig,
   RecordProviderKey,
+  ChatCompletion,
+  ChatCompletionChoice,
+  ChatCompletionMessage,
 } from "./types.js";
diff --git a/src/jsonrpc.ts b/src/jsonrpc.ts
new file mode 100644
index 0000000..dd49041
--- /dev/null
+++ b/src/jsonrpc.ts
@@ -0,0 +1,144 @@
+import type * as http from "node:http";
+
+export type JsonRpcResponse = {
+  jsonrpc: "2.0";
+  id: string | number | null;
+  result?: unknown;
+  error?: { code: number; message: string; data?: unknown };
+};
+
+export type MethodHandler = (
+  params: unknown,
+  id: string | number,
+  req: http.IncomingMessage,
+) => Promise<JsonRpcResponse | null>;
+
+export interface JsonRpcDispatcherOptions {
+  methods: Record<string, MethodHandler>;
+  onNotification?: (method: string, params: unknown) => void;
+}
+
+function errorResponse(
+  code: number,
+  message: string,
+  id: string | number | null = null,
+): JsonRpcResponse {
+  return { jsonrpc: "2.0", id, error: { code, message } };
+}
+
+function isObject(val: unknown): val is Record<string, unknown> {
+  return typeof val === "object" && val !== null && !Array.isArray(val);
+}
+
+async function processOne(
+  entry: unknown,
+  methods: Record<string, MethodHandler>,
+  onNotification: ((method: string, params: unknown) => void) | undefined,
+  req: http.IncomingMessage,
+): Promise<JsonRpcResponse | null> {
+  if (!isObject(entry)) {
+    return errorResponse(-32600, "Invalid request");
+  }
+
+  const { jsonrpc, method, params, id } = entry;
+
+  if (jsonrpc !== "2.0" || typeof method !== "string") {
+    const reqId = typeof id === "string" || typeof id === "number" ? id : null;
+    return errorResponse(-32600, "Invalid request", reqId);
+  }
+
+  // Notification: id is absent/undefined
+  const isNotification = !("id" in entry) || id === undefined;
+
+  if (isNotification) {
+    if (onNotification) {
+      onNotification(method, params);
+    }
+    // Invoke the method handler for side effects (e.g., MCP notifications/initialized),
+    // but discard the result — notifications MUST NOT produce responses per JSON-RPC 2.0.
+    const handler = methods[method];
+    if (handler) {
+      try {
+        await handler(params, null as unknown as string | number, req);
+      } catch (err: unknown) {
+        console.warn("Notification handler error:", err);
+      }
+    }
+    return null;
+  }
+
+  const reqId = typeof id === "string" || typeof id === "number" ? id : null;
+
+  const handler = methods[method];
+  if (!handler) {
+    return errorResponse(-32601, "Method not found", reqId);
+  }
+
+  try {
+    const result = await handler(params, reqId as string | number, req);
+    if (result) return result;
+    return { jsonrpc: "2.0", id: reqId, result: null };
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    return errorResponse(-32603, `Internal error: ${msg}`, reqId);
+  }
+}
+
+export function createJsonRpcDispatcher(
+  options: JsonRpcDispatcherOptions,
+): (req: http.IncomingMessage, res: http.ServerResponse, body: string) => Promise<void> {
+  const { methods, onNotification } = options;
+
+  return async (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: string,
+  ): Promise<void> => {
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(body);
+    } catch {
+      const resp = errorResponse(-32700, "Parse error");
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(resp));
+      return;
+    }
+
+    // Empty batch
+    if (Array.isArray(parsed) && parsed.length === 0) {
+      const resp = errorResponse(-32600, "Invalid request");
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(resp));
+      return;
+    }
+
+    // Batch mode
+    if (Array.isArray(parsed)) {
+      const responses: JsonRpcResponse[] = [];
+      for (const entry of parsed) {
+        const result = await processOne(entry, methods, onNotification, req);
+        if (result !== null) {
+          responses.push(result);
+        }
+      }
+      if (responses.length === 0) {
+        res.writeHead(202);
+        res.end("");
+        return;
+      }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(responses));
+      return;
+    }
+
+    // Single request
+    const result = await processOne(parsed, methods, onNotification, req);
+    if (result === null) {
+      res.writeHead(202);
+      res.end("");
+      return;
+    }
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(result));
+  };
+}
diff --git a/src/llmock.ts b/src/llmock.ts
index d528c8a..ca8796d 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -6,14 +6,22 @@ import type {
   FixtureOpts,
   FixtureResponse,
   MockServerOptions,
+  Mountable,
   RecordConfig,
 } from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
 import { Journal } from "./journal.js";
+import type { SearchFixture, SearchResult } from "./search.js";
+import type { RerankFixture, RerankResult } from "./rerank.js";
+import type { ModerationFixture, ModerationResult } from "./moderation.js";
 
 export class LLMock {
   private fixtures: Fixture[] = [];
+  private searchFixtures: SearchFixture[] = [];
+  private rerankFixtures: RerankFixture[] = [];
+  private moderationFixtures: ModerationFixture[] = [];
+  private mounts: Array<{ path: string; handler: Mountable }> = [];
   private serverInstance: ServerInstance | null = null;
   private options: MockServerOptions;
 
@@ -94,6 +102,23 @@ export class LLMock {
     return this.on({ toolCallId: id }, response, opts);
   }
 
+  // ---- Service mock convenience methods ----
+
+  onSearch(pattern: string | RegExp, results: SearchResult[]): this {
+    this.searchFixtures.push({ match: pattern, results });
+    return this;
+  }
+
+  onRerank(pattern: string | RegExp, results: RerankResult[]): this {
+    this.rerankFixtures.push({ match: pattern, results });
+    return this;
+  }
+
+  onModerate(pattern: string | RegExp, result: ModerationResult): this {
+    this.moderationFixtures.push({ match: pattern, result });
+    return this;
+  }
+
   /**
    * Queue a one-shot error that will be returned for the next matching
    * request, then automatically removed. Implemented as an internal fixture
@@ -134,6 +159,21 @@ export class LLMock {
     return this;
   }
 
+  // ---- Mounts ----
+
+  mount(path: string, handler: Mountable): this {
+    this.mounts.push({ path, handler });
+
+    // If server is already running, wire up journal and baseUrl immediately
+    // so late mounts behave identically to pre-start mounts.
+    if (this.serverInstance) {
+      if (handler.setJournal) handler.setJournal(this.serverInstance.journal);
+      if (handler.setBaseUrl) handler.setBaseUrl(this.serverInstance.url + path);
+    }
+
+    return this;
+  }
+
   // ---- Journal proxies ----
 
   getRequests(): import("./types.js").JournalEntry[] {
@@ -183,6 +223,9 @@ export class LLMock {
 
   reset(): this {
     this.clearFixtures();
+    this.searchFixtures.length = 0;
+    this.rerankFixtures.length = 0;
+    this.moderationFixtures.length = 0;
     if (this.serverInstance) {
       this.serverInstance.journal.clear();
     }
@@ -195,7 +238,11 @@ export class LLMock {
     if (this.serverInstance) {
       throw new Error("Server already started");
     }
-    this.serverInstance = await createServer(this.fixtures, this.options);
+    this.serverInstance = await createServer(this.fixtures, this.options, this.mounts, {
+      search: this.searchFixtures,
+      rerank: this.rerankFixtures,
+      moderation: this.moderationFixtures,
+    });
     return this.serverInstance.url;
   }
 
diff --git a/src/mcp-handler.ts b/src/mcp-handler.ts
new file mode 100644
index 0000000..f5942f3
--- /dev/null
+++ b/src/mcp-handler.ts
@@ -0,0 +1,266 @@
+import type * as http from "node:http";
+import { randomUUID } from "node:crypto";
+import { createJsonRpcDispatcher } from "./jsonrpc.js";
+import type {
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPResourceContent,
+  MCPPromptDefinition,
+  MCPPromptResult,
+  MCPContent,
+  MCPSession,
+} from "./mcp-types.js";
+
+export interface MCPState {
+  serverInfo: { name: string; version: string };
+  tools: Map<string, { def: MCPToolDefinition; handler?: (...args: unknown[]) => unknown }>;
+  resources: Map<string, { def: MCPResourceDefinition; content?: MCPResourceContent }>;
+  prompts: Map<
+    string,
+    {
+      def: MCPPromptDefinition;
+      handler?: (...args: unknown[]) => MCPPromptResult | Promise<MCPPromptResult>;
+    }
+  >;
+  sessions: Map<string, MCPSession>;
+}
+
+function jsonRpcResult(id: string | number, result: unknown) {
+  return { jsonrpc: "2.0" as const, id, result };
+}
+
+function jsonRpcError(id: string | number | null, code: number, message: string) {
+  return { jsonrpc: "2.0" as const, id, error: { code, message } };
+}
+
+export function createMCPRequestHandler(state: MCPState) {
+  const dispatcher = createJsonRpcDispatcher({
+    methods: {
+      // initialize is handled directly in the outer function — this entry is
+      // only here so the dispatcher doesn't return "Method not found" if the
+      // request somehow reaches it.
+      initialize: async (_params, id) => {
+        return jsonRpcResult(id, {
+          protocolVersion: "2025-03-26",
+          capabilities: { tools: {}, resources: {}, prompts: {} },
+          serverInfo: state.serverInfo,
+        });
+      },
+
+      "notifications/initialized": async (_params, _id, req) => {
+        const sessionId = req.headers["mcp-session-id"] as string;
+        const session = state.sessions.get(sessionId);
+        if (session) {
+          session.initialized = true;
+        }
+        return null;
+      },
+
+      ping: async (_params, id) => {
+        return jsonRpcResult(id, {});
+      },
+
+      "tools/list": async (_params, id) => {
+        const tools: MCPToolDefinition[] = [];
+        for (const { def } of state.tools.values()) {
+          tools.push(def);
+        }
+        return jsonRpcResult(id, { tools });
+      },
+
+      "tools/call": async (params, id) => {
+        const { name, arguments: args } = (params ?? {}) as { name?: string; arguments?: unknown };
+        if (!name) {
+          return jsonRpcError(id, -32602, "Missing tool name");
+        }
+        const entry = state.tools.get(name);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown tool: ${name}`);
+        }
+        if (entry.handler) {
+          try {
+            const result = await entry.handler(args);
+            const content: MCPContent[] = Array.isArray(result)
+              ? result
+              : [{ type: "text", text: String(result) }];
+            return jsonRpcResult(id, { content, isError: false });
+          } catch (err: unknown) {
+            const message = err instanceof Error ? err.message : String(err);
+            return jsonRpcResult(id, {
+              content: [{ type: "text", text: message }],
+              isError: true,
+            });
+          }
+        }
+        // No handler — return empty content
+        return jsonRpcResult(id, { content: [], isError: false });
+      },
+
+      "resources/list": async (_params, id) => {
+        const resources: MCPResourceDefinition[] = [];
+        for (const { def } of state.resources.values()) {
+          resources.push(def);
+        }
+        return jsonRpcResult(id, { resources });
+      },
+
+      "resources/read": async (params, id) => {
+        const { uri } = (params ?? {}) as { uri?: string };
+        if (!uri) {
+          return jsonRpcError(id, -32602, "Missing resource URI");
+        }
+        const entry = state.resources.get(uri);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown resource: ${uri}`);
+        }
+        return jsonRpcResult(id, {
+          contents: [
+            {
+              uri,
+              ...(entry.content?.text !== undefined && { text: entry.content.text }),
+              ...(entry.content?.blob !== undefined && { blob: entry.content.blob }),
+              ...(entry.content?.mimeType !== undefined && { mimeType: entry.content.mimeType }),
+            },
+          ],
+        });
+      },
+
+      "prompts/list": async (_params, id) => {
+        const prompts: MCPPromptDefinition[] = [];
+        for (const { def } of state.prompts.values()) {
+          prompts.push(def);
+        }
+        return jsonRpcResult(id, { prompts });
+      },
+
+      "prompts/get": async (params, id) => {
+        const { name, arguments: args } = (params ?? {}) as { name?: string; arguments?: unknown };
+        if (!name) {
+          return jsonRpcError(id, -32602, "Missing prompt name");
+        }
+        const entry = state.prompts.get(name);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown prompt: ${name}`);
+        }
+        if (entry.handler) {
+          try {
+            const result = await entry.handler(args);
+            return jsonRpcResult(id, result);
+          } catch (err: unknown) {
+            const message = err instanceof Error ? err.message : String(err);
+            return jsonRpcError(id, -32603, `Prompt handler error: ${message}`);
+          }
+        }
+        // No handler — return empty messages
+        return jsonRpcResult(id, { messages: [] });
+      },
+    },
+  });
+
+  return async (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: string,
+  ): Promise<void> => {
+    // DELETE handler: session teardown
+    if (req.method === "DELETE") {
+      const sessionId = req.headers["mcp-session-id"] as string | undefined;
+      if (!sessionId) {
+        res.writeHead(400, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Missing mcp-session-id header" }));
+        return;
+      }
+      if (!state.sessions.has(sessionId)) {
+        res.writeHead(404, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Session not found" }));
+        return;
+      }
+      state.sessions.delete(sessionId);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ ok: true }));
+      return;
+    }
+
+    // Parse the body to determine method for session validation
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(body);
+    } catch {
+      // Let the dispatcher handle parse errors
+      await dispatcher(req, res, body);
+      return;
+    }
+
+    const method =
+      typeof parsed === "object" && parsed !== null && "method" in parsed
+        ? (parsed as { method: unknown }).method
+        : undefined;
+
+    // Handle initialize directly to control response headers
+    if (method === "initialize") {
+      const id =
+        typeof parsed === "object" && parsed !== null && "id" in parsed
+          ? (parsed as { id: unknown }).id
+          : null;
+
+      const sessionId = randomUUID();
+      state.sessions.set(sessionId, {
+        id: sessionId,
+        initialized: false,
+        createdAt: Date.now(),
+      });
+
+      const response = {
+        jsonrpc: "2.0",
+        id,
+        result: {
+          protocolVersion: "2025-03-26",
+          capabilities: { tools: {}, resources: {}, prompts: {} },
+          serverInfo: state.serverInfo,
+        },
+      };
+
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "Mcp-Session-Id": sessionId,
+      });
+      res.end(JSON.stringify(response));
+      return;
+    }
+
+    // Session validation for all other methods
+    const sessionId = req.headers["mcp-session-id"] as string | undefined;
+    if (!sessionId) {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Missing mcp-session-id header" }));
+      return;
+    }
+    if (!state.sessions.has(sessionId)) {
+      res.writeHead(404, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Session not found" }));
+      return;
+    }
+
+    // Enforce initialization: only allow notifications/initialized through
+    // before the session is fully initialized
+    const session = state.sessions.get(sessionId)!;
+    if (!session.initialized && method !== "notifications/initialized") {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify(
+          jsonRpcError(
+            typeof parsed === "object" && parsed !== null && "id" in parsed
+              ? ((parsed as { id: unknown }).id as string | number)
+              : null,
+            -32002,
+            "Session not initialized",
+          ),
+        ),
+      );
+      return;
+    }
+
+    // Delegate to the JSON-RPC dispatcher for all other methods
+    await dispatcher(req, res, body);
+  };
+}
diff --git a/src/mcp-mock.ts b/src/mcp-mock.ts
new file mode 100644
index 0000000..9359900
--- /dev/null
+++ b/src/mcp-mock.ts
@@ -0,0 +1,224 @@
+import * as http from "node:http";
+import type { Mountable } from "./types.js";
+import type { Journal } from "./journal.js";
+import type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPResourceContent,
+  MCPPromptDefinition,
+  MCPPromptResult,
+  MCPContent,
+  MCPSession,
+} from "./mcp-types.js";
+import { createMCPRequestHandler, type MCPState } from "./mcp-handler.js";
+import { flattenHeaders, readBody } from "./helpers.js";
+
+export class MCPMock implements Mountable {
+  private tools: Map<
+    string,
+    { def: MCPToolDefinition; handler?: (...args: unknown[]) => unknown }
+  > = new Map();
+  private resources: Map<string, { def: MCPResourceDefinition; content?: MCPResourceContent }> =
+    new Map();
+  private prompts: Map<
+    string,
+    {
+      def: MCPPromptDefinition;
+      handler?: (...args: unknown[]) => MCPPromptResult | Promise<MCPPromptResult>;
+    }
+  > = new Map();
+  private sessions: Map<string, MCPSession> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private options: MCPMockOptions;
+  private requestHandler: ReturnType<typeof createMCPRequestHandler>;
+
+  constructor(options?: MCPMockOptions) {
+    this.options = options ?? {};
+    this.requestHandler = this.buildHandler();
+  }
+
+  // ---- Configuration: Tools ----
+
+  addTool(def: MCPToolDefinition): this {
+    this.tools.set(def.name, { def });
+    return this;
+  }
+
+  onToolCall(
+    name: string,
+    handler: (args: unknown) => MCPContent[] | string | Promise<MCPContent[] | string>,
+  ): this {
+    const entry = this.tools.get(name);
+    if (entry) {
+      entry.handler = handler;
+    } else {
+      this.tools.set(name, { def: { name }, handler });
+    }
+    return this;
+  }
+
+  // ---- Configuration: Resources ----
+
+  addResource(def: MCPResourceDefinition, content?: MCPResourceContent): this {
+    this.resources.set(def.uri, { def, content });
+    return this;
+  }
+
+  // ---- Configuration: Prompts ----
+
+  addPrompt(
+    def: MCPPromptDefinition,
+    handler?: (args: unknown) => MCPPromptResult | Promise<MCPPromptResult>,
+  ): this {
+    this.prompts.set(def.name, { def, handler });
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    // Only handle POST and DELETE to the root of the mount
+    if (pathname !== "/" && pathname !== "") {
+      return false;
+    }
+    if (req.method !== "POST" && req.method !== "DELETE") {
+      return false;
+    }
+
+    const body = await readBody(req);
+
+    await this.requestHandler(req, res, body);
+
+    // Journal the request after the handler completes
+    if (this.journal) {
+      this.journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "mcp",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+
+    return true;
+  }
+
+  health(): { status: string; [key: string]: unknown } {
+    return {
+      status: "ok",
+      tools: this.tools.size,
+      resources: this.resources.size,
+      prompts: this.prompts.size,
+      sessions: this.sessions.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("Server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise((resolve, reject) => {
+      const srv = http.createServer((req, res) => {
+        const chunks: Buffer[] = [];
+        req.on("data", (chunk: Buffer) => chunks.push(chunk));
+        req.on("end", () => {
+          const body = Buffer.concat(chunks).toString();
+
+          this.requestHandler(req, res, body)
+            .then(() => {
+              if (this.journal) {
+                this.journal.add({
+                  method: req.method ?? "POST",
+                  path: req.url ?? "/",
+                  headers: flattenHeaders(req.headers),
+                  body: null,
+                  service: "mcp",
+                  response: { status: res.statusCode, fixture: null },
+                });
+              }
+            })
+            .catch((err) => {
+              console.error("MCPMock request error:", err);
+              if (!res.headersSent) {
+                res.writeHead(500);
+                res.end("Internal server error");
+              }
+            });
+        });
+      });
+
+      srv.listen(port, host, () => {
+        this.server = srv;
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          resolve(`http://${host}:${addr.port}`);
+        } else {
+          resolve(`http://${host}:${port}`);
+        }
+      });
+
+      srv.on("error", reject);
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("Server not started");
+    }
+    const srv = this.server;
+    this.server = null;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err) => (err ? reject(err) : resolve()));
+    });
+  }
+
+  // ---- Inspection ----
+
+  getRequests(): unknown[] {
+    if (!this.journal) return [];
+    return this.journal.getAll().filter((e) => e.service === "mcp");
+  }
+
+  getSessions(): Map<string, MCPSession> {
+    return new Map(this.sessions);
+  }
+
+  reset(): this {
+    this.tools.clear();
+    this.resources.clear();
+    this.prompts.clear();
+    this.sessions.clear();
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Internal ----
+
+  private buildHandler() {
+    const state: MCPState = {
+      serverInfo: this.options.serverInfo ?? { name: "mcp-mock", version: "1.0.0" },
+      tools: this.tools,
+      resources: this.resources,
+      prompts: this.prompts,
+      sessions: this.sessions,
+    };
+    return createMCPRequestHandler(state);
+  }
+}
diff --git a/src/mcp-stub.ts b/src/mcp-stub.ts
new file mode 100644
index 0000000..c1f1335
--- /dev/null
+++ b/src/mcp-stub.ts
@@ -0,0 +1,11 @@
+export { MCPMock } from "./mcp-mock.js";
+export type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPPromptDefinition,
+  MCPContent,
+  MCPResourceContent,
+  MCPPromptResult,
+  MCPSession,
+} from "./mcp-types.js";
diff --git a/src/mcp-types.ts b/src/mcp-types.ts
new file mode 100644
index 0000000..075d024
--- /dev/null
+++ b/src/mcp-types.ts
@@ -0,0 +1,48 @@
+export interface MCPMockOptions {
+  port?: number;
+  host?: string;
+  serverInfo?: { name: string; version: string };
+}
+
+export interface MCPToolDefinition {
+  name: string;
+  description?: string;
+  inputSchema?: Record<string, unknown>;
+}
+
+export interface MCPResourceDefinition {
+  uri: string;
+  name: string;
+  mimeType?: string;
+  description?: string;
+}
+
+export interface MCPPromptDefinition {
+  name: string;
+  description?: string;
+  arguments?: Array<{ name: string; description?: string; required?: boolean }>;
+}
+
+export type MCPContent =
+  | { type: "text"; text: string }
+  | { type: "image"; data: string; mimeType: string }
+  | {
+      type: "resource";
+      resource: { uri: string; text?: string; blob?: string; mimeType?: string };
+    };
+
+export interface MCPResourceContent {
+  text?: string;
+  blob?: string;
+  mimeType?: string;
+}
+
+export interface MCPPromptResult {
+  messages: Array<{ role: string; content: MCPContent }>;
+}
+
+export interface MCPSession {
+  id: string;
+  initialized: boolean;
+  createdAt: number;
+}
diff --git a/src/moderation.ts b/src/moderation.ts
new file mode 100644
index 0000000..732fd87
--- /dev/null
+++ b/src/moderation.ts
@@ -0,0 +1,139 @@
+/**
+ * Moderation API support for LLMock.
+ *
+ * Handles POST /v1/moderations requests (OpenAI-compatible). Matches
+ * fixtures by comparing the request `input` field against registered
+ * patterns. First match wins; no match returns a default unflagged result.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, generateId, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Moderation types ─────────────────────────────────────────────────────
+
+export interface ModerationResult {
+  flagged: boolean;
+  categories: Record<string, boolean>;
+  category_scores?: Record<string, number>;
+}
+
+export interface ModerationFixture {
+  match: string | RegExp;
+  result: ModerationResult;
+}
+
+// ─── Default unflagged result ─────────────────────────────────────────────
+
+const DEFAULT_RESULT: ModerationResult = {
+  flagged: false,
+  categories: {
+    sexual: false,
+    hate: false,
+    harassment: false,
+    "self-harm": false,
+    "sexual/minors": false,
+    "hate/threatening": false,
+    "violence/graphic": false,
+    "self-harm/intent": false,
+    "self-harm/instructions": false,
+    "harassment/threatening": false,
+    violence: false,
+  },
+  category_scores: {
+    sexual: 0,
+    hate: 0,
+    harassment: 0,
+    "self-harm": 0,
+    "sexual/minors": 0,
+    "hate/threatening": 0,
+    "violence/graphic": 0,
+    "self-harm/intent": 0,
+    "self-harm/instructions": 0,
+    "harassment/threatening": 0,
+    violence: 0,
+  },
+};
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleModeration(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: ModerationFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { input?: string | string[] };
+  try {
+    body = JSON.parse(raw) as { input?: string | string[] };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/moderations",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "moderation",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Normalize input to a single string for matching
+  const rawInput = body.input ?? "";
+  const inputText = Array.isArray(rawInput) ? rawInput.join(" ") : rawInput;
+
+  // Find first matching fixture
+  let matchedResult: ModerationResult = DEFAULT_RESULT;
+  let matchedFixture: ModerationFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(inputText, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResult = fixture.result;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Moderation fixture matched for input "${inputText.slice(0, 80)}"`);
+  } else {
+    logger.debug(
+      `No moderation fixture matched for input "${inputText.slice(0, 80)}" — returning unflagged`,
+    );
+  }
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v1/moderations",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "moderation",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(
+    JSON.stringify({
+      id: generateId("modr"),
+      model: "text-moderation-latest",
+      results: [matchedResult],
+    }),
+  );
+}
diff --git a/src/rerank.ts b/src/rerank.ts
new file mode 100644
index 0000000..4d706ff
--- /dev/null
+++ b/src/rerank.ts
@@ -0,0 +1,121 @@
+/**
+ * Reranking API support for LLMock.
+ *
+ * Handles POST /v2/rerank requests (Cohere rerank-compatible). Matches
+ * fixtures by comparing the request `query` field against registered
+ * patterns. First match wins; no match returns empty results.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, generateId, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Rerank types ─────────────────────────────────────────────────────────
+
+export interface RerankResult {
+  index: number;
+  relevance_score: number;
+}
+
+export interface RerankFixture {
+  match: string | RegExp;
+  results: RerankResult[];
+}
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleRerank(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: RerankFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { query?: string; documents?: unknown[]; model?: string };
+  try {
+    body = JSON.parse(raw) as { query?: string; documents?: unknown[]; model?: string };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/rerank",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "rerank",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  const query = body.query ?? "";
+  const documents = body.documents ?? [];
+
+  // Find first matching fixture
+  let matchedResults: RerankResult[] = [];
+  let matchedFixture: RerankFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(query, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResults = fixture.results;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Rerank fixture matched for query "${query.slice(0, 80)}"`);
+  } else {
+    logger.debug(`No rerank fixture matched for query "${query.slice(0, 80)}" — returning empty`);
+  }
+
+  // Build response with document text included (Cohere rerank v2 format)
+  const results = matchedResults.map((r) => {
+    const doc = documents[r.index];
+    const text =
+      typeof doc === "string"
+        ? doc
+        : typeof doc === "object" && doc !== null && "text" in doc
+          ? (doc as { text: string }).text
+          : "";
+    return {
+      index: r.index,
+      relevance_score: r.relevance_score,
+      document: { text },
+    };
+  });
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v2/rerank",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "rerank",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(
+    JSON.stringify({
+      id: generateId("rerank"),
+      results,
+      meta: {
+        billed_units: { search_units: 0 },
+      },
+    }),
+  );
+}
diff --git a/src/search.ts b/src/search.ts
new file mode 100644
index 0000000..5d5d020
--- /dev/null
+++ b/src/search.ts
@@ -0,0 +1,104 @@
+/**
+ * Web Search API support for LLMock.
+ *
+ * Handles POST /search requests (Tavily-compatible). Matches fixtures by
+ * comparing the request `query` field against registered patterns. First
+ * match wins; no match returns empty results.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Search types ─────────────────────────────────────────────────────────
+
+export interface SearchResult {
+  title: string;
+  url: string;
+  content: string;
+  score?: number;
+}
+
+export interface SearchFixture {
+  match: string | RegExp;
+  results: SearchResult[];
+}
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleSearch(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: SearchFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { query?: string; max_results?: number };
+  try {
+    body = JSON.parse(raw) as { query?: string; max_results?: number };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/search",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "search",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  const query = body.query ?? "";
+  const maxResults = body.max_results;
+
+  // Find first matching fixture
+  let matchedResults: SearchResult[] = [];
+  let matchedFixture: SearchFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(query, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResults = fixture.results;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Search fixture matched for query "${query.slice(0, 80)}"`);
+  } else {
+    logger.debug(`No search fixture matched for query "${query.slice(0, 80)}" — returning empty`);
+  }
+
+  // Apply max_results limit
+  if (maxResults !== undefined && maxResults > 0) {
+    matchedResults = matchedResults.slice(0, maxResults);
+  }
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/search",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "search",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(JSON.stringify({ results: matchedResults }));
+}
diff --git a/src/server.ts b/src/server.ts
index 9fbf233..cc4e896 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -4,6 +4,7 @@ import type {
   ChatCompletionRequest,
   HandlerDefaults,
   MockServerOptions,
+  Mountable,
   RecordProviderKey,
 } from "./types.js";
 import { Journal } from "./journal.js";
@@ -28,6 +29,9 @@ import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
 import { handleEmbeddings } from "./embeddings.js";
 import { handleOllama, handleOllamaGenerate } from "./ollama.js";
 import { handleCohere } from "./cohere.js";
+import { handleSearch, type SearchFixture } from "./search.js";
+import { handleRerank, type RerankFixture } from "./rerank.js";
+import { handleModeration, type ModerationFixture } from "./moderation.js";
 import { upgradeToWebSocket, type WebSocketConnection } from "./ws-framing.js";
 import { handleWebSocketResponses } from "./ws-responses.js";
 import { handleWebSocketRealtime } from "./ws-realtime.js";
@@ -52,6 +56,9 @@ const GEMINI_LIVE_PATH =
 const MESSAGES_PATH = "/v1/messages";
 const EMBEDDINGS_PATH = "/v1/embeddings";
 const COHERE_CHAT_PATH = "/v2/chat";
+const SEARCH_PATH = "/search";
+const RERANK_PATH = "/v2/rerank";
+const MODERATIONS_PATH = "/v1/moderations";
 const DEFAULT_CHUNK_SIZE = 20;
 
 const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
@@ -365,12 +372,20 @@ async function handleCompletions(
   );
 }
 
+export interface ServiceFixtures {
+  search: SearchFixture[];
+  rerank: RerankFixture[];
+  moderation: ModerationFixture[];
+}
+
 // NOTE: The fixtures array is read by reference on each request. Callers
 // (e.g. LLMock) may mutate it after the server starts and changes will
 // be visible immediately. This is intentional — do not copy the array.
 export async function createServer(
   fixtures: Fixture[],
   options?: MockServerOptions,
+  mounts?: Array<{ path: string; handler: Mountable }>,
+  serviceFixtures?: ServiceFixtures,
 ): Promise<ServerInstance> {
   const host = options?.host ?? "127.0.0.1";
   const port = options?.port ?? 0;
@@ -409,12 +424,34 @@ export async function createServer(
 
   const journal = new Journal();
 
+  // Share journal with mounted services
+  if (mounts) {
+    for (const { handler } of mounts) {
+      if (handler.setJournal) handler.setJournal(journal);
+    }
+  }
+
   // Set initial fixtures-loaded gauge
   if (registry) {
     registry.setGauge("llmock_fixtures_loaded", {}, fixtures.length);
   }
 
   const server = http.createServer((req: http.IncomingMessage, res: http.ServerResponse) => {
+    // Delegate to async handler — catch unhandled rejections to prevent Node.js crashes
+    handleHttpRequest(req, res).catch((err: unknown) => {
+      const msg = err instanceof Error ? err.message : "Internal error";
+      defaults.logger.warn(`Unhandled request error: ${msg}`);
+      if (!res.headersSent) {
+        res.writeHead(500, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: { message: msg, type: "server_error" } }));
+      }
+    });
+  });
+
+  async function handleHttpRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
     // OPTIONS preflight
     if (req.method === "OPTIONS") {
       handleOptions(res);
@@ -453,6 +490,17 @@ export async function createServer(
       });
     }
 
+    // Dispatch to mounted services before any path rewrites
+    if (mounts) {
+      for (const { path: mountPath, handler } of mounts) {
+        if (pathname === mountPath || pathname.startsWith(mountPath + "/")) {
+          const subPath = pathname.slice(mountPath.length) || "/";
+          const handled = await handler.handleRequest(req, res, subPath);
+          if (handled) return;
+        }
+      }
+    }
+
     // Azure OpenAI: /openai/deployments/{id}/{operation} → /v1/{operation} (chat/completions, embeddings)
     // Must be checked BEFORE the generic /openai/ prefix strip
     let azureDeploymentId: string | undefined;
@@ -472,8 +520,22 @@ export async function createServer(
     // Health / readiness probes
     if (pathname === HEALTH_PATH && req.method === "GET") {
       setCorsHeaders(res);
-      res.writeHead(200, { "Content-Type": "application/json" });
-      res.end(JSON.stringify({ status: "ok" }));
+      if (mounts && mounts.length > 0) {
+        const services: Record<string, unknown> = {
+          llm: { status: "ok", fixtures: fixtures.length },
+        };
+        for (const { path: mountPath, handler } of mounts) {
+          if (handler.health) {
+            const name = mountPath.replace(/^\//, "");
+            services[name] = handler.health();
+          }
+        }
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ status: "ok", services }));
+      } else {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ status: "ok" }));
+      }
       return;
     }
 
@@ -930,6 +992,93 @@ export async function createServer(
       return;
     }
 
+    // POST /search — Web Search API (Tavily-compatible)
+    if (pathname === SEARCH_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleSearch(
+            req,
+            res,
+            raw,
+            serviceFixtures?.search ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v2/rerank — Reranking API (Cohere rerank-compatible)
+    if (pathname === RERANK_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleRerank(
+            req,
+            res,
+            raw,
+            serviceFixtures?.rerank ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/moderations — Moderation API (OpenAI-compatible)
+    if (pathname === MODERATIONS_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleModeration(
+            req,
+            res,
+            raw,
+            serviceFixtures?.moderation ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
     // POST /v1/chat/completions — Chat Completions API
     if (pathname !== COMPLETIONS_PATH) {
       handleNotFound(res, "Not found");
@@ -974,7 +1123,7 @@ export async function createServer(
         res.end();
       }
     });
-  });
+  }
 
   // ─── WebSocket upgrade handling ──────────────────────────────────────────
 
@@ -983,65 +1132,90 @@ export async function createServer(
   server.on(
     "upgrade",
     (req: http.IncomingMessage, socket: import("node:net").Socket, head: Buffer) => {
-      const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
-      const pathname = parsedUrl.pathname;
-
-      if (
-        pathname !== RESPONSES_PATH &&
-        pathname !== REALTIME_PATH &&
-        pathname !== GEMINI_LIVE_PATH
-      ) {
-        socket.write("HTTP/1.1 404 Not Found\r\n\r\n");
-        socket.destroy();
-        return;
-      }
+      handleUpgradeRequest(req, socket, head).catch((err: unknown) => {
+        const msg = err instanceof Error ? err.message : "Internal error";
+        defaults.logger.warn(`Unhandled upgrade error: ${msg}`);
+        if (!socket.destroyed) socket.destroy();
+      });
+    },
+  );
 
-      // Push any buffered data back before upgrading
-      if (head.length > 0) {
-        socket.unshift(head);
+  async function handleUpgradeRequest(
+    req: http.IncomingMessage,
+    socket: import("node:net").Socket,
+    head: Buffer,
+  ): Promise<void> {
+    const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+    const pathname = parsedUrl.pathname;
+
+    // Dispatch to mounted services
+    if (mounts) {
+      for (const { path: mountPath, handler } of mounts) {
+        if (
+          (pathname === mountPath || pathname.startsWith(mountPath + "/")) &&
+          handler.handleUpgrade
+        ) {
+          const subPath = pathname.slice(mountPath.length) || "/";
+          if (await handler.handleUpgrade(socket, head, subPath)) return;
+        }
       }
+    }
 
-      let ws: WebSocketConnection;
-      try {
-        ws = upgradeToWebSocket(req, socket);
-      } catch (err: unknown) {
-        const msg = err instanceof Error ? err.message : "WebSocket upgrade failed";
-        logger.error(`WebSocket upgrade error: ${msg}`);
-        if (!socket.destroyed) socket.destroy();
-        return;
-      }
+    if (
+      pathname !== RESPONSES_PATH &&
+      pathname !== REALTIME_PATH &&
+      pathname !== GEMINI_LIVE_PATH
+    ) {
+      socket.write("HTTP/1.1 404 Not Found\r\n\r\n");
+      socket.destroy();
+      return;
+    }
 
-      activeConnections.add(ws);
+    // Push any buffered data back before upgrading
+    if (head.length > 0) {
+      socket.unshift(head);
+    }
 
-      ws.on("error", (err: Error) => {
-        logger.error(`WebSocket error: ${err.message}`);
-        activeConnections.delete(ws);
-      });
+    let ws: WebSocketConnection;
+    try {
+      ws = upgradeToWebSocket(req, socket);
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : "WebSocket upgrade failed";
+      logger.error(`WebSocket upgrade error: ${msg}`);
+      if (!socket.destroyed) socket.destroy();
+      return;
+    }
 
-      ws.on("close", () => {
-        activeConnections.delete(ws);
-      });
+    activeConnections.add(ws);
 
-      // Route to handler
-      if (pathname === RESPONSES_PATH) {
-        handleWebSocketResponses(ws, fixtures, journal, {
-          ...defaults,
-          model: "gpt-4",
-        });
-      } else if (pathname === REALTIME_PATH) {
-        const model = parsedUrl.searchParams.get("model") ?? "gpt-4o-realtime";
-        handleWebSocketRealtime(ws, fixtures, journal, {
-          ...defaults,
-          model,
-        });
-      } else if (pathname === GEMINI_LIVE_PATH) {
-        handleWebSocketGeminiLive(ws, fixtures, journal, {
-          ...defaults,
-          model: "gemini-2.0-flash",
-        });
-      }
-    },
-  );
+    ws.on("error", (err: Error) => {
+      logger.error(`WebSocket error: ${err.message}`);
+      activeConnections.delete(ws);
+    });
+
+    ws.on("close", () => {
+      activeConnections.delete(ws);
+    });
+
+    // Route to handler
+    if (pathname === RESPONSES_PATH) {
+      handleWebSocketResponses(ws, fixtures, journal, {
+        ...defaults,
+        model: "gpt-4",
+      });
+    } else if (pathname === REALTIME_PATH) {
+      const model = parsedUrl.searchParams.get("model") ?? "gpt-4o-realtime";
+      handleWebSocketRealtime(ws, fixtures, journal, {
+        ...defaults,
+        model,
+      });
+    } else if (pathname === GEMINI_LIVE_PATH) {
+      handleWebSocketGeminiLive(ws, fixtures, journal, {
+        ...defaults,
+        model: "gemini-2.0-flash",
+      });
+    }
+  }
 
   // Close active WS connections when server shuts down
   const originalClose = server.close.bind(server);
@@ -1063,6 +1237,14 @@ export async function createServer(
         return;
       }
       const url = `http://${addr.address}:${addr.port}`;
+
+      // Set base URL on mounted services that support it
+      if (mounts) {
+        for (const { path: mountPath, handler } of mounts) {
+          if (handler.setBaseUrl) handler.setBaseUrl(url + mountPath);
+        }
+      }
+
       resolve({ server, journal, url, defaults });
     });
   });
diff --git a/src/suite.ts b/src/suite.ts
new file mode 100644
index 0000000..788c500
--- /dev/null
+++ b/src/suite.ts
@@ -0,0 +1,66 @@
+import { LLMock } from "./llmock.js";
+import { MCPMock } from "./mcp-mock.js";
+import { A2AMock } from "./a2a-mock.js";
+import { VectorMock } from "./vector-mock.js";
+import type { MockServerOptions } from "./types.js";
+import type { MCPMockOptions } from "./mcp-types.js";
+import type { A2AMockOptions } from "./a2a-types.js";
+import type { VectorMockOptions } from "./vector-types.js";
+
+export interface MockSuiteOptions {
+  llm?: MockServerOptions;
+  mcp?: MCPMockOptions;
+  a2a?: A2AMockOptions;
+  vector?: VectorMockOptions;
+}
+
+export interface MockSuite {
+  llm: LLMock;
+  mcp?: MCPMock;
+  a2a?: A2AMock;
+  vector?: VectorMock;
+  start(): Promise<void>;
+  stop(): Promise<void>;
+  reset(): void;
+}
+
+export async function createMockSuite(options: MockSuiteOptions = {}): Promise<MockSuite> {
+  const llm = new LLMock(options.llm);
+  let mcp: MCPMock | undefined;
+  let a2a: A2AMock | undefined;
+  let vector: VectorMock | undefined;
+
+  if (options.mcp) {
+    mcp = new MCPMock(options.mcp);
+    llm.mount("/mcp", mcp);
+  }
+
+  if (options.a2a) {
+    a2a = new A2AMock(options.a2a);
+    llm.mount("/a2a", a2a);
+  }
+
+  if (options.vector) {
+    vector = new VectorMock(options.vector);
+    llm.mount("/vector", vector);
+  }
+
+  return {
+    llm,
+    mcp,
+    a2a,
+    vector,
+    async start() {
+      await llm.start();
+    },
+    async stop() {
+      await llm.stop();
+    },
+    reset() {
+      llm.reset();
+      if (mcp) mcp.reset();
+      if (a2a) a2a.reset();
+      if (vector) vector.reset();
+    },
+  };
+}
diff --git a/src/types.ts b/src/types.ts
index 3bbae4d..be21a6c 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,8 +1,23 @@
+import type * as http from "node:http";
+import type * as net from "node:net";
+import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import type { MetricsRegistry } from "./metrics.js";
 
 // LLMock type definitions — shared across all provider adapters and the fixture router.
 
+export interface Mountable {
+  handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean>;
+  handleUpgrade?(socket: net.Socket, head: Buffer, pathname: string): Promise<boolean>;
+  health?(): { status: string; [key: string]: unknown };
+  setJournal?(journal: Journal): void;
+  setBaseUrl?(url: string): void;
+}
+
 export interface ContentPart {
   type: string;
   text?: string;
@@ -153,6 +168,7 @@ export interface JournalEntry {
   path: string;
   headers: Record<string, string>;
   body: ChatCompletionRequest | null;
+  service?: string;
   response: {
     status: number;
     fixture: Fixture | null;
diff --git a/src/vector-handler.ts b/src/vector-handler.ts
new file mode 100644
index 0000000..4149b93
--- /dev/null
+++ b/src/vector-handler.ts
@@ -0,0 +1,345 @@
+import type * as http from "node:http";
+import type {
+  VectorCollection,
+  VectorEntry,
+  VectorQuery,
+  QueryResult,
+  QueryHandler,
+} from "./vector-types.js";
+
+export interface VectorState {
+  collections: Map<string, VectorCollection>;
+  queryHandlers: Map<string, QueryHandler>;
+}
+
+interface RouteResult {
+  handled: boolean;
+}
+
+function jsonResponse(res: http.ServerResponse, status: number, body: unknown): void {
+  const payload = JSON.stringify(body);
+  res.writeHead(status, {
+    "Content-Type": "application/json",
+    "Content-Length": String(Buffer.byteLength(payload)),
+  });
+  res.end(payload);
+}
+
+function resolveQuery(
+  state: VectorState,
+  collectionName: string,
+  query: VectorQuery,
+): QueryResult[] {
+  const handler = state.queryHandlers.get(collectionName);
+  if (!handler) return [];
+  if (typeof handler === "function") return handler(query);
+  return handler;
+}
+
+// ---- Pinecone-compatible endpoints ----
+
+function handlePinecone(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /query
+  if (req.method === "POST" && pathname === "/query") {
+    const namespace = (body.namespace as string) ?? "default";
+    const collection = state.collections.get(namespace);
+    if (!collection) {
+      jsonResponse(res, 404, { error: { message: `Collection '${namespace}' not found` } });
+      return { handled: true };
+    }
+
+    const query: VectorQuery = {
+      vector: body.vector as number[] | undefined,
+      topK: body.topK as number | undefined,
+      filter: body.filter,
+      collection: namespace,
+    };
+    const results = resolveQuery(state, namespace, query);
+    const topK = query.topK ?? 10;
+    const matches = results.slice(0, topK).map((r) => ({
+      id: r.id,
+      score: r.score,
+      ...(r.metadata !== undefined && { metadata: r.metadata }),
+    }));
+
+    jsonResponse(res, 200, { matches });
+    return { handled: true };
+  }
+
+  // POST /vectors/upsert
+  if (req.method === "POST" && pathname === "/vectors/upsert") {
+    const vectors = (body.vectors ?? []) as Array<{
+      id: string;
+      values: number[];
+      metadata?: Record<string, unknown>;
+    }>;
+    const namespace = (body.namespace as string) ?? "default";
+
+    let collection = state.collections.get(namespace);
+    if (!collection) {
+      const dim = vectors.length > 0 ? vectors[0].values.length : 0;
+      collection = { name: namespace, dimension: dim, vectors: new Map() };
+      state.collections.set(namespace, collection);
+    }
+
+    for (const v of vectors) {
+      const entry: VectorEntry = { id: v.id, values: v.values, metadata: v.metadata };
+      collection.vectors.set(v.id, entry);
+    }
+
+    jsonResponse(res, 200, { upsertedCount: vectors.length });
+    return { handled: true };
+  }
+
+  // POST /vectors/delete
+  if (req.method === "POST" && pathname === "/vectors/delete") {
+    const ids = (body.ids ?? []) as string[];
+    const namespace = (body.namespace as string) ?? "default";
+    const collection = state.collections.get(namespace);
+    if (collection) {
+      for (const id of ids) {
+        collection.vectors.delete(id);
+      }
+    }
+    jsonResponse(res, 200, {});
+    return { handled: true };
+  }
+
+  // GET /describe-index-stats
+  if (req.method === "GET" && pathname === "/describe-index-stats") {
+    let totalVectorCount = 0;
+    let dimension = 0;
+    for (const col of state.collections.values()) {
+      totalVectorCount += col.vectors.size;
+      if (col.dimension > 0) dimension = col.dimension;
+    }
+    jsonResponse(res, 200, { dimension, totalVectorCount });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- Qdrant-compatible endpoints ----
+
+const QDRANT_SEARCH_RE = /^\/collections\/([^/]+)\/points\/search$/;
+const QDRANT_UPSERT_RE = /^\/collections\/([^/]+)\/points$/;
+const QDRANT_DELETE_RE = /^\/collections\/([^/]+)\/points\/delete$/;
+
+function handleQdrant(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /collections/{name}/points/search
+  let match = pathname.match(QDRANT_SEARCH_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    if (!collection) {
+      jsonResponse(res, 404, { status: { error: `Collection '${name}' not found` } });
+      return { handled: true };
+    }
+
+    const query: VectorQuery = {
+      vector: body.vector as number[] | undefined,
+      topK: body.limit as number | undefined,
+      filter: body.filter,
+      collection: name,
+    };
+    const results = resolveQuery(state, name, query);
+    const limit = (body.limit as number) ?? 10;
+    const result = results.slice(0, limit).map((r) => ({
+      id: r.id,
+      score: r.score,
+      ...(r.metadata !== undefined && { payload: r.metadata }),
+    }));
+
+    jsonResponse(res, 200, { result });
+    return { handled: true };
+  }
+
+  // PUT /collections/{name}/points
+  match = pathname.match(QDRANT_UPSERT_RE);
+  if (match && req.method === "PUT") {
+    const name = decodeURIComponent(match[1]);
+    let collection = state.collections.get(name);
+    const points = (body.points ?? []) as Array<{
+      id: string;
+      vector: number[];
+      payload?: Record<string, unknown>;
+    }>;
+
+    if (!collection) {
+      const dim = points.length > 0 ? points[0].vector.length : 0;
+      collection = { name, dimension: dim, vectors: new Map() };
+      state.collections.set(name, collection);
+    }
+
+    for (const p of points) {
+      const entry: VectorEntry = { id: String(p.id), values: p.vector, metadata: p.payload };
+      collection.vectors.set(String(p.id), entry);
+    }
+
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  // POST /collections/{name}/points/delete
+  match = pathname.match(QDRANT_DELETE_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    const points = (body.points ?? []) as string[];
+    if (collection) {
+      for (const id of points) {
+        collection.vectors.delete(String(id));
+      }
+    }
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- ChromaDB-compatible endpoints ----
+
+const CHROMA_QUERY_RE = /^\/api\/v1\/collections\/([^/]+)\/query$/;
+const CHROMA_ADD_RE = /^\/api\/v1\/collections\/([^/]+)\/add$/;
+const CHROMA_COLLECTION_RE = /^\/api\/v1\/collections\/([^/]+)$/;
+const CHROMA_COLLECTIONS = "/api/v1/collections";
+
+function handleChromaDB(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /api/v1/collections/{id}/query
+  let match = pathname.match(CHROMA_QUERY_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    if (!collection) {
+      jsonResponse(res, 404, { error: `Collection '${name}' not found` });
+      return { handled: true };
+    }
+
+    const queryEmbeddings = (body.query_embeddings ?? []) as number[][];
+    const nResults = (body.n_results as number) ?? 10;
+
+    // Process each query embedding
+    const allIds: string[][] = [];
+    const allDistances: number[][] = [];
+    const allMetadatas: Array<Array<Record<string, unknown> | null>> = [];
+
+    for (const embedding of queryEmbeddings) {
+      const query: VectorQuery = {
+        vector: embedding,
+        topK: nResults,
+        filter: body.where,
+        collection: name,
+      };
+      const results = resolveQuery(state, name, query).slice(0, nResults);
+
+      allIds.push(results.map((r) => r.id));
+      allDistances.push(results.map((r) => r.score));
+      allMetadatas.push(results.map((r) => r.metadata ?? null));
+    }
+
+    jsonResponse(res, 200, {
+      ids: allIds,
+      distances: allDistances,
+      metadatas: allMetadatas,
+    });
+    return { handled: true };
+  }
+
+  // POST /api/v1/collections/{id}/add
+  match = pathname.match(CHROMA_ADD_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    let collection = state.collections.get(name);
+
+    const ids = (body.ids ?? []) as string[];
+    const embeddings = (body.embeddings ?? []) as number[][];
+    const metadatas = (body.metadatas ?? []) as Array<Record<string, unknown> | undefined>;
+
+    if (!collection) {
+      const dim = embeddings.length > 0 ? embeddings[0].length : 0;
+      collection = { name, dimension: dim, vectors: new Map() };
+      state.collections.set(name, collection);
+    }
+
+    for (let i = 0; i < ids.length; i++) {
+      const entry: VectorEntry = {
+        id: ids[i],
+        values: embeddings[i] ?? [],
+        metadata: metadatas[i],
+      };
+      collection.vectors.set(ids[i], entry);
+    }
+
+    jsonResponse(res, 200, true);
+    return { handled: true };
+  }
+
+  // GET /api/v1/collections — list collections
+  if (req.method === "GET" && pathname === CHROMA_COLLECTIONS) {
+    const collections = Array.from(state.collections.values()).map((c) => ({
+      id: c.name,
+      name: c.name,
+      metadata: null,
+    }));
+    jsonResponse(res, 200, collections);
+    return { handled: true };
+  }
+
+  // DELETE /api/v1/collections/{id}
+  match = pathname.match(CHROMA_COLLECTION_RE);
+  if (match && req.method === "DELETE") {
+    const name = decodeURIComponent(match[1]);
+    if (!state.collections.has(name)) {
+      jsonResponse(res, 404, { error: `Collection '${name}' not found` });
+      return { handled: true };
+    }
+    state.collections.delete(name);
+    state.queryHandlers.delete(name);
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- Main dispatch ----
+
+export function createVectorRequestHandler(state: VectorState) {
+  return (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+    body: Record<string, unknown>,
+  ): boolean => {
+    const pinecone = handlePinecone(state, req, res, pathname, body);
+    if (pinecone.handled) return true;
+
+    const qdrant = handleQdrant(state, req, res, pathname, body);
+    if (qdrant.handled) return true;
+
+    const chroma = handleChromaDB(state, req, res, pathname, body);
+    if (chroma.handled) return true;
+
+    return false;
+  };
+}
diff --git a/src/vector-mock.ts b/src/vector-mock.ts
new file mode 100644
index 0000000..e4bff07
--- /dev/null
+++ b/src/vector-mock.ts
@@ -0,0 +1,214 @@
+import * as http from "node:http";
+import type { Mountable, JournalEntry } from "./types.js";
+import type { Journal } from "./journal.js";
+import type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
+import { createVectorRequestHandler, type VectorState } from "./vector-handler.js";
+import { flattenHeaders, readBody } from "./helpers.js";
+
+export class VectorMock implements Mountable {
+  private collections: Map<string, VectorCollection> = new Map();
+  private queryHandlers: Map<string, QueryHandler> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private options: VectorMockOptions;
+  private requestHandler: ReturnType<typeof createVectorRequestHandler>;
+
+  constructor(options?: VectorMockOptions) {
+    this.options = options ?? {};
+    this.requestHandler = this.buildHandler();
+  }
+
+  // ---- Configuration ----
+
+  addCollection(name: string, opts: { dimension: number }): this {
+    const collection: VectorCollection = {
+      name,
+      dimension: opts.dimension,
+      vectors: new Map(),
+    };
+    this.collections.set(name, collection);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  upsert(collection: string, vectors: VectorEntry[]): this {
+    let col = this.collections.get(collection);
+    if (!col) {
+      const dim = vectors.length > 0 ? vectors[0].values.length : 0;
+      col = { name: collection, dimension: dim, vectors: new Map() };
+      this.collections.set(collection, col);
+    }
+    for (const v of vectors) {
+      col.vectors.set(v.id, v);
+    }
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  onQuery(
+    collection: string,
+    results: QueryResult[] | ((query: VectorQuery) => QueryResult[]),
+  ): this {
+    this.queryHandlers.set(collection, results);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  deleteCollection(name: string): this {
+    this.collections.delete(name);
+    this.queryHandlers.delete(name);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    const body = await readBody(req);
+    let parsed: Record<string, unknown> = {};
+    try {
+      if (body) parsed = JSON.parse(body);
+    } catch {
+      if (req.method !== "GET") {
+        res.writeHead(400, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Malformed JSON body" }));
+        return true;
+      }
+    }
+
+    const handled = this.requestHandler(req, res, pathname, parsed);
+
+    // Journal the request after the handler completes
+    if (handled && this.journal) {
+      this.journal.add({
+        method: req.method ?? "GET",
+        path: req.url ?? "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "vector",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+
+    return handled;
+  }
+
+  health(): { status: string; collections: number } {
+    return {
+      status: "ok",
+      collections: this.collections.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("Server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise((resolve, reject) => {
+      const srv = http.createServer((req, res) => {
+        const chunks: Buffer[] = [];
+        req.on("data", (chunk: Buffer) => chunks.push(chunk));
+        req.on("end", () => {
+          const body = Buffer.concat(chunks).toString();
+          let parsed: Record<string, unknown> = {};
+          try {
+            if (body) parsed = JSON.parse(body);
+          } catch {
+            if (req.method !== "GET") {
+              res.writeHead(400, { "Content-Type": "application/json" });
+              res.end(JSON.stringify({ error: "Malformed JSON body" }));
+              return;
+            }
+          }
+
+          const url = new URL(req.url ?? "/", `http://${host}`);
+
+          const handled = this.requestHandler(req, res, url.pathname, parsed);
+
+          if (handled && this.journal) {
+            this.journal.add({
+              method: req.method ?? "GET",
+              path: req.url ?? "/",
+              headers: flattenHeaders(req.headers),
+              body: null,
+              service: "vector",
+              response: { status: res.statusCode, fixture: null },
+            });
+          }
+          if (!handled) {
+            res.writeHead(404, { "Content-Type": "application/json" });
+            res.end(JSON.stringify({ error: "Not found" }));
+          }
+        });
+      });
+
+      srv.listen(port, host, () => {
+        this.server = srv;
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          resolve(`http://${host}:${addr.port}`);
+        } else {
+          resolve(`http://${host}:${port}`);
+        }
+      });
+
+      srv.on("error", reject);
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("Server not started");
+    }
+    const srv = this.server;
+    this.server = null;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err) => (err ? reject(err) : resolve()));
+    });
+  }
+
+  // ---- Inspection ----
+
+  getRequests(): JournalEntry[] {
+    if (!this.journal) return [];
+    return this.journal.getAll().filter((e) => e.service === "vector");
+  }
+
+  reset(): this {
+    this.collections.clear();
+    this.queryHandlers.clear();
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Internal ----
+
+  private buildHandler() {
+    const state: VectorState = {
+      collections: this.collections,
+      queryHandlers: this.queryHandlers,
+    };
+    return createVectorRequestHandler(state);
+  }
+}
diff --git a/src/vector-stub.ts b/src/vector-stub.ts
new file mode 100644
index 0000000..ae87749
--- /dev/null
+++ b/src/vector-stub.ts
@@ -0,0 +1,10 @@
+// Re-export VectorMock and types for backwards compatibility.
+export { VectorMock } from "./vector-mock.js";
+export type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
diff --git a/src/vector-types.ts b/src/vector-types.ts
new file mode 100644
index 0000000..b6c0c19
--- /dev/null
+++ b/src/vector-types.ts
@@ -0,0 +1,32 @@
+export interface VectorMockOptions {
+  port?: number;
+  host?: string;
+}
+
+export interface VectorCollection {
+  name: string;
+  dimension: number;
+  vectors: Map<string, VectorEntry>;
+}
+
+export interface VectorEntry {
+  id: string;
+  values: number[];
+  metadata?: Record<string, unknown>;
+}
+
+export interface QueryResult {
+  id: string;
+  score: number;
+  metadata?: Record<string, unknown>;
+  values?: number[];
+}
+
+export interface VectorQuery {
+  vector?: number[];
+  topK?: number;
+  filter?: unknown;
+  collection: string;
+}
+
+export type QueryHandler = QueryResult[] | ((query: VectorQuery) => QueryResult[]);
diff --git a/tsdown.config.ts b/tsdown.config.ts
index af597cb..5698d8a 100644
--- a/tsdown.config.ts
+++ b/tsdown.config.ts
@@ -1,6 +1,13 @@
 import { defineConfig } from "tsdown";
 export default defineConfig({
-  entry: ["src/index.ts", "src/cli.ts"],
+  entry: [
+    "src/index.ts",
+    "src/cli.ts",
+    "src/aimock-cli.ts",
+    "src/mcp-stub.ts",
+    "src/a2a-stub.ts",
+    "src/vector-stub.ts",
+  ],
   format: ["esm", "cjs"],
   dts: true,
   sourcemap: true,
diff --git a/vitest.config.ts b/vitest.config.ts
index cf26048..281331a 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -5,5 +5,16 @@ export default defineConfig({
     globals: true,
     silent: true,
     include: ["src/__tests__/**/*.test.ts"],
+    coverage: {
+      provider: "v8",
+      reporter: ["text", "json-summary"],
+      include: ["src/**/*.ts"],
+      exclude: ["src/__tests__/**", "src/index.ts", "src/cli.ts"],
+      thresholds: {
+        lines: 90,
+        branches: 85,
+        functions: 90,
+      },
+    },
   },
 });