From acb833f720e5281d2d2d2e9679258cd306f87c03 Mon Sep 17 00:00:00 2001
From: Al Jami Islam Anik <anik.islam1494@gmail.com>
Date: Thu, 2 Apr 2026 16:34:52 +0600
Subject: [PATCH 1/2] feat: integrate Ollama and Chatterbox TTS providers,
 enhance README and CLI options

- Added support for Ollama as a local LLM and image provider, including interactive model selection.
- Introduced Chatterbox TTS for text-to-speech functionality, with setup instructions and requirements.
- Updated README to reflect new prerequisites and usage instructions for local development.
- Enhanced CLI options to include new parameters for Ollama and Chatterbox configurations.
- Improved cost estimation logic to account for free local providers.
- Added validation for local provider availability and setup processes.

This update significantly expands the capabilities of the pipeline for local development and usage.
---
 .gitignore                      |   8 +-
 README.md                       |  98 ++++++++++++++++++-
 scripts/chatterbox_tts.py       |  89 ++++++++++++++++++
 src/agents/creative-director.ts |  15 ++-
 src/cli/args.test.ts            |   6 ++
 src/cli/args.ts                 |  26 +++++-
 src/cli/chatterbox-setup.ts     | 146 +++++++++++++++++++++++++++++
 src/cli/cost-estimator.ts       |  40 ++++++--
 src/cli/ollama-setup.ts         | 152 ++++++++++++++++++++++++++++++
 src/cli/validate-env.ts         |  72 +++++++++++---
 src/index.ts                    | 161 +++++++++++++++++++++++++++++++-
 src/pipeline/orchestrator.ts    | 107 +++++++++++++++------
 src/providers/factory.ts        |  27 +++++-
 src/providers/image/ollama.ts   |  64 +++++++++++++
 src/providers/llm/ollama.ts     | 105 +++++++++++++++++++++
 src/providers/tts/chatterbox.ts | 127 +++++++++++++++++++++++++
 src/schema/providers.ts         |   6 +-
 17 files changed, 1178 insertions(+), 71 deletions(-)
 create mode 100644 scripts/chatterbox_tts.py
 create mode 100644 src/cli/chatterbox-setup.ts
 create mode 100644 src/cli/ollama-setup.ts
 create mode 100644 src/providers/image/ollama.ts
 create mode 100644 src/providers/llm/ollama.ts
 create mode 100644 src/providers/tts/chatterbox.ts

diff --git a/.gitignore b/.gitignore
index 45e9841..d901635 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,10 @@ dist/
 *.tsbuildinfo
 .env
 staging_docs/
-output/
\ No newline at end of file
+output/
+
+# macOS
+.DS_Store
+
+# Python version managers
+.python-version
\ No newline at end of file
diff --git a/README.md b/README.md
index 5ead90b..84eefbb 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ docker compose run worker npx tsx src/index.ts --yes "5 stoic lessons that chang
 
 ### Local development
 
-**Prerequisites:** Node.js 22+, pnpm, ffprobe (for stock video duration detection)
+**Prerequisites:** Node.js 22+, pnpm, ffprobe (for stock video duration detection), Python 3.11 or 3.12 (only required for `--tts-provider chatterbox` — Python 3.13+ is not supported due to PyTorch/OpenMP issues)
 
 ```bash
 git clone https://github.com/tsensei/OpenReels.git
@@ -99,18 +99,110 @@ pnpm start "your topic" --archetype anime_illustration --provider openai
 
 **Optional:** `PEXELS_API_KEY` ([Pexels](https://www.pexels.com/api/)), `PIXABAY_API_KEY` ([Pixabay](https://pixabay.com/api/docs/)) for stock footage (free registration)
 
+### Zero-API-key local mode (Ollama + Chatterbox)
+
+Run the full pipeline with **no API keys** using local open-source models.
+
+#### One-time setup
+
+```bash
+# 1. Install and start Ollama (macOS)
+brew install ollama
+ollama serve   # keep running in a separate terminal
+
+# 2. Pull your preferred LLM model (one-time — pick one)
+ollama pull llama3.1:8b     # ~5 GB, fast and reliable
+ollama pull gemma3:9b       # ~6 GB, good quality
+ollama pull qwen2.5:7b      # ~5 GB, multilingual
+
+# 3. Pull an image generation model — macOS only (one-time — pick one)
+ollama pull x/flux2-klein:4b    # ~6 GB, fastest
+ollama pull x/flux2-klein:9b    # ~12 GB, higher quality
+ollama pull x/z-image-turbo:fp8     # ~13 GB, photorealistic
+```
+
+> **Interactive model selection:** When you run with `--provider ollama`, OpenReels will show you all pulled models and let you choose interactively. No need to memorise model names.
+
+> **Chatterbox is auto-installed:** OpenReels automatically creates an isolated Python venv at `~/.openreels/chatterbox-venv` and installs `chatterbox-tts` on first use. You only need Python 3.11 or 3.12 on your system (`brew install python@3.12` on macOS). Python 3.13+ is not supported.
+
+> **First run note:** Chatterbox Turbo downloads ~1.5 GB of model weights on first use. This is automatic and cached locally (`~/.cache/huggingface/`). Expect 2–5 minutes on a typical connection.
+
+> **GPU recommended:** Chatterbox is significantly faster on Apple Silicon (MPS) or a CUDA GPU. CPU generation works but is slow (10–30× slower than real-time).
+
+#### Running with no API keys
+
+```bash
+# Interactive — OpenReels will prompt you to choose a model and describe your topic
+pnpm start "your topic" \
+  --provider ollama \
+  --tts-provider chatterbox \
+  --image-provider ollama
+
+# Non-interactive — supply context via --brief and pin specific models
+pnpm start "your topic" \
+  --provider ollama \
+  --tts-provider chatterbox \
+  --image-provider ollama \
+  --ollama-model llama3.1:8b \
+  --ollama-image-model x/flux2-klein:4b \
+  --brief "Solar panels cost $10k upfront but save $50k over 20 years. Mood: informative."
+```
+
+> **Linux/Windows users:** Ollama image generation is currently macOS-only. Use `--image-provider gemini` (free tier available) or `--image-provider openai` instead, and provide the relevant API key.
+
+#### Mix and match — combine free and paid providers
+
+Each provider is independent. You can freely mix local and cloud options to get the best trade-off between cost, speed, and quality.
+
+```bash
+# Best quality script + free TTS + free images (macOS for now)
+# Requires: ANTHROPIC_API_KEY
+pnpm start "your topic" \
+  --provider anthropic \
+  --tts-provider chatterbox \
+  --image-provider ollama
+
+# Free script + paid TTS for higher quality voice + free images (macOS for now)
+# Requires: ELEVENLABS_API_KEY
+pnpm start "your topic" \
+  --provider ollama \
+  --tts-provider elevenlabs \
+  --image-provider ollama
+
+# Free everything on Linux/Windows (Ollama image gen is macOS-only, use Gemini instead)
+# Requires: GOOGLE_API_KEY
+pnpm start "your topic" \
+  --provider ollama \
+  --tts-provider chatterbox \
+  --image-provider gemini
+
+# Free script + free TTS + best image quality (OpenAI DALL-E)
+# Requires: OPENAI_API_KEY
+pnpm start "your topic" \
+  --provider ollama \
+  --tts-provider chatterbox \
+  --image-provider openai
+```
+
 ### CLI flags
 
 | Flag | Description | Default |
 |------|-------------|---------|
 | `--archetype <name>` | Override visual archetype | LLM chooses |
-| `--provider <name>` | LLM provider (`anthropic` or `openai`) | `anthropic` |
-| `--tts-provider <name>` | TTS provider (`elevenlabs` or `inworld`) | `elevenlabs` |
+| `--provider <name>` | LLM provider (`anthropic`, `openai`, `ollama`) | `anthropic` |
+| `--tts-provider <name>` | TTS provider (`elevenlabs`, `inworld`, `chatterbox`) | `elevenlabs` |
+| `-i, --image-provider <name>` | Image provider (`gemini`, `openai`, `ollama`) | `gemini` |
 | `--platform <name>` | Target platform (`youtube`, `tiktok`, `instagram`) | `youtube` |
 | `--dry-run` | Output DirectorScore JSON without generating assets | off |
 | `--preview` | Open Remotion Studio after rendering | off |
 | `-o, --output <dir>` | Output directory | `./output` |
 | `-y, --yes` | Auto-confirm cost estimation (for Docker/CI) | off |
+| `--brief <text>` | Topic context for Ollama mode (skips interactive prompt) | — |
+| `--ollama-model <name>` | Ollama LLM model name | interactive selection |
+| `--ollama-image-model <name>` | Ollama image generation model name | interactive selection |
+| `--ollama-host <url>` | Ollama API host URL | `http://localhost:11434` |
+| `--chatterbox-device <device>` | PyTorch device for Chatterbox (`cpu`, `cuda`, `mps`) | auto-detected |
+| `--chatterbox-audio-prompt <path>` | Reference WAV for Chatterbox voice cloning (5–10s) | — |
 
 ## Archetypes
 
diff --git a/scripts/chatterbox_tts.py b/scripts/chatterbox_tts.py
new file mode 100644
index 0000000..1887623
--- /dev/null
+++ b/scripts/chatterbox_tts.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+Chatterbox Turbo TTS bridge for OpenReels.
+
+Generates speech from text using ResembleAI's Chatterbox Turbo model and writes:
+  - A WAV audio file
+  - A JSON file containing approximate word-level timestamps
+
+Usage:
+    python scripts/chatterbox_tts.py \
+        --text "Your script here" \
+        --out /tmp/output.wav \
+        --timestamps /tmp/timestamps.json \
+        [--device cpu|cuda|mps] \
+        [--audio-prompt /path/to/reference.wav]
+
+First run will download ~1.5 GB of model weights automatically.
+Subsequent runs use the cached weights (usually ~/.cache/huggingface/).
+
+Requirements:
+    pip install chatterbox-tts
+"""
+
+import argparse
+import json
+import sys
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Chatterbox Turbo TTS bridge")
+    parser.add_argument("--text", required=True, help="Text to synthesize")
+    parser.add_argument("--out", required=True, help="Output WAV file path")
+    parser.add_argument("--timestamps", required=True, help="Output JSON timestamps file path")
+    parser.add_argument(
+        "--device",
+        default="cpu",
+        choices=["cpu", "cuda", "mps"],
+        help="PyTorch device (default: cpu; use mps on Apple Silicon, cuda on NVIDIA GPU)",
+    )
+    parser.add_argument(
+        "--audio-prompt",
+        default=None,
+        help="Optional path to a reference WAV file for zero-shot voice cloning (5–10s recommended)",
+    )
+    args = parser.parse_args()
+
+    try:
+        import torchaudio as ta
+        from chatterbox.tts_turbo import ChatterboxTurboTTS
+    except ImportError as e:
+        print(
+            f"ERROR: {e}\n"
+            "Chatterbox Turbo is not installed.\n"
+            "Install it with:  pip install chatterbox-tts\n"
+            "Python 3.11 is strongly recommended.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    print("Loading Chatterbox Turbo model (first run downloads ~1.5 GB)...", file=sys.stderr)
+    model = ChatterboxTurboTTS.from_pretrained(device=args.device)
+
+    generate_kwargs: dict = {"audio_prompt_path": args.audio_prompt} if args.audio_prompt else {}
+    wav = model.generate(args.text, **generate_kwargs)
+
+    ta.save(args.out, wav, model.sr)
+    print(f"Audio saved to: {args.out}", file=sys.stderr)
+
+    # Chatterbox Turbo does not expose word-level timestamps natively.
+    # We approximate by distributing words evenly across the total audio duration.
+    # Caption timing will be approximate but functional.
+    duration_sec: float = wav.shape[-1] / model.sr
+    words = args.text.split()
+    if not words:
+        timestamps = []
+    else:
+        step = duration_sec / len(words)
+        timestamps = [
+            {"word": word, "start": round(i * step, 4), "end": round((i + 1) * step, 4)}
+            for i, word in enumerate(words)
+        ]
+
+    with open(args.timestamps, "w", encoding="utf-8") as f:
+        json.dump(timestamps, f)
+    print(f"Timestamps saved to: {args.timestamps}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/agents/creative-director.ts b/src/agents/creative-director.ts
index 3171433..93b1ad8 100644
--- a/src/agents/creative-director.ts
+++ b/src/agents/creative-director.ts
@@ -34,7 +34,11 @@ export async function generateDirectorScore(
   llm: LLMProvider,
   topic: string,
   researchContext: ResearchResult,
-  options?: { archetype?: string },
+  options?: {
+    archetype?: string;
+    /** Restrict which visual types the director may use. Defaults to all four. */
+    allowedVisualTypes?: VisualType[];
+  },
 ): Promise<DirectorScoreOutput> {
   let systemPrompt = buildDefaultPrompt();
 
@@ -57,6 +61,13 @@ export async function generateDirectorScore(
     ? `Use the "${options.archetype}" archetype.`
     : `Choose from: ${archetypes.join(", ")}`;
 
+  const allVisualTypes = VisualType.options;
+  const allowed: VisualType[] = options?.allowedVisualTypes ?? [...allVisualTypes];
+  const visualTypeConstraint = allowed.length === allVisualTypes.length
+    ? `Use all 4 visual types (ai_image, stock_image, stock_video, text_card).`
+    : `IMPORTANT: You may ONLY use these visual types: ${allowed.join(", ")}. ` +
+      `Do NOT use ${allVisualTypes.filter((t) => !allowed.includes(t)).join(" or ")} — those providers are not available.\n`
+
   const userMessage = `Topic: ${topic}
 
 Research context:
@@ -69,7 +80,7 @@ Mood: ${researchContext.mood}
 
 ${archetypeInstruction}
 
-Create a DirectorScore with 4-7 scenes. Use all 4 visual types (ai_image, stock_image, stock_video, text_card).
+Create a DirectorScore with 4-7 scenes. ${visualTypeConstraint}
 CRITICAL RULE: Never use the same visual_type more than 2 times in a row.
 Every scene MUST have a script_line (the voiceover text).
 The first scene should be a strong hook.
diff --git a/src/cli/args.test.ts b/src/cli/args.test.ts
index c6fb510..4ede5fb 100644
--- a/src/cli/args.test.ts
+++ b/src/cli/args.test.ts
@@ -15,6 +15,9 @@ describe("CLIOptions type", () => {
       preview: false,
       output: "./output",
       yes: true,
+      ollamaModel: "llama3.2",
+      ollamaImageModel: "x/flux2-klein",
+      ollamaHost: "http://localhost:11434",
     };
     expect(opts.yes).toBe(true);
   });
@@ -30,6 +33,9 @@ describe("CLIOptions type", () => {
       preview: false,
       output: "./output",
       yes: false,
+      ollamaModel: "llama3.2",
+      ollamaImageModel: "x/flux2-klein",
+      ollamaHost: "http://localhost:11434",
     };
     expect(opts.yes).toBe(false);
   });
diff --git a/src/cli/args.ts b/src/cli/args.ts
index 6823260..843d599 100644
--- a/src/cli/args.ts
+++ b/src/cli/args.ts
@@ -16,6 +16,14 @@ export interface CLIOptions {
   preview: boolean;
   output: string;
   yes: boolean;
+  brief?: string;
+  /** Explicitly provided via --ollama-model. When undefined, model is selected interactively. */
+  ollamaModel?: string;
+  /** Explicitly provided via --ollama-image-model. When undefined, model is selected interactively. */
+  ollamaImageModel?: string;
+  ollamaHost: string;
+  chatterboxDevice?: string;
+  chatterboxAudioPrompt?: string;
 }
 
 export function parseArgs(): CLIOptions {
@@ -28,17 +36,17 @@ export function parseArgs(): CLIOptions {
     .argument("<topic>", "The topic for your video")
     .addOption(
       new Option("-p, --provider <provider>", "LLM provider")
-        .choices(["anthropic", "openai"])
+        .choices(["anthropic", "openai", "ollama"])
         .default("anthropic"),
     )
     .addOption(
       new Option("-i, --image-provider <provider>", "Image generation provider")
-        .choices(["gemini", "openai"])
+        .choices(["gemini", "openai", "ollama"])
         .default("gemini"),
     )
     .addOption(
       new Option("--tts-provider <provider>", "TTS provider")
-        .choices(["elevenlabs", "inworld"])
+        .choices(["elevenlabs", "inworld", "chatterbox"])
         .default("elevenlabs"),
     )
     .option("-a, --archetype <archetype>", "Visual archetype override")
@@ -47,6 +55,12 @@ export function parseArgs(): CLIOptions {
     .option("--preview", "Open Remotion Studio preview after rendering", false)
     .option("-o, --output <dir>", "Output directory", "./output")
     .option("-y, --yes", "Auto-confirm cost estimation prompt (non-interactive mode)", false)
+    .option("--brief <text>", "Topic context for Ollama mode (skips interactive prompt)")
+    .option("--ollama-model <name>", "Ollama LLM model name (default: interactive selection)")
+    .option("--ollama-image-model <name>", "Ollama image generation model name (default: interactive selection)")
+    .option("--ollama-host <url>", "Ollama API host", "http://localhost:11434")
+    .option("--chatterbox-device <device>", "PyTorch device for Chatterbox TTS (cpu, cuda, mps)")
+    .option("--chatterbox-audio-prompt <path>", "Path to reference WAV for Chatterbox voice cloning")
     .parse();
 
   const topic = program.args[0] ?? "";
@@ -67,5 +81,11 @@ export function parseArgs(): CLIOptions {
     preview: opts["preview"] as boolean,
     output: opts["output"] as string,
     yes: opts["yes"] as boolean,
+    brief: opts["brief"] as string | undefined,
+    ollamaModel: opts["ollamaModel"] as string | undefined,
+    ollamaImageModel: opts["ollamaImageModel"] as string | undefined,
+    ollamaHost: opts["ollamaHost"] as string,
+    chatterboxDevice: opts["chatterboxDevice"] as string | undefined,
+    chatterboxAudioPrompt: opts["chatterboxAudioPrompt"] as string | undefined,
   };
 }
diff --git a/src/cli/chatterbox-setup.ts b/src/cli/chatterbox-setup.ts
new file mode 100644
index 0000000..ab48967
--- /dev/null
+++ b/src/cli/chatterbox-setup.ts
@@ -0,0 +1,146 @@
+/**
+ * Chatterbox Turbo local TTS setup: Python venv creation and package installation.
+ */
+
+import { spawnSync } from "node:child_process";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+
+const VENV_DIR = path.join(os.homedir(), ".openreels", "chatterbox-venv");
+
+/**
+ * Ensures chatterbox-tts is available in a managed venv at ~/.openreels/chatterbox-venv.
+ *
+ * Strategy (in order):
+ *   1. If `uv` is available, use `uv venv --python 3.12` + `uv pip install` — handles
+ *      uv-managed Pythons correctly and is 10-100x faster than pip.
+ *   2. Otherwise fall back to finding a system python3.12/3.11 and using `python -m venv`.
+ *
+ * On subsequent runs the venv is detected in ~0ms and skips all setup.
+ *
+ * @returns Path to the Python binary inside the venv.
+ */
+export async function ensureChatterboxVenv(): Promise<string> {
+  const venvPython = process.platform === "win32"
+    ? path.join(VENV_DIR, "Scripts", "python.exe")
+    : path.join(VENV_DIR, "bin", "python");
+
+  // Fast path: venv already exists and all required packages are importable
+  if (fs.existsSync(venvPython)) {
+    const check = spawnSync(
+      venvPython,
+      ["-c", "import chatterbox; import torchaudio; import pkg_resources"],
+      { encoding: "utf-8" },
+    );
+    if (check.status === 0) return venvPython;
+    console.info(`\nChatterbox venv found but missing dependencies — reinstalling...\n`);
+  } else {
+    console.info(
+      `\n─────────────────────────────────────────────────────────────\n` +
+      ` Chatterbox Turbo: one-time setup\n` +
+      ` Creating isolated Python environment at:\n` +
+      `   ${VENV_DIR}\n` +
+      ` Installing chatterbox-tts... this takes a few minutes.\n` +
+      ` Model weights (~1.5 GB) are downloaded on first generation.\n` +
+      `─────────────────────────────────────────────────────────────\n`,
+    );
+    fs.mkdirSync(path.dirname(VENV_DIR), { recursive: true });
+  }
+
+  const uvBin = findUv();
+
+  if (uvBin) {
+    // uv path: works with uv-managed Pythons, no ensurepip issues
+    const venvResult = spawnSync(uvBin, ["venv", "--python", "3.12", VENV_DIR], { stdio: "inherit" });
+    if (venvResult.status !== 0) {
+      // 3.12 not available to uv, try 3.11
+      const fallback = spawnSync(uvBin, ["venv", "--python", "3.11", VENV_DIR], { stdio: "inherit" });
+      if (fallback.status !== 0) {
+        console.error(
+          `\n✗ uv could not create a venv with Python 3.11 or 3.12.\n\n` +
+          `  Install Python 3.12 via uv:  uv python install 3.12\n` +
+          `  Or via Homebrew:             brew install python@3.12\n`,
+        );
+        process.exit(1);
+      }
+    }
+    const installResult = spawnSync(
+      uvBin,
+      ["pip", "install", "--python", venvPython, "chatterbox-tts", "setuptools<70"],
+      { stdio: "inherit" },
+    );
+    if (installResult.status !== 0) {
+      console.error(`\n✗ Failed to install chatterbox-tts via uv.\n`);
+      process.exit(1);
+    }
+  } else {
+    // Standard path: find a system python3.12 or python3.11 and use python -m venv
+    const systemPython = findCompatiblePython();
+    const venvResult = spawnSync(systemPython, ["-m", "venv", VENV_DIR], { stdio: "inherit" });
+    if (venvResult.status !== 0) {
+      console.error(`\n✗ Failed to create venv: ${systemPython} -m venv ${VENV_DIR}\n`);
+      process.exit(1);
+    }
+    const pipBin = process.platform === "win32"
+      ? path.join(VENV_DIR, "Scripts", "pip.exe")
+      : path.join(VENV_DIR, "bin", "pip");
+    const installResult = spawnSync(pipBin, ["install", "chatterbox-tts", "setuptools<70"], {
+      stdio: "inherit",
+    });
+    if (installResult.status !== 0) {
+      console.error(`\n✗ pip install chatterbox-tts failed.\n`);
+      process.exit(1);
+    }
+  }
+
+  console.info(`\n✓ Chatterbox Turbo ready.\n`);
+  return venvPython;
+}
+
+/** Returns the path to `uv` if available on PATH or common install locations, otherwise null. */
+function findUv(): string | null {
+  for (const bin of ["uv", "/opt/homebrew/bin/uv", "/usr/local/bin/uv"]) {
+    const probe = spawnSync(bin, ["--version"], { encoding: "utf-8" });
+    if (probe.status === 0) return bin;
+  }
+  return null;
+}
+
+/**
+ * Finds a system-managed python3.12 or python3.11 (non-uv).
+ * Used as fallback when uv is not available.
+ * Checks common absolute paths in addition to PATH so it doesn't miss Homebrew installs.
+ */
+function findCompatiblePython(): string {
+  const home = os.homedir();
+  const candidates = [
+    "python3.12", "python3.11",
+    "/opt/homebrew/bin/python3.12", "/opt/homebrew/bin/python3.11",
+    "/usr/local/bin/python3.12", "/usr/local/bin/python3.11",
+    path.join(home, ".pyenv", "shims", "python3.12"),
+    path.join(home, ".pyenv", "shims", "python3.11"),
+  ];
+
+  for (const bin of candidates) {
+    const probe = spawnSync(bin, ["--version"], { encoding: "utf-8" });
+    if (probe.status !== 0) continue;
+    const version = (probe.stdout ?? probe.stderr ?? "").trim();
+    const match = version.match(/Python 3\.(\d+)/);
+    if (!match) continue;
+    const minor = parseInt(match[1] ?? "0", 10);
+    if (minor === 11 || minor === 12) return bin;
+  }
+
+  console.error(
+    `\n✗ Python 3.11 or 3.12 is required for Chatterbox Turbo.\n` +
+    `  Python 3.13+ has known PyTorch/OpenMP issues on macOS.\n\n` +
+    `  The fastest option is to install via uv (already installed):\n` +
+    `    uv python install 3.12\n\n` +
+    `  Or install Python 3.12 directly:\n` +
+    `    macOS:   brew install python@3.12\n` +
+    `    Linux:   sudo apt install python3.12\n` +
+    `    Windows: https://python.org  (download 3.12)\n`,
+  );
+  process.exit(1);
+}
diff --git a/src/cli/cost-estimator.ts b/src/cli/cost-estimator.ts
index 08b7d9e..25d7552 100644
--- a/src/cli/cost-estimator.ts
+++ b/src/cli/cost-estimator.ts
@@ -84,9 +84,19 @@ export function estimateCost(
     callCost(TOKEN_ESTIMATES.creativeDirector) +
     callCost(TOKEN_ESTIMATES.critic) +
     aiImages * callCost(TOKEN_ESTIMATES.imagePrompter);
-  const ttsPerChar = ttsProvider === "inworld" ? PRICING.inworldPerChar : PRICING.elevenLabsPerChar;
+  const ttsPerChar =
+    ttsProvider === "inworld"
+      ? PRICING.inworldPerChar
+      : ttsProvider === "chatterbox"
+        ? 0
+        : PRICING.elevenLabsPerChar;
   const ttsCost = ttsCharacters * ttsPerChar;
-  const perImage = imageProvider === "openai" ? PRICING.openaiPerImage : PRICING.geminiPerImage;
+  const perImage =
+    imageProvider === "openai"
+      ? PRICING.openaiPerImage
+      : imageProvider === "ollama"
+        ? 0
+        : PRICING.geminiPerImage;
   const imageCost = aiImages * perImage;
   const totalCost = llmCost + ttsCost + imageCost;
 
@@ -97,7 +107,12 @@ export function formatCostEstimate(
   breakdown: CostBreakdown,
   imageProvider: ImageProviderKey = "gemini",
 ): string {
-  const perImage = imageProvider === "openai" ? PRICING.openaiPerImage : PRICING.geminiPerImage;
+  const perImage =
+    imageProvider === "openai"
+      ? PRICING.openaiPerImage
+      : imageProvider === "ollama"
+        ? 0
+        : PRICING.geminiPerImage;
   return [
     `Estimated cost: $${breakdown.totalCost.toFixed(3)}`,
     `  LLM:    $${breakdown.llmCost.toFixed(4)} (${breakdown.details.llmCalls} calls)`,
@@ -117,14 +132,27 @@ export function computeActualLLMCost(
   imageProvider: ImageProviderKey = "gemini",
   ttsProvider: TTSProviderKey = "elevenlabs",
 ): ActualCostBreakdown {
-  const p = PRICING[provider];
+  // Ollama and Chatterbox are free local providers — cost is $0
+  const p = provider === "ollama"
+    ? { perInputToken: 0, perOutputToken: 0 }
+    : PRICING[provider];
   const totalInputTokens = usages.reduce((sum, u) => sum + u.inputTokens, 0);
   const totalOutputTokens = usages.reduce((sum, u) => sum + u.outputTokens, 0);
 
   const llmCost = totalInputTokens * p.perInputToken + totalOutputTokens * p.perOutputToken;
-  const ttsPerChar = ttsProvider === "inworld" ? PRICING.inworldPerChar : PRICING.elevenLabsPerChar;
+  const ttsPerChar =
+    ttsProvider === "inworld"
+      ? PRICING.inworldPerChar
+      : ttsProvider === "chatterbox"
+        ? 0
+        : PRICING.elevenLabsPerChar;
   const ttsCost = nonLlm.ttsCharacters * ttsPerChar;
-  const perImage = imageProvider === "openai" ? PRICING.openaiPerImage : PRICING.geminiPerImage;
+  const perImage =
+    imageProvider === "openai"
+      ? PRICING.openaiPerImage
+      : imageProvider === "ollama"
+        ? 0
+        : PRICING.geminiPerImage;
   const imageCost = nonLlm.aiImages * perImage;
   const totalCost = llmCost + ttsCost + imageCost;
 
diff --git a/src/cli/ollama-setup.ts b/src/cli/ollama-setup.ts
new file mode 100644
index 0000000..736c2d0
--- /dev/null
+++ b/src/cli/ollama-setup.ts
@@ -0,0 +1,152 @@
+/**
+ * Ollama local provider setup: reachability check and interactive model selection.
+ */
+
+import * as readline from "node:readline";
+
+/**
+ * Curated LLM models known to reliably produce structured JSON output.
+ * Only 7B+ parameter models are included — smaller models consistently fail
+ * at the constrained JSON generation this pipeline requires.
+ * Each entry is the exact tag Ollama expects (name:params).
+ */
+export const KNOWN_LLM_MODELS = [
+  "llama3.1:8b",
+  "llama3.2:latest",    // 3b — borderline but usable
+  "llama3.3:70b",
+  "mistral:7b",
+  "mixtral:8x7b",
+  "gemma3:9b",
+  "gemma3:27b",
+  "qwen2.5:7b",
+  "qwen2.5:14b",
+  "phi4:14b",
+  "deepseek-r1:7b",
+  "deepseek-r1:14b",
+];
+
+/**
+ * The only models that support image generation in Ollama (macOS, experimental).
+ * https://ollama.com/blog/image-generation
+ * Full tags are required — Ollama returns 404 for bare names like "x/flux2-klein".
+ */
+export const KNOWN_IMAGE_MODELS = [
+  "x/flux2-klein:4b",
+  "x/flux2-klein:9b",
+  "x/z-image-turbo:latest",
+];
+
+interface OllamaTagsResponse {
+  models?: Array<{ name: string }>;
+}
+
+/** Checks Ollama is reachable and returns the list of locally pulled model names. */
+export async function checkOllamaReachable(host: string): Promise<string[]> {
+  const url = `${host.replace(/\/$/, "")}/api/tags`;
+  let data: OllamaTagsResponse;
+  try {
+    const res = await fetch(url, { signal: AbortSignal.timeout(5000) });
+    if (!res.ok) throw new Error(`HTTP ${res.status}`);
+    data = (await res.json()) as OllamaTagsResponse;
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.error(
+      `\n✗ Ollama is not reachable at ${host}\n` +
+      `  Error: ${msg}\n\n` +
+      `  → Start Ollama:   ollama serve\n` +
+      `  → Install Ollama: https://ollama.com\n` +
+      `  → Or override host with: --ollama-host <url>\n`,
+    );
+    process.exit(1);
+  }
+
+  return (data.models ?? []).map((m) => m.name);
+}
+
+/**
+ * Shows a numbered list of model choices and lets the user pick one.
+ *
+ * For each known model, if the user already has it pulled locally we show the
+ * exact pulled name (e.g. "gemma3:27b") so Ollama receives the right tag.
+ * Unpulled known models are shown with their recommended tag from KNOWN_*_MODELS.
+ *
+ * For image models the list is locked to KNOWN_IMAGE_MODELS only — other locally
+ * pulled models are never shown because they cannot generate images.
+ *
+ * @param lockToKnown  When true, only show knownModels (no "other pulled" bucket).
+ */
+export async function selectOllamaModel(
+  pulledModels: string[],
+  knownModels: string[],
+  label: string,
+  lockToKnown = false,
+): Promise<string> {
+  // Build a lookup: base name → full pulled tag (e.g. "gemma3" → "gemma3:27b")
+  // When the user has multiple tags of the same base, prefer non-"latest" tags (more specific).
+  const pulledByBase = new Map<string, string>();
+  for (const fullTag of pulledModels) {
+    const base = fullTag.replace(/:.*$/, "");
+    const existing = pulledByBase.get(base);
+    if (!existing || existing === `${base}:latest`) {
+      pulledByBase.set(base, fullTag);
+    }
+  }
+
+  const resolveDisplayName = (knownEntry: string): string => {
+    const base = knownEntry.replace(/:.*$/, "");
+    return pulledByBase.get(base) ?? knownEntry;
+  };
+
+  const isPulled = (knownEntry: string): boolean =>
+    pulledByBase.has(knownEntry.replace(/:.*$/, ""));
+
+  const pulledKnown = knownModels.filter(isPulled);
+  const unpulledKnown = knownModels.filter((m) => !isPulled(m));
+
+  // LLM selector surfaces other pulled models too; image selector is locked to known list.
+  const otherPulled = lockToKnown
+    ? []
+    : pulledModels.filter((fullTag) => {
+        const base = fullTag.replace(/:.*$/, "");
+        return !knownModels.some((k) => k.replace(/:.*$/, "") === base);
+      });
+
+  const options: Array<{ display: string; pulled: boolean }> = [
+    ...pulledKnown.map((m) => ({ display: resolveDisplayName(m), pulled: true })),
+    ...otherPulled.map((m) => ({ display: m, pulled: true })),
+    ...unpulledKnown.map((m) => ({ display: m, pulled: false })),
+  ];
+
+  console.info(`\n─────────────────────────────────────────────────────────────`);
+  console.info(` Select an Ollama model for ${label}:`);
+  console.info(` (✓ = already pulled locally; others require: ollama pull <name>)\n`);
+
+  options.forEach(({ display, pulled }, i) => {
+    console.info(`  [${i + 1}] ${pulled ? "✓" : " "} ${display}`);
+  });
+  console.info(`  [${options.length + 1}]   Enter a custom model name`);
+  console.info(`─────────────────────────────────────────────────────────────\n`);
+
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const ask = (q: string): Promise<string> =>
+    new Promise((resolve) => rl.question(q, (a) => resolve(a.trim())));
+
+  let chosen = "";
+  while (!chosen) {
+    const raw = await ask(`Your choice (1–${options.length + 1}): `);
+    const n = parseInt(raw, 10);
+
+    if (n >= 1 && n <= options.length) {
+      chosen = options[n - 1]?.display ?? "";
+    } else if (n === options.length + 1 || (!Number.isInteger(n) && raw.length > 0)) {
+      const manual = Number.isInteger(n) ? await ask(`Model name (e.g. llama3.1:8b): `) : raw;
+      if (manual.length > 0) chosen = manual;
+    } else {
+      console.info(`  Please enter a number between 1 and ${options.length + 1}.`);
+    }
+  }
+
+  rl.close();
+  console.info(`\n  Using model: ${chosen}\n`);
+  return chosen;
+}
diff --git a/src/cli/validate-env.ts b/src/cli/validate-env.ts
index e351ea4..15ffa6b 100644
--- a/src/cli/validate-env.ts
+++ b/src/cli/validate-env.ts
@@ -1,4 +1,6 @@
 import type { ImageProviderKey, LLMProviderKey, TTSProviderKey } from "../schema/providers.js";
+import { checkOllamaReachable, selectOllamaModel, KNOWN_LLM_MODELS, KNOWN_IMAGE_MODELS } from "./ollama-setup.js";
+import { ensureChatterboxVenv } from "./chatterbox-setup.js";
 
 interface EnvRequirement {
   key: string;
@@ -7,11 +9,25 @@ interface EnvRequirement {
   required: boolean;
 }
 
-export function validateEnv(opts: {
+export interface ValidateEnvResult {
+  /** Resolved Ollama LLM model name (may differ from CLI flag if user selected interactively). */
+  ollamaModel?: string;
+  /** Resolved Ollama image model name (may differ from CLI flag if user selected interactively). */
+  ollamaImageModel?: string;
+  /** Path to the venv Python binary for Chatterbox. Undefined when chatterbox is not selected. */
+  chatterboxPythonBin?: string;
+}
+
+export async function validateEnv(opts: {
   provider: LLMProviderKey;
   ttsProvider: TTSProviderKey;
   imageProvider: ImageProviderKey;
-}): void {
+  ollamaHost?: string;
+  /** Explicitly passed --ollama-model. If set, skip interactive selection. */
+  ollamaModel?: string;
+  /** Explicitly passed --ollama-image-model. If set, skip interactive selection. */
+  ollamaImageModel?: string;
+}): Promise<ValidateEnvResult> {
   const requirements: EnvRequirement[] = [
     {
       key: "ANTHROPIC_API_KEY",
@@ -47,8 +63,8 @@ export function validateEnv(opts: {
 
   const missing = requirements.filter((r) => r.required && !process.env[r.key]);
 
-  // Stock keys are optional — the pipeline degrades gracefully (black frames) — but
-  // warn upfront so users aren't surprised by missing visuals on stock_image/stock_video scenes.
+  // Stock keys are optional — the pipeline degrades gracefully — but warn upfront
+  // so users aren't surprised by missing visuals on stock_image/stock_video scenes.
   const hasStockKey = process.env["PEXELS_API_KEY"] || process.env["PIXABAY_API_KEY"];
   if (!hasStockKey) {
     console.warn(
@@ -58,17 +74,43 @@ export function validateEnv(opts: {
     );
   }
 
-  if (missing.length === 0) return;
+  if (missing.length > 0) {
+    console.error("\nMissing required API keys:\n");
+    console.error("  Key                     Status    Get it at");
+    console.error("  " + "-".repeat(70));
+    for (const r of missing) {
+      const key = r.key.padEnd(24);
+      console.error(`  ${key}MISSING   ${r.signupUrl}`);
+    }
+    console.error(
+      "\nSet these in your .env file (or pass with `docker run --env-file .env` when using Docker).\n",
+    );
+    process.exit(1);
+  }
+
+  // --- Local provider setup (no API keys needed, but tools must be available) ---
+  let ollamaModel: string | undefined;
+  let ollamaImageModel: string | undefined;
 
-  console.error("\nMissing required API keys:\n");
-  console.error("  Key                     Status    Get it at");
-  console.error("  " + "-".repeat(70));
-  for (const r of missing) {
-    const key = r.key.padEnd(24);
-    console.error(`  ${key}MISSING   ${r.signupUrl}`);
+  if (opts.provider === "ollama" || opts.imageProvider === "ollama") {
+    const host = opts.ollamaHost ?? "http://localhost:11434";
+    const pulledModels = await checkOllamaReachable(host);
+
+    if (opts.provider === "ollama") {
+      ollamaModel = opts.ollamaModel
+        ?? await selectOllamaModel(pulledModels, KNOWN_LLM_MODELS, "LLM");
+    }
+
+    if (opts.imageProvider === "ollama") {
+      ollamaImageModel = opts.ollamaImageModel
+        ?? await selectOllamaModel(pulledModels, KNOWN_IMAGE_MODELS, "image generation", true);
+    }
   }
-  console.error(
-    "\nSet these in your .env file (or pass with `docker run --env-file .env` when using Docker).\n",
-  );
-  process.exit(1);
+
+  let chatterboxPythonBin: string | undefined;
+  if (opts.ttsProvider === "chatterbox") {
+    chatterboxPythonBin = await ensureChatterboxVenv();
+  }
+
+  return { ollamaModel, ollamaImageModel, chatterboxPythonBin };
 }
diff --git a/src/index.ts b/src/index.ts
index 6292f31..92e74e6 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,31 +1,55 @@
 #!/usr/bin/env node
 
+import * as readline from "node:readline";
 import { parseArgs } from "./cli/args.js";
 import { validateEnv } from "./cli/validate-env.js";
 import { createCliCallbacks, runPipeline } from "./pipeline/orchestrator.js";
 import { createProviders } from "./providers/factory.js";
+import type { LLMProvider } from "./schema/providers.js";
+import type { ResearchResult } from "./agents/research.js";
 
 async function main(): Promise<void> {
   const opts = parseArgs();
 
-  // Validate required API keys before constructing providers
-  validateEnv({
+  // Validate required API keys (and local tool availability) before constructing providers.
+  // When --ollama-model / --ollama-image-model are omitted, validateEnv presents an interactive
+  // model selection prompt and returns the chosen values in envResult.
+  const envResult = await validateEnv({
     provider: opts.provider,
     ttsProvider: opts.ttsProvider,
     imageProvider: opts.imageProvider,
+    ollamaHost: opts.ollamaHost,
+    ollamaModel: opts.ollamaModel,
+    ollamaImageModel: opts.ollamaImageModel,
   });
 
-  // Initialize providers via factory
+  // Use model names resolved by validateEnv (interactive selection or explicit flag)
+  const resolvedOllamaModel = envResult.ollamaModel ?? opts.ollamaModel;
+  const resolvedOllamaImageModel = envResult.ollamaImageModel ?? opts.ollamaImageModel;
+
+  // Initialize all providers via factory
   const { llm, tts, imageGen, stock } = createProviders({
     llm: opts.provider,
     tts: opts.ttsProvider,
     image: opts.imageProvider,
+    ollamaModel: resolvedOllamaModel,
+    ollamaImageModel: resolvedOllamaImageModel,
+    ollamaHost: opts.ollamaHost,
+    chatterboxDevice: opts.chatterboxDevice,
+    chatterboxAudioPrompt: opts.chatterboxAudioPrompt,
+    chatterboxPythonBin: envResult.chatterboxPythonBin,
   });
 
-  // Create CLI callbacks for terminal progress display
+  // Collect topic brief for Ollama mode (replaces web-search research)
+  let topicBrief: ResearchResult | undefined;
+  if (opts.provider === "ollama") {
+    topicBrief = await collectTopicBrief(opts.topic, llm, opts.brief);
+  }
+
+  // Build CLI callbacks (wraps ProgressDisplay + cost/log printing)
   const { callbacks, progress } = createCliCallbacks(opts.yes);
 
-  // Run pipeline with CLI callbacks
+  // Run pipeline
   const result = await runPipeline(
     {
       topic: opts.topic,
@@ -41,6 +65,7 @@ async function main(): Promise<void> {
       preview: opts.preview,
       outputDir: opts.output,
       yes: opts.yes,
+      topicBrief,
     },
     callbacks,
   );
@@ -54,6 +79,132 @@ async function main(): Promise<void> {
   }
 }
 
+/**
+ * Collects topic context for Ollama mode.
+ *
+ * Flow:
+ *   1. --brief flag → use it directly, no prompts
+ *   2. Non-TTY (Docker/pipe) → continue with topic only
+ *   3. Interactive TTY →
+ *      a. Explain why context helps
+ *      b. Offer: [1] AI-guided questions (Ollama generates 3 topic-specific questions)
+ *                [2] Write it yourself (freeform)
+ *                [3] Skip (topic name only)
+ */
+async function collectTopicBrief(
+  topic: string,
+  llm: LLMProvider,
+  brief?: string,
+): Promise<ResearchResult> {
+  if (brief) {
+    console.info(`\n  Using provided --brief for research context.\n`);
+    return { summary: brief, key_facts: [], mood: "neutral", sources: [] };
+  }
+
+  if (!process.stdin.isTTY) {
+    return { summary: `Topic: ${topic}`, key_facts: [], mood: "neutral", sources: [] };
+  }
+
+  const divider = `─────────────────────────────────────────────────────────────`;
+
+  console.info(
+    `\n${divider}\n` +
+    ` Topic context — optional but recommended\n` +
+    `${divider}\n\n` +
+    ` Since you're using Ollama (no web search), providing a little context\n` +
+    ` helps us write a more accurate and interesting script for you.\n\n` +
+    ` How would you like to provide context?\n\n` +
+    `  [1]  Guided   — We ask you 3 questions about "${topic}"\n` +
+    `  [2]  Freeform — You write a few lines yourself\n` +
+    `  [3]  Skip     — Continue with topic name only\n`,
+  );
+
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const ask = (q: string): Promise<string> =>
+    new Promise((resolve) => rl.question(q, (a) => resolve(a.trim())));
+
+  let choice = "";
+  while (!["1", "2", "3"].includes(choice)) {
+    choice = await ask(`  Your choice (1/2/3): `);
+    if (!["1", "2", "3"].includes(choice)) {
+      console.info(`  Please enter 1, 2, or 3.`);
+    }
+  }
+
+  // ── Option 3: skip ──────────────────────────────────────────────────────────
+  if (choice === "3") {
+    rl.close();
+    console.info(`\n  Continuing with topic name only.\n`);
+    return { summary: `Topic: ${topic}`, key_facts: [], mood: "neutral", sources: [] };
+  }
+
+  // ── Option 2: freeform ──────────────────────────────────────────────────────
+  if (choice === "2") {
+    console.info(
+      `\n  Write a few lines about your topic — key facts, context, desired mood.\n`
+    );
+    const text = await ask(`  > `);
+    rl.close();
+    console.info("");
+    // summary carries the full user text; key_facts left empty to avoid duplication.
+    return {
+      summary: text || `Topic: ${topic}`,
+      key_facts: [],
+      mood: "neutral",
+      sources: [],
+    };
+  }
+
+  // ── Option 1: AI-guided questions ───────────────────────────────────────────
+  console.info(`\n  Generating questions for "${topic}"...\n`);
+
+  let questions: string[] = [];
+  try {
+    const { z } = await import("zod");
+    const QuestionsSchema = z.object({
+      questions: z.array(z.string()).length(3),
+    });
+    const result = await llm.generate({
+      systemPrompt:
+        `You are a research assistant helping prepare a short-form video script.\n` +
+        `Generate exactly 3 specific, open-ended questions that will help gather useful\n` +
+        `context about the given topic. Questions should target: key facts/events,\n` +
+        `emotional angle or human interest, and surprising or lesser-known details.\n` +
+        `Keep each question under 15 words.`,
+      userMessage: `Topic: "${topic}"`,
+      schema: QuestionsSchema,
+    });
+    questions = result.data.questions;
+  } catch {
+    questions = [
+      `What are the most important facts about "${topic}"?`,
+      `What is the emotional angle or human story here?`,
+      `What would surprise most people about this topic?`,
+    ];
+  }
+
+  console.info(`  Answer each question (or press Enter to skip):\n`);
+
+  const answers: string[] = [];
+  for (let i = 0; i < questions.length; i++) {
+    console.info(`  ${i + 1}. ${questions[i]}`);
+    const answer = await ask(`     > `);
+    answers.push(answer);
+    console.info("");
+  }
+
+  rl.close();
+
+  // key_facts = each non-empty answer as a standalone atomic fact.
+  // summary = prose joining all answers so the director has full narrative context.
+  const key_facts = answers.filter(Boolean);
+  const summary = key_facts.length > 0
+    ? key_facts.join(". ")
+    : `Topic: ${topic}`;
+
+  return { summary, key_facts, mood: "neutral", sources: [] };
+}
+
 main().catch((err) => {
   console.error("\nPipeline failed:", err instanceof Error ? err.message : String(err));
   process.exit(1);
diff --git a/src/pipeline/orchestrator.ts b/src/pipeline/orchestrator.ts
index b3b14ce..e3a56b5 100644
--- a/src/pipeline/orchestrator.ts
+++ b/src/pipeline/orchestrator.ts
@@ -20,7 +20,7 @@ import { getArchetype } from "../config/archetype-registry.js";
 import { getPlatformConfig } from "../config/platforms.js";
 import { getTotalDurationInFrames, mapScoreToProps } from "../remotion/lib/score-to-props.js";
 import type { ArchetypeConfig } from "../schema/archetype.js";
-import type { DirectorScore } from "../schema/director-score.js";
+import type { DirectorScore, VisualType } from "../schema/director-score.js";
 import type {
   ImageProvider,
   ImageProviderKey,
@@ -31,6 +31,7 @@ import type {
   TTSProviderKey,
   WordTimestamp,
 } from "../schema/providers.js";
+import type { ResearchResult } from "../agents/research.js";
 
 // Stage names matching the pipeline execution order
 export const STAGE_NAMES = [
@@ -121,6 +122,8 @@ export interface PipelineOptions {
   preview: boolean;
   outputDir: string;
   yes: boolean;
+  /** Pre-supplied research result (Ollama local mode). When set, the research agent LLM call is skipped. */
+  topicBrief?: ResearchResult;
 }
 
 export interface PipelineResult {
@@ -173,26 +176,36 @@ export async function runPipeline(
   try {
     // Stage 1: Research
     cb.onStageStart?.("research");
-    let researchResult;
+    let researchResult: ResearchResult;
     const researchStart = Date.now();
-    try {
-      const researchOutput = await research(opts.llm, opts.topic);
-      researchResult = researchOutput.data;
-      llmUsages.push(researchOutput.usage);
+
+    if (opts.topicBrief) {
+      // Ollama local mode: user-supplied brief, no LLM web-search call needed
+      researchResult = opts.topicBrief;
       const dur = (Date.now() - researchStart) / 1000;
-      cb.onStageComplete?.("research", `${researchResult.key_facts.length} facts`, dur);
+      cb.onStageComplete?.("research", `provided brief (${researchResult.key_facts.length} facts)`, dur);
       cb.onProgress?.("research", { type: "results", summary: researchResult.summary, key_facts: researchResult.key_facts, mood: researchResult.mood });
-      log.stages.push({ name: "research", duration: dur, status: "done" });
-    } catch (err) {
-      const dur = (Date.now() - researchStart) / 1000;
-      cb.onStageSkip?.("research", "web search failed");
-      log.stages.push({ name: "research", duration: dur, status: "skipped", error: String(err) });
-      researchResult = {
-        summary: `Topic: ${opts.topic}`,
-        key_facts: [],
-        mood: "informative",
-        sources: [],
-      };
+      log.stages.push({ name: "research", duration: dur, status: "brief" });
+    } else {
+      try {
+        const researchOutput = await research(opts.llm, opts.topic);
+        researchResult = researchOutput.data;
+        llmUsages.push(researchOutput.usage);
+        const dur = (Date.now() - researchStart) / 1000;
+        cb.onStageComplete?.("research", `${researchResult.key_facts.length} facts`, dur);
+        cb.onProgress?.("research", { type: "results", summary: researchResult.summary, key_facts: researchResult.key_facts, mood: researchResult.mood });
+        log.stages.push({ name: "research", duration: dur, status: "done" });
+      } catch (err) {
+        const dur = (Date.now() - researchStart) / 1000;
+        cb.onStageSkip?.("research", "web search failed");
+        log.stages.push({ name: "research", duration: dur, status: "skipped", error: String(err) });
+        researchResult = {
+          summary: `Topic: ${opts.topic}`,
+          key_facts: [],
+          mood: "informative",
+          sources: [],
+        };
+      }
     }
 
     // Check cancellation between stages
@@ -203,8 +216,19 @@ export async function runPipeline(
     // Stage 2: Creative Director
     cb.onStageStart?.("director");
     const cdStart = Date.now();
+
+    // Only allow visual types whose providers are actually configured.
+    // Stock types require a Pexels or Pixabay API key — without one they silently produce blank frames.
+    const stockAvailable = !!(process.env["PEXELS_API_KEY"] || process.env["PIXABAY_API_KEY"]);
+    const allowedVisualTypes: VisualType[] = [
+      "ai_image",
+      "text_card",
+      ...(stockAvailable ? (["stock_image", "stock_video"] as const) : []),
+    ];
+
     const cdOutput = await generateDirectorScore(opts.llm, opts.topic, researchResult, {
       archetype: opts.archetype,
+      allowedVisualTypes,
     });
     const directorScore = cdOutput.data;
     llmUsages.push(cdOutput.usage);
@@ -271,24 +295,47 @@ export async function runPipeline(
       return { outputDir: runDir, videoPath: null, thumbnailPath: null, scorePath, logPath };
     }
 
-    // Stage 4: Visual Assets (parallel)
+    // Stage 4: Visual Assets
+    // Ollama image generation runs sequentially to avoid overwhelming a single-threaded local model.
+    // Cloud providers run in parallel for speed.
     cb.onStageStart?.("visuals");
     const visualStart = Date.now();
     const totalScenes = directorScore.scenes.length;
-    const sceneResults = await Promise.all(
-      directorScore.scenes.map(async (scene, i) => {
+    const sceneResults: Array<{ path: string | null; usage: LLMUsage | null; durationSeconds: number | null }> = [];
+
+    if (opts.imageProvider === "ollama") {
+      for (let i = 0; i < directorScore.scenes.length; i++) {
+        const scene = directorScore.scenes[i]!;
+        process.stderr.write(`\n[visuals] Scene ${i + 1}/${totalScenes}: generating image (${scene.visual_type})...\n`);
         try {
-          return await resolveVisualAsset(scene, i, totalScenes, assetsDir, opts, archetypeConfig);
+          const result = await resolveVisualAsset(scene, i, totalScenes, assetsDir, opts, archetypeConfig);
+          sceneResults.push(result);
+          process.stderr.write(`[visuals] Scene ${i + 1}/${totalScenes}: ✓ done\n`);
         } catch (err) {
-          cb.onProgress?.("visuals", { type: "asset_failed", scene: i, error: String(err) });
-          return {
-            path: null as string | null,
-            usage: null as LLMUsage | null,
-            durationSeconds: null,
-          };
+          const msg = err instanceof Error ? err.message : String(err);
+          process.stderr.write(`[visuals] Scene ${i + 1}/${totalScenes}: ✗ failed — ${msg}\n`);
+          cb.onProgress?.("visuals", { type: "asset_failed", scene: i, error: msg });
+          sceneResults.push({ path: null, usage: null, durationSeconds: null });
         }
-      }),
-    );
+      }
+    } else {
+      const parallelResults = await Promise.all(
+        directorScore.scenes.map(async (scene, i) => {
+          try {
+            return await resolveVisualAsset(scene, i, totalScenes, assetsDir, opts, archetypeConfig);
+          } catch (err) {
+            cb.onProgress?.("visuals", { type: "asset_failed", scene: i, error: String(err) });
+            return {
+              path: null as string | null,
+              usage: null as LLMUsage | null,
+              durationSeconds: null,
+            };
+          }
+        }),
+      );
+      sceneResults.push(...parallelResults);
+    }
+
     const sceneAssets = sceneResults.map((r) => r.path);
     const sceneSourceDurations = sceneResults.map((r) => r.durationSeconds);
     for (const r of sceneResults) {
diff --git a/src/providers/factory.ts b/src/providers/factory.ts
index eb17e9d..ecfab9c 100644
--- a/src/providers/factory.ts
+++ b/src/providers/factory.ts
@@ -10,12 +10,15 @@ import type {
 } from "../schema/providers.js";
 import { GeminiImage } from "./image/gemini.js";
 import { OpenAIImage } from "./image/openai.js";
+import { OllamaImage } from "./image/ollama.js";
 import { AnthropicLLM } from "./llm/anthropic.js";
 import { OpenAILLM } from "./llm/openai.js";
+import { OllamaLLM } from "./llm/ollama.js";
 import { PexelsStock } from "./stock/pexels.js";
 import { PixabayStock } from "./stock/pixabay.js";
 import { ElevenLabsTTS } from "./tts/elevenlabs.js";
 import { InworldTTS } from "./tts/inworld.js";
+import { ChatterboxTTS } from "./tts/chatterbox.js";
 
 export interface ProviderConfig {
   llm: LLMProviderKey;
@@ -23,6 +26,14 @@ export interface ProviderConfig {
   image: ImageProviderKey;
   stock?: StockProviderKey;
   keys?: Record<string, string>;
+  /** Ollama-specific options */
+  ollamaModel?: string;
+  ollamaImageModel?: string;
+  ollamaHost?: string;
+  /** Chatterbox-specific options */
+  chatterboxDevice?: string;
+  chatterboxAudioPrompt?: string;
+  chatterboxPythonBin?: string;
 }
 
 export interface Providers {
@@ -38,17 +49,27 @@ export function createProviders(config: ProviderConfig): Providers {
   const llm: LLMProvider =
     config.llm === "openai"
       ? new OpenAILLM(undefined, k["OPENAI_API_KEY"])
-      : new AnthropicLLM(undefined, k["ANTHROPIC_API_KEY"]);
+      : config.llm === "ollama"
+        ? new OllamaLLM(config.ollamaModel, config.ollamaHost)
+        : new AnthropicLLM(undefined, k["ANTHROPIC_API_KEY"]);
 
   const tts: TTSProvider =
     config.tts === "inworld"
       ? new InworldTTS(undefined, undefined, k["INWORLD_TTS_API_KEY"])
-      : new ElevenLabsTTS(undefined, k["ELEVENLABS_API_KEY"]);
+      : config.tts === "chatterbox"
+        ? new ChatterboxTTS({
+            device: config.chatterboxDevice,
+            audioPrompt: config.chatterboxAudioPrompt,
+            pythonBin: config.chatterboxPythonBin,
+          })
+        : new ElevenLabsTTS(undefined, k["ELEVENLABS_API_KEY"]);
 
   const imageGen: ImageProvider =
     config.image === "openai"
       ? new OpenAIImage(undefined, k["OPENAI_API_KEY"])
-      : new GeminiImage(undefined, k["GOOGLE_API_KEY"]);
+      : config.image === "ollama"
+        ? new OllamaImage(config.ollamaImageModel, config.ollamaHost)
+        : new GeminiImage(undefined, k["GOOGLE_API_KEY"]);
 
   const stockKey = config.stock ?? "pexels";
   const stock: StockProvider =
diff --git a/src/providers/image/ollama.ts b/src/providers/image/ollama.ts
new file mode 100644
index 0000000..b531b0a
--- /dev/null
+++ b/src/providers/image/ollama.ts
@@ -0,0 +1,64 @@
+import type { ImageProvider } from "../../schema/providers.js";
+
+const DEFAULT_MODEL = "x/flux2-klein:4b";
+const DEFAULT_HOST = "http://localhost:11434";
+
+export class OllamaImage implements ImageProvider {
+  private model: string;
+  private host: string;
+
+  constructor(model: string = DEFAULT_MODEL, host: string = DEFAULT_HOST) {
+    if (process.platform !== "darwin") {
+      throw new Error(
+        `Ollama image generation is currently macOS-only.\n` +
+        `  → Use --image-provider gemini or --image-provider openai on Linux/Windows.\n` +
+        `  → See: https://ollama.com/blog/image-generation`,
+      );
+    }
+    this.model = model;
+    this.host = host.replace(/\/$/, "");
+  }
+
+  async generate(prompt: string, style?: string): Promise<Buffer> {
+    const fullPrompt = style
+      ? `${prompt}. Style: ${style}. Vertical 9:16 aspect ratio, portrait orientation. No text, no watermarks.`
+      : `${prompt}. Vertical 9:16 aspect ratio, portrait orientation. No text, no watermarks.`;
+
+    const response = await fetch(`${this.host}/api/generate`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: this.model,
+        prompt: fullPrompt,
+        stream: false,
+      }),
+    });
+
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(
+        `Ollama image generation failed (${response.status}): ${body}\n` +
+        `  → Ensure Ollama is running: ollama serve\n` +
+        `  → Ensure the model is pulled: ollama pull ${this.model}`,
+      );
+    }
+
+    const data = (await response.json()) as OllamaGenerateResponse;
+
+    // Ollama image models return the image as base64 in data.image (singular)
+    const imageData = data.image;
+    if (!imageData) {
+      throw new Error(
+        `Ollama returned no image data. ` +
+        `Ensure you are using an image-capable model (e.g. x/flux2-klein:4b or x/z-image-turbo:latest).`,
+      );
+    }
+
+    return Buffer.from(imageData, "base64");
+  }
+}
+
+interface OllamaGenerateResponse {
+  image?: string;
+  response: string;
+}
diff --git a/src/providers/llm/ollama.ts b/src/providers/llm/ollama.ts
new file mode 100644
index 0000000..65effcc
--- /dev/null
+++ b/src/providers/llm/ollama.ts
@@ -0,0 +1,105 @@
+import { z } from "zod";
+import type { LLMProvider, LLMResult } from "../../schema/providers.js";
+
+const DEFAULT_MODEL = "llama3.2";
+const DEFAULT_HOST = "http://localhost:11434";
+
+export class OllamaLLM implements LLMProvider {
+  readonly id = "ollama" as const;
+  private model: string;
+  private host: string;
+
+  constructor(model: string = DEFAULT_MODEL, host: string = DEFAULT_HOST) {
+    this.model = model;
+    this.host = host.replace(/\/$/, "");
+    console.info(
+      `ℹ  Ollama mode: web search is disabled. You will be asked to provide topic context before the pipeline starts.`,
+    );
+  }
+
+  async generate<T extends z.ZodType>(opts: {
+    systemPrompt: string;
+    userMessage: string;
+    schema: T;
+    enableWebSearch?: boolean;
+  }): Promise<LLMResult<z.infer<T>>> {
+    // Ollama has no web search capability. When called with enableWebSearch=true
+    // (the research agent), we return a stub so the pipeline can continue with
+    // the user-provided topic brief injected via topicBrief in PipelineOptions.
+    if (opts.enableWebSearch) {
+      return {
+        data: {
+          summary: "",
+          key_facts: [],
+          mood: "neutral",
+          sources: [],
+        } as z.infer<T>,
+        usage: { inputTokens: 0, outputTokens: 0 },
+      };
+    }
+
+    return this.generateStructured(opts);
+  }
+
+  private async generateStructured<T extends z.ZodType>(opts: {
+    systemPrompt: string;
+    userMessage: string;
+    schema: T;
+  }): Promise<LLMResult<z.infer<T>>> {
+    const jsonSchema = z.toJSONSchema(opts.schema);
+
+    const systemWithSchema =
+      `${opts.systemPrompt}\n\n` +
+      `You MUST respond with a single valid JSON object that conforms to this JSON Schema:\n` +
+      `${JSON.stringify(jsonSchema, null, 2)}\n\n` +
+      `Do not include any explanation, markdown, or text outside the JSON object.`;
+
+    const response = await fetch(`${this.host}/api/chat`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: this.model,
+        format: "json",
+        stream: false,
+        messages: [
+          { role: "system", content: systemWithSchema },
+          { role: "user", content: opts.userMessage },
+        ],
+      }),
+    });
+
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Ollama API error (${response.status}): ${body}`);
+    }
+
+    const raw = (await response.json()) as OllamaChatResponse;
+    const content = raw.message?.content ?? "";
+
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(content);
+    } catch {
+      throw new Error(`Ollama returned non-JSON content: ${content.slice(0, 200)}`);
+    }
+
+    const result = opts.schema.safeParse(parsed);
+    if (!result.success) {
+      throw new Error(`Ollama response failed schema validation: ${JSON.stringify(result.error)}`);
+    }
+
+    return {
+      data: result.data,
+      usage: {
+        inputTokens: raw.prompt_eval_count ?? 0,
+        outputTokens: raw.eval_count ?? 0,
+      },
+    };
+  }
+}
+
+interface OllamaChatResponse {
+  message?: { role: string; content: string };
+  prompt_eval_count?: number;
+  eval_count?: number;
+}
diff --git a/src/providers/tts/chatterbox.ts b/src/providers/tts/chatterbox.ts
new file mode 100644
index 0000000..9652984
--- /dev/null
+++ b/src/providers/tts/chatterbox.ts
@@ -0,0 +1,127 @@
+import { execFileSync, spawn, spawnSync } from "node:child_process";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import type { TTSProvider, TTSResult, WordTimestamp } from "../../schema/providers.js";
+
+const SCRIPT_PATH = path.join(process.cwd(), "scripts", "chatterbox_tts.py");
+
+export class ChatterboxTTS implements TTSProvider {
+  private pythonBin: string;
+  private device: string;
+  private audioPrompt: string | null;
+
+  /**
+   * @param opts.pythonBin  Venv Python path returned by validateEnv. When omitted
+   *                        (e.g. tests), falls back to searching PATH for python3.12/3.11.
+   */
+  constructor(opts: { device?: string; audioPrompt?: string; pythonBin?: string } = {}) {
+    this.device = opts.device ?? this.detectDevice();
+    this.audioPrompt = opts.audioPrompt ?? null;
+    this.pythonBin = opts.pythonBin ?? this.resolvePythonBin();
+  }
+
+  async generate(text: string): Promise<TTSResult> {
+    const tmpDir = os.tmpdir();
+    const id = `openreels-tts-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+    const wavPath = path.join(tmpDir, `${id}.wav`);
+    const mp3Path = path.join(tmpDir, `${id}.mp3`);
+    const tsPath = path.join(tmpDir, `${id}.json`);
+
+    try {
+      const args = [
+        SCRIPT_PATH,
+        "--text", text,
+        "--out", wavPath,
+        "--timestamps", tsPath,
+        "--device", this.device,
+      ];
+      if (this.audioPrompt) {
+        args.push("--audio-prompt", this.audioPrompt);
+      }
+
+      console.info(
+        `\nChatterbox Turbo: generating audio` +
+        (this.device !== "cpu" ? ` (device: ${this.device})` : ` (device: cpu — may be slow)`) +
+        `...\n`,
+      );
+
+      // Use async spawn so stderr streams live to the terminal (shows model load progress)
+      // and the Node.js event loop is not blocked during the potentially long model load.
+      await spawnAsync(this.pythonBin, args);
+
+      if (!fs.existsSync(wavPath)) {
+        throw new Error(`Chatterbox TTS did not produce output file: ${wavPath}`);
+      }
+
+      // Convert WAV → MP3 using ffmpeg (already required by the pipeline)
+      execFileSync("ffmpeg", [
+        "-y", "-i", wavPath,
+        "-codec:a", "libmp3lame", "-q:a", "2",
+        mp3Path,
+      ], { stdio: "pipe" });
+
+      const audio = fs.readFileSync(mp3Path);
+
+      const rawTimestamps = JSON.parse(fs.readFileSync(tsPath, "utf-8")) as unknown[];
+      const words: WordTimestamp[] = rawTimestamps
+        .filter((t): t is { word: string; start: number; end: number } =>
+          typeof (t as Record<string, unknown>)["word"] === "string")
+        .map((t) => ({ word: t.word, start: t.start, end: t.end }));
+
+      return { audio, words };
+    } finally {
+      for (const f of [wavPath, mp3Path, tsPath]) {
+        try { fs.unlinkSync(f); } catch { /* ignore cleanup errors */ }
+      }
+    }
+  }
+
+  private resolvePythonBin(): string {
+    // Mirror the preference order in validate-env: prefer 3.12/3.11 over generic python3
+    for (const bin of ["python3.12", "python3.11", "python3", "python"]) {
+      const probe = spawnSync(bin, ["--version"], { encoding: "utf-8" });
+      if (probe.status === 0) return bin;
+    }
+    throw new Error(
+      `Python not found. Chatterbox Turbo requires Python 3.11 or 3.12.\n` +
+      `  → macOS: brew install python@3.12\n` +
+      `  → Then:  pip install chatterbox-tts`,
+    );
+  }
+
+  private detectDevice(): string {
+    // Prefer MPS on Apple Silicon, fall back to CPU
+    const platform = process.platform;
+    if (platform === "darwin") {
+      const arch = process.arch;
+      if (arch === "arm64") return "mps";
+    }
+    return "cpu";
+  }
+}
+
+/**
+ * Async wrapper around child_process.spawn.
+ * Streams stderr live to the terminal so users can see Chatterbox model loading progress.
+ * Rejects with a descriptive error if the process exits non-zero.
+ */
+function spawnAsync(bin: string, args: string[]): Promise<void> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(bin, args, {
+      stdio: ["ignore", "pipe", "inherit"], // stdout captured (not used), stderr → terminal live
+    });
+
+    child.on("error", (err) => {
+      reject(new Error(`Failed to start Chatterbox TTS process: ${err.message}`));
+    });
+
+    child.on("close", (code) => {
+      if (code === 0) {
+        resolve();
+      } else {
+        reject(new Error(`Chatterbox TTS script exited with code ${code ?? "unknown"}.`));
+      }
+    });
+  });
+}
diff --git a/src/schema/providers.ts b/src/schema/providers.ts
index cf52e05..a3c91fd 100644
--- a/src/schema/providers.ts
+++ b/src/schema/providers.ts
@@ -1,8 +1,8 @@
 import type { z } from "zod";
 
-export type LLMProviderKey = "anthropic" | "openai";
-export type TTSProviderKey = "elevenlabs" | "inworld";
-export type ImageProviderKey = "gemini" | "openai";
+export type LLMProviderKey = "anthropic" | "openai" | "ollama";
+export type TTSProviderKey = "elevenlabs" | "inworld" | "chatterbox";
+export type ImageProviderKey = "gemini" | "openai" | "ollama";
 export type StockProviderKey = "pexels" | "pixabay";
 
 export interface LLMUsage {

From eee77ecff36e8f8619ed2b0514dea451fb1e63ff Mon Sep 17 00:00:00 2001
From: Al Jami Islam Anik <anik.islam1494@gmail.com>
Date: Thu, 2 Apr 2026 17:18:51 +0600
Subject: [PATCH 2/2] refactor: enhance Ollama integration and improve user
 interaction in topic context collection

- Updated `collectTopicBrief` function to include LLM provider as a parameter, allowing for dynamic question generation based on the selected LLM.
- Improved user interaction flow for providing topic context, offering options for guided questions, freeform input, or skipping.
- Enhanced cost estimation logic to accommodate the new LLM provider parameter.
- Refined error handling and output messages in the Chatterbox TTS provider for better user experience.
- Updated relevant interfaces and types to ensure consistency across the pipeline.

These changes significantly improve the flexibility and usability of the pipeline for local development.
---
 src/cli/cost-estimator.ts       |  5 ++-
 src/cli/ollama-setup.ts         | 30 ++++++++++++++---
 src/index.ts                    |  7 +---
 src/providers/tts/chatterbox.ts | 57 ++++++++++++++++++++++++++++-----
 4 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/src/cli/cost-estimator.ts b/src/cli/cost-estimator.ts
index 25d7552..1346125 100644
--- a/src/cli/cost-estimator.ts
+++ b/src/cli/cost-estimator.ts
@@ -70,12 +70,15 @@ export function estimateCost(
   score: DirectorScore,
   imageProvider: ImageProviderKey = "gemini",
   ttsProvider: TTSProviderKey = "elevenlabs",
+  provider: LLMProviderKey = "anthropic",
 ): CostBreakdown {
   const aiImages = score.scenes.filter((s) => s.visual_type === "ai_image").length;
   const ttsCharacters = score.scenes.reduce((sum, s) => sum + s.script_line.length, 0);
   const llmCalls = 3 + aiImages; // research + CD + critic + 1 per ai_image
 
-  const p = PRICING.anthropic; // conservative estimate
+  const p = provider === "ollama"
+    ? { perInputToken: 0, perOutputToken: 0 }
+    : PRICING[provider];
   const callCost = (est: { input: number; output: number }) =>
     est.input * p.perInputToken + est.output * p.perOutputToken;
 
diff --git a/src/cli/ollama-setup.ts b/src/cli/ollama-setup.ts
index 736c2d0..a37ad4c 100644
--- a/src/cli/ollama-setup.ts
+++ b/src/cli/ollama-setup.ts
@@ -81,8 +81,10 @@ export async function selectOllamaModel(
   label: string,
   lockToKnown = false,
 ): Promise<string> {
-  // Build a lookup: base name → full pulled tag (e.g. "gemma3" → "gemma3:27b")
-  // When the user has multiple tags of the same base, prefer non-"latest" tags (more specific).
+  // Build two lookups:
+  //   pulledExact:  full tag → true  (e.g. "x/flux2-klein:4b" → true)
+  //   pulledByBase: base     → full tag (e.g. "gemma3" → "gemma3:27b") for base-only entries
+  const pulledExact = new Set(pulledModels);
   const pulledByBase = new Map<string, string>();
   for (const fullTag of pulledModels) {
     const base = fullTag.replace(/:.*$/, "");
@@ -92,13 +94,33 @@ export async function selectOllamaModel(
     }
   }
 
+  /**
+   * Resolve display name for a known model entry:
+   *   1. If the exact tag is pulled, show it as-is.
+   *   2. If only a different tag of the same base is pulled, show the pulled tag.
+   *   3. Otherwise show the known entry unchanged (unpulled, recommended tag).
+   */
   const resolveDisplayName = (knownEntry: string): string => {
+    if (pulledExact.has(knownEntry)) return knownEntry;
     const base = knownEntry.replace(/:.*$/, "");
     return pulledByBase.get(base) ?? knownEntry;
   };
 
-  const isPulled = (knownEntry: string): boolean =>
-    pulledByBase.has(knownEntry.replace(/:.*$/, ""));
+  /**
+   * A known entry is considered "pulled" if:
+   *   - Its exact tag is pulled (e.g. "x/flux2-klein:4b" pulled → "x/flux2-klein:4b" ✓)
+   *   - OR it has no explicit tag in the known list (bare name like "gemma3") and any tag
+   *     of that base is pulled (e.g. user pulled "gemma3:27b" → "gemma3" ✓)
+   * This prevents x/flux2-klein:9b from showing as pulled just because :4b is pulled.
+   */
+  const isPulled = (knownEntry: string): boolean => {
+    if (pulledExact.has(knownEntry)) return true;
+    // Only fall back to base matching when the known entry has no tag (no ":")
+    // or uses ":latest" — i.e. it's not a specific version tag
+    const tag = knownEntry.includes(":") ? knownEntry.split(":")[1] : null;
+    if (tag && tag !== "latest") return false;
+    return pulledByBase.has(knownEntry.replace(/:.*$/, ""));
+  };
 
   const pulledKnown = knownModels.filter(isPulled);
   const unpulledKnown = knownModels.filter((m) => !isPulled(m));
diff --git a/src/index.ts b/src/index.ts
index 92e74e6..14e52c7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -47,7 +47,7 @@ async function main(): Promise<void> {
   }
 
   // Build CLI callbacks (wraps ProgressDisplay + cost/log printing)
-  const { callbacks, progress } = createCliCallbacks(opts.yes);
+  const { callbacks } = createCliCallbacks(opts.yes);
 
   // Run pipeline
   const result = await runPipeline(
@@ -70,8 +70,6 @@ async function main(): Promise<void> {
     callbacks,
   );
 
-  progress.summary();
-
   if (result.videoPath) {
     console.log(`\nDone! Video saved to: ${result.videoPath}`);
   } else if (opts.dryRun) {
@@ -146,7 +144,6 @@ async function collectTopicBrief(
     const text = await ask(`  > `);
     rl.close();
     console.info("");
-    // summary carries the full user text; key_facts left empty to avoid duplication.
     return {
       summary: text || `Topic: ${topic}`,
       key_facts: [],
@@ -195,8 +192,6 @@ async function collectTopicBrief(
 
   rl.close();
 
-  // key_facts = each non-empty answer as a standalone atomic fact.
-  // summary = prose joining all answers so the director has full narrative context.
   const key_facts = answers.filter(Boolean);
   const summary = key_facts.length > 0
     ? key_facts.join(". ")
diff --git a/src/providers/tts/chatterbox.ts b/src/providers/tts/chatterbox.ts
index 9652984..7221e01 100644
--- a/src/providers/tts/chatterbox.ts
+++ b/src/providers/tts/chatterbox.ts
@@ -40,14 +40,14 @@ export class ChatterboxTTS implements TTSProvider {
         args.push("--audio-prompt", this.audioPrompt);
       }
 
-      console.info(
-        `\nChatterbox Turbo: generating audio` +
-        (this.device !== "cpu" ? ` (device: ${this.device})` : ` (device: cpu — may be slow)`) +
+      process.stderr.write(
+        `\n  Chatterbox Turbo: synthesising audio` +
+        (this.device !== "cpu" ? ` on ${this.device}` : ` on cpu (may be slow)`) +
         `...\n`,
       );
 
-      // Use async spawn so stderr streams live to the terminal (shows model load progress)
-      // and the Node.js event loop is not blocked during the potentially long model load.
+      // Async spawn — stderr is captured and filtered so raw tqdm progress bars
+      // don't pollute the terminal alongside the Node.js pipeline progress spinner.
       await spawnAsync(this.pythonBin, args);
 
       if (!fs.existsSync(wavPath)) {
@@ -103,13 +103,46 @@ export class ChatterboxTTS implements TTSProvider {
 
 /**
  * Async wrapper around child_process.spawn.
- * Streams stderr live to the terminal so users can see Chatterbox model loading progress.
+ *
+ * Captures Python's stderr and filters it so that:
+ *   - Raw tqdm progress bars (the noisy `0%|█ | 0/1000` lines) are suppressed
+ *   - Key status messages (model loading, "Audio saved") are forwarded as clean lines
+ *   - Any unexpected errors are still surfaced on stderr for debugging
+ *
  * Rejects with a descriptive error if the process exits non-zero.
  */
 function spawnAsync(bin: string, args: string[]): Promise<void> {
   return new Promise((resolve, reject) => {
     const child = spawn(bin, args, {
-      stdio: ["ignore", "pipe", "inherit"], // stdout captured (not used), stderr → terminal live
+      stdio: ["ignore", "pipe", "pipe"], // capture both stdout and stderr
+    });
+
+    // Accumulate stderr to surface on failure, and filter noisy lines in real-time
+    const stderrLines: string[] = [];
+    let stderrBuf = "";
+
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderrBuf += chunk.toString();
+      const lines = stderrBuf.split("\n");
+      stderrBuf = lines.pop() ?? ""; // keep incomplete last line in buffer
+
+      for (const line of lines) {
+        stderrLines.push(line);
+        const cleaned = line.trim();
+        if (!cleaned) continue;
+
+        // Suppress tqdm progress bars: they contain "|" and "it/s" or "%|"
+        if (/\d+%\|/.test(cleaned) || /it\/s\]/.test(cleaned)) continue;
+        // Suppress diffusers FutureWarning noise
+        if (/FutureWarning|LoRACompatible|deprecate/.test(cleaned)) continue;
+        // Suppress blank carriage-return lines tqdm emits
+        if (/^\r/.test(cleaned)) continue;
+
+        // Forward meaningful status messages cleanly
+        if (/Loading|loaded|Fetching|S3 Token|Audio saved|Timestamps/.test(cleaned)) {
+          process.stderr.write(`  ${cleaned}\n`);
+        }
+      }
     });
 
     child.on("error", (err) => {
@@ -117,10 +150,18 @@ function spawnAsync(bin: string, args: string[]): Promise<void> {
     });
 
     child.on("close", (code) => {
+      // Flush any remaining buffered stderr
+      if (stderrBuf.trim()) stderrLines.push(stderrBuf);
+
       if (code === 0) {
+        process.stderr.write(`  Chatterbox Turbo: audio ready.\n\n`);
         resolve();
       } else {
-        reject(new Error(`Chatterbox TTS script exited with code ${code ?? "unknown"}.`));
+        // On failure, print the last few lines of stderr to aid debugging
+        const tail = stderrLines.slice(-10).join("\n");
+        reject(new Error(
+          `Chatterbox TTS script exited with code ${code ?? "unknown"}.\n${tail}`,
+        ));
       }
     });
   });