Findit-AI · al8n · Mar 12, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -18,6 +18,11 @@ path = "benches/srt.rs"
 name = "srt"
 harness = false
 
+[[bench]]
+path = "benches/vtt.rs"
+name = "vtt"
+harness = false
+
 [features]
 default = ["std"]
 alloc = ["memchr"]

diff --git a/README.md b/README.md
@@ -70,6 +70,41 @@ fasrt = "0.2"
 | `alloc`  | No      | Enables `CueText` DOM tree and entity decoding without `std` |
 | `memchr` | Yes (via `alloc`/`std`) | SIMD-accelerated fast path for entity decoding |
 
+## Benchmarks
+
+Measured on Apple Silicon with `cargo bench` (Criterion).
+
+### SRT
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse (strict) | 2 cues, 89 B | ~170 ns | 520 MiB/s |
+| Parse (strict) | 26 KB file | ~38 µs | 661 MiB/s |
+| Parse (lossy) | 332 files, ~8 MB | ~12.1 ms | 646 MiB/s |
+| Collect into `Vec` | 26 KB file | ~40 µs | 616 MiB/s |
+
+### WebVTT
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse | 2 cues, 96 B | ~318 ns | 291 MiB/s |
+| Parse | Settings + region + style, 354 B | ~915 ns | 387 MiB/s |
+| Parse | All WPT fixtures, ~34 KB | ~113 µs | 314 MiB/s |
+| Collect into `Vec` | Settings + region + style, 354 B | ~973 ns | 364 MiB/s |
+
+### Cue Text
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse | Tags only, 166 B | ~316 ns | 552 MiB/s |
+| Parse | 500 timestamps, ~11 KB | ~14.1 µs | 776 MiB/s |
+
+Run benchmarks yourself:
+
+```sh
+cargo bench
+```
+
 #### License
 
 `fasrt` is under the terms of both the MIT license and the

diff --git a/benches/srt.rs b/benches/srt.rs
@@ -1 +1,103 @@
-fn main() {}
+use std::hint::black_box;
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use fasrt::srt::Parser;
+
+const SMALL_SRT: &str = "\
+1
+00:00:01,000 --> 00:00:04,000
+Hello world!
+
+2
+00:00:05,000 --> 00:00:08,000
+Goodbye world!
+";
+
+const MEDIUM_SRT: &str = include_str!("../fixtures/srt/DeathNote_01.eng.srt");
+
+fn load_all_fixtures() -> String {
+  if let Ok(read_dir) = std::fs::read_dir("fixtures/srt") {
+    let mut paths: Vec<_> = read_dir
+      .filter_map(|entry| {
+        let entry = entry.ok()?;
+        let path = entry.path();
+        if path.extension().is_some_and(|e| e == "srt") {
+          Some(path)
+        } else {
+          None
+        }
+      })
+      .collect();
+
+    paths.sort();
+
+    let mut buf = String::new();
+    for path in paths {
+      if let Ok(content) = std::fs::read_to_string(path) {
+        if !buf.is_empty() {
+          buf.push_str("\n\n");
+        }
+        buf.push_str(&content);
+      }
+    }
+    buf
+  } else {
+    // Fallback: use a small embedded sample when fixtures are unavailable.
+    SMALL_SRT.to_string()
+  }
+}
+
+fn bench_srt_parse(c: &mut Criterion) {
+  let all_fixtures = load_all_fixtures();
+
+  let mut group = c.benchmark_group("srt/parse");
+
+  // Small inline SRT
+  group.throughput(Throughput::Bytes(SMALL_SRT.len() as u64));
+  group.bench_function(BenchmarkId::new("strict", "small_2_cues"), |b| {
+    b.iter(|| {
+      let count = Parser::strict(black_box(SMALL_SRT)).count();
+      black_box(count);
+    });
+  });
+
+  // Medium real file (~26 KB)
+  group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
+  group.bench_function(BenchmarkId::new("strict", "medium_26kb"), |b| {
+    b.iter(|| {
+      let count = Parser::strict(black_box(MEDIUM_SRT)).count();
+      black_box(count);
+    });
+  });
+
+  // All fixtures (~8 MB)
+  group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
+  group.bench_function(BenchmarkId::new("lossy", "all_fixtures_8mb"), |b| {
+    b.iter(|| {
+      let count = Parser::lossy(black_box(&all_fixtures)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+fn bench_srt_collect(c: &mut Criterion) {
+  let mut group = c.benchmark_group("srt/collect");
+
+  // Collect into Vec to measure allocation overhead
+  group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
+  group.bench_function("medium_26kb", |b| {
+    b.iter(|| {
+      let entries: Vec<_> = Parser::strict(black_box(MEDIUM_SRT))
+        .collect::<Result<_, _>>()
+        .unwrap();
+      black_box(entries.len());
+    });
+  });
+
+  group.finish();
+}
+
+criterion_group!(benches, bench_srt_parse, bench_srt_collect);
+criterion_main!(benches);
diff --git a/benches/vtt.rs b/benches/vtt.rs
@@ -0,0 +1,176 @@
+use std::hint::black_box;
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use fasrt::vtt::Parser;
+use fasrt::vtt::cue::CueParser;
+
+const SMALL_VTT: &str = "\
+WEBVTT
+
+00:00:00.000 --> 00:00:01.000
+Hello world!
+
+00:00:01.000 --> 00:00:02.000
+Goodbye world!
+";
+
+const SETTINGS_VTT: &str = "\
+WEBVTT
+
+STYLE
+::cue { color: white; }
+
+REGION
+id:region1
+width:40%
+lines:3
+regionanchor:0%,100%
+viewportanchor:10%,90%
+scroll:up
+
+cue-1
+00:00:00.000 --> 00:00:01.000 align:start position:10% size:80% line:0 vertical:rl region:region1
+<b>Bold</b> and <i>italic</i> text
+
+NOTE This is a comment
+
+00:00:01.000 --> 00:00:05.000
+Second cue with <v Roger Bingham>voice tag</v>
+";
+
+fn load_all_vtt_fixtures() -> String {
+  let mut buf = String::new();
+  for dir in &[
+    "fixtures/webvtt/wpt-file-parsing",
+    "fixtures/webvtt/wpt-cue-parsing",
+  ] {
+    if let Ok(read_dir) = std::fs::read_dir(dir) {
+      let mut entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
+      entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
+      for entry in entries {
+        if entry.path().extension().is_some_and(|e| e == "vtt") {
+          if let Ok(contents) = std::fs::read_to_string(entry.path()) {
+            buf.push_str(&contents);
+            buf.push_str("\n\n");
+          }
+        }
+      }
+    }
+  }
+  if buf.is_empty() {
+    // Fallback to embedded samples so benches still run without fixtures.
+    buf.push_str(SMALL_VTT);
+    buf.push_str("\n\n");
+    buf.push_str(SETTINGS_VTT);
+  }
+  buf
+}
+
+fn bench_vtt_parse(c: &mut Criterion) {
+  let all_fixtures = load_all_vtt_fixtures();
+
+  let mut group = c.benchmark_group("vtt/parse");
+
+  // Small inline VTT
+  group.throughput(Throughput::Bytes(SMALL_VTT.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "small_2_cues"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(SMALL_VTT)).count();
+      black_box(count);
+    });
+  });
+
+  // VTT with settings, regions, styles, cue options
+  group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "with_settings"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(SETTINGS_VTT)).count();
+      black_box(count);
+    });
+  });
+
+  // All VTT fixtures
+  group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "all_fixtures"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(&all_fixtures)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+fn bench_vtt_collect(c: &mut Criterion) {
+  let mut group = c.benchmark_group("vtt/collect");
+
+  group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
+  group.bench_function("with_settings", |b| {
+    b.iter(|| {
+      let blocks: Vec<_> = Parser::new(black_box(SETTINGS_VTT))
+        .collect::<Result<_, _>>()
+        .unwrap();
+      black_box(blocks.len());
+    });
+  });
+
+  group.finish();
+}
+
+/// Cue text with many timestamp tags to benchmark the cue-text parsing path.
+fn build_cue_text_with_timestamps() -> String {
+  let mut s = String::new();
+  for i in 0..500 {
+    let h = i / 3600;
+    let m = (i % 3600) / 60;
+    let sec = i % 60;
+    s.push_str(&format!(
+      "word <{h:02}:{m:02}:{sec:02}.{ms:03}>text",
+      h = h,
+      m = m,
+      sec = sec,
+      ms = (i * 7) % 1000
+    ));
+  }
+  s
+}
+
+/// Cue text with tags but no timestamps.
+const CUE_TEXT_TAGS: &str = "\
+<b>bold <i>bold-italic</i></b> plain <u>underline</u> \
+<v Roger>voice</v> <lang en>english</lang> <ruby>base<rt>ruby</rt></ruby> \
+<c.highlight.big>classed</c> &amp; &lt; &gt; &nbsp; end";
+
+fn bench_vtt_cue_text(c: &mut Criterion) {
+  let ts_input = build_cue_text_with_timestamps();
+
+  let mut group = c.benchmark_group("vtt/cue_text");
+
+  // Tags only (no timestamps)
+  group.throughput(Throughput::Bytes(CUE_TEXT_TAGS.len() as u64));
+  group.bench_function("tags_only", |b| {
+    b.iter(|| {
+      let count = CueParser::new(black_box(CUE_TEXT_TAGS)).count();
+      black_box(count);
+    });
+  });
+
+  // Timestamp-heavy cue text
+  group.throughput(Throughput::Bytes(ts_input.len() as u64));
+  group.bench_function("500_timestamps", |b| {
+    b.iter(|| {
+      let count = CueParser::new(black_box(&ts_input)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+criterion_group!(
+  benches,
+  bench_vtt_parse,
+  bench_vtt_collect,
+  bench_vtt_cue_text
+);
+criterion_main!(benches);
diff --git a/ci/miri_sb.sh b/ci/miri_sb.sh
@@ -35,4 +35,6 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check"
 
-cargo miri test --all-targets --target "$TARGET"
+cargo miri test --lib --tests --target "$TARGET"
+
+cargo miri test --doc --target "$TARGET"
diff --git a/ci/miri_tb.sh b/ci/miri_tb.sh
@@ -35,4 +35,6 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check -Zmiri-tree-borrows"
 
-cargo miri test --all-targets --target "$TARGET"
+cargo miri test --lib --tests --target "$TARGET"
+
+cargo miri test --doc --target "$TARGET"