Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ path = "benches/srt.rs"
name = "srt"
harness = false

[[bench]]
path = "benches/vtt.rs"
name = "vtt"
harness = false

[features]
default = ["std"]
alloc = ["memchr"]
Expand Down
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,41 @@ fasrt = "0.2"
| `alloc` | No | Enables `CueText` DOM tree and entity decoding without `std` |
| `memchr` | Yes (via `alloc`/`std`) | SIMD-accelerated fast path for entity decoding |

## Benchmarks

Measured on Apple Silicon with `cargo bench` (Criterion).

Comment thread
al8n marked this conversation as resolved.
### SRT

| Benchmark | Input | Time | Throughput |
|-----------|-------|------|------------|
| Parse (strict) | 2 cues, 89 B | ~170 ns | 520 MiB/s |
| Parse (strict) | 26 KB file | ~38 µs | 661 MiB/s |
| Parse (lossy) | 332 files, ~8 MB | ~12.1 ms | 646 MiB/s |
| Collect into `Vec` | 26 KB file | ~40 µs | 616 MiB/s |

### WebVTT

| Benchmark | Input | Time | Throughput |
|-----------|-------|------|------------|
| Parse | 2 cues, 96 B | ~318 ns | 291 MiB/s |
| Parse | Settings + region + style, 354 B | ~915 ns | 387 MiB/s |
| Parse | All WPT fixtures, ~34 KB | ~113 µs | 314 MiB/s |
| Collect into `Vec` | Settings + region + style, 354 B | ~973 ns | 364 MiB/s |

### Cue Text

| Benchmark | Input | Time | Throughput |
|-----------|-------|------|------------|
| Parse | Tags only, 166 B | ~316 ns | 552 MiB/s |
| Parse | 500 timestamps, ~11 KB | ~14.1 µs | 776 MiB/s |

Run benchmarks yourself:

```sh
cargo bench
```

#### License

`fasrt` is under the terms of both the MIT license and the
Expand Down
104 changes: 103 additions & 1 deletion benches/srt.rs
Original file line number Diff line number Diff line change
@@ -1 +1,103 @@
fn main() {}
use std::hint::black_box;

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use fasrt::srt::Parser;

const SMALL_SRT: &str = "\
1
00:00:01,000 --> 00:00:04,000
Hello world!

2
00:00:05,000 --> 00:00:08,000
Goodbye world!
";

const MEDIUM_SRT: &str = include_str!("../fixtures/srt/DeathNote_01.eng.srt");
Comment thread
al8n marked this conversation as resolved.
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

include_str!("../fixtures/srt/DeathNote_01.eng.srt") hard-depends on a fixture file at compile time, but Cargo.toml excludes fixtures/ from the published package. This will make cargo bench fail to compile from crates.io or any checkout without fixtures. Consider reading the file at runtime with a fallback (like the VTT benches do), or include the needed fixtures in the package.

Copilot uses AI. Check for mistakes.

Comment on lines +14 to +17
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MEDIUM_SRT uses include_str!("../fixtures/srt/DeathNote_01.eng.srt"), but Cargo.toml excludes the entire fixtures directory from the published package (exclude = ["tests", "fixtures"]). That means cargo bench will fail to compile for downstream users of the published crate because the included file won’t exist. Consider embedding a smaller sample under benches/, generating the data at runtime, or adjusting the package include/exclude so the referenced fixture is available when benches are built.

Copilot uses AI. Check for mistakes.
fn load_all_fixtures() -> String {
if let Ok(read_dir) = std::fs::read_dir("fixtures/srt") {
let mut paths: Vec<_> = read_dir
.filter_map(|entry| {
let entry = entry.ok()?;
let path = entry.path();
if path.extension().is_some_and(|e| e == "srt") {
Some(path)
} else {
None
}
})
.collect();

paths.sort();

let mut buf = String::new();
for path in paths {
if let Ok(content) = std::fs::read_to_string(path) {
if !buf.is_empty() {
buf.push_str("\n\n");
}
buf.push_str(&content);
}
}
buf
} else {
// Fallback: use a small embedded sample when fixtures are unavailable.
SMALL_SRT.to_string()
}
}

fn bench_srt_parse(c: &mut Criterion) {
let all_fixtures = load_all_fixtures();

let mut group = c.benchmark_group("srt/parse");

// Small inline SRT
group.throughput(Throughput::Bytes(SMALL_SRT.len() as u64));
group.bench_function(BenchmarkId::new("strict", "small_2_cues"), |b| {
b.iter(|| {
let count = Parser::strict(black_box(SMALL_SRT)).count();
black_box(count);
});
});

// Medium real file (~26 KB)
group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
group.bench_function(BenchmarkId::new("strict", "medium_26kb"), |b| {
b.iter(|| {
let count = Parser::strict(black_box(MEDIUM_SRT)).count();
black_box(count);
});
});

// All fixtures (~8 MB)
group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
group.bench_function(BenchmarkId::new("lossy", "all_fixtures_8mb"), |b| {
b.iter(|| {
let count = Parser::lossy(black_box(&all_fixtures)).count();
black_box(count);
});
});

group.finish();
}

fn bench_srt_collect(c: &mut Criterion) {
let mut group = c.benchmark_group("srt/collect");

// Collect into Vec to measure allocation overhead
group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
group.bench_function("medium_26kb", |b| {
b.iter(|| {
let entries: Vec<_> = Parser::strict(black_box(MEDIUM_SRT))
.collect::<Result<_, _>>()
.unwrap();
black_box(entries.len());
});
});

group.finish();
}

criterion_group!(benches, bench_srt_parse, bench_srt_collect);
criterion_main!(benches);
176 changes: 176 additions & 0 deletions benches/vtt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
use std::hint::black_box;

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use fasrt::vtt::Parser;
use fasrt::vtt::cue::CueParser;

const SMALL_VTT: &str = "\
WEBVTT

00:00:00.000 --> 00:00:01.000
Hello world!

00:00:01.000 --> 00:00:02.000
Goodbye world!
";

const SETTINGS_VTT: &str = "\
WEBVTT

STYLE
::cue { color: white; }

REGION
id:region1
width:40%
lines:3
regionanchor:0%,100%
viewportanchor:10%,90%
scroll:up

cue-1
00:00:00.000 --> 00:00:01.000 align:start position:10% size:80% line:0 vertical:rl region:region1
<b>Bold</b> and <i>italic</i> text

NOTE This is a comment

00:00:01.000 --> 00:00:05.000
Second cue with <v Roger Bingham>voice tag</v>
";

fn load_all_vtt_fixtures() -> String {
let mut buf = String::new();
for dir in &[
"fixtures/webvtt/wpt-file-parsing",
"fixtures/webvtt/wpt-cue-parsing",
] {
if let Ok(read_dir) = std::fs::read_dir(dir) {
let mut entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
for entry in entries {
if entry.path().extension().is_some_and(|e| e == "vtt") {
if let Ok(contents) = std::fs::read_to_string(entry.path()) {
buf.push_str(&contents);
buf.push_str("\n\n");
}
}
}
}
}
if buf.is_empty() {
// Fallback to embedded samples so benches still run without fixtures.
buf.push_str(SMALL_VTT);
buf.push_str("\n\n");
buf.push_str(SETTINGS_VTT);
}
buf
}

fn bench_vtt_parse(c: &mut Criterion) {
let all_fixtures = load_all_vtt_fixtures();

let mut group = c.benchmark_group("vtt/parse");

// Small inline VTT
group.throughput(Throughput::Bytes(SMALL_VTT.len() as u64));
group.bench_function(BenchmarkId::new("parse", "small_2_cues"), |b| {
b.iter(|| {
let count = Parser::new(black_box(SMALL_VTT)).count();
black_box(count);
});
});

// VTT with settings, regions, styles, cue options
group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
group.bench_function(BenchmarkId::new("parse", "with_settings"), |b| {
b.iter(|| {
let count = Parser::new(black_box(SETTINGS_VTT)).count();
black_box(count);
});
});

// All VTT fixtures
group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
group.bench_function(BenchmarkId::new("parse", "all_fixtures"), |b| {
b.iter(|| {
let count = Parser::new(black_box(&all_fixtures)).count();
black_box(count);
});
});

group.finish();
}

fn bench_vtt_collect(c: &mut Criterion) {
let mut group = c.benchmark_group("vtt/collect");

group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
group.bench_function("with_settings", |b| {
b.iter(|| {
let blocks: Vec<_> = Parser::new(black_box(SETTINGS_VTT))
.collect::<Result<_, _>>()
.unwrap();
black_box(blocks.len());
});
});

group.finish();
}

/// Cue text with many timestamp tags to benchmark the cue-text parsing path.
fn build_cue_text_with_timestamps() -> String {
let mut s = String::new();
for i in 0..500 {
let h = i / 3600;
let m = (i % 3600) / 60;
let sec = i % 60;
s.push_str(&format!(
"word <{h:02}:{m:02}:{sec:02}.{ms:03}>text",
h = h,
m = m,
sec = sec,
ms = (i * 7) % 1000
));
}
s
}

/// Cue text with tags but no timestamps.
const CUE_TEXT_TAGS: &str = "\
<b>bold <i>bold-italic</i></b> plain <u>underline</u> \
<v Roger>voice</v> <lang en>english</lang> <ruby>base<rt>ruby</rt></ruby> \
<c.highlight.big>classed</c> &amp; &lt; &gt; &nbsp; end";

fn bench_vtt_cue_text(c: &mut Criterion) {
let ts_input = build_cue_text_with_timestamps();

let mut group = c.benchmark_group("vtt/cue_text");

// Tags only (no timestamps)
group.throughput(Throughput::Bytes(CUE_TEXT_TAGS.len() as u64));
group.bench_function("tags_only", |b| {
b.iter(|| {
let count = CueParser::new(black_box(CUE_TEXT_TAGS)).count();
black_box(count);
});
});

// Timestamp-heavy cue text
group.throughput(Throughput::Bytes(ts_input.len() as u64));
group.bench_function("500_timestamps", |b| {
b.iter(|| {
let count = CueParser::new(black_box(&ts_input)).count();
black_box(count);
});
});

group.finish();
}

criterion_group!(
benches,
bench_vtt_parse,
bench_vtt_collect,
bench_vtt_cue_text
);
criterion_main!(benches);
4 changes: 3 additions & 1 deletion ci/miri_sb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ cargo miri setup

export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check"

cargo miri test --all-targets --target "$TARGET"
cargo miri test --lib --tests --target "$TARGET"

cargo miri test --doc --target "$TARGET"
4 changes: 3 additions & 1 deletion ci/miri_tb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ cargo miri setup

export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check -Zmiri-tree-borrows"

cargo miri test --all-targets --target "$TARGET"
cargo miri test --lib --tests --target "$TARGET"

cargo miri test --doc --target "$TARGET"
Loading
Loading