Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,52 @@ jobs:
export CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="$SDE_PATH/sde64 -icx --"
cargo test --all-features

# Run the wasm32 simd128 lib tests under wasmtime. The `cross` job
# above only builds for wasm targets; without this job the
# `wasm_simd128` backend's handcrafted swizzles / clamps / u16
# stores were dispatchable in production (under
# `-C target-feature=+simd128`) but never runtime‑verified. This job
# runs every scalar‑equivalence test — including the new yuv420p10
# u8 / u16 output paths and the adversarial out‑of‑range regressions
# — against an actual wasm runtime.
#
# `wasm32-wasip1` is the wasi preview‑1 target (libstd + file/env
# APIs that the test harness needs). Criterion is gated out of the
# wasm dev‑deps in Cargo.toml because rayon doesn't build for wasi.
test-wasm-simd128:
name: test-wasm-simd128
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Cache cargo build and registry
uses: actions/cache@v5
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-test-wasm-simd128-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-test-wasm-simd128-
- name: Install Rust + wasm32-wasip1
run: |
rustup update stable --no-self-update
rustup default stable
rustup target add wasm32-wasip1
- name: Install wasmtime
run: |
curl https://wasmtime.dev/install.sh -sSf | bash
echo "$HOME/.wasmtime/bin" >> "$GITHUB_PATH"
- name: Run lib tests under wasmtime (simd128)
env:
# `cargo test` hands the compiled `.wasm` test binary as the
# first positional arg after `--`; wasmtime's `run --`
# interprets that as the module path. We don't need filesystem
# or env access — the tests are pure compute.
CARGO_TARGET_WASM32_WASIP1_RUNNER: wasmtime run --
RUSTFLAGS: -C target-feature=+simd128
run: cargo test --lib --target wasm32-wasip1

sanitizer:
name: sanitizer
runs-on: ubuntu-latest
Expand Down
12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ harness = false
name = "nv21_to_rgb"
harness = false

[[bench]]
name = "yuv_420p10_to_rgb"
harness = false

[[bench]]
name = "rgb_to_hsv"
harness = false
Expand All @@ -43,9 +47,15 @@ thiserror = { version = "2", default-features = false }
libm = { version = "0.2", optional = true }

[dev-dependencies]
criterion = "0.8"
tempfile = "3"

# Criterion pulls in rayon, which doesn't build for the wasm32‑wasi*
# targets we use to run the simd128 backends under wasmtime. Gate it
# to non‑wasm hosts — benches never run on wasm anyway (they need
# system threading / timing that the wasi runner doesn't expose).
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.8"

[profile.bench]
opt-level = 3
debug = false
Expand Down
108 changes: 108 additions & 0 deletions benches/yuv_420p10_to_rgb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//! Per‑row YUV 4:2:0 10‑bit → packed RGB throughput baseline.
//!
//! Three variants per width:
//! - `u8_simd` / `u8_scalar` — native‑SIMD vs scalar on the u8 output
//! path (analogous to the 8‑bit bench).
//! - `u16_simd` / `u16_scalar` — same pair for the native‑depth u16
//! output path. The u16 path writes 2× the bytes so the MB/s
//! figure is comparable only within the u16 column.

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use std::hint::black_box;

use colconv::{
ColorMatrix,
row::{yuv420p10_to_rgb_row, yuv420p10_to_rgb_u16_row},
};

/// Fills a `u16` buffer with a deterministic 10‑bit pseudo‑random
/// sequence — values occupy the low 10 bits of each `u16`, matching
/// the storage layout of `yuv420p10le`.
fn fill_pseudo_random_u16(buf: &mut [u16], seed: u32) {
let mut state = seed;
for b in buf {
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
*b = ((state >> 8) & 0x3FF) as u16;
}
}

fn bench(c: &mut Criterion) {
// 720p / 1080p / 4K — multiples of 64 so the widest backend
// (AVX‑512, 64 pixels per iteration) covers each fully without tail
// work. Avoids skewing comparisons across targets.
const WIDTHS: &[usize] = &[1280, 1920, 3840];
const MATRIX: ColorMatrix = ColorMatrix::Bt2020Ncl;
const FULL_RANGE: bool = false;

// ---- u8 output ------------------------------------------------------
let mut group_u8 = c.benchmark_group("yuv420p10_to_rgb_row");

for &w in WIDTHS {
let mut y = std::vec![0u16; w];
let mut u = std::vec![0u16; w / 2];
let mut v = std::vec![0u16; w / 2];
fill_pseudo_random_u16(&mut y, 0x1111);
fill_pseudo_random_u16(&mut u, 0x2222);
fill_pseudo_random_u16(&mut v, 0x3333);
let mut rgb = std::vec![0u8; w * 3];

group_u8.throughput(Throughput::Bytes((w * 3) as u64));

for use_simd in [false, true] {
let label = if use_simd { "u8_simd" } else { "u8_scalar" };
group_u8.bench_with_input(BenchmarkId::new(label, w), &w, |b, &w| {
b.iter(|| {
yuv420p10_to_rgb_row(
black_box(&y),
black_box(&u),
black_box(&v),
black_box(&mut rgb),
w,
MATRIX,
FULL_RANGE,
use_simd,
);
});
});
}
}
group_u8.finish();

// ---- u16 native-depth output ----------------------------------------
let mut group_u16 = c.benchmark_group("yuv420p10_to_rgb_u16_row");

for &w in WIDTHS {
let mut y = std::vec![0u16; w];
let mut u = std::vec![0u16; w / 2];
let mut v = std::vec![0u16; w / 2];
fill_pseudo_random_u16(&mut y, 0x1111);
fill_pseudo_random_u16(&mut u, 0x2222);
fill_pseudo_random_u16(&mut v, 0x3333);
let mut rgb = std::vec![0u16; w * 3];

// u16 output writes 2× the bytes of u8.
group_u16.throughput(Throughput::Bytes((w * 3 * 2) as u64));

for use_simd in [false, true] {
let label = if use_simd { "u16_simd" } else { "u16_scalar" };
group_u16.bench_with_input(BenchmarkId::new(label, w), &w, |b, &w| {
b.iter(|| {
yuv420p10_to_rgb_u16_row(
black_box(&y),
black_box(&u),
black_box(&v),
black_box(&mut rgb),
w,
MATRIX,
FULL_RANGE,
use_simd,
);
});
});
}
}
group_u16.finish();
}

criterion_group!(benches, bench);
criterion_main!(benches);
Loading
Loading