From d512064eeeb152cea0abf2a7c83280002f14c59e Mon Sep 17 00:00:00 2001 From: Geoffrey Claude Date: Wed, 14 Jan 2026 11:04:41 +0100 Subject: [PATCH 1/2] Add strategy-focused InList benchmarks Add a new in_list_strategy benchmark file with targeted coverage of each optimization strategy, without replacing the existing in_list benchmarks which are kept intact for historical comparison. (cherry picked from commit d6e645db508375d3eae7cbedc4353dc4b974cba4) --- datafusion/physical-expr/Cargo.toml | 4 + .../physical-expr/benches/in_list_strategy.rs | 1036 +++++++++++++++++ 2 files changed, 1040 insertions(+) create mode 100644 datafusion/physical-expr/benches/in_list_strategy.rs diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 5854d2957c7f7..b755353d75658 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -71,6 +71,10 @@ rstest = { workspace = true } harness = false name = "in_list" +[[bench]] +harness = false +name = "in_list_strategy" + [[bench]] harness = false name = "case_when" diff --git a/datafusion/physical-expr/benches/in_list_strategy.rs b/datafusion/physical-expr/benches/in_list_strategy.rs new file mode 100644 index 0000000000000..9df31fd9f94eb --- /dev/null +++ b/datafusion/physical-expr/benches/in_list_strategy.rs @@ -0,0 +1,1036 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Focused benchmarks for InList optimizations +//! +//! This benchmark file provides targeted coverage of each optimization strategy +//! with controlled parameters to ensure statistical robustness: +//! +//! - **Controlled match rates**: Tests both "found" and "not found" code paths +//! - **List size scaling**: Measures performance across different list sizes +//! - **Strategy coverage**: Each optimization has dedicated benchmarks +//! - **Reinterpret coverage**: Tests types that use zero-copy reinterpretation +//! - **Stage 2 stress testing**: Prefix-collision strings for two-stage filters +//! - **Null handling**: Tests null short-circuit optimization paths +//! +//! # Optimization Coverage +//! +//! | Strategy | Types | Threshold | List Sizes Tested | +//! |----------|-------|-----------|-------------------| +//! | BitmapFilter (stack) | UInt8 | always | 4, 16 | +//! | BitmapFilter (heap) | Int16 | always | 4, 64, 256 | +//! | BranchlessFilter | Int32, Float32 | ≤32 | 4, 32 | +//! | DirectProbeFilter | Int32, Float32 | >32 | 64, 256 | +//! | BranchlessFilter | Int64, TimestampNs | ≤16 | 4, 16 | +//! | DirectProbeFilter | Int64, TimestampNs | >16 | 32, 128 | +//! | Utf8TwoStageFilter | Utf8 | always | 4, 64, 256 | +//! | ByteViewMaskedFilter | Utf8View | always | 4, 16, 64, 256 | +//! | arrow_eq fallback | FixedSizeBinary(16) | always | 4, 64, 256, 10000 | + +use arrow::array::*; +use arrow::datatypes::{Field, Int32Type, Schema}; +use arrow::record_batch::RecordBatch; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; +use datafusion_physical_expr::expressions::{col, in_list, lit}; +use rand::distr::Alphanumeric; +use rand::prelude::*; +use std::sync::Arc; + +const ARRAY_SIZE: usize = 8192; + +/// Match rates to test both code paths (miss-heavy and balanced) +const MATCH_RATES: [u32; 2] = [0, 50]; + +// ============================================================================= +// NUMERIC BENCHMARK HELPERS +// ============================================================================= + +/// Configuration for numeric benchmarks, grouping test parameters. +struct NumericBenchConfig { + list_size: usize, + match_rate: f64, + null_rate: f64, + make_value: fn(&mut StdRng) -> T, + to_scalar: fn(T) -> ScalarValue, + negated: bool, +} + +impl NumericBenchConfig { + fn new( + list_size: usize, + match_rate: f64, + make_value: fn(&mut StdRng) -> T, + to_scalar: fn(T) -> ScalarValue, + ) -> Self { + Self { + list_size, + match_rate, + null_rate: 0.0, + make_value, + to_scalar, + negated: false, + } + } + + fn with_null_rate(mut self, null_rate: f64) -> Self { + self.null_rate = null_rate; + self + } + + fn with_negated(mut self) -> Self { + self.negated = true; + self + } +} + +/// Creates and runs a benchmark for numeric types with controlled match rate. +/// Uses a seed derived from list_size to avoid subset correlation between sizes. +fn bench_numeric( + c: &mut Criterion, + group: &str, + name: &str, + cfg: &NumericBenchConfig, +) where + T: Clone, + A: Array + FromIterator> + 'static, +{ + // Use different seed per list_size to avoid subset correlation + let seed = 0xDEAD_BEEF_u64.wrapping_add(cfg.list_size as u64 * 0x1234_5678); + let mut rng = StdRng::seed_from_u64(seed); + + // Generate IN list values + let haystack: Vec = (0..cfg.list_size) + .map(|_| (cfg.make_value)(&mut rng)) + .collect(); + + // Generate array with controlled match rate and null rate + let values: A = (0..ARRAY_SIZE) + .map(|_| { + if cfg.null_rate > 0.0 && rng.random_bool(cfg.null_rate) { + None + } else if !haystack.is_empty() && rng.random_bool(cfg.match_rate) { + Some(haystack.choose(&mut rng).unwrap().clone()) + } else { + Some((cfg.make_value)(&mut rng)) + } + }) + .collect(); + + let schema = Schema::new(vec![Field::new("a", values.data_type().clone(), true)]); + let exprs: Vec<_> = haystack + .iter() + .map(|v: &T| lit((cfg.to_scalar)(v.clone()))) + .collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &cfg.negated, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new(group, name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +// ============================================================================= +// STRING BENCHMARK HELPERS +// ============================================================================= + +fn random_string(rng: &mut StdRng, len: usize) -> String { + String::from_utf8(rng.sample_iter(&Alphanumeric).take(len).collect()).unwrap() +} + +/// Creates a set of strings that share a common prefix but differ in suffix. +/// Uses random alphanumeric suffix to avoid bench-maxing on numeric patterns. +fn strings_with_shared_prefix( + rng: &mut StdRng, + count: usize, + prefix_len: usize, +) -> Vec { + let prefix = random_string(rng, prefix_len); + (0..count) + .map(|_| format!("{}{}", prefix, random_string(rng, 8))) // prefix + random 8-char suffix + .collect() +} + +/// Configuration for string benchmarks, grouping test parameters. +struct StringBenchConfig { + list_size: usize, + match_rate: f64, + null_rate: f64, + string_len: usize, + to_scalar: fn(String) -> ScalarValue, + negated: bool, +} + +impl StringBenchConfig { + fn new( + list_size: usize, + match_rate: f64, + string_len: usize, + to_scalar: fn(String) -> ScalarValue, + ) -> Self { + Self { + list_size, + match_rate, + null_rate: 0.0, + string_len, + to_scalar, + negated: false, + } + } + + fn with_null_rate(mut self, null_rate: f64) -> Self { + self.null_rate = null_rate; + self + } + + fn with_negated(mut self) -> Self { + self.negated = true; + self + } +} + +/// Creates and runs a benchmark for string types with controlled match rate. +/// Uses a seed derived from list_size and string_len to avoid correlation. +fn bench_string(c: &mut Criterion, group: &str, name: &str, cfg: &StringBenchConfig) +where + A: Array + FromIterator> + 'static, +{ + // Use different seed per (list_size, string_len) to avoid correlation + let seed = 0xCAFE_BABE_u64 + .wrapping_add(cfg.list_size as u64 * 0x1111) + .wrapping_add(cfg.string_len as u64 * 0x2222); + let mut rng = StdRng::seed_from_u64(seed); + + // Generate IN list values + let haystack: Vec = (0..cfg.list_size) + .map(|_| random_string(&mut rng, cfg.string_len)) + .collect(); + + // Generate array with controlled match rate and null rate + let values: A = (0..ARRAY_SIZE) + .map(|_| { + if cfg.null_rate > 0.0 && rng.random_bool(cfg.null_rate) { + None + } else if !haystack.is_empty() && rng.random_bool(cfg.match_rate) { + Some(haystack.choose(&mut rng).unwrap().clone()) + } else { + Some(random_string(&mut rng, cfg.string_len)) + } + }) + .collect(); + + let schema = Schema::new(vec![Field::new("a", values.data_type().clone(), true)]); + let exprs: Vec<_> = haystack + .iter() + .map(|v| lit((cfg.to_scalar)(v.clone()))) + .collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &cfg.negated, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new(group, name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +/// Benchmarks strings with shared prefixes to stress Stage 2 of two-stage filters. +/// Uses variable prefix lengths and random suffixes to avoid bench-maxing. +fn bench_string_prefix_collision( + c: &mut Criterion, + group: &str, + name: &str, + list_size: usize, + match_rate: f64, + prefix_len: usize, + to_scalar: fn(String) -> ScalarValue, +) where + A: Array + FromIterator> + 'static, +{ + let seed = 0xFEED_FACE_u64 + .wrapping_add(list_size as u64 * 0x3333) + .wrapping_add(prefix_len as u64 * 0x4444); + let mut rng = StdRng::seed_from_u64(seed); + + // Generate IN list with shared prefix (forces Stage 2) + let haystack = strings_with_shared_prefix(&mut rng, list_size, prefix_len); + + // Generate non-matching strings with SAME prefix (will pass Stage 1, fail Stage 2) + let non_match_pool = strings_with_shared_prefix(&mut rng, 100, prefix_len); + + // Generate array with controlled match rate + let values: A = (0..ARRAY_SIZE) + .map(|_| { + Some(if !haystack.is_empty() && rng.random_bool(match_rate) { + haystack.choose(&mut rng).unwrap().clone() + } else { + non_match_pool.choose(&mut rng).unwrap().clone() + }) + }) + .collect(); + + let schema = Schema::new(vec![Field::new("a", values.data_type().clone(), true)]); + let exprs: Vec<_> = haystack.iter().map(|v| lit(to_scalar(v.clone()))).collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &false, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new(group, name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +/// Benchmarks mixed-length strings (some short ≤12, some long >12). +/// Tests the two-stage filter with realistic length distribution. +fn bench_string_mixed_lengths( + c: &mut Criterion, + group: &str, + name: &str, + list_size: usize, + match_rate: f64, + to_scalar: fn(String) -> ScalarValue, +) where + A: Array + FromIterator> + 'static, +{ + let seed = 0xABCD_EF01_u64.wrapping_add(list_size as u64 * 0x5555); + let mut rng = StdRng::seed_from_u64(seed); + + // Mixed lengths: some short (≤12), some long (>12) + let lengths = [4, 8, 12, 16, 20, 24]; + + // Generate IN list with mixed lengths + let haystack: Vec = (0..list_size) + .map(|_| { + let len = *lengths.choose(&mut rng).unwrap(); + random_string(&mut rng, len) + }) + .collect(); + + // Generate array with controlled match rate and mixed lengths + let values: A = (0..ARRAY_SIZE) + .map(|_| { + Some(if !haystack.is_empty() && rng.random_bool(match_rate) { + haystack.choose(&mut rng).unwrap().clone() + } else { + let len = *lengths.choose(&mut rng).unwrap(); + random_string(&mut rng, len) + }) + }) + .collect(); + + let schema = Schema::new(vec![Field::new("a", values.data_type().clone(), true)]); + let exprs: Vec<_> = haystack.iter().map(|v| lit(to_scalar(v.clone()))).collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &false, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new(group, name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +// ============================================================================= +// BITMAP FILTER BENCHMARKS (UInt8, Int16) +// ============================================================================= + +fn bench_bitmap(c: &mut Criterion) { + // UInt8: 32-byte stack-allocated bitmap + // NOTE: With 256 possible values, list_size=16 covers 6.25% of value space, + // so even "match=0%" has ~6% accidental matches from random data. + for list_size in [4, 16] { + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "bitmap", + &format!("u8/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random(), + |v| ScalarValue::UInt8(Some(v)), + ), + ); + } + } + + // Int16: 8KB heap-allocated bitmap (via zero-copy reinterpret) + for list_size in [4, 64, 256] { + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "bitmap", + &format!("i16/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random(), + |v| ScalarValue::Int16(Some(v)), + ), + ); + } + } +} + +// ============================================================================= +// PRIMITIVE BENCHMARKS (Branchless vs Hash) +// ============================================================================= + +fn bench_primitive(c: &mut Criterion) { + // Int32: branchless threshold is 32 + for list_size in [4, 32, 64, 256] { + let strategy = if list_size <= 32 { + "branchless" + } else { + "hash" + }; + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "primitive", + &format!("i32/{strategy}/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ), + ); + } + } + + // Int64: branchless threshold is 16 + for list_size in [4, 16, 32, 128] { + let strategy = if list_size <= 16 { + "branchless" + } else { + "hash" + }; + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "primitive", + &format!("i64/{strategy}/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random(), + |v| ScalarValue::Int64(Some(v)), + ), + ); + } + } + + // NOT IN benchmark: test negated path + bench_numeric::( + c, + "primitive", + "i32/branchless/list=16/match=50%/NOT_IN", + &NumericBenchConfig::new( + 16, + 0.5, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ) + .with_negated(), + ); +} + +// ============================================================================= +// REINTERPRETED TYPE BENCHMARKS (Float32, TimestampNs) +// ============================================================================= + +fn bench_reinterpret(c: &mut Criterion) { + // Float32: reinterpreted as u32, uses same branchless/hash strategies + // Threshold is 32 (same as Int32) + for list_size in [4, 32, 64] { + let strategy = if list_size <= 32 { + "branchless" + } else { + "hash" + }; + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "reinterpret", + &format!("f32/{strategy}/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random::() * 1000.0, + |v| ScalarValue::Float32(Some(v)), + ), + ); + } + } + + // TimestampNanosecond: reinterpreted as i64, threshold is 16 + for list_size in [4, 16, 32] { + let strategy = if list_size <= 16 { + "branchless" + } else { + "hash" + }; + for match_pct in MATCH_RATES { + bench_numeric::( + c, + "reinterpret", + &format!("timestamp_ns/{strategy}/list={list_size}/match={match_pct}%"), + &NumericBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + |rng| rng.random::().abs(), + |v| ScalarValue::TimestampNanosecond(Some(v), None), + ), + ); + } + } +} + +// ============================================================================= +// UTF8 TWO-STAGE FILTER BENCHMARKS +// ============================================================================= + +fn bench_utf8(c: &mut Criterion) { + let to_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8(Some(s)); + + // Short strings (8 bytes < 12): Stage 1 definitive + for list_size in [4, 64, 256] { + for match_pct in MATCH_RATES { + bench_string::( + c, + "utf8", + &format!("short_8b/list={list_size}/match={match_pct}%"), + &StringBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + 8, + to_scalar, + ), + ); + } + } + + // Long strings (24 bytes > 12): hits Stage 2 + for list_size in [4, 64, 256] { + for match_pct in MATCH_RATES { + bench_string::( + c, + "utf8", + &format!("long_24b/list={list_size}/match={match_pct}%"), + &StringBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + 24, + to_scalar, + ), + ); + } + } + + // Mixed-length strings: realistic distribution + for list_size in [16, 64] { + for match_pct in MATCH_RATES { + bench_string_mixed_lengths::( + c, + "utf8", + &format!("mixed_len/list={list_size}/match={match_pct}%"), + list_size, + match_pct as f64 / 100.0, + to_scalar, + ); + } + } + + // Prefix collision: stresses Stage 2 comparison + bench_string_prefix_collision::( + c, + "utf8", + "prefix_collision/pfx=12/list=32/match=50%", + 32, + 0.5, + 12, + to_scalar, + ); + + // NOT IN benchmark + bench_string::( + c, + "utf8", + "short_8b/list=16/match=50%/NOT_IN", + &StringBenchConfig::new(16, 0.5, 8, to_scalar).with_negated(), + ); +} + +// ============================================================================= +// UTF8VIEW TWO-STAGE FILTER BENCHMARKS +// ============================================================================= + +fn bench_utf8view(c: &mut Criterion) { + let to_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8View(Some(s)); + + // Short strings (8 bytes ≤ 12): inline storage path + for list_size in [4, 16, 64, 256] { + for match_pct in MATCH_RATES { + bench_string::( + c, + "utf8view", + &format!("short_8b/list={list_size}/match={match_pct}%"), + &StringBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + 8, + to_scalar, + ), + ); + } + } + + // Boundary strings (exactly 12 bytes): max inline size + for list_size in [16, 64] { + for match_pct in MATCH_RATES { + bench_string::( + c, + "utf8view", + &format!("boundary_12b/list={list_size}/match={match_pct}%"), + &StringBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + 12, + to_scalar, + ), + ); + } + } + + // Long strings (24 bytes > 12): out-of-line storage, two-stage filter + for list_size in [4, 16, 64, 256] { + for match_pct in MATCH_RATES { + bench_string::( + c, + "utf8view", + &format!("long_24b/list={list_size}/match={match_pct}%"), + &StringBenchConfig::new( + list_size, + match_pct as f64 / 100.0, + 24, + to_scalar, + ), + ); + } + } + + // Mixed-length strings: realistic distribution + for list_size in [16, 64] { + for match_pct in MATCH_RATES { + bench_string_mixed_lengths::( + c, + "utf8view", + &format!("mixed_len/list={list_size}/match={match_pct}%"), + list_size, + match_pct as f64 / 100.0, + to_scalar, + ); + } + } + + // Prefix collision: stresses Stage 2 comparison with varying prefix lengths + for (prefix_len, list_size) in [(8, 16), (12, 32), (16, 64)] { + for match_pct in MATCH_RATES { + bench_string_prefix_collision::( + c, + "utf8view", + &format!( + "prefix_collision/pfx={prefix_len}/list={list_size}/match={match_pct}%" + ), + list_size, + match_pct as f64 / 100.0, + prefix_len, + to_scalar, + ); + } + } +} + +// ============================================================================= +// DICTIONARY ARRAY BENCHMARKS +// ============================================================================= + +/// Helper to benchmark dictionary-encoded Int32 arrays +fn bench_dict_int32( + c: &mut Criterion, + name: &str, + dict_size: usize, + list_size: usize, + negated: bool, +) { + let seed = 0xD1C7_0000_u64 + .wrapping_add(dict_size as u64 * 0x1111) + .wrapping_add(list_size as u64 * 0x2222); + let mut rng = StdRng::seed_from_u64(seed); + + let dict_values: Vec = (0..dict_size).map(|_| rng.random()).collect(); + let haystack: Vec = dict_values.iter().take(list_size).cloned().collect(); + + let indices: Vec = (0..ARRAY_SIZE) + .map(|_| rng.random_range(0..dict_size as i32)) + .collect(); + let indices_array = Int32Array::from(indices); + let values_array = Int32Array::from(dict_values); + let dict_array = + DictionaryArray::::try_new(indices_array, Arc::new(values_array)) + .unwrap(); + + let schema = Schema::new(vec![Field::new("a", dict_array.data_type().clone(), true)]); + let exprs: Vec<_> = haystack + .iter() + .map(|v| lit(ScalarValue::Int32(Some(*v)))) + .collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &negated, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dict_array) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new("dictionary", name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +/// Helper to benchmark dictionary-encoded string arrays +fn bench_dict_string( + c: &mut Criterion, + name: &str, + dict_size: usize, + list_size: usize, + string_len: usize, +) { + let seed = 0xD1C7_5778_u64 + .wrapping_add(dict_size as u64 * 0x3333) + .wrapping_add(string_len as u64 * 0x4444); + let mut rng = StdRng::seed_from_u64(seed); + + let dict_values: Vec = (0..dict_size) + .map(|_| random_string(&mut rng, string_len)) + .collect(); + let haystack: Vec = dict_values.iter().take(list_size).cloned().collect(); + + let indices: Vec = (0..ARRAY_SIZE) + .map(|_| rng.random_range(0..dict_size as i32)) + .collect(); + let indices_array = Int32Array::from(indices); + let values_array = StringArray::from(dict_values); + let dict_array = + DictionaryArray::::try_new(indices_array, Arc::new(values_array)) + .unwrap(); + + let schema = Schema::new(vec![Field::new("a", dict_array.data_type().clone(), true)]); + let exprs: Vec<_> = haystack + .iter() + .map(|v| lit(ScalarValue::Utf8(Some(v.clone())))) + .collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &false, &schema).unwrap(); + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dict_array) as ArrayRef]) + .unwrap(); + + c.bench_with_input(BenchmarkId::new("dictionary", name), &batch, |b, batch| { + b.iter(|| expr.evaluate(batch).unwrap()) + }); +} + +fn bench_dictionary(c: &mut Criterion) { + // Int32 dictionary: varying list sizes (tests branchless vs hash on values) + // Dictionary with 100 unique values + for list_size in [4, 16, 64] { + bench_dict_int32( + c, + &format!("i32/dict=100/list={list_size}"), + 100, + list_size, + false, + ); + } + + // Int32 dictionary: varying dictionary cardinality + for dict_size in [10, 1000] { + bench_dict_int32( + c, + &format!("i32/dict={dict_size}/list=16"), + dict_size, + 16, + false, + ); + } + + // Int32 dictionary: NOT IN path + bench_dict_int32(c, "i32/dict=100/list=16/NOT_IN", 100, 16, true); + + // String dictionary: short strings (≤12 bytes, common for codes/categories) + for list_size in [8, 32] { + bench_dict_string( + c, + &format!("utf8_short/dict=50/list={list_size}"), + 50, + list_size, + 8, + ); + } + + // String dictionary: long strings (>12 bytes) + bench_dict_string(c, "utf8_long/dict=100/list=16", 100, 16, 24); + + // String dictionary: large cardinality (realistic category counts) + bench_dict_string(c, "utf8_short/dict=500/list=20", 500, 20, 10); +} + +// ============================================================================= +// NULL HANDLING BENCHMARKS +// ============================================================================= +// +// Tests null short-circuit optimization paths in: +// - build_in_list_result: computes contains for ALL positions, masks via bitmap ops +// - build_in_list_result_with_null_shortcircuit: skips contains for null positions +// +// The shortcircuit is beneficial for expensive contains checks (strings) but +// adds branch overhead for cheap checks (primitives). + +fn bench_nulls(c: &mut Criterion) { + // ========================================================================= + // PRIMITIVE TYPES: Tests build_in_list_result (no shortcircuit) + // ========================================================================= + + // BitmapFilter with nulls + bench_numeric::( + c, + "nulls", + "bitmap/u8/list=16/match=50%/nulls=20%", + &NumericBenchConfig::new( + 16, + 0.5, + |rng| rng.random(), + |v| ScalarValue::UInt8(Some(v)), + ) + .with_null_rate(0.2), + ); + + // BranchlessFilter with nulls + bench_numeric::( + c, + "nulls", + "branchless/i32/list=16/match=50%/nulls=20%", + &NumericBenchConfig::new( + 16, + 0.5, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ) + .with_null_rate(0.2), + ); + + // DirectProbeFilter with nulls + bench_numeric::( + c, + "nulls", + "hash/i32/list=64/match=50%/nulls=20%", + &NumericBenchConfig::new( + 64, + 0.5, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ) + .with_null_rate(0.2), + ); + + // ========================================================================= + // STRING TYPES: Tests build_in_list_result_with_null_shortcircuit + // ========================================================================= + + let utf8_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8(Some(s)); + let utf8view_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8View(Some(s)); + + // Utf8TwoStageFilter with nulls (short strings) + bench_string::( + c, + "nulls", + "utf8/short_8b/list=16/match=50%/nulls=20%", + &StringBenchConfig::new(16, 0.5, 8, utf8_scalar).with_null_rate(0.2), + ); + + // Utf8TwoStageFilter with nulls (long strings - Stage 2) + bench_string::( + c, + "nulls", + "utf8/long_24b/list=16/match=50%/nulls=20%", + &StringBenchConfig::new(16, 0.5, 24, utf8_scalar).with_null_rate(0.2), + ); + + // ByteViewMaskedFilter with nulls (short strings - inline) + bench_string::( + c, + "nulls", + "utf8view/short_8b/list=16/match=50%/nulls=20%", + &StringBenchConfig::new(16, 0.5, 8, utf8view_scalar).with_null_rate(0.2), + ); + + // ByteViewMaskedFilter with nulls (long strings - out-of-line) + bench_string::( + c, + "nulls", + "utf8view/long_24b/list=16/match=50%/nulls=20%", + &StringBenchConfig::new(16, 0.5, 24, utf8view_scalar).with_null_rate(0.2), + ); + + // ========================================================================= + // NOT IN WITH NULLS: Tests negated path with null propagation + // ========================================================================= + + // Primitive NOT IN with nulls + bench_numeric::( + c, + "nulls", + "branchless/i32/list=16/match=50%/nulls=20%/NOT_IN", + &NumericBenchConfig::new( + 16, + 0.5, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ) + .with_null_rate(0.2) + .with_negated(), + ); + + // String NOT IN with nulls + bench_string::( + c, + "nulls", + "utf8view/short_8b/list=16/match=50%/nulls=20%/NOT_IN", + &StringBenchConfig::new(16, 0.5, 8, utf8view_scalar) + .with_null_rate(0.2) + .with_negated(), + ); + + // ========================================================================= + // HIGH NULL RATE: Stress test null handling paths + // ========================================================================= + + // 50% nulls - half the array is null + bench_numeric::( + c, + "nulls", + "branchless/i32/list=16/match=50%/nulls=50%", + &NumericBenchConfig::new( + 16, + 0.5, + |rng| rng.random(), + |v| ScalarValue::Int32(Some(v)), + ) + .with_null_rate(0.5), + ); + + bench_string::( + c, + "nulls", + "utf8view/short_8b/list=16/match=50%/nulls=50%", + &StringBenchConfig::new(16, 0.5, 8, utf8view_scalar).with_null_rate(0.5), + ); +} + +// ============================================================================= +// FIXED SIZE BINARY BENCHMARKS (FixedSizeBinary<16>, e.g. UUIDs) +// ============================================================================= + +/// Generates a random 16-byte value (UUID-sized). +fn random_fixed_binary_16(rng: &mut StdRng) -> Vec { + let mut buf = vec![0u8; 16]; + rng.fill(&mut buf[..]); + buf +} + +/// Benchmarks FixedSizeBinary(16) IN list evaluation. +/// FixedSizeBinary doesn't use the generic numeric helpers since its array +/// construction differs from primitive types. +fn bench_fixed_size_binary_inner( + c: &mut Criterion, + name: &str, + list_size: usize, + match_rate: f64, +) { + let seed = 0xF1ED_B1A7_u64.wrapping_add(list_size as u64 * 0x6666); + let mut rng = StdRng::seed_from_u64(seed); + + // Generate IN list values (16-byte each) + let haystack: Vec> = (0..list_size) + .map(|_| random_fixed_binary_16(&mut rng)) + .collect(); + + // Generate array with controlled match rate + let values: Vec> = (0..ARRAY_SIZE) + .map(|_| { + if !haystack.is_empty() && rng.random_bool(match_rate) { + haystack.choose(&mut rng).unwrap().clone() + } else { + random_fixed_binary_16(&mut rng) + } + }) + .collect(); + + let refs: Vec<&[u8]> = values.iter().map(|v| v.as_slice()).collect(); + let array = FixedSizeBinaryArray::from(refs); + + let schema = Schema::new(vec![Field::new("a", array.data_type().clone(), true)]); + let exprs: Vec<_> = haystack + .iter() + .map(|v| lit(ScalarValue::FixedSizeBinary(16, Some(v.clone())))) + .collect(); + let expr = in_list(col("a", &schema).unwrap(), exprs, &false, &schema).unwrap(); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array) as ArrayRef]) + .unwrap(); + + c.bench_with_input( + BenchmarkId::new("fixed_size_binary", name), + &batch, + |b, batch| b.iter(|| expr.evaluate(batch).unwrap()), + ); +} + +fn bench_fixed_size_binary(c: &mut Criterion) { + for list_size in [4, 64, 256, 10000] { + for match_pct in MATCH_RATES { + bench_fixed_size_binary_inner( + c, + &format!("fsb16/list={list_size}/match={match_pct}%"), + list_size, + match_pct as f64 / 100.0, + ); + } + } +} + +// ============================================================================= +// CRITERION SETUP +// ============================================================================= + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = bench_bitmap, bench_primitive, bench_reinterpret, bench_utf8, bench_utf8view, bench_dictionary, bench_nulls, bench_fixed_size_binary +} + +criterion_main!(benches); From fa93228408a878a6480ae7e08e4cabd25b6ada72 Mon Sep 17 00:00:00 2001 From: Geoffrey Claude Date: Wed, 15 Apr 2026 21:08:01 +0200 Subject: [PATCH 2/2] Clarify InList benchmark case descriptions --- .../physical-expr/benches/in_list_strategy.rs | 205 +++++++++--------- 1 file changed, 103 insertions(+), 102 deletions(-) diff --git a/datafusion/physical-expr/benches/in_list_strategy.rs b/datafusion/physical-expr/benches/in_list_strategy.rs index 9df31fd9f94eb..5c4922fdcf8a9 100644 --- a/datafusion/physical-expr/benches/in_list_strategy.rs +++ b/datafusion/physical-expr/benches/in_list_strategy.rs @@ -15,31 +15,35 @@ // specific language governing permissions and limitations // under the License. -//! Focused benchmarks for InList optimizations +//! Focused benchmarks for `InList` cases. //! -//! This benchmark file provides targeted coverage of each optimization strategy -//! with controlled parameters to ensure statistical robustness: +//! This benchmark file adds targeted coverage for representative `IN LIST` +//! workloads with controlled parameters: //! -//! - **Controlled match rates**: Tests both "found" and "not found" code paths -//! - **List size scaling**: Measures performance across different list sizes -//! - **Strategy coverage**: Each optimization has dedicated benchmarks -//! - **Reinterpret coverage**: Tests types that use zero-copy reinterpretation -//! - **Stage 2 stress testing**: Prefix-collision strings for two-stage filters -//! - **Null handling**: Tests null short-circuit optimization paths +//! - **Controlled match rates**: Exercises both hit-heavy and miss-heavy paths +//! - **List size scaling**: Measures behavior across small and large `IN` lists +//! - **Type coverage**: Covers primitive, string, string-view, dictionary, and +//! fixed-size-binary inputs +//! - **Shared-prefix strings**: Adds collision-heavy string cases where values +//! only differ late in the string +//! - **Mixed-length strings**: Covers inputs that combine short and long values +//! - **Null handling**: Includes representative `NULL` and `NOT IN` cases //! -//! # Optimization Coverage +//! # Case Coverage //! -//! | Strategy | Types | Threshold | List Sizes Tested | -//! |----------|-------|-----------|-------------------| -//! | BitmapFilter (stack) | UInt8 | always | 4, 16 | -//! | BitmapFilter (heap) | Int16 | always | 4, 64, 256 | -//! | BranchlessFilter | Int32, Float32 | ≤32 | 4, 32 | -//! | DirectProbeFilter | Int32, Float32 | >32 | 64, 256 | -//! | BranchlessFilter | Int64, TimestampNs | ≤16 | 4, 16 | -//! | DirectProbeFilter | Int64, TimestampNs | >16 | 32, 128 | -//! | Utf8TwoStageFilter | Utf8 | always | 4, 64, 256 | -//! | ByteViewMaskedFilter | Utf8View | always | 4, 16, 64, 256 | -//! | arrow_eq fallback | FixedSizeBinary(16) | always | 4, 64, 256, 10000 | +//! | Case | Types | Characteristics | List Sizes Tested | +//! |------|-------|-----------------|-------------------| +//! | Narrow integer cases | UInt8 | small value domain | 4, 16 | +//! | Narrow integer cases | Int16 | larger value domain | 4, 64, 256 | +//! | 32-bit primitive cases | Int32, Float32 | small and large lists | 4, 32, 64, 256 | +//! | 64-bit primitive cases | Int64, TimestampNs | small and large lists | 4, 16, 32, 128 | +//! | Utf8 short-string cases | Utf8 | 8-byte strings | 4, 64, 256 | +//! | Utf8 long-string cases | Utf8 | 24-byte strings | 4, 64, 256 | +//! | Utf8View short-string cases | Utf8View | 8-byte strings | 4, 16, 64, 256 | +//! | Utf8View length-12 cases | Utf8View | 12-byte strings | 16, 64 | +//! | Utf8View long-string cases | Utf8View | 24-byte strings | 4, 16, 64, 256 | +//! | Shared-prefix string cases | Utf8, Utf8View | same prefix, different suffix | 16, 32, 64 | +//! | Fixed-size binary cases | FixedSizeBinary(16) | fixed-width binary values | 4, 64, 256, 10000 | use arrow::array::*; use arrow::datatypes::{Field, Int32Type, Schema}; @@ -250,9 +254,9 @@ where }); } -/// Benchmarks strings with shared prefixes to stress Stage 2 of two-stage filters. +/// Benchmarks strings with shared prefixes and different suffixes. /// Uses variable prefix lengths and random suffixes to avoid bench-maxing. -fn bench_string_prefix_collision( +fn bench_string_shared_prefix( c: &mut Criterion, group: &str, name: &str, @@ -268,10 +272,11 @@ fn bench_string_prefix_collision( .wrapping_add(prefix_len as u64 * 0x4444); let mut rng = StdRng::seed_from_u64(seed); - // Generate IN list with shared prefix (forces Stage 2) + // Generate IN list with a shared prefix. let haystack = strings_with_shared_prefix(&mut rng, list_size, prefix_len); - // Generate non-matching strings with SAME prefix (will pass Stage 1, fail Stage 2) + // Generate non-matching strings with the same prefix to keep misses close + // to the matching set. let non_match_pool = strings_with_shared_prefix(&mut rng, 100, prefix_len); // Generate array with controlled match rate @@ -297,8 +302,8 @@ fn bench_string_prefix_collision( }); } -/// Benchmarks mixed-length strings (some short ≤12, some long >12). -/// Tests the two-stage filter with realistic length distribution. +/// Benchmarks mixed-length strings (some short <= 12, some long > 12). +/// Uses a more realistic length distribution than the fixed-width cases. fn bench_string_mixed_lengths( c: &mut Criterion, group: &str, @@ -312,7 +317,7 @@ fn bench_string_mixed_lengths( let seed = 0xABCD_EF01_u64.wrapping_add(list_size as u64 * 0x5555); let mut rng = StdRng::seed_from_u64(seed); - // Mixed lengths: some short (≤12), some long (>12) + // Mixed lengths: some short (<= 12), some long (> 12) let lengths = [4, 8, 12, 16, 20, 24]; // Generate IN list with mixed lengths @@ -348,18 +353,18 @@ fn bench_string_mixed_lengths( } // ============================================================================= -// BITMAP FILTER BENCHMARKS (UInt8, Int16) +// NARROW INTEGER CASE BENCHMARKS // ============================================================================= -fn bench_bitmap(c: &mut Criterion) { - // UInt8: 32-byte stack-allocated bitmap +fn bench_narrow_integer(c: &mut Criterion) { + // UInt8: small value domain // NOTE: With 256 possible values, list_size=16 covers 6.25% of value space, // so even "match=0%" has ~6% accidental matches from random data. for list_size in [4, 16] { for match_pct in MATCH_RATES { bench_numeric::( c, - "bitmap", + "narrow_integer", &format!("u8/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, @@ -371,12 +376,12 @@ fn bench_bitmap(c: &mut Criterion) { } } - // Int16: 8KB heap-allocated bitmap (via zero-copy reinterpret) + // Int16: larger value domain with wider list sizes for list_size in [4, 64, 256] { for match_pct in MATCH_RATES { bench_numeric::( c, - "bitmap", + "narrow_integer", &format!("i16/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, @@ -390,22 +395,22 @@ fn bench_bitmap(c: &mut Criterion) { } // ============================================================================= -// PRIMITIVE BENCHMARKS (Branchless vs Hash) +// PRIMITIVE SIZE-SCALING BENCHMARKS // ============================================================================= fn bench_primitive(c: &mut Criterion) { - // Int32: branchless threshold is 32 + // Int32: small and larger list sizes for list_size in [4, 32, 64, 256] { - let strategy = if list_size <= 32 { - "branchless" + let list_case = if list_size <= 32 { + "small_list" } else { - "hash" + "large_list" }; for match_pct in MATCH_RATES { bench_numeric::( c, "primitive", - &format!("i32/{strategy}/list={list_size}/match={match_pct}%"), + &format!("i32/{list_case}/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, match_pct as f64 / 100.0, @@ -416,18 +421,18 @@ fn bench_primitive(c: &mut Criterion) { } } - // Int64: branchless threshold is 16 + // Int64: small and larger list sizes for list_size in [4, 16, 32, 128] { - let strategy = if list_size <= 16 { - "branchless" + let list_case = if list_size <= 16 { + "small_list" } else { - "hash" + "large_list" }; for match_pct in MATCH_RATES { bench_numeric::( c, "primitive", - &format!("i64/{strategy}/list={list_size}/match={match_pct}%"), + &format!("i64/{list_case}/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, match_pct as f64 / 100.0, @@ -442,7 +447,7 @@ fn bench_primitive(c: &mut Criterion) { bench_numeric::( c, "primitive", - "i32/branchless/list=16/match=50%/NOT_IN", + "i32/small_list/list=16/match=50%/NOT_IN", &NumericBenchConfig::new( 16, 0.5, @@ -454,23 +459,22 @@ fn bench_primitive(c: &mut Criterion) { } // ============================================================================= -// REINTERPRETED TYPE BENCHMARKS (Float32, TimestampNs) +// FLOAT AND TIMESTAMP CASE BENCHMARKS // ============================================================================= -fn bench_reinterpret(c: &mut Criterion) { - // Float32: reinterpreted as u32, uses same branchless/hash strategies - // Threshold is 32 (same as Int32) +fn bench_f32(c: &mut Criterion) { + // Float32: uses the same list sizes as the Int32 cases. for list_size in [4, 32, 64] { - let strategy = if list_size <= 32 { - "branchless" + let list_case = if list_size <= 32 { + "small_list" } else { - "hash" + "large_list" }; for match_pct in MATCH_RATES { bench_numeric::( c, - "reinterpret", - &format!("f32/{strategy}/list={list_size}/match={match_pct}%"), + "f32", + &format!("{list_case}/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, match_pct as f64 / 100.0, @@ -480,19 +484,21 @@ fn bench_reinterpret(c: &mut Criterion) { ); } } +} - // TimestampNanosecond: reinterpreted as i64, threshold is 16 +fn bench_timestamp_ns(c: &mut Criterion) { + // TimestampNanosecond: uses the same list sizes as the Int64-style cases. for list_size in [4, 16, 32] { - let strategy = if list_size <= 16 { - "branchless" + let list_case = if list_size <= 16 { + "small_list" } else { - "hash" + "large_list" }; for match_pct in MATCH_RATES { bench_numeric::( c, - "reinterpret", - &format!("timestamp_ns/{strategy}/list={list_size}/match={match_pct}%"), + "timestamp_ns", + &format!("{list_case}/list={list_size}/match={match_pct}%"), &NumericBenchConfig::new( list_size, match_pct as f64 / 100.0, @@ -505,13 +511,13 @@ fn bench_reinterpret(c: &mut Criterion) { } // ============================================================================= -// UTF8 TWO-STAGE FILTER BENCHMARKS +// UTF8 STRING CASE BENCHMARKS // ============================================================================= fn bench_utf8(c: &mut Criterion) { let to_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8(Some(s)); - // Short strings (8 bytes < 12): Stage 1 definitive + // Short strings (8 bytes) for list_size in [4, 64, 256] { for match_pct in MATCH_RATES { bench_string::( @@ -528,7 +534,7 @@ fn bench_utf8(c: &mut Criterion) { } } - // Long strings (24 bytes > 12): hits Stage 2 + // Long strings (24 bytes) for list_size in [4, 64, 256] { for match_pct in MATCH_RATES { bench_string::( @@ -559,11 +565,11 @@ fn bench_utf8(c: &mut Criterion) { } } - // Prefix collision: stresses Stage 2 comparison - bench_string_prefix_collision::( + // Shared-prefix strings: same prefix, different suffix + bench_string_shared_prefix::( c, "utf8", - "prefix_collision/pfx=12/list=32/match=50%", + "shared_prefix/pfx=12/list=32/match=50%", 32, 0.5, 12, @@ -580,13 +586,13 @@ fn bench_utf8(c: &mut Criterion) { } // ============================================================================= -// UTF8VIEW TWO-STAGE FILTER BENCHMARKS +// UTF8VIEW STRING CASE BENCHMARKS // ============================================================================= fn bench_utf8view(c: &mut Criterion) { let to_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8View(Some(s)); - // Short strings (8 bytes ≤ 12): inline storage path + // Short strings (8 bytes) for list_size in [4, 16, 64, 256] { for match_pct in MATCH_RATES { bench_string::( @@ -603,13 +609,13 @@ fn bench_utf8view(c: &mut Criterion) { } } - // Boundary strings (exactly 12 bytes): max inline size + // Length-12 strings for list_size in [16, 64] { for match_pct in MATCH_RATES { bench_string::( c, "utf8view", - &format!("boundary_12b/list={list_size}/match={match_pct}%"), + &format!("len_12b/list={list_size}/match={match_pct}%"), &StringBenchConfig::new( list_size, match_pct as f64 / 100.0, @@ -620,7 +626,7 @@ fn bench_utf8view(c: &mut Criterion) { } } - // Long strings (24 bytes > 12): out-of-line storage, two-stage filter + // Long strings (24 bytes) for list_size in [4, 16, 64, 256] { for match_pct in MATCH_RATES { bench_string::( @@ -651,14 +657,14 @@ fn bench_utf8view(c: &mut Criterion) { } } - // Prefix collision: stresses Stage 2 comparison with varying prefix lengths + // Shared-prefix strings with varying prefix lengths for (prefix_len, list_size) in [(8, 16), (12, 32), (16, 64)] { for match_pct in MATCH_RATES { - bench_string_prefix_collision::( + bench_string_shared_prefix::( c, "utf8view", &format!( - "prefix_collision/pfx={prefix_len}/list={list_size}/match={match_pct}%" + "shared_prefix/pfx={prefix_len}/list={list_size}/match={match_pct}%" ), list_size, match_pct as f64 / 100.0, @@ -756,7 +762,7 @@ fn bench_dict_string( } fn bench_dictionary(c: &mut Criterion) { - // Int32 dictionary: varying list sizes (tests branchless vs hash on values) + // Int32 dictionary: varying list sizes across dictionary values // Dictionary with 100 unique values for list_size in [4, 16, 64] { bench_dict_int32( @@ -782,7 +788,7 @@ fn bench_dictionary(c: &mut Criterion) { // Int32 dictionary: NOT IN path bench_dict_int32(c, "i32/dict=100/list=16/NOT_IN", 100, 16, true); - // String dictionary: short strings (≤12 bytes, common for codes/categories) + // String dictionary: short strings (<= 12 bytes, common for codes/categories) for list_size in [8, 32] { bench_dict_string( c, @@ -804,23 +810,18 @@ fn bench_dictionary(c: &mut Criterion) { // NULL HANDLING BENCHMARKS // ============================================================================= // -// Tests null short-circuit optimization paths in: -// - build_in_list_result: computes contains for ALL positions, masks via bitmap ops -// - build_in_list_result_with_null_shortcircuit: skips contains for null positions -// -// The shortcircuit is beneficial for expensive contains checks (strings) but -// adds branch overhead for cheap checks (primitives). +// Tests representative null-containing inputs across primitive and string cases. fn bench_nulls(c: &mut Criterion) { // ========================================================================= - // PRIMITIVE TYPES: Tests build_in_list_result (no shortcircuit) + // PRIMITIVE CASES // ========================================================================= - // BitmapFilter with nulls + // UInt8 case with nulls bench_numeric::( c, "nulls", - "bitmap/u8/list=16/match=50%/nulls=20%", + "narrow_integer/u8/list=16/match=50%/nulls=20%", &NumericBenchConfig::new( 16, 0.5, @@ -830,11 +831,11 @@ fn bench_nulls(c: &mut Criterion) { .with_null_rate(0.2), ); - // BranchlessFilter with nulls + // Int32 small-list case with nulls bench_numeric::( c, "nulls", - "branchless/i32/list=16/match=50%/nulls=20%", + "primitive/i32/small_list/list=16/match=50%/nulls=20%", &NumericBenchConfig::new( 16, 0.5, @@ -844,11 +845,11 @@ fn bench_nulls(c: &mut Criterion) { .with_null_rate(0.2), ); - // DirectProbeFilter with nulls + // Int32 large-list case with nulls bench_numeric::( c, "nulls", - "hash/i32/list=64/match=50%/nulls=20%", + "primitive/i32/large_list/list=64/match=50%/nulls=20%", &NumericBenchConfig::new( 64, 0.5, @@ -859,13 +860,13 @@ fn bench_nulls(c: &mut Criterion) { ); // ========================================================================= - // STRING TYPES: Tests build_in_list_result_with_null_shortcircuit + // STRING CASES // ========================================================================= let utf8_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8(Some(s)); let utf8view_scalar: fn(String) -> ScalarValue = |s| ScalarValue::Utf8View(Some(s)); - // Utf8TwoStageFilter with nulls (short strings) + // Utf8 short-string case with nulls bench_string::( c, "nulls", @@ -873,7 +874,7 @@ fn bench_nulls(c: &mut Criterion) { &StringBenchConfig::new(16, 0.5, 8, utf8_scalar).with_null_rate(0.2), ); - // Utf8TwoStageFilter with nulls (long strings - Stage 2) + // Utf8 long-string case with nulls bench_string::( c, "nulls", @@ -881,7 +882,7 @@ fn bench_nulls(c: &mut Criterion) { &StringBenchConfig::new(16, 0.5, 24, utf8_scalar).with_null_rate(0.2), ); - // ByteViewMaskedFilter with nulls (short strings - inline) + // Utf8View short-string case with nulls bench_string::( c, "nulls", @@ -889,7 +890,7 @@ fn bench_nulls(c: &mut Criterion) { &StringBenchConfig::new(16, 0.5, 8, utf8view_scalar).with_null_rate(0.2), ); - // ByteViewMaskedFilter with nulls (long strings - out-of-line) + // Utf8View long-string case with nulls bench_string::( c, "nulls", @@ -898,14 +899,14 @@ fn bench_nulls(c: &mut Criterion) { ); // ========================================================================= - // NOT IN WITH NULLS: Tests negated path with null propagation + // NOT IN CASES WITH NULLS // ========================================================================= - // Primitive NOT IN with nulls + // Primitive NOT IN case with nulls bench_numeric::( c, "nulls", - "branchless/i32/list=16/match=50%/nulls=20%/NOT_IN", + "primitive/i32/small_list/list=16/match=50%/nulls=20%/NOT_IN", &NumericBenchConfig::new( 16, 0.5, @@ -916,7 +917,7 @@ fn bench_nulls(c: &mut Criterion) { .with_negated(), ); - // String NOT IN with nulls + // String NOT IN case with nulls bench_string::( c, "nulls", @@ -927,14 +928,14 @@ fn bench_nulls(c: &mut Criterion) { ); // ========================================================================= - // HIGH NULL RATE: Stress test null handling paths + // HIGH NULL-RATE CASES // ========================================================================= // 50% nulls - half the array is null bench_numeric::( c, "nulls", - "branchless/i32/list=16/match=50%/nulls=50%", + "primitive/i32/small_list/list=16/match=50%/nulls=50%", &NumericBenchConfig::new( 16, 0.5, @@ -1030,7 +1031,7 @@ fn bench_fixed_size_binary(c: &mut Criterion) { criterion_group! { name = benches; config = Criterion::default(); - targets = bench_bitmap, bench_primitive, bench_reinterpret, bench_utf8, bench_utf8view, bench_dictionary, bench_nulls, bench_fixed_size_binary + targets = bench_narrow_integer, bench_primitive, bench_f32, bench_timestamp_ns, bench_utf8, bench_utf8view, bench_dictionary, bench_nulls, bench_fixed_size_binary } criterion_main!(benches);