diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md index 7d84ad23d5905..f0a54cf978fbf 100644 --- a/datafusion/sqllogictest/README.md +++ b/datafusion/sqllogictest/README.md @@ -75,24 +75,25 @@ RUST_LOG=debug cargo test --test sqllogictests -- ddl The sqllogictest runner can emit deterministic per-file elapsed timings to help identify slow test files. -By default (`--timing-summary auto`), timing summary output is disabled in local -TTY runs and shows a top-slowest summary in non-TTY/CI runs. +Timing summary output is disabled by default and enabled with +`--timing-summary` (or `SLT_TIMING_SUMMARY=true`). -`--timing-top-n` / `SLT_TIMING_TOP_N` must be a positive integer (`>= 1`). +When timing summary is enabled, periodic `Progress:` lines are suppressed by +default to keep output stable. ```shell -# Show top 10 slowest files (good for CI) -cargo test --test sqllogictests -- --timing-summary top --timing-top-n 10 +# Show deterministic per-file elapsed timings (sorted slowest first) +cargo test --test sqllogictests -- --timing-summary ``` ```shell -# Show full per-file timing table -cargo test --test sqllogictests -- --timing-summary full +# Keep only the top 10 lines using standard shell tooling +cargo test --test sqllogictests -- --timing-summary | head -n 10 ``` ```shell -# Same controls via environment variables -SLT_TIMING_SUMMARY=top SLT_TIMING_TOP_N=15 cargo test --test sqllogictests +# Enable via environment variable +SLT_TIMING_SUMMARY=1 cargo test --test sqllogictests ``` ```shell diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index 6edc2e997819f..cb76cc30fd08f 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use clap::{ColorChoice, Parser, ValueEnum}; +use clap::{ColorChoice, Parser}; use datafusion::common::instant::Instant; use datafusion::common::utils::get_available_parallelism; use datafusion::common::{DataFusionError, Result, exec_datafusion_err, exec_err}; @@ -44,7 +44,7 @@ use crate::postgres_container::{ use datafusion::common::runtime::SpawnedTask; use futures::FutureExt; use std::fs; -use std::io::{IsTerminal, stderr, stdout}; +use std::io::{IsTerminal, Write, stderr, stdout}; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -62,14 +62,6 @@ const SQLITE_PREFIX: &str = "sqlite"; const ERRS_PER_FILE_LIMIT: usize = 10; const TIMING_DEBUG_SLOW_FILES_ENV: &str = "SLT_TIMING_DEBUG_SLOW_FILES"; -#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] -enum TimingSummaryMode { - Auto, - Off, - Top, - Full, -} - #[derive(Debug)] struct FileTiming { relative_path: PathBuf, @@ -187,8 +179,11 @@ async fn run_tests() -> Result<()> { } let num_tests = test_files.len(); - // For CI environments without TTY, print progress periodically + // For CI environments without TTY, print progress periodically unless + // deterministic timing summary output is requested. let is_ci = !stderr().is_terminal(); + let print_periodic_progress = is_ci && !options.timing_summary; + let progress_interval = std::cmp::max(1, num_tests / 10); let completed_count = Arc::new(AtomicUsize::new(0)); let file_results: Vec<_> = futures::stream::iter(test_files) @@ -307,8 +302,12 @@ async fn run_tests() -> Result<()> { let completed_count = Arc::clone(&completed_count); move |_| { let completed = completed_count.fetch_add(1, Ordering::Relaxed) + 1; - // In CI (no TTY), print progress every 10% or every 50 files - if is_ci && (completed.is_multiple_of(50) || completed == num_tests) { + // Print progress at 10% intervals, every 50 files, and completion. + if print_periodic_progress + && (completed.is_multiple_of(progress_interval) + || completed.is_multiple_of(50) + || completed == num_tests) + { eprintln!( "Progress: {}/{} files completed ({:.0}%)", completed, @@ -335,7 +334,7 @@ async fn run_tests() -> Result<()> { .then_with(|| a.relative_path.cmp(&b.relative_path)) }); - print_timing_summary(&options, &m, is_ci, &file_timings)?; + print_timing_summary(&options, &file_timings)?; let errors: Vec<_> = file_results .into_iter() @@ -395,44 +394,23 @@ async fn run_tests() -> Result<()> { } } -fn print_timing_summary( - options: &Options, - progress: &MultiProgress, - is_ci: bool, - file_timings: &[FileTiming], -) -> Result<()> { - let mode = options.timing_summary_mode(is_ci); - if mode == TimingSummaryMode::Off || file_timings.is_empty() { +fn print_timing_summary(options: &Options, file_timings: &[FileTiming]) -> Result<()> { + if !options.timing_summary || file_timings.is_empty() { return Ok(()); } - let top_n = options.timing_top_n; - debug_assert!(matches!( - mode, - TimingSummaryMode::Top | TimingSummaryMode::Full - )); - let count = if mode == TimingSummaryMode::Full { - file_timings.len() - } else { - top_n - }; - - progress.println("Per-file elapsed summary (deterministic):")?; - for (idx, timing) in file_timings.iter().take(count).enumerate() { - progress.println(format!( + let mut output = stdout().lock(); + writeln!(output, "Per-file elapsed summary (deterministic):")?; + for (idx, timing) in file_timings.iter().enumerate() { + writeln!( + output, "{:>3}. {:>8.3}s {}", idx + 1, timing.elapsed.as_secs_f64(), timing.relative_path.display() - ))?; - } - - if mode != TimingSummaryMode::Full && file_timings.len() > count { - progress.println(format!( - "... {} more files omitted (use --timing-summary full to show all)", - file_timings.len() - count - ))?; + )?; } + output.flush()?; Ok(()) } @@ -448,16 +426,6 @@ fn is_env_truthy(name: &str) -> bool { }) } -fn parse_timing_top_n(arg: &str) -> std::result::Result { - let parsed = arg - .parse::() - .map_err(|error| format!("invalid value '{arg}': {error}"))?; - if parsed == 0 { - return Err("must be >= 1".to_string()); - } - Ok(parsed) -} - async fn run_test_file_substrait_round_trip( test_file: TestFile, validator: Validator, @@ -927,20 +895,10 @@ struct Options { #[clap( long, env = "SLT_TIMING_SUMMARY", - value_enum, - default_value_t = TimingSummaryMode::Auto, - help = "Per-file timing summary mode: auto|off|top|full" + default_value_t = false, + help = "Print deterministic per-file timing summary" )] - timing_summary: TimingSummaryMode, - - #[clap( - long, - env = "SLT_TIMING_TOP_N", - default_value_t = 10, - value_parser = parse_timing_top_n, - help = "Number of files to show when timing summary mode is auto/top (must be >= 1)" - )] - timing_top_n: usize, + timing_summary: bool, #[clap( long, @@ -952,19 +910,6 @@ struct Options { } impl Options { - fn timing_summary_mode(&self, is_ci: bool) -> TimingSummaryMode { - match self.timing_summary { - TimingSummaryMode::Auto => { - if is_ci { - TimingSummaryMode::Top - } else { - TimingSummaryMode::Off - } - } - mode => mode, - } - } - /// Because this test can be run as a cargo test, commands like /// /// ```shell