From 411d6f77ece53ecd8c751009a5a46b1d78db792a Mon Sep 17 00:00:00 2001
From: Connor Tsui <connor.tsui20@gmail.com>
Date: Tue, 21 Apr 2026 15:10:20 -0400
Subject: [PATCH] implement smarter sampler

Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
---
 vortex-btrblocks/src/schemes/integer.rs       |  32 +-
 vortex-compressor/public-api.lock             |  28 +-
 .../src/builtins/constant/float.rs            |   2 +-
 .../src/builtins/constant/string.rs           |   2 +-
 vortex-compressor/src/compressor.rs           | 303 ++++++++++++++++--
 vortex-compressor/src/ctx.rs                  |   1 +
 vortex-compressor/src/estimate.rs             |  36 ++-
 7 files changed, 350 insertions(+), 54 deletions(-)

diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs
index 7e6c3503239..7dc1fccad80 100644
--- a/vortex-btrblocks/src/schemes/integer.rs
+++ b/vortex-btrblocks/src/schemes/integer.rs
@@ -17,6 +17,7 @@ use vortex_compressor::builtins::FloatDictScheme;
 use vortex_compressor::builtins::StringDictScheme;
 use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::estimate::DeferredEstimate;
+use vortex_compressor::estimate::EstimateScore;
 use vortex_compressor::estimate::EstimateVerdict;
 use vortex_compressor::scheme::AncestorExclusion;
 use vortex_compressor::scheme::ChildSelection;
@@ -737,20 +738,29 @@ impl Scheme for SequenceScheme {
             return CompressionEstimate::Verdict(EstimateVerdict::Skip);
         }
 
-        // TODO(connor): Why do we sequence encode the whole thing and then throw it away? And then
-        // why do we divide the ratio by 2???
-
+        // TODO(connor): `sequence_encode` allocates the encoded array just to confirm feasibility.
+        // A cheaper `is_sequence` probe would let us skip the allocation entirely.
         CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-            |_compressor, data, _ctx, exec_ctx| {
-                let Some(encoded) = sequence_encode(data.array_as_primitive(), exec_ctx)? else {
-                    // If we are unable to sequence encode this array, make sure we skip.
+            |_compressor, data, best_so_far, _ctx, exec_ctx| {
+                // `SequenceArray` stores exactly two scalars (base and multiplier), so the best
+                // achievable compression ratio is `array_len / 2`.
+                let compressed_size = 2usize;
+                let max_ratio = data.array_len() as f64 / compressed_size as f64;
+
+                // If we cannot beat the best so far, then we do not want to even try sequence
+                // encoding the data.
+                let threshold = best_so_far.and_then(EstimateScore::finite_ratio);
+                if threshold.is_some_and(|t| max_ratio <= t) {
                     return Ok(EstimateVerdict::Skip);
-                };
+                }
 
-                // TODO(connor): This doesn't really make sense?
-                // Since two values are required to store base and multiplier the compression ratio is
-                // divided by 2.
-                Ok(EstimateVerdict::Ratio(encoded.len() as f64 / 2.0))
+                // TODO(connor): We should pass this array back to the compressor in the case that
+                // we do want to sequence encode this so that we do not need to recompress.
+                if sequence_encode(data.array_as_primitive(), exec_ctx)?.is_none() {
+                    return Ok(EstimateVerdict::Skip);
+                }
+                // TODO(connor): Should we get the actual ratio here?
+                Ok(EstimateVerdict::Ratio(max_ratio))
             },
         )))
     }
diff --git a/vortex-compressor/public-api.lock b/vortex-compressor/public-api.lock
index 05cbad5ad6f..d93f2c44305 100644
--- a/vortex-compressor/public-api.lock
+++ b/vortex-compressor/public-api.lock
@@ -326,6 +326,32 @@ impl core::fmt::Debug for vortex_compressor::estimate::DeferredEstimate
 
 pub fn vortex_compressor::estimate::DeferredEstimate::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
+pub enum vortex_compressor::estimate::EstimateScore
+
+pub vortex_compressor::estimate::EstimateScore::FiniteCompression(f64)
+
+pub vortex_compressor::estimate::EstimateScore::ZeroBytes
+
+impl vortex_compressor::estimate::EstimateScore
+
+pub fn vortex_compressor::estimate::EstimateScore::finite_ratio(self) -> core::option::Option<f64>
+
+impl core::clone::Clone for vortex_compressor::estimate::EstimateScore
+
+pub fn vortex_compressor::estimate::EstimateScore::clone(&self) -> vortex_compressor::estimate::EstimateScore
+
+impl core::cmp::PartialEq for vortex_compressor::estimate::EstimateScore
+
+pub fn vortex_compressor::estimate::EstimateScore::eq(&self, other: &vortex_compressor::estimate::EstimateScore) -> bool
+
+impl core::fmt::Debug for vortex_compressor::estimate::EstimateScore
+
+pub fn vortex_compressor::estimate::EstimateScore::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+impl core::marker::Copy for vortex_compressor::estimate::EstimateScore
+
+impl core::marker::StructuralPartialEq for vortex_compressor::estimate::EstimateScore
+
 pub enum vortex_compressor::estimate::EstimateVerdict
 
 pub vortex_compressor::estimate::EstimateVerdict::AlwaysUse
@@ -338,7 +364,7 @@ impl core::fmt::Debug for vortex_compressor::estimate::EstimateVerdict
 
 pub fn vortex_compressor::estimate::EstimateVerdict::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
 
-pub type vortex_compressor::estimate::EstimateFn = (dyn core::ops::function::FnOnce(&vortex_compressor::CascadingCompressor, &vortex_compressor::stats::ArrayAndStats, vortex_compressor::ctx::CompressorContext, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_compressor::estimate::EstimateVerdict> + core::marker::Send + core::marker::Sync)
+pub type vortex_compressor::estimate::EstimateFn = (dyn core::ops::function::FnOnce(&vortex_compressor::CascadingCompressor, &vortex_compressor::stats::ArrayAndStats, core::option::Option<vortex_compressor::estimate::EstimateScore>, vortex_compressor::ctx::CompressorContext, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_compressor::estimate::EstimateVerdict> + core::marker::Send + core::marker::Sync)
 
 pub mod vortex_compressor::scheme
 
diff --git a/vortex-compressor/src/builtins/constant/float.rs b/vortex-compressor/src/builtins/constant/float.rs
index 1445a6cca2d..501e3cd9623 100644
--- a/vortex-compressor/src/builtins/constant/float.rs
+++ b/vortex-compressor/src/builtins/constant/float.rs
@@ -66,7 +66,7 @@ impl Scheme for FloatConstantScheme {
         // This is an expensive check, but in practice the distinct count is known because we often
         // include dictionary encoding in our set of schemes, so we rarely call this.
         CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-            |_compressor, data, _ctx, exec_ctx| {
+            |_compressor, data, _best_so_far, _ctx, exec_ctx| {
                 if is_constant(data.array(), exec_ctx)? {
                     Ok(EstimateVerdict::AlwaysUse)
                 } else {
diff --git a/vortex-compressor/src/builtins/constant/string.rs b/vortex-compressor/src/builtins/constant/string.rs
index 68ccfe14e8a..fcd6138bca2 100644
--- a/vortex-compressor/src/builtins/constant/string.rs
+++ b/vortex-compressor/src/builtins/constant/string.rs
@@ -60,7 +60,7 @@ impl Scheme for StringConstantScheme {
         // This is an expensive check, but the alternative of not compressing a constant array is
         // far less preferable.
         CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-            |_compressor, data, _ctx, exec_ctx| {
+            |_compressor, data, _best_so_far, _ctx, exec_ctx| {
                 if is_constant(data.array(), exec_ctx)? {
                     Ok(EstimateVerdict::AlwaysUse)
                 } else {
diff --git a/vortex-compressor/src/compressor.rs b/vortex-compressor/src/compressor.rs
index 0f67619e3cb..aa18b61637b 100644
--- a/vortex-compressor/src/compressor.rs
+++ b/vortex-compressor/src/compressor.rs
@@ -349,7 +349,17 @@ impl CascadingCompressor {
 
     /// Calls [`expected_compression_ratio`] on each candidate and returns the winning scheme along
     /// with its resolved winner estimate, or `None` if no scheme beats the canonical encoding.
-    /// Ties are broken by registration order (earlier in the list wins).
+    ///
+    /// Selection runs in two passes. Pass 1 evaluates every immediate
+    /// [`CompressionEstimate::Verdict`] and tracks the running best. [`Scheme`]s returning
+    /// [`CompressionEstimate::Deferred`] are stashed for pass 2 so that we do not make any
+    /// expensive computations if we don't have to.
+    ///
+    /// Pass 2 evaluates the deferred work and, for each [`DeferredEstimate::Callback`], passes the
+    /// current best [`EstimateScore`] as an early-exit hint so the callback can return
+    /// [`EstimateVerdict::Skip`] without doing expensive work when it cannot beat the threshold.
+    ///
+    /// Ties are broken by registration order within each pass.
     ///
     /// [`expected_compression_ratio`]: Scheme::expected_compression_ratio
     fn choose_best_scheme(
@@ -360,40 +370,61 @@ impl CascadingCompressor {
         exec_ctx: &mut ExecutionCtx,
     ) -> VortexResult<Option<(&'static dyn Scheme, WinnerEstimate)>> {
         let mut best: Option<(&'static dyn Scheme, EstimateScore)> = None;
+        let mut deferred: Vec<(&'static dyn Scheme, DeferredEstimate)> = Vec::new();
 
-        // TODO(connor): Rather than computing the deferred estimates eagerly, it would be better to
-        // look at all quick estimates and see if it makes sense to sample at all.
+        // Pass 1: evaluate every immediate verdict. Stash deferred work for pass 2.
         for &scheme in schemes {
-            let verdict =
-                match scheme.expected_compression_ratio(data, compress_ctx.clone(), exec_ctx) {
-                    CompressionEstimate::Verdict(verdict) => verdict,
-                    CompressionEstimate::Deferred(DeferredEstimate::Sample) => {
-                        let score = estimate_compression_ratio_with_sampling(
-                            scheme,
-                            self,
-                            data.array(),
-                            compress_ctx.clone(),
-                            exec_ctx,
-                        )?;
-                        if is_better_score(score, &best) {
-                            best = Some((scheme, score));
-                        }
-                        continue;
-                    }
-                    CompressionEstimate::Deferred(DeferredEstimate::Callback(callback)) => {
-                        callback(self, data, compress_ctx.clone(), exec_ctx)?
+            match scheme.expected_compression_ratio(data, compress_ctx.clone(), exec_ctx) {
+                CompressionEstimate::Verdict(EstimateVerdict::Skip) => {}
+                CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse) => {
+                    return Ok(Some((scheme, WinnerEstimate::AlwaysUse)));
+                }
+                CompressionEstimate::Verdict(EstimateVerdict::Ratio(ratio)) => {
+                    let score = EstimateScore::FiniteCompression(ratio);
+
+                    if is_better_score(score, &best) {
+                        best = Some((scheme, score));
                     }
-                };
+                }
+                CompressionEstimate::Deferred(deferred_estimate) => {
+                    deferred.push((scheme, deferred_estimate));
+                }
+            }
+        }
+
+        // Pass 2: run deferred work. Callbacks receive the current best as a threshold so they can
+        // short-circuit with `Skip` when they cannot beat it.
+        for (scheme, deferred_estimate) in deferred {
+            let threshold: Option<EstimateScore> = best.map(|(_, score)| score);
+            match deferred_estimate {
+                DeferredEstimate::Sample => {
+                    let score = estimate_compression_ratio_with_sampling(
+                        self,
+                        scheme,
+                        data.array(),
+                        compress_ctx.clone(),
+                        exec_ctx,
+                    )?;
 
-            match verdict {
-                EstimateVerdict::Skip => {}
-                EstimateVerdict::AlwaysUse => return Ok(Some((scheme, WinnerEstimate::AlwaysUse))),
-                EstimateVerdict::Ratio(ratio) => {
-                    let score = EstimateScore::FiniteCompression(ratio);
                     if is_better_score(score, &best) {
                         best = Some((scheme, score));
                     }
                 }
+                DeferredEstimate::Callback(callback) => {
+                    match callback(self, data, threshold, compress_ctx.clone(), exec_ctx)? {
+                        EstimateVerdict::Skip => {}
+                        EstimateVerdict::AlwaysUse => {
+                            return Ok(Some((scheme, WinnerEstimate::AlwaysUse)));
+                        }
+                        EstimateVerdict::Ratio(ratio) => {
+                            let score = EstimateScore::FiniteCompression(ratio);
+
+                            if is_better_score(score, &best) {
+                                best = Some((scheme, score));
+                            }
+                        }
+                    }
+                }
             }
         }
 
@@ -757,7 +788,7 @@ mod tests {
             _exec_ctx: &mut ExecutionCtx,
         ) -> CompressionEstimate {
             CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-                |_compressor, _data, _ctx, _exec_ctx| Ok(EstimateVerdict::AlwaysUse),
+                |_compressor, _data, _ctx, _exec_ctx, _best_so_far| Ok(EstimateVerdict::AlwaysUse),
             )))
         }
 
@@ -791,7 +822,7 @@ mod tests {
             _exec_ctx: &mut ExecutionCtx,
         ) -> CompressionEstimate {
             CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-                |_compressor, _data, _ctx, _exec_ctx| Ok(EstimateVerdict::Skip),
+                |_compressor, _data, _ctx, _exec_ctx, _best_so_far| Ok(EstimateVerdict::Skip),
             )))
         }
 
@@ -825,7 +856,7 @@ mod tests {
             _exec_ctx: &mut ExecutionCtx,
         ) -> CompressionEstimate {
             CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
-                |_compressor, _data, _ctx, _exec_ctx| Ok(EstimateVerdict::Ratio(3.0)),
+                |_compressor, _data, _ctx, _exec_ctx, _best_so_far| Ok(EstimateVerdict::Ratio(3.0)),
             )))
         }
 
@@ -1201,6 +1232,216 @@ mod tests {
         Ok(())
     }
 
+    // Observer helper used by threshold-related tests. Captures the `best_so_far` value the
+    // compressor passes to its deferred callback. `OBSERVER_LOCK` serializes tests that share
+    // `OBSERVED_THRESHOLD` so they do not race.
+    static OBSERVER_LOCK: Mutex<()> = Mutex::new(());
+    static OBSERVED_THRESHOLD: Mutex<Option<Option<EstimateScore>>> = Mutex::new(None);
+
+    #[derive(Debug)]
+    struct ThresholdObservingScheme;
+
+    impl Scheme for ThresholdObservingScheme {
+        fn scheme_name(&self) -> &'static str {
+            "test.threshold_observing"
+        }
+
+        fn matches(&self, canonical: &Canonical) -> bool {
+            matches_integer_primitive(canonical)
+        }
+
+        fn expected_compression_ratio(
+            &self,
+            _data: &ArrayAndStats,
+            _compress_ctx: CompressorContext,
+            _exec_ctx: &mut ExecutionCtx,
+        ) -> CompressionEstimate {
+            CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
+                |_compressor, _data, best_so_far, _ctx, _exec_ctx| {
+                    *OBSERVED_THRESHOLD.lock() = Some(best_so_far);
+                    Ok(EstimateVerdict::Skip)
+                },
+            )))
+        }
+
+        fn compress(
+            &self,
+            _compressor: &CascadingCompressor,
+            _data: &ArrayAndStats,
+            _compress_ctx: CompressorContext,
+            _exec_ctx: &mut ExecutionCtx,
+        ) -> VortexResult<ArrayRef> {
+            unreachable!("test helper should never be selected for compression")
+        }
+    }
+
+    #[derive(Debug)]
+    struct CallbackMatchingRatioScheme;
+
+    impl Scheme for CallbackMatchingRatioScheme {
+        fn scheme_name(&self) -> &'static str {
+            "test.callback_matching_ratio"
+        }
+
+        fn matches(&self, canonical: &Canonical) -> bool {
+            matches_integer_primitive(canonical)
+        }
+
+        fn expected_compression_ratio(
+            &self,
+            _data: &ArrayAndStats,
+            _compress_ctx: CompressorContext,
+            _exec_ctx: &mut ExecutionCtx,
+        ) -> CompressionEstimate {
+            CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
+                |_compressor, _data, _ctx, _exec_ctx, _best_so_far| Ok(EstimateVerdict::Ratio(2.0)),
+            )))
+        }
+
+        fn compress(
+            &self,
+            _compressor: &CascadingCompressor,
+            _data: &ArrayAndStats,
+            _compress_ctx: CompressorContext,
+            _exec_ctx: &mut ExecutionCtx,
+        ) -> VortexResult<ArrayRef> {
+            unreachable!("test helper should never be selected for compression")
+        }
+    }
+
+    #[test]
+    fn callback_always_use_overrides_pass_one_best() -> VortexResult<()> {
+        // `HugeRatioScheme` returns an immediate `Ratio(100.0)` in pass 1;
+        // `CallbackAlwaysUseScheme` returns `AlwaysUse` from its deferred callback in pass 2.
+        // The deferred `AlwaysUse` must still win.
+        let compressor = CascadingCompressor::new(vec![&HugeRatioScheme, &CallbackAlwaysUseScheme]);
+        let schemes: [&'static dyn Scheme; 2] = [&HugeRatioScheme, &CallbackAlwaysUseScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        let winner = compressor.choose_best_scheme(
+            &schemes,
+            &data,
+            CompressorContext::new(),
+            &mut exec_ctx,
+        )?;
+
+        assert!(matches!(
+            winner,
+            Some((scheme, WinnerEstimate::AlwaysUse))
+                if scheme.id() == CallbackAlwaysUseScheme.id()
+        ));
+        Ok(())
+    }
+
+    #[test]
+    fn threshold_reflects_pass_one_best() -> VortexResult<()> {
+        let _guard = OBSERVER_LOCK.lock();
+        *OBSERVED_THRESHOLD.lock() = None;
+
+        let compressor =
+            CascadingCompressor::new(vec![&DirectRatioScheme, &ThresholdObservingScheme]);
+        let schemes: [&'static dyn Scheme; 2] = [&DirectRatioScheme, &ThresholdObservingScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        compressor.choose_best_scheme(&schemes, &data, CompressorContext::new(), &mut exec_ctx)?;
+
+        let observed = *OBSERVED_THRESHOLD.lock();
+        assert!(matches!(
+            observed,
+            Some(Some(EstimateScore::FiniteCompression(r))) if r == 2.0
+        ));
+        Ok(())
+    }
+
+    #[test]
+    fn threshold_is_none_when_only_prior_is_zero_bytes() -> VortexResult<()> {
+        let _guard = OBSERVER_LOCK.lock();
+        *OBSERVED_THRESHOLD.lock() = None;
+
+        let compressor =
+            CascadingCompressor::new(vec![&ZeroBytesSamplingScheme, &ThresholdObservingScheme]);
+        let schemes: [&'static dyn Scheme; 2] =
+            [&ZeroBytesSamplingScheme, &ThresholdObservingScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        compressor.choose_best_scheme(&schemes, &data, CompressorContext::new(), &mut exec_ctx)?;
+
+        // The observing callback was invoked (outer `Some`) and `best_so_far` was `None` (inner
+        // `None`) because the zero-byte sample is never stored as the best.
+        let observed = *OBSERVED_THRESHOLD.lock();
+        assert_eq!(observed, Some(None));
+        Ok(())
+    }
+
+    #[test]
+    fn threshold_is_none_when_no_prior_scheme() -> VortexResult<()> {
+        let _guard = OBSERVER_LOCK.lock();
+        *OBSERVED_THRESHOLD.lock() = None;
+
+        let compressor = CascadingCompressor::new(vec![&ThresholdObservingScheme]);
+        let schemes: [&'static dyn Scheme; 1] = [&ThresholdObservingScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        compressor.choose_best_scheme(&schemes, &data, CompressorContext::new(), &mut exec_ctx)?;
+
+        let observed = *OBSERVED_THRESHOLD.lock();
+        assert_eq!(observed, Some(None));
+        Ok(())
+    }
+
+    #[test]
+    fn threshold_updates_from_earlier_deferred_callback() -> VortexResult<()> {
+        let _guard = OBSERVER_LOCK.lock();
+        *OBSERVED_THRESHOLD.lock() = None;
+
+        // Both schemes are deferred. The first callback registers `Ratio(3.0)`; the second
+        // callback must observe it as its threshold.
+        let compressor =
+            CascadingCompressor::new(vec![&CallbackRatioScheme, &ThresholdObservingScheme]);
+        let schemes: [&'static dyn Scheme; 2] = [&CallbackRatioScheme, &ThresholdObservingScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        compressor.choose_best_scheme(&schemes, &data, CompressorContext::new(), &mut exec_ctx)?;
+
+        let observed = *OBSERVED_THRESHOLD.lock();
+        assert!(matches!(
+            observed,
+            Some(Some(EstimateScore::FiniteCompression(r))) if r == 3.0
+        ));
+        Ok(())
+    }
+
+    #[test]
+    fn ratio_tie_between_immediate_and_deferred_favors_immediate() -> VortexResult<()> {
+        // Both schemes produce the same `Ratio(2.0)`, one from pass 1 (immediate) and one from
+        // pass 2 (deferred callback). Pass 1 locks in first, and strict `>` tie-breaking means
+        // the deferred callback's equal ratio cannot displace it.
+        let compressor =
+            CascadingCompressor::new(vec![&CallbackMatchingRatioScheme, &DirectRatioScheme]);
+        let schemes: [&'static dyn Scheme; 2] = [&CallbackMatchingRatioScheme, &DirectRatioScheme];
+        let data = estimate_test_data();
+        let mut exec_ctx = SESSION.create_execution_ctx();
+
+        let winner = compressor.choose_best_scheme(
+            &schemes,
+            &data,
+            CompressorContext::new(),
+            &mut exec_ctx,
+        )?;
+
+        assert!(matches!(
+            winner,
+            Some((scheme, WinnerEstimate::Score(EstimateScore::FiniteCompression(r))))
+                if scheme.id() == DirectRatioScheme.id() && r == 2.0
+        ));
+        Ok(())
+    }
+
     #[test]
     fn all_null_array_compresses_to_constant() -> VortexResult<()> {
         let array = PrimitiveArray::new(
@@ -1244,8 +1485,8 @@ mod tests {
         //   "this must be present since `DictScheme` declared that we need distinct values"
         let mut exec_ctx = SESSION.create_execution_ctx();
         let score = estimate_compression_ratio_with_sampling(
-            &FloatDictScheme,
             &compressor,
+            &FloatDictScheme,
             &array,
             ctx,
             &mut exec_ctx,
diff --git a/vortex-compressor/src/ctx.rs b/vortex-compressor/src/ctx.rs
index 576fc8002a6..4e0619ff8ee 100644
--- a/vortex-compressor/src/ctx.rs
+++ b/vortex-compressor/src/ctx.rs
@@ -30,6 +30,7 @@ pub struct CompressorContext {
     /// Merged stats options from all eligible schemes at this compression site.
     merged_stats_options: GenerateStatsOptions,
 
+    // TODO(connor): Replace this with an `im::Vector`
     /// The cascade chain: `(scheme_id, child_index)` pairs from root to current depth.
     /// Used for self-exclusion, push rules ([`descendant_exclusions`]), and pull rules
     /// ([`ancestor_exclusions`]).
diff --git a/vortex-compressor/src/estimate.rs b/vortex-compressor/src/estimate.rs
index 22a99fbccdc..9fbf434352a 100644
--- a/vortex-compressor/src/estimate.rs
+++ b/vortex-compressor/src/estimate.rs
@@ -23,12 +23,21 @@ use crate::trace;
 
 /// Closure type for [`DeferredEstimate::Callback`].
 ///
-/// The compressor calls this with the same arguments it would pass to sampling. The closure must
-/// resolve directly to a terminal [`EstimateVerdict`].
+/// The compressor calls this with the same arguments it would pass to sampling, plus the best
+/// [`EstimateScore`] observed so far (if any). The closure must resolve directly to a terminal
+/// [`EstimateVerdict`].
+///
+/// The `best_so_far` threshold is an early-exit hint. If your scheme's maximum achievable
+/// compression ratio is not strictly greater than
+/// `best_so_far.and_then(EstimateScore::finite_ratio)`, you should return
+/// [`EstimateVerdict::Skip`]. Returning a ratio equal to the threshold is permitted but will
+/// lose to the prior best due to strict `>` tie-breaking in the selector. Use the threshold
+/// only as an early-exit hint, never to perform additional work.
 #[rustfmt::skip]
 pub type EstimateFn = dyn FnOnce(
         &CascadingCompressor,
         &ArrayAndStats,
+        Option<EstimateScore>,
         CompressorContext,
         &mut ExecutionCtx,
     ) -> VortexResult<EstimateVerdict>
@@ -85,17 +94,23 @@ pub enum DeferredEstimate {
     /// Use this only when the scheme needs to perform trial encoding or other costly checks to
     /// determine its compression ratio. The callback returns an [`EstimateVerdict`] directly, so
     /// it cannot request more sampling or another deferred callback.
+    ///
+    /// The compressor evaluates all immediate [`CompressionEstimate::Verdict`] results before
+    /// invoking any deferred callback, and passes the best [`EstimateScore`] observed so far to
+    /// the callback. This lets the callback return [`EstimateVerdict::Skip`] without performing
+    /// expensive work when its maximum achievable ratio cannot beat the current best. See
+    /// [`EstimateFn`] for the full contract.
     Callback(Box<EstimateFn>),
 }
 
 /// Ranked estimate used for comparing non-terminal compression candidates.
 #[derive(Debug, Clone, Copy, PartialEq)]
-pub(super) enum EstimateScore {
+pub enum EstimateScore {
     /// A finite compression ratio. Higher means a smaller amount of data, so it is better.
     FiniteCompression(f64),
     /// Trial compression produced a 0-byte output.
     ///
-    /// This has no finite trace ratio and is not eligible for scheme selection.
+    /// This has no finite ratio and is not eligible for scheme selection.
     ///
     /// TODO(connor): A zero-byte sample usually means the sampler happened to hit an all-null
     /// sample. Improve this logic so we can distinguish real zero-byte wins from sampling artifacts.
@@ -112,8 +127,11 @@ impl EstimateScore {
         }
     }
 
-    /// Returns the traceable numeric ratio, omitting the zero-byte special case.
-    pub(super) fn trace_ratio(self) -> Option<f64> {
+    /// Returns the finite compression ratio, or [`None`] for the zero-byte special case.
+    ///
+    /// Callers comparing a scheme's maximum achievable ratio against a "best so far" threshold
+    /// should use this to extract a numeric value from an [`EstimateScore`].
+    pub fn finite_ratio(self) -> Option<f64> {
         match self {
             Self::FiniteCompression(ratio) => Some(ratio),
             Self::ZeroBytes => None,
@@ -156,7 +174,7 @@ impl WinnerEstimate {
     pub(super) fn trace_ratio(self) -> Option<f64> {
         match self {
             Self::AlwaysUse => None,
-            Self::Score(score) => score.trace_ratio(),
+            Self::Score(score) => score.finite_ratio(),
         }
     }
 }
@@ -178,8 +196,8 @@ pub(super) fn is_better_score(
 ///
 /// Returns an error if sample compression fails.
 pub(super) fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
-    scheme: &S,
     compressor: &CascadingCompressor,
+    scheme: &S,
     array: &ArrayRef,
     compress_ctx: CompressorContext,
     exec_ctx: &mut ExecutionCtx,
@@ -212,7 +230,7 @@ pub(super) fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
 
     // Single DEBUG event per sampled scheme. Downstream tooling can join this with the eventual
     // `scheme.compress_result` on the same scheme to compute sample-vs-full divergence.
-    trace::sample_result(scheme.id(), before, after, score.trace_ratio());
+    trace::sample_result(scheme.id(), before, after, score.finite_ratio());
 
     Ok(score)
 }