Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions vortex-btrblocks/src/schemes/integer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use vortex_compressor::builtins::FloatDictScheme;
use vortex_compressor::builtins::StringDictScheme;
use vortex_compressor::estimate::CompressionEstimate;
use vortex_compressor::estimate::DeferredEstimate;
use vortex_compressor::estimate::EstimateScore;
use vortex_compressor::estimate::EstimateVerdict;
use vortex_compressor::scheme::AncestorExclusion;
use vortex_compressor::scheme::ChildSelection;
Expand Down Expand Up @@ -737,20 +738,29 @@ impl Scheme for SequenceScheme {
return CompressionEstimate::Verdict(EstimateVerdict::Skip);
}

// TODO(connor): Why do we sequence encode the whole thing and then throw it away? And then
// why do we divide the ratio by 2???

// TODO(connor): `sequence_encode` allocates the encoded array just to confirm feasibility.
// A cheaper `is_sequence` probe would let us skip the allocation entirely.
CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
|_compressor, data, _ctx, exec_ctx| {
let Some(encoded) = sequence_encode(data.array_as_primitive(), exec_ctx)? else {
// If we are unable to sequence encode this array, make sure we skip.
|_compressor, data, best_so_far, _ctx, exec_ctx| {
// `SequenceArray` stores exactly two scalars (base and multiplier), so the best
// achievable compression ratio is `array_len / 2`.
let compressed_size = 2usize;
let max_ratio = data.array_len() as f64 / compressed_size as f64;

// If we cannot beat the best so far, then we do not want to even try sequence
// encoding the data.
let threshold = best_so_far.and_then(EstimateScore::finite_ratio);
if threshold.is_some_and(|t| max_ratio <= t) {
return Ok(EstimateVerdict::Skip);
};
}

// TODO(connor): This doesn't really make sense?
// Since two values are required to store base and multiplier the compression ratio is
// divided by 2.
Ok(EstimateVerdict::Ratio(encoded.len() as f64 / 2.0))
// TODO(connor): We should pass this array back to the compressor in the case that
// we do want to sequence encode this so that we do not need to recompress.
if sequence_encode(data.array_as_primitive(), exec_ctx)?.is_none() {
return Ok(EstimateVerdict::Skip);
}
// TODO(connor): Should we get the actual ratio here?
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't if you do the todo above that avoids creating the array

Ok(EstimateVerdict::Ratio(max_ratio))
},
)))
}
Expand Down
28 changes: 27 additions & 1 deletion vortex-compressor/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,32 @@ impl core::fmt::Debug for vortex_compressor::estimate::DeferredEstimate

pub fn vortex_compressor::estimate::DeferredEstimate::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub enum vortex_compressor::estimate::EstimateScore

pub vortex_compressor::estimate::EstimateScore::FiniteCompression(f64)

pub vortex_compressor::estimate::EstimateScore::ZeroBytes

impl vortex_compressor::estimate::EstimateScore

pub fn vortex_compressor::estimate::EstimateScore::finite_ratio(self) -> core::option::Option<f64>

impl core::clone::Clone for vortex_compressor::estimate::EstimateScore

pub fn vortex_compressor::estimate::EstimateScore::clone(&self) -> vortex_compressor::estimate::EstimateScore

impl core::cmp::PartialEq for vortex_compressor::estimate::EstimateScore

pub fn vortex_compressor::estimate::EstimateScore::eq(&self, other: &vortex_compressor::estimate::EstimateScore) -> bool

impl core::fmt::Debug for vortex_compressor::estimate::EstimateScore

pub fn vortex_compressor::estimate::EstimateScore::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::marker::Copy for vortex_compressor::estimate::EstimateScore

impl core::marker::StructuralPartialEq for vortex_compressor::estimate::EstimateScore

pub enum vortex_compressor::estimate::EstimateVerdict

pub vortex_compressor::estimate::EstimateVerdict::AlwaysUse
Expand All @@ -338,7 +364,7 @@ impl core::fmt::Debug for vortex_compressor::estimate::EstimateVerdict

pub fn vortex_compressor::estimate::EstimateVerdict::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub type vortex_compressor::estimate::EstimateFn = (dyn core::ops::function::FnOnce(&vortex_compressor::CascadingCompressor, &vortex_compressor::stats::ArrayAndStats, vortex_compressor::ctx::CompressorContext, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_compressor::estimate::EstimateVerdict> + core::marker::Send + core::marker::Sync)
pub type vortex_compressor::estimate::EstimateFn = (dyn core::ops::function::FnOnce(&vortex_compressor::CascadingCompressor, &vortex_compressor::stats::ArrayAndStats, core::option::Option<vortex_compressor::estimate::EstimateScore>, vortex_compressor::ctx::CompressorContext, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_compressor::estimate::EstimateVerdict> + core::marker::Send + core::marker::Sync)

pub mod vortex_compressor::scheme

Expand Down
2 changes: 1 addition & 1 deletion vortex-compressor/src/builtins/constant/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl Scheme for FloatConstantScheme {
// This is an expensive check, but in practice the distinct count is known because we often
// include dictionary encoding in our set of schemes, so we rarely call this.
CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
|_compressor, data, _ctx, exec_ctx| {
|_compressor, data, _best_so_far, _ctx, exec_ctx| {
if is_constant(data.array(), exec_ctx)? {
Ok(EstimateVerdict::AlwaysUse)
} else {
Expand Down
2 changes: 1 addition & 1 deletion vortex-compressor/src/builtins/constant/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ impl Scheme for StringConstantScheme {
// This is an expensive check, but the alternative of not compressing a constant array is
// far less preferable.
CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
|_compressor, data, _ctx, exec_ctx| {
|_compressor, data, _best_so_far, _ctx, exec_ctx| {
if is_constant(data.array(), exec_ctx)? {
Ok(EstimateVerdict::AlwaysUse)
} else {
Expand Down
Loading
Loading