Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 16 additions & 52 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,7 @@ pub fn vortex_array::arrays::Extension::scalar_at(array: vortex_array::ArrayView

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -2338,7 +2338,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::ValidityChild<vortex_array::arrays::Extension> for vortex_array::arrays::Extension

Expand Down Expand Up @@ -2368,44 +2368,16 @@ impl vortex_array::scalar_fn::fns::mask::MaskReduce for vortex_array::arrays::Ex

pub fn vortex_array::arrays::Extension::mask(array: vortex_array::ArrayView<'_, vortex_array::arrays::Extension>, mask: &vortex_array::ArrayRef) -> vortex_error::VortexResult<core::option::Option<vortex_array::ArrayRef>>

pub struct vortex_array::arrays::extension::ExtensionData

impl vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef

pub fn vortex_array::arrays::extension::ExtensionData::new(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> Self

pub unsafe fn vortex_array::arrays::extension::ExtensionData::new_unchecked(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> Self

pub fn vortex_array::arrays::extension::ExtensionData::try_new(ext_dtype: vortex_array::dtype::extension::ExtDTypeRef, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<Self>

impl core::clone::Clone for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::clone(&self) -> vortex_array::arrays::extension::ExtensionData

impl core::fmt::Debug for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::fmt::Display for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl vortex_array::ArrayEq for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool

impl vortex_array::ArrayHash for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)

pub trait vortex_array::arrays::extension::ExtensionArrayExt: vortex_array::TypedArrayRef<vortex_array::arrays::Extension>

pub fn vortex_array::arrays::extension::ExtensionArrayExt::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef

pub fn vortex_array::arrays::extension::ExtensionArrayExt::storage_array(&self) -> &vortex_array::ArrayRef

impl<T: vortex_array::TypedArrayRef<vortex_array::arrays::Extension>> vortex_array::arrays::extension::ExtensionArrayExt for T

pub fn T::ext_dtype(&self) -> &vortex_array::dtype::extension::ExtDTypeRef

pub fn T::storage_array(&self) -> &vortex_array::ArrayRef

pub type vortex_array::arrays::extension::ExtensionArray = vortex_array::Array<vortex_array::arrays::Extension>
Expand Down Expand Up @@ -5558,7 +5530,7 @@ pub fn vortex_array::arrays::Extension::scalar_at(array: vortex_array::ArrayView

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -5594,7 +5566,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::ValidityChild<vortex_array::arrays::Extension> for vortex_array::arrays::Extension

Expand Down Expand Up @@ -19464,7 +19436,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -19500,7 +19472,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::VTable for vortex_array::arrays::Filter

Expand Down Expand Up @@ -20436,7 +20408,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -20472,7 +20444,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::VTable for vortex_array::arrays::Filter

Expand Down Expand Up @@ -22686,10 +22658,6 @@ impl vortex_array::ArrayEq for vortex_array::arrays::dict::DictData

pub fn vortex_array::arrays::dict::DictData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool

impl vortex_array::ArrayEq for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::array_eq(&self, _other: &Self, _precision: vortex_array::Precision) -> bool

impl vortex_array::ArrayEq for vortex_array::arrays::filter::FilterData

pub fn vortex_array::arrays::filter::FilterData::array_eq(&self, other: &Self, precision: vortex_array::Precision) -> bool
Expand Down Expand Up @@ -22794,10 +22762,6 @@ impl vortex_array::ArrayHash for vortex_array::arrays::dict::DictData

pub fn vortex_array::arrays::dict::DictData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)

impl vortex_array::ArrayHash for vortex_array::arrays::extension::ExtensionData

pub fn vortex_array::arrays::extension::ExtensionData::array_hash<H: core::hash::Hasher>(&self, _state: &mut H, _precision: vortex_array::Precision)

impl vortex_array::ArrayHash for vortex_array::arrays::filter::FilterData

pub fn vortex_array::arrays::filter::FilterData::array_hash<H: core::hash::Hasher>(&self, state: &mut H, precision: vortex_array::Precision)
Expand Down Expand Up @@ -23132,7 +23096,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -23168,7 +23132,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::VTable for vortex_array::arrays::Filter

Expand Down Expand Up @@ -24352,7 +24316,7 @@ pub fn vortex_array::arrays::Decimal::validate(&self, data: &vortex_array::array

impl vortex_array::VTable for vortex_array::arrays::Extension

pub type vortex_array::arrays::Extension::ArrayData = vortex_array::arrays::extension::ExtensionData
pub type vortex_array::arrays::Extension::ArrayData = vortex_array::EmptyArrayData

pub type vortex_array::arrays::Extension::OperationsVTable = vortex_array::arrays::Extension

Expand Down Expand Up @@ -24388,7 +24352,7 @@ pub fn vortex_array::arrays::Extension::serialize(_array: vortex_array::ArrayVie

pub fn vortex_array::arrays::Extension::slot_name(_array: vortex_array::ArrayView<'_, Self>, idx: usize) -> alloc::string::String

pub fn vortex_array::arrays::Extension::validate(&self, data: &vortex_array::arrays::extension::ExtensionData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>
pub fn vortex_array::arrays::Extension::validate(&self, _data: &vortex_array::EmptyArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option<vortex_array::ArrayRef>]) -> vortex_error::VortexResult<()>

impl vortex_array::VTable for vortex_array::arrays::Filter

Expand Down
145 changes: 22 additions & 123 deletions vortex-array/src/arrays/extension/array.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::fmt::Display;
use std::fmt::Formatter;

use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure_eq;

use crate::ArrayRef;
use crate::EmptyArrayData;
use crate::array::Array;
use crate::array::ArrayParts;
use crate::array::TypedArrayRef;
Expand All @@ -22,113 +21,14 @@ pub(super) const STORAGE_SLOT: usize = 0;
pub(super) const NUM_SLOTS: usize = 1;
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["storage"];

/// An extension array that wraps another array with additional type information.
///
/// **⚠️ Unstable API**: This is an experimental feature that may change significantly
/// in future versions. The extension type system is still evolving.
///
/// Unlike Apache Arrow's extension arrays, Vortex extension arrays provide a more flexible
/// mechanism for adding semantic meaning to existing array types without requiring
/// changes to the core type system.
///
/// ## Design Philosophy
///
/// Extension arrays serve as a type-safe wrapper that:
/// - Preserves the underlying storage format and operations
/// - Adds semantic type information via `ExtDType`
/// - Enables custom serialization and deserialization logic
/// - Allows domain-specific interpretations of generic data
///
/// ## Storage and Type Relationship
///
/// The extension array maintains a strict contract:
/// - **Storage array**: Contains the actual data in a standard Vortex encoding
/// - **Extension type**: Defines how to interpret the storage data semantically
/// - **Type safety**: The storage array's dtype must match the extension type's storage dtype
///
/// ## Use Cases
///
/// Extension arrays are ideal for:
/// - **Custom numeric types**: Units of measurement, currencies
/// - **Temporal types**: Custom date/time formats, time zones, calendars
/// - **Domain-specific types**: UUIDs, IP addresses, geographic coordinates
/// - **Encoded types**: Base64 strings, compressed data, encrypted values
///
/// ## Validity and Operations
///
/// Extension arrays delegate validity and most operations to their storage array:
/// - Validity is inherited from the underlying storage
/// - Slicing preserves the extension type
/// - Scalar access wraps storage scalars with extension metadata
#[derive(Clone, Debug)]
pub struct ExtensionData {
/// The storage dtype. This **must** be a [`Extension::DType`] variant.
pub(super) ext_dtype: ExtDTypeRef,
}

impl Display for ExtensionData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "ext_dtype: {}", self.ext_dtype)
}
}

impl ExtensionData {
/// Constructs a new `ExtensionArray`.
///
/// # Panics
///
/// Panics if the storage array in not compatible with the extension dtype.
pub fn new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
Self::try_new(ext_dtype, storage_dtype).vortex_expect("Failed to create `ExtensionArray`")
}

/// Tries to construct a new `ExtensionArray`.
///
/// # Errors
///
/// Returns an error if the storage array in not compatible with the extension dtype.
pub fn try_new(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> VortexResult<Self> {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// ext_dtype.validate_storage_array(&storage_array)?;
assert_eq!(
ext_dtype.storage_dtype(),
storage_dtype,
"ExtensionArray: storage_dtype must match storage array DType",
);

// SAFETY: we validate that the inputs are valid above.
Ok(unsafe { Self::new_unchecked(ext_dtype, storage_dtype) })
}

/// Creates a new `ExtensionArray`.
///
/// # Safety
///
/// The caller must ensure that the storage array is compatible with the extension dtype. In
/// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
/// called successfully on this storage array.
pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_dtype: &DType) -> Self {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// #[cfg(debug_assertions)]
// ext_dtype
// .validate_storage_array(&storage_array)
// .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
debug_assert_eq!(
ext_dtype.storage_dtype(),
storage_dtype,
"ExtensionArray: storage_dtype must match storage array DType",
);

Self { ext_dtype }
}

/// The extension dtype of this array.
pub fn ext_dtype(&self) -> &ExtDTypeRef {
&self.ext_dtype
pub trait ExtensionArrayExt: TypedArrayRef<Extension> {
fn ext_dtype(&self) -> &ExtDTypeRef {
self.as_ref()
.dtype()
.as_extension_opt()
.vortex_expect("extension array somehow did not have an extension dtype")
}
}

pub trait ExtensionArrayExt: TypedArrayRef<Extension> {
fn storage_array(&self) -> &ArrayRef {
self.as_ref().slots()[STORAGE_SLOT]
.as_ref()
Expand All @@ -144,26 +44,24 @@ impl Array<Extension> {
///
/// Panics if the storage array is not compatible with the extension dtype.
pub fn new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
let dtype = DType::Extension(ext_dtype.clone());
let len = storage_array.len();
let data = ExtensionData::new(ext_dtype, storage_array.dtype());
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
)
}
Self::try_new(ext_dtype, storage_array).vortex_expect("Unable to create `ExtensionArray`")
}

/// Tries to construct a new `ExtensionArray`.
pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
let dtype = DType::Extension(ext_dtype.clone());
vortex_ensure_eq!(
ext_dtype.storage_dtype(),
storage_array.dtype(),
"Tried to create an `ExtensionArray` with an incompatible storage array"
);

let dtype = DType::Extension(ext_dtype);
let len = storage_array.len();
let data = ExtensionData::try_new(ext_dtype, storage_array.dtype())?;
Ok(unsafe {
Array::from_parts_unchecked(
ArrayParts::new(Extension, dtype, len, data).with_slots(vec![Some(storage_array)]),
)
})

let parts = ArrayParts::new(Extension, dtype, len, EmptyArrayData)
.with_slots(vec![Some(storage_array)]);

Ok(unsafe { Array::from_parts_unchecked(parts) })
}

/// Creates a new [`ExtensionArray`](crate::arrays::ExtensionArray) from a vtable, metadata, and
Expand All @@ -176,6 +74,7 @@ impl Array<Extension> {
let ext_dtype =
ExtDType::<V>::try_with_vtable(vtable, metadata, storage_array.dtype().clone())?
.erased();

Self::try_new(ext_dtype, storage_array)
}
}
1 change: 0 additions & 1 deletion vortex-array/src/arrays/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

mod array;
pub use array::ExtensionArrayExt;
pub use array::ExtensionData;
pub use vtable::ExtensionArray;

pub(crate) mod compute;
Expand Down
Loading
Loading