Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions datasketches/src/hll/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,17 @@ impl List {
empty: bool,
compact: bool,
) -> Result<Self, Error> {
// Compute array size
let array_size = if compact { coupon_count } else { 1 << lg_arr };

// Read coupons
// Always allocate the full-sized array (1 << lg_arr) so Coupon::EMPTY sentinel
// slots are available for future update() calls. In compact format only
// coupon_count values are stored on disk, but memory must hold the full capacity
// so the linear scan in update() can find an empty slot to insert into.
let array_size = 1 << lg_arr;
let read_count = if compact { coupon_count } else { array_size };

// Read coupons into the front of the full-sized array; remaining slots stay Coupon::EMPTY.
let mut coupons = vec![Coupon::EMPTY; array_size];
if !empty && coupon_count > 0 {
for (i, coupon) in coupons.iter_mut().enumerate() {
for (i, coupon) in coupons.iter_mut().take(read_count).enumerate() {
let raw = cursor.read_u32_le().map_err(|_| {
Error::insufficient_data(format!(
"expect {coupon_count} coupons, failed at index {i}"
Expand Down
28 changes: 28 additions & 0 deletions datasketches/tests/hll_serialization_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::path::PathBuf;

use common::serialization_test_data;
use datasketches::hll::HllSketch;
use datasketches::hll::HllType;

fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k: u8) {
let expected = expected_cardinality as f64;
Expand Down Expand Up @@ -102,6 +103,33 @@ fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k: u
);
}

/// Reproducer for https://github.com/apache/datasketches-rust/issues/115
///
/// A compact-serialized List has no trailing COUPON_EMPTY (0) sentinels.
/// Before the fix, update() would scan the fully-packed array, find no
/// empty slot, and silently drop the new value.
#[test]
fn test_update_after_deserialize_list_mode() {
const LG_K: u8 = 11;
for hll_type in [HllType::Hll4, HllType::Hll6, HllType::Hll8] {
let mut sketch = HllSketch::new(LG_K, hll_type);
sketch.update(1u64);

// Round-trip through serialization (compact format, List mode)
let bytes = sketch.serialize();
let mut sketch = HllSketch::deserialize(&bytes).unwrap();

// This update was silently dropped before the fix
sketch.update(2u64);

let est = sketch.estimate();
assert!(
(est - 2.0).abs() < 0.1,
"{hll_type:?}: expected estimate close to 2.0 after update post-deserialize, got {est}"
);
}
}

#[test]
fn test_java_hll4_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
Expand Down