Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions datafusion/physical-expr/src/aggregate/count_distinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ macro_rules! float_distinct_count_accumulator {
}};
}

/// Returns the estimated number of hashbrown hashtables is likely to come up again
fn estimated_buckets<T>(hashset: &HashSet<T, RandomState>) -> usize {
let estimated_buckets =
(hashset.len().checked_mul(8).unwrap_or(usize::MAX) / 7).next_power_of_two();
estimated_buckets
}

impl AggregateExpr for DistinctCount {
/// Return a reference to Any that can be used for downcasting
fn as_any(&self) -> &dyn Any {
Expand Down Expand Up @@ -336,9 +343,7 @@ where
}

fn size(&self) -> usize {
let estimated_buckets = (self.values.len().checked_mul(8).unwrap_or(usize::MAX)
/ 7)
.next_power_of_two();
let estimated_buckets = estimated_buckets(&self.values);

// Size of accumulator
// + size of entry * number of buckets
Expand Down Expand Up @@ -423,9 +428,7 @@ where
}

fn size(&self) -> usize {
let estimated_buckets = (self.values.len().checked_mul(8).unwrap_or(usize::MAX)
/ 7)
.next_power_of_two();
let estimated_buckets = estimated_buckets(&self.values);

// Size of accumulator
// + size of entry * number of buckets
Expand Down