Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ default = ["parquet"]
pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
chrono = { workspace = true }
half = { version = "2.1", default-features = false }
num_cpus = "1.13.0"
object_store = { version = "0.7.0", default-features = false, optional = true }
parquet = { workspace = true, optional = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@

//! Functionality used both on logical and physical plans

use std::sync::Arc;

use ahash::RandomState;
use arrow::array::*;
use arrow::datatypes::*;
use arrow::row::Rows;
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use arrow_buffer::i256;
use datafusion_common::{
cast::{
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
},
internal_err, DataFusionError, Result,

use crate::cast::{
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
};
use std::sync::Arc;
use crate::error::{DataFusionError, Result, _internal_err};

// Combines two hashes into one hash
#[inline]
Expand All @@ -51,7 +51,7 @@ fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col:
}
}

pub(crate) trait HashValue {
pub trait HashValue {
fn hash_one(&self, state: &RandomState) -> u64;
}

Expand Down Expand Up @@ -337,7 +337,7 @@ pub fn create_hashes<'a>(
}
_ => {
// This is internal because we should have caught this before.
return internal_err!(
return _internal_err!(
"Unsupported data type in hasher: {}",
col.data_type()
);
Expand Down
1 change: 1 addition & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mod error;
pub mod file_options;
pub mod format;
mod functional_dependencies;
pub mod hash_utils;
mod join_type;
pub mod parsers;
#[cfg(feature = "pyarrow")]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/expressions/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::sync::Arc;

use crate::hash_utils::HashValue;
use crate::physical_expr::down_cast_any_ref;
use crate::utils::expr_list_eq_any_order;
use crate::PhysicalExpr;
Expand All @@ -37,6 +36,7 @@ use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
use arrow::util::bit_iterator::BitIndexIterator;
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use datafusion_common::hash_utils::HashValue;
use datafusion_common::{
cast::{as_boolean_array, as_generic_binary_array, as_string_array},
internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
Expand Down
4 changes: 3 additions & 1 deletion datafusion/physical-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ pub mod equivalence;
pub mod execution_props;
pub mod expressions;
pub mod functions;
pub mod hash_utils;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is technically a breaking change if someone used hash_utils directly in their code.

Perhaps we could leave a pointer like

Suggested change
pub mod hash_utils;
// backwards compatibility
pub use datafusion_common::hash_util;

But I don't think that is critical

pub mod intervals;
pub mod math_expressions;
mod partitioning;
Expand All @@ -49,6 +48,9 @@ pub mod utils;
pub mod var_provider;
pub mod window;

// For backwards compatibility
pub use datafusion_common::hash_utils;

pub use aggregate::groups_accumulator::{
EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use arrow::record_batch::RecordBatch;
use arrow::row::{RowConverter, Rows, SortField};
use arrow_array::{Array, ArrayRef};
use arrow_schema::{DataType, SchemaRef};
use datafusion_common::hash_utils::create_hashes;
use datafusion_common::{DataFusionError, Result};
use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
use datafusion_physical_expr::hash_utils::create_hashes;
use datafusion_physical_expr::EmitTo;
use hashbrown::raw::RawTable;

Expand Down
3 changes: 2 additions & 1 deletion datafusion/physical-plan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,11 @@ pub mod windows;

use crate::repartition::RepartitionExec;
use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
pub use datafusion_common::hash_utils;
pub use datafusion_common::utils::project_schema;
use datafusion_execution::TaskContext;
pub use datafusion_physical_expr::{
expressions, functions, hash_utils, ordering_equivalence_properties_helper, udf,
expressions, functions, ordering_equivalence_properties_helper, udf,
};

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ use arrow::{
datatypes::{Schema, SchemaBuilder, SchemaRef},
record_batch::RecordBatch,
};

use datafusion_common::hash_utils::create_hashes;
use datafusion_common::utils::{
evaluate_partition_ranges, get_arrayref_at_indices, get_at_indices,
get_record_batch_at_indices, get_row_at_idx,
Expand All @@ -51,7 +53,6 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
use datafusion_expr::ColumnarValue;
use datafusion_physical_expr::hash_utils::create_hashes;
use datafusion_physical_expr::window::{
PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
};
Expand Down