-
Notifications
You must be signed in to change notification settings - Fork 254
chore: upgrade to DataFusion 50.0.0, Arrow 56.1.0, Parquet 56.0.0 among others #2286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3334f31
350eefe
f3bf2e9
08c52dd
c3199a2
f2aac63
597f53c
040b1b8
b1cac10
f2519c5
32e35bc
01c9ad1
499e0f3
7b7fd03
a3e603a
906000f
886e73d
a70d03b
4476b68
22ab2e5
7f73025
9e5063f
bcb1208
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ use datafusion::physical_plan::InputOrderMode; | |
| use datafusion::{ | ||
| arrow::{compute::SortOptions, datatypes::SchemaRef}, | ||
| common::DataFusionError, | ||
| config::ConfigOptions, | ||
| execution::FunctionRegistry, | ||
| functions_aggregate::first_last::{FirstValue, LastValue}, | ||
| logical_expr::Operator as DataFusionOperator, | ||
|
|
@@ -622,8 +623,13 @@ impl PhysicalPlanner { | |
| let args = vec![child]; | ||
| let comet_hour = Arc::new(ScalarUDF::new_from_impl(SparkHour::new(timezone))); | ||
| let field_ref = Arc::new(Field::new("hour", DataType::Int32, true)); | ||
| let expr: ScalarFunctionExpr = | ||
| ScalarFunctionExpr::new("hour", comet_hour, args, field_ref); | ||
| let expr: ScalarFunctionExpr = ScalarFunctionExpr::new( | ||
| "hour", | ||
| comet_hour, | ||
| args, | ||
| field_ref, | ||
| Arc::new(ConfigOptions::default()), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of possibly instantiating multiple default
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That would be in the |
||
| ); | ||
|
|
||
| Ok(Arc::new(expr)) | ||
| } | ||
|
|
@@ -634,8 +640,13 @@ impl PhysicalPlanner { | |
| let args = vec![child]; | ||
| let comet_minute = Arc::new(ScalarUDF::new_from_impl(SparkMinute::new(timezone))); | ||
| let field_ref = Arc::new(Field::new("minute", DataType::Int32, true)); | ||
| let expr: ScalarFunctionExpr = | ||
| ScalarFunctionExpr::new("minute", comet_minute, args, field_ref); | ||
| let expr: ScalarFunctionExpr = ScalarFunctionExpr::new( | ||
| "minute", | ||
| comet_minute, | ||
| args, | ||
| field_ref, | ||
| Arc::new(ConfigOptions::default()), | ||
| ); | ||
|
|
||
| Ok(Arc::new(expr)) | ||
| } | ||
|
|
@@ -646,8 +657,13 @@ impl PhysicalPlanner { | |
| let args = vec![child]; | ||
| let comet_second = Arc::new(ScalarUDF::new_from_impl(SparkSecond::new(timezone))); | ||
| let field_ref = Arc::new(Field::new("second", DataType::Int32, true)); | ||
| let expr: ScalarFunctionExpr = | ||
| ScalarFunctionExpr::new("second", comet_second, args, field_ref); | ||
| let expr: ScalarFunctionExpr = ScalarFunctionExpr::new( | ||
| "second", | ||
| comet_second, | ||
| args, | ||
| field_ref, | ||
| Arc::new(ConfigOptions::default()), | ||
| ); | ||
|
|
||
| Ok(Arc::new(expr)) | ||
| } | ||
|
|
@@ -869,8 +885,13 @@ impl PhysicalPlanner { | |
| ScalarUDF::new_from_impl(BloomFilterMightContain::try_new(bloom_filter_expr)?); | ||
|
|
||
| let field_ref = Arc::new(Field::new("might_contain", DataType::Boolean, true)); | ||
| let expr: ScalarFunctionExpr = | ||
| ScalarFunctionExpr::new("might_contain", Arc::new(udf), args, field_ref); | ||
| let expr: ScalarFunctionExpr = ScalarFunctionExpr::new( | ||
| "might_contain", | ||
| Arc::new(udf), | ||
| args, | ||
| field_ref, | ||
| Arc::new(ConfigOptions::default()), | ||
| ); | ||
| Ok(Arc::new(expr)) | ||
| } | ||
| ExprStruct::CreateNamedStruct(expr) => { | ||
|
|
@@ -1089,6 +1110,7 @@ impl PhysicalPlanner { | |
| fun_expr, | ||
| vec![left, right], | ||
| Arc::new(Field::new(func_name, data_type, true)), | ||
| Arc::new(ConfigOptions::default()), | ||
| ))) | ||
| } | ||
| _ => { | ||
|
|
@@ -1114,6 +1136,7 @@ impl PhysicalPlanner { | |
| fun_expr, | ||
| vec![left, right], | ||
| Arc::new(Field::new(op_str, data_type, true)), | ||
| Arc::new(ConfigOptions::default()), | ||
| ))) | ||
| } else { | ||
| Ok(Arc::new(BinaryExpr::new(left, op, right))) | ||
|
|
@@ -2375,6 +2398,8 @@ impl PhysicalPlanner { | |
| window_frame.into(), | ||
| input_schema.as_ref(), | ||
| false, // TODO: Ignore nulls | ||
| false, // TODO: Spark does not support DISTINCT ... OVER | ||
| None, | ||
| ) | ||
| .map_err(|e| ExecutionError::DataFusionError(e.to_string())) | ||
| } | ||
|
|
@@ -2554,6 +2579,7 @@ impl PhysicalPlanner { | |
| fun_expr, | ||
| args.to_vec(), | ||
| Arc::new(Field::new(fun_name, data_type, true)), | ||
| Arc::new(ConfigOptions::default()), | ||
| )); | ||
|
|
||
| Ok(scalar_expr) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,14 +38,23 @@ use std::sync::Arc; | |
| /// The implementation mostly is the same as the DataFusion's implementation. The reason | ||
| /// we have our own implementation is that DataFusion has UInt64 for state_field count, | ||
| /// while Spark has Double for count. | ||
| #[derive(Debug, Clone)] | ||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||
| pub struct Covariance { | ||
| name: String, | ||
| signature: Signature, | ||
| stats_type: StatsType, | ||
| null_on_divide_by_zero: bool, | ||
| } | ||
|
|
||
| impl std::hash::Hash for Covariance { | ||
| fn hash<H: std::hash::Hasher>(&self, state: &mut H) { | ||
| self.name.hash(state); | ||
| self.signature.hash(state); | ||
| (self.stats_type as u8).hash(state); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| self.null_on_divide_by_zero.hash(state); | ||
| } | ||
| } | ||
|
|
||
| impl Covariance { | ||
| /// Create a new COVAR aggregate function | ||
| pub fn new( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.