-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Support FixedSizeList type coercion
#8902
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4a40883
0d12e97
fc4c8cf
81745c5
7a5f42c
bb20c37
53bf53a
c7cd1f5
b9474ec
597c262
97dc372
b01cca0
2ecb193
7e0573b
25e620a
0c08cc9
19e322a
707c0c8
acf9df4
9c82e43
159648a
76cede4
b1d79ba
832b59a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -599,10 +599,11 @@ impl BuiltinScalarFunction { | |
| } | ||
| BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), | ||
| BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { | ||
| List(field) => Ok(field.data_type().clone()), | ||
| LargeList(field) => Ok(field.data_type().clone()), | ||
| List(field) | ||
| | LargeList(field) | ||
| | FixedSizeList(field, _) => Ok(field.data_type().clone()), | ||
| _ => plan_err!( | ||
| "The {self} function can only accept list or largelist as the first argument" | ||
| "The {self} function can only accept List, LargeList or FixedSizeList as the first argument" | ||
| ), | ||
| }, | ||
| BuiltinScalarFunction::ArrayLength => Ok(UInt64), | ||
|
|
@@ -944,10 +945,9 @@ impl BuiltinScalarFunction { | |
| BuiltinScalarFunction::ArraySort => { | ||
| Signature::variadic_any(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayAppend => Signature { | ||
| type_signature: ArrayAndElement, | ||
| volatility: self.volatility(), | ||
| }, | ||
| BuiltinScalarFunction::ArrayAppend => { | ||
| Signature::array_and_element(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::MakeArray => { | ||
| // 0 or more arguments of arbitrary type | ||
| Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) | ||
|
|
@@ -959,12 +959,17 @@ impl BuiltinScalarFunction { | |
| } | ||
| BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), | ||
| BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), | ||
| BuiltinScalarFunction::ArrayElement => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::ArrayElement => { | ||
| Signature::array_and_index(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), | ||
| BuiltinScalarFunction::ArrayHasAll | ||
| | BuiltinScalarFunction::ArrayHasAny | ||
| | BuiltinScalarFunction::ArrayHas => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { | ||
| Signature::any(2, self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayHas => { | ||
| Signature::array_and_element(self.volatility()) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add the example that works after this change. One of the example is
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
| BuiltinScalarFunction::ArrayLength => { | ||
| Signature::variadic_any(self.volatility()) | ||
| } | ||
|
|
@@ -973,15 +978,20 @@ impl BuiltinScalarFunction { | |
| BuiltinScalarFunction::ArrayPosition => { | ||
| Signature::variadic_any(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayPositions => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::ArrayPrepend => Signature { | ||
| type_signature: ElementAndArray, | ||
| volatility: self.volatility(), | ||
| }, | ||
| BuiltinScalarFunction::ArrayPositions => { | ||
| Signature::array_and_element(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayPrepend => { | ||
| Signature::element_and_array(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although the functions including |
||
| BuiltinScalarFunction::ArrayRemove => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::ArrayRemove => { | ||
| Signature::array_and_element(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()), | ||
| BuiltinScalarFunction::ArrayRemoveAll => Signature::any(2, self.volatility()), | ||
| BuiltinScalarFunction::ArrayRemoveAll => { | ||
| Signature::array_and_element(self.volatility()) | ||
| } | ||
| BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()), | ||
| BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()), | ||
| BuiltinScalarFunction::ArrayReplaceAll => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -116,6 +116,12 @@ pub enum TypeSignature { | |
| /// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` | ||
| /// is `OneOf(vec![Any(0), VariadicAny])`. | ||
| OneOf(Vec<TypeSignature>), | ||
| /// Specifies Signatures for array functions | ||
| ArraySignature(ArrayFunctionSignature), | ||
|
Comment on lines
+119
to
+120
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For future extension
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that is a good change, though a change of the API |
||
| } | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
| pub enum ArrayFunctionSignature { | ||
| /// Specialized Signature for ArrayAppend and similar functions | ||
| /// The first argument should be List/LargeList, and the second argument should be non-list or list. | ||
| /// The second argument's list dimension should be one dimension less than the first argument's list dimension. | ||
|
|
@@ -126,6 +132,23 @@ pub enum TypeSignature { | |
| /// The first argument should be non-list or list, and the second argument should be List/LargeList. | ||
| /// The first argument's list dimension should be one dimension less than the second argument's list dimension. | ||
| ElementAndArray, | ||
| ArrayAndIndex, | ||
| } | ||
|
|
||
| impl std::fmt::Display for ArrayFunctionSignature { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| match self { | ||
| ArrayFunctionSignature::ArrayAndElement => { | ||
| write!(f, "array, element") | ||
| } | ||
| ArrayFunctionSignature::ElementAndArray => { | ||
| write!(f, "element, array") | ||
| } | ||
| ArrayFunctionSignature::ArrayAndIndex => { | ||
| write!(f, "array, index") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl TypeSignature { | ||
|
|
@@ -156,11 +179,8 @@ impl TypeSignature { | |
| TypeSignature::OneOf(sigs) => { | ||
| sigs.iter().flat_map(|s| s.to_string_repr()).collect() | ||
| } | ||
| TypeSignature::ArrayAndElement => { | ||
| vec!["ArrayAndElement(List<T>, T)".to_string()] | ||
| } | ||
| TypeSignature::ElementAndArray => { | ||
| vec!["ElementAndArray(T, List<T>)".to_string()] | ||
| TypeSignature::ArraySignature(array_signature) => { | ||
| vec![array_signature.to_string()] | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -263,6 +283,33 @@ impl Signature { | |
| volatility, | ||
| } | ||
| } | ||
| /// Specialized Signature for ArrayAppend and similar functions | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice cleanup |
||
| pub fn array_and_element(volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ArrayAndElement, | ||
| ), | ||
| volatility, | ||
| } | ||
| } | ||
| /// Specialized Signature for ArrayPrepend and similar functions | ||
| pub fn element_and_array(volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ElementAndArray, | ||
| ), | ||
| volatility, | ||
| } | ||
| } | ||
| /// Specialized Signature for ArrayElement and similar functions | ||
| pub fn array_and_index(volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ArrayAndIndex, | ||
| ), | ||
| volatility, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,14 +15,16 @@ | |
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use crate::signature::TIMEZONE_WILDCARD; | ||
| use crate::signature::{ArrayFunctionSignature, TIMEZONE_WILDCARD}; | ||
| use crate::{Signature, TypeSignature}; | ||
| use arrow::{ | ||
| compute::can_cast_types, | ||
| datatypes::{DataType, TimeUnit}, | ||
| }; | ||
| use datafusion_common::utils::list_ndims; | ||
| use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; | ||
| use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; | ||
| use datafusion_common::{ | ||
| internal_datafusion_err, internal_err, plan_err, DataFusionError, Result, | ||
| }; | ||
|
|
||
| use super::binary::comparison_coercion; | ||
|
|
||
|
|
@@ -48,7 +50,6 @@ pub fn data_types( | |
| ); | ||
| } | ||
| } | ||
|
|
||
| let valid_types = get_valid_types(&signature.type_signature, current_types)?; | ||
|
|
||
| if valid_types | ||
|
|
@@ -104,30 +105,48 @@ fn get_valid_types( | |
| let elem_base_type = datafusion_common::utils::base_type(elem_type); | ||
| let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); | ||
|
|
||
| if new_base_type.is_none() { | ||
| return internal_err!( | ||
| let new_base_type = new_base_type.ok_or_else(|| { | ||
| internal_datafusion_err!( | ||
| "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." | ||
| ); | ||
| } | ||
| let new_base_type = new_base_type.unwrap(); | ||
| ) | ||
| })?; | ||
|
|
||
| let array_type = datafusion_common::utils::coerced_type_with_base_type_only( | ||
| array_type, | ||
| &new_base_type, | ||
| ); | ||
|
|
||
| match array_type { | ||
| DataType::List(ref field) | DataType::LargeList(ref field) => { | ||
| DataType::List(ref field) | ||
| | DataType::LargeList(ref field) | ||
| | DataType::FixedSizeList(ref field, _) => { | ||
| let elem_type = field.data_type(); | ||
| if is_append { | ||
| Ok(vec![vec![array_type.clone(), elem_type.to_owned()]]) | ||
| Ok(vec![vec![array_type.clone(), elem_type.clone()]]) | ||
| } else { | ||
| Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) | ||
| } | ||
| } | ||
| _ => Ok(vec![vec![]]), | ||
| } | ||
| } | ||
| fn array_and_index(current_types: &[DataType]) -> Result<Vec<Vec<DataType>>> { | ||
| if current_types.len() != 2 { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| let array_type = ¤t_types[0]; | ||
|
|
||
| match array_type { | ||
| DataType::List(_) | ||
| | DataType::LargeList(_) | ||
| | DataType::FixedSizeList(_, _) => { | ||
| let array_type = coerced_fixed_size_list_to_list(array_type); | ||
| Ok(vec![vec![array_type, DataType::Int64]]) | ||
| } | ||
| _ => Ok(vec![vec![]]), | ||
| } | ||
| } | ||
| let valid_types = match signature { | ||
| TypeSignature::Variadic(valid_types) => valid_types | ||
| .iter() | ||
|
|
@@ -160,12 +179,19 @@ fn get_valid_types( | |
| } | ||
|
|
||
| TypeSignature::Exact(valid_types) => vec![valid_types.clone()], | ||
| TypeSignature::ArrayAndElement => { | ||
| return array_append_or_prepend_valid_types(current_types, true) | ||
| } | ||
| TypeSignature::ElementAndArray => { | ||
| return array_append_or_prepend_valid_types(current_types, false) | ||
| } | ||
| TypeSignature::ArraySignature(ref function_signature) => match function_signature | ||
| { | ||
| ArrayFunctionSignature::ArrayAndElement => { | ||
| return array_append_or_prepend_valid_types(current_types, true) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndIndex => { | ||
| return array_and_index(current_types) | ||
| } | ||
| ArrayFunctionSignature::ElementAndArray => { | ||
| return array_append_or_prepend_valid_types(current_types, false) | ||
| } | ||
| }, | ||
|
|
||
| TypeSignature::Any(number) => { | ||
| if current_types.len() != *number { | ||
| return plan_err!( | ||
|
|
@@ -311,6 +337,8 @@ fn coerced_from<'a>( | |
| Utf8 | LargeUtf8 => Some(type_into.clone()), | ||
| Null if can_cast_types(type_from, type_into) => Some(type_into.clone()), | ||
|
|
||
| List(_) if matches!(type_from, FixedSizeList(_, _)) => Some(type_into.clone()), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will it be easier if we run Fixedsizelist to List like
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am unable to decide between
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One is coerced based on function, another is based on signature. If we target to convert from fixedsizelist to list, it seems
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think we can have fixed size list to list conversion in one place.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move coerce_arguments_for_fun before signature? Convert FixedSizeList to List before signature coercion. Then we don't need to deal with FixedSizeList
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we can. In
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we dont need this anymore, input types should be handled later on via
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| // Only accept list and largelist with the same number of dimensions unless the type is Null. | ||
| // List or LargeList with different dimensions should be handled in TypeSignature or other places before this. | ||
| List(_) | LargeList(_) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.