-
Notifications
You must be signed in to change notification settings - Fork 1.9k
feat: support FixedSizeList Type Coercion
#9108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
56e4ddf
c10b8b2
92d5e9e
b61cbef
3220da5
b565290
58b7d09
3d5c899
eeed955
342d379
d1f8744
4de5ee2
13801be
4b21267
1d9f35e
cb37528
a7f121e
0a338b6
9d9527e
9201a36
ad73b17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,10 @@ | |
| //! Signature module contains foundational types that are used to represent signatures, types, | ||
| //! and return types of functions in DataFusion. | ||
|
|
||
| use crate::type_coercion::binary::comparison_coercion; | ||
| use arrow::datatypes::DataType; | ||
| use datafusion_common::utils::coerced_fixed_size_list_to_list; | ||
| use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; | ||
|
|
||
| /// Constant that is used as a placeholder for any valid timezone. | ||
| /// This is used where a function can accept a timestamp type with any | ||
|
|
@@ -123,35 +126,211 @@ pub enum TypeSignature { | |
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
| pub enum ArrayFunctionSignature { | ||
| /// Specialized Signature for ArrayAppend and similar functions | ||
| /// The first argument should be List/LargeList, and the second argument should be non-list or list. | ||
| /// If `allow_null` is true, the function also accepts a single argument of type Null. | ||
| /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. | ||
| /// The second argument's list dimension should be one dimension less than the first argument's list dimension. | ||
| /// List dimension of the List/LargeList is equivalent to the number of List. | ||
| /// List dimension of the non-list is 0. | ||
| ArrayAndElement, | ||
| ArrayAndElement(bool), | ||
| /// Specialized Signature for ArrayPrepend and similar functions | ||
| /// If `allow_null` is true, the function also accepts a single argument of type Null. | ||
| /// The first argument should be non-list or list, and the second argument should be List/LargeList. | ||
| /// The first argument's list dimension should be one dimension less than the second argument's list dimension. | ||
| ElementAndArray, | ||
| ElementAndArray(bool), | ||
| /// Specialized Signature for Array functions of the form (List/LargeList, Index) | ||
| ArrayAndIndex, | ||
| /// If `allow_null` is true, the function also accepts a single argument of type Null. | ||
| /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. | ||
| ArrayAndIndex(bool), | ||
| /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) | ||
| ArrayAndElementAndOptionalIndex, | ||
| /// Specialized Signature for ArrayEmpty and similar functions | ||
Weijun-H marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| /// The function takes a single argument that must be a List/LargeList/FixedSizeList | ||
| /// or something that can be coerced to one of those types. | ||
| /// If `allow_null` is true, the function also accepts a single argument of type Null. | ||
| Array(bool), | ||
| } | ||
|
|
||
| impl ArrayFunctionSignature { | ||
| /// Arguments to ArrayFunctionSignature | ||
| /// `current_types` - The data types of the arguments | ||
| /// `allow_null_coercion` - Whether null type coercion is allowed | ||
| /// Returns the valid types for the function signature | ||
| pub fn get_type_signature( | ||
|
||
| &self, | ||
| current_types: &[DataType], | ||
| ) -> Result<Vec<Vec<DataType>>> { | ||
| fn array_element_and_optional_index( | ||
| current_types: &[DataType], | ||
| ) -> Result<Vec<Vec<DataType>>> { | ||
| // make sure there's 2 or 3 arguments | ||
| if !(current_types.len() == 2 || current_types.len() == 3) { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| let first_two_types = ¤t_types[0..2]; | ||
| let mut valid_types = | ||
| array_append_or_prepend_valid_types(first_two_types, true, true)?; | ||
|
|
||
| // Early return if there are only 2 arguments | ||
| if current_types.len() == 2 { | ||
| return Ok(valid_types); | ||
| } | ||
|
|
||
| let valid_types_with_index = valid_types | ||
| .iter() | ||
| .map(|t| { | ||
| let mut t = t.clone(); | ||
| t.push(DataType::Int64); | ||
| t | ||
| }) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| valid_types.extend(valid_types_with_index); | ||
|
|
||
| Ok(valid_types) | ||
| } | ||
|
|
||
| fn array_append_or_prepend_valid_types( | ||
|
||
| current_types: &[DataType], | ||
| is_append: bool, | ||
| allow_null_coercion: bool, | ||
| ) -> Result<Vec<Vec<DataType>>> { | ||
| if current_types.len() != 2 { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| let (array_type, elem_type) = if is_append { | ||
| (¤t_types[0], ¤t_types[1]) | ||
| } else { | ||
| (¤t_types[1], ¤t_types[0]) | ||
| }; | ||
|
|
||
| // We follow Postgres on `array_append(Null, T)`, which is not valid. | ||
| if array_type.eq(&DataType::Null) { | ||
| if allow_null_coercion { | ||
| return Ok(vec![vec![array_type.clone(), elem_type.clone()]]); | ||
| } else { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
| } | ||
|
|
||
| // We need to find the coerced base type, mainly for cases like: | ||
| // `array_append(List(null), i64)` -> `List(i64)` | ||
| let array_base_type = datafusion_common::utils::base_type(array_type); | ||
| let elem_base_type = datafusion_common::utils::base_type(elem_type); | ||
| let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); | ||
|
|
||
| let new_base_type = new_base_type.ok_or_else(|| { | ||
| internal_datafusion_err!( | ||
| "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." | ||
| ) | ||
| })?; | ||
|
|
||
| let new_array_type = | ||
| datafusion_common::utils::coerced_type_with_base_type_only( | ||
| array_type, | ||
| &new_base_type, | ||
| ); | ||
|
|
||
| match new_array_type { | ||
| DataType::List(ref field) | ||
| | DataType::LargeList(ref field) | ||
| | DataType::FixedSizeList(ref field, _) => { | ||
| let new_elem_type = field.data_type(); | ||
| if is_append { | ||
| Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]]) | ||
| } else { | ||
| Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]]) | ||
| } | ||
| } | ||
| _ => Ok(vec![vec![]]), | ||
| } | ||
| } | ||
| fn array_and_index( | ||
| current_types: &[DataType], | ||
| allow_null_coercion: bool, | ||
| ) -> Result<Vec<Vec<DataType>>> { | ||
| if current_types.len() != 2 { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| let array_type = ¤t_types[0]; | ||
|
|
||
| if array_type.eq(&DataType::Null) && !allow_null_coercion { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| match array_type { | ||
jayzhan211 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| DataType::List(_) | ||
| | DataType::LargeList(_) | ||
| | DataType::FixedSizeList(_, _) => { | ||
| let array_type = coerced_fixed_size_list_to_list(array_type); | ||
| Ok(vec![vec![array_type, DataType::Int64]]) | ||
|
||
| } | ||
| DataType::Null => Ok(vec![vec![array_type.clone(), DataType::Int64]]), | ||
| _ => Ok(vec![vec![]]), | ||
| } | ||
| } | ||
| fn array( | ||
| current_types: &[DataType], | ||
| allow_null_coercion: bool, | ||
| ) -> Result<Vec<Vec<DataType>>> { | ||
| if current_types.len() != 1 | ||
| || (current_types[0].is_null() && !allow_null_coercion) | ||
| { | ||
| return Ok(vec![vec![]]); | ||
| } | ||
|
|
||
| let array_type = ¤t_types[0]; | ||
|
|
||
| match array_type { | ||
| DataType::List(_) | ||
| | DataType::LargeList(_) | ||
| | DataType::FixedSizeList(_, _) => { | ||
| let array_type = coerced_fixed_size_list_to_list(array_type); | ||
| Ok(vec![vec![array_type]]) | ||
| } | ||
| DataType::Null => Ok(vec![vec![array_type.clone()]]), | ||
| _ => Ok(vec![vec![]]), | ||
| } | ||
| } | ||
| match self { | ||
| ArrayFunctionSignature::ArrayAndElement(allow_null) => { | ||
| array_append_or_prepend_valid_types(current_types, true, *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ElementAndArray(allow_null) => { | ||
| array_append_or_prepend_valid_types(current_types, false, *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndIndex(allow_null) => { | ||
| array_and_index(current_types, *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { | ||
| array_element_and_optional_index(current_types) | ||
| } | ||
| ArrayFunctionSignature::Array(allow_null) => { | ||
| array(current_types, *allow_null) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl std::fmt::Display for ArrayFunctionSignature { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| match self { | ||
| ArrayFunctionSignature::ArrayAndElement => { | ||
| write!(f, "array, element") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this change? |
||
| ArrayFunctionSignature::ArrayAndElement(allow_null) => { | ||
| write!(f, "ArrayAndElement({})", *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { | ||
| write!(f, "array, element, [index]") | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| } | ||
| ArrayFunctionSignature::ElementAndArray => { | ||
| write!(f, "element, array") | ||
| ArrayFunctionSignature::ElementAndArray(allow_null) => { | ||
| write!(f, "ElementAndArray({})", *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndIndex(allow_null) => { | ||
| write!(f, "ArrayAndIndex({})", *allow_null) | ||
| } | ||
| ArrayFunctionSignature::ArrayAndIndex => { | ||
| write!(f, "array, index") | ||
| ArrayFunctionSignature::Array(allow_null) => { | ||
| write!(f, "Array({})", *allow_null) | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -290,10 +469,10 @@ impl Signature { | |
| } | ||
| } | ||
| /// Specialized Signature for ArrayAppend and similar functions | ||
| pub fn array_and_element(volatility: Volatility) -> Self { | ||
| pub fn array_and_element(allow_null: bool, volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ArrayAndElement, | ||
| ArrayFunctionSignature::ArrayAndElement(allow_null), | ||
| ), | ||
| volatility, | ||
| } | ||
|
|
@@ -308,23 +487,32 @@ impl Signature { | |
| } | ||
| } | ||
| /// Specialized Signature for ArrayPrepend and similar functions | ||
| pub fn element_and_array(volatility: Volatility) -> Self { | ||
| pub fn element_and_array(allow_null: bool, volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ElementAndArray, | ||
| ArrayFunctionSignature::ElementAndArray(allow_null), | ||
| ), | ||
| volatility, | ||
| } | ||
| } | ||
| /// Specialized Signature for ArrayElement and similar functions | ||
| pub fn array_and_index(volatility: Volatility) -> Self { | ||
| pub fn array_and_index(allow_null: bool, volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature( | ||
| ArrayFunctionSignature::ArrayAndIndex, | ||
| ArrayFunctionSignature::ArrayAndIndex(allow_null), | ||
| ), | ||
| volatility, | ||
| } | ||
| } | ||
| /// Specialized Signature for ArrayEmpty and similar functions | ||
| pub fn array(allow_null: bool, volatility: Volatility) -> Self { | ||
| Signature { | ||
| type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array( | ||
| allow_null, | ||
| )), | ||
| volatility, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.