diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 6b3f2b956dfd3..2d70f6051e154 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -26,9 +26,7 @@ use std::sync::{Arc, OnceLock}; use crate::signature::TIMEZONE_WILDCARD; use crate::type_coercion::binary::get_wider_type; use crate::type_coercion::functions::data_types; -use crate::{ - conditional_expressions, FuncMonotonicity, Signature, TypeSignature, Volatility, -}; +use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility}; use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; use datafusion_common::{exec_err, plan_err, DataFusionError, Result}; @@ -906,10 +904,9 @@ impl BuiltinScalarFunction { | BuiltinScalarFunction::ConcatWithSeparator => { Signature::variadic(vec![Utf8], self.volatility()) } - BuiltinScalarFunction::Coalesce => Signature::variadic( - conditional_expressions::SUPPORTED_COALESCE_TYPES.to_vec(), - self.volatility(), - ), + BuiltinScalarFunction::Coalesce => { + Signature::variadic_equal(self.volatility()) + } BuiltinScalarFunction::SHA224 | BuiltinScalarFunction::SHA256 | BuiltinScalarFunction::SHA384 @@ -1593,4 +1590,13 @@ mod tests { assert_eq!(func_from_str, *func_original); } } + + #[test] + fn test_coalesce_return_types() { + let coalesce = BuiltinScalarFunction::Coalesce; + let return_type = coalesce + .return_type(&[DataType::Date32, DataType::Date32]) + .unwrap(); + assert_eq!(return_type, DataType::Date32); + } } diff --git a/datafusion/expr/src/conditional_expressions.rs b/datafusion/expr/src/conditional_expressions.rs index 1346825f054da..7a2bf4b6c44a0 100644 --- a/datafusion/expr/src/conditional_expressions.rs +++ b/datafusion/expr/src/conditional_expressions.rs @@ -22,25 +22,6 @@ use arrow::datatypes::DataType; use datafusion_common::{plan_err, DFSchema, Result}; use std::collections::HashSet; -/// Currently supported types by the coalesce function. -/// The order of these types correspond to the order on which coercion applies -/// This should thus be from least informative to most informative -pub static SUPPORTED_COALESCE_TYPES: &[DataType] = &[ - DataType::Boolean, - DataType::UInt8, - DataType::UInt16, - DataType::UInt32, - DataType::UInt64, - DataType::Int8, - DataType::Int16, - DataType::Int32, - DataType::Int64, - DataType::Float32, - DataType::Float64, - DataType::Utf8, - DataType::LargeUtf8, -]; - /// Helper struct for building [Expr::Case] pub struct CaseBuilder { expr: Option>, diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 118844e4b2663..e419343548065 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -361,7 +361,7 @@ fn string_temporal_coercion( /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation /// where one both are numeric -fn comparison_binary_numeric_coercion( +pub(crate) fn comparison_binary_numeric_coercion( lhs_type: &DataType, rhs_type: &DataType, ) -> Option { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index e9878fd17e8d6..19acde87b0806 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -24,7 +24,7 @@ use arrow::{ use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result}; -use super::binary::comparison_coercion; +use super::binary::{comparison_binary_numeric_coercion, comparison_coercion}; /// Performs type coercion for function arguments. /// @@ -183,6 +183,10 @@ fn get_valid_types( let new_type = current_types.iter().skip(1).try_fold( current_types.first().unwrap().clone(), |acc, x| { + // The coerced types found by `comparison_coercion` are not guaranteed to be + // coercible for the arguments. `comparison_coercion` returns more loose + // types that can be coerced to both `acc` and `x` for comparison purpose. + // See `maybe_data_types` for the actual coercion. let coerced_type = comparison_coercion(&acc, x); if let Some(coerced_type) = coerced_type { Ok(coerced_type) @@ -272,9 +276,9 @@ fn maybe_data_types( if current_type == valid_type { new_type.push(current_type.clone()) } else { - // attempt to coerce - if let Some(valid_type) = coerced_from(valid_type, current_type) { - new_type.push(valid_type) + // attempt to coerce. + if let Some(coerced_type) = coerced_from(valid_type, current_type) { + new_type.push(coerced_type) } else { // not possible return None; @@ -408,8 +412,19 @@ fn coerced_from<'a>( Some(type_into.clone()) } - // cannot coerce - _ => None, + // More coerce rules. + // Note that not all rules in `comparison_coercion` can be reused here. + // For example, all numeric types can be coerced into Utf8 for comparison, + // but not for function arguments. + _ => comparison_binary_numeric_coercion(type_into, type_from).and_then( + |coerced_type| { + if *type_into == coerced_type { + Some(coerced_type) + } else { + None + } + }, + ), } } diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs index 98a05dff53862..a8c115ba3a82c 100644 --- a/datafusion/physical-expr/src/math_expressions.rs +++ b/datafusion/physical-expr/src/math_expressions.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use arrow::array::ArrayRef; use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array}; use arrow::datatypes::DataType; +use arrow_array::Array; use rand::{thread_rng, Rng}; use datafusion_common::ScalarValue::{Float32, Int64}; @@ -92,8 +93,9 @@ macro_rules! downcast_arg { ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{ $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { DataFusionError::Internal(format!( - "could not cast {} to {}", + "could not cast {} from {} to {}", $NAME, + $ARG.data_type(), type_name::<$ARRAY_TYPE>() )) })? diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 9619696679d2c..6c6e242ff5892 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -1782,7 +1782,7 @@ AS VALUES ('BB', 6, 1), ('BB', 6, 1); -query TIR +query TII select col1, col2, coalesce(sum_col3, 0) as sum_col3 from (select distinct col2 from tbl) AS q1 cross join (select distinct col1 from tbl) AS q2 diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 5ff253c1a34a9..a64fcbbdbca24 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1841,6 +1841,51 @@ SELECT COALESCE(c1 * c2, 0) FROM test statement ok drop table test +# coalesce date32 + +statement ok +CREATE TABLE test( + d1_date DATE, + d2_date DATE, + d3_date DATE +) as VALUES + ('2022-12-12','2022-12-12','2022-12-12'), + (NULL,'2022-12-11','2022-12-12'), + ('2022-12-12','2022-12-10','2022-12-12'), + ('2022-12-12',NULL,'2022-12-12'), + ('2022-12-12','2022-12-8','2022-12-12'), + ('2022-12-12','2022-12-7',NULL), + ('2022-12-12',NULL,'2022-12-12'), + (NULL,'2022-12-5','2022-12-12') +; + +query D +SELECT COALESCE(d1_date, d2_date, d3_date) FROM test +---- +2022-12-12 +2022-12-11 +2022-12-12 +2022-12-12 +2022-12-12 +2022-12-12 +2022-12-12 +2022-12-05 + +query T +SELECT arrow_typeof(COALESCE(d1_date, d2_date, d3_date)) FROM test +---- +Date32 +Date32 +Date32 +Date32 +Date32 +Date32 +Date32 +Date32 + +statement ok +drop table test + statement ok CREATE TABLE test( i32 INT,