From 28f0c28e9ac422d8bc78a26c41c8ef18973cd1aa Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 31 Jan 2025 09:40:52 -0500 Subject: [PATCH 1/3] Test for coercing inner structs --- datafusion/sqllogictest/test_files/case.slt | 59 +++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 46e9c86c7591..cfbee1744b21 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -416,5 +416,64 @@ SELECT end FROM t; +statement ok +drop table t + +# Fix coercion of lists of structs +# https://github.com/apache/datafusion/issues/14154 + +statement ok +create or replace table t as values +( + 100, -- column1 int (so the case isn't constant folded) + [{ 'foo': arrow_cast('baz', 'Utf8View') }], -- column2 has List of Struct w/ Utf8View + [{ 'foo': 'bar' }], -- column3 has List of Struct w/ Utf8 + [{ 'foo': 'blarg' }] -- column4 has List of Struct w/ Utf8 +); + +# This case forces all branches to be coerced to the same type +query error +SELECT + case + when column1 > 0 then column2 + when column1 < 0 then column3 + else column4 + end +FROM t; +---- +DataFusion error: type_coercion +caused by +Error during planning: Failed to coerce then ([List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })]) and else (Some(List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }))) to common types in CASE WHEN expression + + +# different orders of the branches +query error +SELECT + case + when column1 > 0 then column3 -- NB different order + when column1 < 0 then column4 + else column2 + end +FROM t; +---- +DataFusion error: type_coercion +caused by +Error during planning: Failed to coerce then ([List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })]) and else (Some(List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }))) to common types in CASE WHEN expression + + +# different orders of the branches +query ? +SELECT + case + when column1 > 0 then column4 -- NB different order + when column1 < 0 then column2 + else column3 + end +FROM t; +---- +[{c0: blarg}] + + + statement ok drop table t From 6b7659df01cfe6ad848915994f3f143c1ff8f874 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 31 Jan 2025 09:58:41 -0500 Subject: [PATCH 2/3] Fix but, update tests --- .../expr-common/src/type_coercion/binary.rs | 15 ++++++++------- datafusion/sqllogictest/test_files/case.slt | 16 +++++----------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 571c17119427..d24f9bbd11af 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -507,18 +507,19 @@ fn type_union_resolution_coercion( None } - let types = lhs + let coerced_types = lhs .iter() .map(|lhs_field| search_corresponding_coerced_type(lhs_field, rhs)) .collect::>>()?; - let fields = types + // preserve the field name and nullability + let orig_fields = std::iter::zip(lhs.iter(), rhs.iter()); + + let fields: Vec = coerced_types .into_iter() - .enumerate() - .map(|(i, datatype)| { - Arc::new(Field::new(format!("c{i}"), datatype, true)) - }) - .collect::>(); + .zip(orig_fields) + .map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs)) + .collect(); Some(DataType::Struct(fields.into())) } _ => { diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index cfbee1744b21..8e470fe988d3 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -432,7 +432,7 @@ create or replace table t as values ); # This case forces all branches to be coerced to the same type -query error +query ? SELECT case when column1 > 0 then column2 @@ -441,13 +441,10 @@ SELECT end FROM t; ---- -DataFusion error: type_coercion -caused by -Error during planning: Failed to coerce then ([List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })]) and else (Some(List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }))) to common types in CASE WHEN expression - +[{foo: baz}] # different orders of the branches -query error +query ? SELECT case when column1 > 0 then column3 -- NB different order @@ -456,10 +453,7 @@ SELECT end FROM t; ---- -DataFusion error: type_coercion -caused by -Error during planning: Failed to coerce then ([List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })]) and else (Some(List(Field { name: "item", data_type: Struct([Field { name: "foo", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }))) to common types in CASE WHEN expression - +[{foo: bar}] # different orders of the branches query ? @@ -471,7 +465,7 @@ SELECT end FROM t; ---- -[{c0: blarg}] +[{foo: blarg}] From f0583a66831907cdbd762407c6ccee23d668f45c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 31 Jan 2025 11:52:18 -0500 Subject: [PATCH 3/3] Update tests --- datafusion/sqllogictest/test_files/struct.slt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d671798b7d0f..0afe39de1795 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -459,14 +459,14 @@ create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3}); query ? select * from t; ---- -{c0: a, c1: 1.0} -{c0: b, c1: 2.3} +{r: a, c: 1.0} +{r: b, c: 2.3} query T select arrow_typeof(column1) from t; ---- -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) statement ok drop table t;