Skip to content

Commit 0204f35

Browse files
committed
fix: handle ordering by order of inputs
1 parent 31fae11 commit 0204f35

3 files changed

Lines changed: 57 additions & 58 deletions

File tree

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! Logical plan types
1919
2020
use std::cmp::Ordering;
21-
use std::collections::{BTreeMap, HashMap, HashSet};
21+
use std::collections::{HashMap, HashSet};
2222
use std::fmt::{self, Debug, Display, Formatter};
2323
use std::hash::{Hash, Hasher};
2424
use std::sync::{Arc, LazyLock};
@@ -2741,35 +2741,34 @@ impl Union {
27412741
) -> Result<DFSchemaRef> {
27422742
type FieldData<'a> =
27432743
(&'a DataType, bool, Vec<&'a HashMap<String, String>>, usize);
2744-
// Prefer `BTreeMap` as it produces items in order by key when iterated over
2745-
let mut cols: BTreeMap<&str, FieldData> = BTreeMap::new();
2744+
let mut cols: Vec<(&str, FieldData)> = Vec::new();
27462745
for input in inputs.iter() {
27472746
for field in input.schema().fields() {
2748-
match cols.entry(field.name()) {
2749-
std::collections::btree_map::Entry::Occupied(mut occupied) => {
2750-
let (data_type, is_nullable, metadata, occurrences) =
2751-
occupied.get_mut();
2752-
if !loose_types && *data_type != field.data_type() {
2753-
return plan_err!(
2754-
"Found different types for field {}",
2755-
field.name()
2756-
);
2757-
}
2758-
2759-
metadata.push(field.metadata());
2760-
// If the field is nullable in any one of the inputs,
2761-
// then the field in the final schema is also nullable.
2762-
*is_nullable |= field.is_nullable();
2763-
*occurrences += 1;
2747+
if let Some((_, (data_type, is_nullable, metadata, occurrences))) =
2748+
cols.iter_mut().find(|(name, _)| name == field.name())
2749+
{
2750+
if !loose_types && *data_type != field.data_type() {
2751+
return plan_err!(
2752+
"Found different types for field {}",
2753+
field.name()
2754+
);
27642755
}
2765-
std::collections::btree_map::Entry::Vacant(vacant) => {
2766-
vacant.insert((
2756+
2757+
metadata.push(field.metadata());
2758+
// If the field is nullable in any one of the inputs,
2759+
// then the field in the final schema is also nullable.
2760+
*is_nullable |= field.is_nullable();
2761+
*occurrences += 1;
2762+
} else {
2763+
cols.push((
2764+
field.name(),
2765+
(
27672766
field.data_type(),
27682767
field.is_nullable(),
27692768
vec![field.metadata()],
27702769
1,
2771-
));
2772-
}
2770+
),
2771+
));
27732772
}
27742773
}
27752774
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,10 +1898,10 @@ fn union_by_name_different_columns() {
18981898
let expected = "\
18991899
Distinct:\
19001900
\n Union\
1901-
\n Projection: NULL AS Int64(1), order_id\
1901+
\n Projection: order_id, NULL AS Int64(1)\
19021902
\n Projection: orders.order_id\
19031903
\n TableScan: orders\
1904-
\n Projection: Int64(1), order_id\
1904+
\n Projection: order_id, Int64(1)\
19051905
\n Projection: orders.order_id, Int64(1)\
19061906
\n TableScan: orders";
19071907
quick_test(sql, expected);
@@ -1937,10 +1937,10 @@ fn union_all_by_name_different_columns() {
19371937
"SELECT order_id from orders UNION ALL BY NAME SELECT order_id, 1 FROM orders";
19381938
let expected = "\
19391939
Union\
1940-
\n Projection: NULL AS Int64(1), order_id\
1940+
\n Projection: order_id, NULL AS Int64(1)\
19411941
\n Projection: orders.order_id\
19421942
\n TableScan: orders\
1943-
\n Projection: Int64(1), order_id\
1943+
\n Projection: order_id, Int64(1)\
19441944
\n Projection: orders.order_id, Int64(1)\
19451945
\n TableScan: orders";
19461946
quick_test(sql, expected);

datafusion/sqllogictest/test_files/union_by_name.slt

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -88,38 +88,38 @@ SELECT x FROM t1 UNION ALL BY NAME SELECT x FROM t1 ORDER BY x;
8888
query II
8989
(SELECT x FROM t1 UNION ALL SELECT x FROM t1) UNION BY NAME SELECT 5 ORDER BY x;
9090
----
91-
NULL 1
92-
NULL 3
93-
5 NULL
91+
1 NULL
92+
3 NULL
93+
NULL 5
9494

9595
query II
9696
(SELECT x FROM t1 UNION ALL SELECT x FROM t1) UNION ALL BY NAME SELECT 5 ORDER BY x;
9797
----
98-
NULL 1
99-
NULL 1
100-
NULL 3
101-
NULL 3
102-
NULL 3
103-
NULL 3
104-
5 NULL
98+
1 NULL
99+
1 NULL
100+
3 NULL
101+
3 NULL
102+
3 NULL
103+
3 NULL
104+
NULL 5
105105

106106
query II
107107
(SELECT x FROM t1 UNION ALL SELECT y FROM t1) UNION BY NAME SELECT 5 ORDER BY x;
108108
----
109-
NULL 1
110-
NULL 3
111-
5 NULL
109+
1 NULL
110+
3 NULL
111+
NULL 5
112112

113113
query II
114114
(SELECT x FROM t1 UNION ALL SELECT y FROM t1) UNION ALL BY NAME SELECT 5 ORDER BY x;
115115
----
116-
NULL 1
117-
NULL 1
118-
NULL 3
119-
NULL 3
120-
NULL 3
121-
NULL 3
122-
5 NULL
116+
1 NULL
117+
1 NULL
118+
3 NULL
119+
3 NULL
120+
3 NULL
121+
3 NULL
122+
NULL 5
123123

124124

125125
# Ambiguous name
@@ -152,22 +152,22 @@ NULL 4
152152
# Limit
153153

154154
query III
155-
SELECT 1 UNION BY NAME SELECT * FROM unnest(range(2, 100)) UNION BY NAME SELECT 999 ORDER BY 3, 1 LIMIT 5;
155+
SELECT 1 UNION BY NAME SELECT * FROM unnest(range(2, 100)) UNION BY NAME SELECT 999 ORDER BY 3, 1, 2 LIMIT 5;
156156
----
157-
NULL NULL 2
158-
NULL NULL 3
159-
NULL NULL 4
160-
NULL NULL 5
161-
NULL NULL 6
157+
NULL NULL 999
158+
1 NULL NULL
159+
NULL 2 NULL
160+
NULL 3 NULL
161+
NULL 4 NULL
162162

163163
query III
164164
SELECT 1 UNION ALL BY NAME SELECT * FROM unnest(range(2, 100)) UNION ALL BY NAME SELECT 999 ORDER BY 3, 1 LIMIT 5;
165165
----
166-
NULL NULL 2
167-
NULL NULL 3
168-
NULL NULL 4
169-
NULL NULL 5
170-
NULL NULL 6
166+
NULL NULL 999
167+
1 NULL NULL
168+
NULL 2 NULL
169+
NULL 3 NULL
170+
NULL 4 NULL
171171

172172
# Order by
173173

0 commit comments

Comments
 (0)