Skip to content

Commit 5293b70

Browse files
authored
adding support for Min/Max over LargeList and FixedSizeList (#16071)
* initial Iteration * add Sql Logic tests * tweak comments * unify data, structure tests * Deleted by mistake
1 parent e5f596b commit 5293b70

File tree

9 files changed

+741
-148
lines changed

9 files changed

+741
-148
lines changed

datafusion/common/src/scalar/mod.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4941,6 +4941,40 @@ mod tests {
49414941
])]),
49424942
));
49434943
assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4944+
4945+
let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4946+
Int64Type,
4947+
_,
4948+
_,
4949+
>(vec![Some(vec![
4950+
None,
4951+
Some(2),
4952+
Some(3),
4953+
])])));
4954+
let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4955+
Int64Type,
4956+
_,
4957+
_,
4958+
>(vec![Some(vec![
4959+
Some(1),
4960+
Some(2),
4961+
Some(3),
4962+
])])));
4963+
assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4964+
4965+
let a = ScalarValue::FixedSizeList(Arc::new(
4966+
FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4967+
vec![Some(vec![None, Some(2), Some(3)])],
4968+
3,
4969+
),
4970+
));
4971+
let b = ScalarValue::FixedSizeList(Arc::new(
4972+
FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4973+
vec![Some(vec![Some(1), Some(2), Some(3)])],
4974+
3,
4975+
),
4976+
));
4977+
assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
49444978
}
49454979

49464980
#[test]

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,10 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
622622
}
623623
DataType::Struct(_) => min_max_batch_generic(values, Ordering::Greater)?,
624624
DataType::List(_) => min_max_batch_generic(values, Ordering::Greater)?,
625+
DataType::LargeList(_) => min_max_batch_generic(values, Ordering::Greater)?,
626+
DataType::FixedSizeList(_, _) => {
627+
min_max_batch_generic(values, Ordering::Greater)?
628+
}
625629
DataType::Dictionary(_, _) => {
626630
let values = values.as_any_dictionary().values();
627631
min_batch(values)?
@@ -720,6 +724,8 @@ pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
720724
}
721725
DataType::Struct(_) => min_max_batch_generic(values, Ordering::Less)?,
722726
DataType::List(_) => min_max_batch_generic(values, Ordering::Less)?,
727+
DataType::LargeList(_) => min_max_batch_generic(values, Ordering::Less)?,
728+
DataType::FixedSizeList(_, _) => min_max_batch_generic(values, Ordering::Less)?,
723729
DataType::Dictionary(_, _) => {
724730
let values = values.as_any_dictionary().values();
725731
max_batch(values)?
@@ -1008,6 +1014,23 @@ macro_rules! min_max {
10081014
) => {
10091015
min_max_generic!(lhs, rhs, $OP)
10101016
}
1017+
1018+
1019+
(
1020+
lhs @ ScalarValue::LargeList(_),
1021+
rhs @ ScalarValue::LargeList(_),
1022+
) => {
1023+
min_max_generic!(lhs, rhs, $OP)
1024+
}
1025+
1026+
1027+
(
1028+
lhs @ ScalarValue::FixedSizeList(_),
1029+
rhs @ ScalarValue::FixedSizeList(_),
1030+
) => {
1031+
min_max_generic!(lhs, rhs, $OP)
1032+
}
1033+
10111034
e => {
10121035
return internal_err!(
10131036
"MIN/MAX is not expected to receive scalars of incompatible types {:?}",

datafusion/optimizer/src/analyzer/type_coercion.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,8 @@ fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
719719
if col_type.is_numeric()
720720
|| is_utf8_or_utf8view_or_large_utf8(col_type)
721721
|| matches!(col_type, DataType::List(_))
722+
|| matches!(col_type, DataType::LargeList(_))
723+
|| matches!(col_type, DataType::FixedSizeList(_, _))
722724
|| matches!(col_type, DataType::Null)
723725
|| matches!(col_type, DataType::Boolean)
724726
{

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 0 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -7018,151 +7018,3 @@ VALUES
70187018
);
70197019
----
70207020
{a: 1, b: 2, c: 3} {a: 1, b: 2, c: 4}
7021-
7022-
# Min/Max with list over integers
7023-
query ??
7024-
SELECT MIN(column1), MAX(column1) FROM VALUES
7025-
([1, 2, 3]),
7026-
([1, 2]);
7027-
----
7028-
[1, 2] [1, 2, 3]
7029-
7030-
# Min/Max with lists over strings
7031-
query ??
7032-
SELECT MIN(column1), MAX(column1) FROM VALUES
7033-
(['a', 'b', 'c']),
7034-
(['a', 'b']);
7035-
----
7036-
[a, b] [a, b, c]
7037-
7038-
# Min/Max with list over booleans
7039-
query ??
7040-
SELECT MIN(column1), MAX(column1) FROM VALUES
7041-
([true, true, false]),
7042-
([false, true]);
7043-
----
7044-
[false, true] [true, true, false]
7045-
7046-
# Min/Max with list over nullable integers
7047-
query ??
7048-
SELECT MIN(column1), MAX(column1) FROM VALUES
7049-
([NULL, 1, 2]),
7050-
([1, 2]);
7051-
----
7052-
[1, 2] [NULL, 1, 2]
7053-
7054-
# Min/Max list with different lengths and nulls
7055-
query ??
7056-
SELECT MIN(column1), MAX(column1) FROM VALUES
7057-
([1, NULL, 3]),
7058-
([1, 2, 3, 4]),
7059-
([1, 2]);
7060-
----
7061-
[1, 2] [1, NULL, 3]
7062-
7063-
# Min/Max list with only NULLs
7064-
query ??
7065-
SELECT MIN(column1), MAX(column1) FROM VALUES
7066-
([NULL, NULL]),
7067-
([NULL]);
7068-
----
7069-
[NULL] [NULL, NULL]
7070-
7071-
# Min/Max list with empty lists
7072-
query ??
7073-
SELECT MIN(column1), MAX(column1) FROM VALUES
7074-
([]),
7075-
([1]),
7076-
([]);
7077-
----
7078-
[] [1]
7079-
7080-
# Min/Max list of varying types (integers and NULLs)
7081-
query ??
7082-
SELECT MIN(column1), MAX(column1) FROM VALUES
7083-
([1, 2, 3]),
7084-
([NULL, 2, 3]),
7085-
([1, 2, NULL]);
7086-
----
7087-
[1, 2, 3] [NULL, 2, 3]
7088-
7089-
# Min/Max list grouped by key with NULLs and differing lengths
7090-
query I?? rowsort
7091-
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
7092-
(0, [1, NULL, 3]),
7093-
(0, [1, 2, 3, 4]),
7094-
(1, [1, 2]),
7095-
(1, [NULL, 5]),
7096-
(1, [])
7097-
GROUP BY column1;
7098-
----
7099-
0 [1, 2, 3, 4] [1, NULL, 3]
7100-
1 [] [NULL, 5]
7101-
7102-
# Min/Max list grouped by key with NULLs and differing lengths
7103-
query I?? rowsort
7104-
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
7105-
(0, [NULL]),
7106-
(0, [NULL, NULL]),
7107-
(1, [NULL])
7108-
GROUP BY column1;
7109-
----
7110-
0 [NULL] [NULL, NULL]
7111-
1 [NULL] [NULL]
7112-
7113-
# Min/Max grouped list with empty and non-empty
7114-
query I?? rowsort
7115-
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
7116-
(0, []),
7117-
(0, [1]),
7118-
(0, []),
7119-
(1, [5, 6]),
7120-
(1, [])
7121-
GROUP BY column1;
7122-
----
7123-
0 [] [1]
7124-
1 [] [5, 6]
7125-
7126-
# Min/Max over lists with a window function
7127-
query ?
7128-
SELECT min(column1) OVER (ORDER BY column1) FROM VALUES
7129-
([1, 2, 3]),
7130-
([1, 2, 3]),
7131-
([2, 3])
7132-
----
7133-
[1, 2, 3]
7134-
[1, 2, 3]
7135-
[1, 2, 3]
7136-
7137-
# Min/Max over lists with a window function and nulls
7138-
query ?
7139-
SELECT min(column1) OVER (ORDER BY column1) FROM VALUES
7140-
(NULL),
7141-
([4, 5]),
7142-
([2, 3])
7143-
----
7144-
[2, 3]
7145-
[2, 3]
7146-
[2, 3]
7147-
7148-
# Min/Max over lists with a window function, nulls and ROWS BETWEEN statement
7149-
query ?
7150-
SELECT min(column1) OVER (ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM VALUES
7151-
(NULL),
7152-
([4, 5]),
7153-
([2, 3])
7154-
----
7155-
[2, 3]
7156-
[2, 3]
7157-
[4, 5]
7158-
7159-
# Min/Max over lists with a window function using a different column
7160-
query ?
7161-
SELECT max(column2) OVER (ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM VALUES
7162-
([1, 2, 3], [4, 5]),
7163-
([2, 3], [2, 3]),
7164-
([1, 2, 3], NULL)
7165-
----
7166-
[4, 5]
7167-
[4, 5]
7168-
[2, 3]
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Min/Max with FixedSizeList over integers
2+
query ??
3+
SELECT MIN(column1), MAX(column1) FROM VALUES
4+
(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)')),
5+
(arrow_cast(make_array(1, 2), 'FixedSizeList(2, Int64)'));
6+
----
7+
[1, 2] [1, 2, 3, 4]
8+
9+
# Min/Max with FixedSizeList over strings
10+
query ??
11+
SELECT MIN(column1), MAX(column1) FROM VALUES
12+
(arrow_cast(make_array('a', 'b', 'c'), 'FixedSizeList(3, Utf8)')),
13+
(arrow_cast(make_array('a', 'b'), 'LargeList(Utf8)'));
14+
----
15+
[a, b] [a, b, c]
16+
17+
# Min/Max with FixedSizeList over booleans
18+
query ??
19+
SELECT MIN(column1), MAX(column1) FROM VALUES
20+
(arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')),
21+
(arrow_cast(make_array(true, false), 'FixedSizeList(2, Boolean)'));
22+
----
23+
[true, false] [true, false, true]
24+
25+
# Min/Max with FixedSizeList over nullable integers
26+
query ??
27+
SELECT MIN(column1), MAX(column1) FROM VALUES
28+
(arrow_cast(make_array(NULL, 1, 2), 'FixedSizeList(3, Int64)')),
29+
(arrow_cast(make_array(1, 2), 'FixedSizeList(2, Int64)'));
30+
----
31+
[1, 2] [NULL, 1, 2]
32+
33+
# Min/Max FixedSizeList with different lengths and nulls
34+
query ??
35+
SELECT MIN(column1), MAX(column1) FROM VALUES
36+
(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)')),
37+
(arrow_cast(make_array(1, 2), 'FixedSizeList(2, Int64)')),
38+
(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)'));
39+
----
40+
[1, 2] [1, NULL, 3]
41+
42+
# Min/Max FixedSizeList with only NULLs
43+
query ??
44+
SELECT MIN(column1), MAX(column1) FROM VALUES
45+
(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')),
46+
(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)'));
47+
----
48+
[NULL] [NULL, NULL]
49+
50+
51+
# Min/Max FixedSizeList of varying types (integers and NULLs)
52+
query ??
53+
SELECT MIN(column1), MAX(column1) FROM VALUES
54+
(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
55+
(arrow_cast(make_array(NULL, 2, 3), 'FixedSizeList(3, Int64)')),
56+
(arrow_cast(make_array(1, 2, NULL), 'FixedSizeList(3, Int64)'));
57+
----
58+
[1, 2, 3] [NULL, 2, 3]
59+
60+
# Min/Max FixedSizeList grouped by key with NULLs and differing lengths
61+
query I?? rowsort
62+
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
63+
(0, arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')),
64+
(0, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)')),
65+
(1, arrow_cast(make_array(1, 2), 'FixedSizeList(2, Int64)')),
66+
(1, arrow_cast(make_array(NULL, 5), 'FixedSizeList(2, Int64)'))
67+
GROUP BY column1;
68+
----
69+
0 [1, 2, 3, 4] [1, NULL, 3]
70+
1 [1, 2] [NULL, 5]
71+
72+
# Min/Max FixedSizeList grouped by key with NULLs and differing lengths
73+
query I?? rowsort
74+
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
75+
(0, arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')),
76+
(0, arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')),
77+
(1, arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)'))
78+
GROUP BY column1;
79+
----
80+
0 [NULL] [NULL, NULL]
81+
1 [NULL] [NULL]
82+
83+
# Min/Max grouped FixedSizeList with empty and non-empty
84+
query I?? rowsort
85+
SELECT column1, MIN(column2), MAX(column2) FROM VALUES
86+
(0, arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')),
87+
(1, arrow_cast(make_array(5, 6), 'FixedSizeList(2, Int64)'))
88+
GROUP BY column1;
89+
----
90+
0 [1] [1]
91+
1 [5, 6] [5, 6]
92+
93+
# Min/Max over FixedSizeList with a window function
94+
query ?
95+
SELECT min(column1) OVER (ORDER BY column1) FROM VALUES
96+
(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
97+
(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
98+
(arrow_cast(make_array(2, 3), 'FixedSizeList(2, Int64)'))
99+
----
100+
[1, 2, 3]
101+
[1, 2, 3]
102+
[1, 2, 3]
103+
104+
# Min/Max over FixedSizeList with a window function and nulls
105+
query ?
106+
SELECT min(column1) OVER (ORDER BY column1) FROM VALUES
107+
(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')),
108+
(arrow_cast(make_array(4, 5), 'FixedSizeList(2, Int64)')),
109+
(arrow_cast(make_array(2, 3), 'FixedSizeList(2, Int64)'))
110+
----
111+
[2, 3]
112+
[2, 3]
113+
[2, 3]
114+
115+
# Min/Max over FixedSizeList with a window function, nulls and ROWS BETWEEN statement
116+
query ?
117+
SELECT min(column1) OVER (ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM VALUES
118+
(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')),
119+
(arrow_cast(make_array(4, 5), 'FixedSizeList(2, Int64)')),
120+
(arrow_cast(make_array(2, 3), 'FixedSizeList(2, Int64)'))
121+
----
122+
[2, 3]
123+
[2, 3]
124+
[4, 5]
125+
126+
# Min/Max over FixedSizeList with a window function using a different column
127+
query ?
128+
SELECT max(column2) OVER (ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM VALUES
129+
(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(4, 5), 'FixedSizeList(2, Int64)')),
130+
(arrow_cast(make_array(2, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2, 3), 'FixedSizeList(2, Int64)'))
131+
----
132+
[4, 5]
133+
[4, 5]
134+

0 commit comments

Comments
 (0)