Skip to content

Commit 2f07061

Browse files
committed
Support GroupsAccumulator for avg duration
1 parent f07fb10 commit 2f07061

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

datafusion/functions-aggregate/src/average.rs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ impl AggregateUDFImpl for Avg {
182182
fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
183183
matches!(
184184
args.return_type,
185-
DataType::Float64 | DataType::Decimal128(_, _)
185+
DataType::Float64 | DataType::Decimal128(_, _) | DataType::Duration(_)
186186
)
187187
}
188188

@@ -243,6 +243,45 @@ impl AggregateUDFImpl for Avg {
243243
)))
244244
}
245245

246+
(Duration(time_unit), Duration(_result_unit)) => {
247+
let avg_fn = move |sum: i64, count: u64| Ok(sum / count as i64);
248+
249+
match time_unit {
250+
TimeUnit::Second => Ok(Box::new(AvgGroupsAccumulator::<
251+
DurationSecondType,
252+
_,
253+
>::new(
254+
&data_type,
255+
args.return_type,
256+
avg_fn,
257+
))),
258+
TimeUnit::Millisecond => Ok(Box::new(AvgGroupsAccumulator::<
259+
DurationMillisecondType,
260+
_,
261+
>::new(
262+
&data_type,
263+
args.return_type,
264+
avg_fn,
265+
))),
266+
TimeUnit::Microsecond => Ok(Box::new(AvgGroupsAccumulator::<
267+
DurationMicrosecondType,
268+
_,
269+
>::new(
270+
&data_type,
271+
args.return_type,
272+
avg_fn,
273+
))),
274+
TimeUnit::Nanosecond => Ok(Box::new(AvgGroupsAccumulator::<
275+
DurationNanosecondType,
276+
_,
277+
>::new(
278+
&data_type,
279+
args.return_type,
280+
avg_fn,
281+
))),
282+
}
283+
}
284+
246285
_ => not_impl_err!(
247286
"AvgGroupsAccumulator for ({} --> {})",
248287
&data_type,

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5036,6 +5036,77 @@ FROM d WHERE column1 IS NOT NULL;
50365036
statement ok
50375037
drop table d;
50385038

5039+
# avg_duration (GroupsAccumulator)
5040+
5041+
statement ok
5042+
create table duration as values
5043+
(arrow_cast(10, 'Duration(Second)'), arrow_cast(100, 'Duration(Millisecond)'), 'a', 1),
5044+
(arrow_cast(20, 'Duration(Second)'), arrow_cast(200, 'Duration(Millisecond)'), 'a', 2),
5045+
(arrow_cast(30, 'Duration(Second)'), arrow_cast(300, 'Duration(Millisecond)'), 'b', 1),
5046+
(arrow_cast(40, 'Duration(Second)'), arrow_cast(400, 'Duration(Millisecond)'), 'b', 2),
5047+
(arrow_cast(50, 'Duration(Second)'), arrow_cast(500, 'Duration(Millisecond)'), 'c', 1),
5048+
(arrow_cast(60, 'Duration(Second)'), arrow_cast(600, 'Duration(Millisecond)'), 'c', 2);
5049+
5050+
query T??I
5051+
SELECT column3, avg(column1), avg(column2), column4 FROM duration GROUP BY column3, column4 ORDER BY column3, column4;
5052+
----
5053+
a 0 days 0 hours 0 mins 10 secs 0 days 0 hours 0 mins 0.100 secs 1
5054+
a 0 days 0 hours 0 mins 20 secs 0 days 0 hours 0 mins 0.200 secs 2
5055+
b 0 days 0 hours 0 mins 30 secs 0 days 0 hours 0 mins 0.300 secs 1
5056+
b 0 days 0 hours 0 mins 40 secs 0 days 0 hours 0 mins 0.400 secs 2
5057+
c 0 days 0 hours 0 mins 50 secs 0 days 0 hours 0 mins 0.500 secs 1
5058+
c 0 days 0 hours 1 mins 0 secs 0 days 0 hours 0 mins 0.600 secs 2
5059+
5060+
query T?
5061+
SELECT column3, avg(column1) FROM duration GROUP BY column3 ORDER BY column3;
5062+
----
5063+
a 0 days 0 hours 0 mins 15 secs
5064+
b 0 days 0 hours 0 mins 35 secs
5065+
c 0 days 0 hours 0 mins 55 secs
5066+
5067+
query I??
5068+
SELECT column4, avg(column1), avg(column2) FROM duration GROUP BY column4 ORDER BY column4;
5069+
----
5070+
1 0 days 0 hours 0 mins 30 secs 0 days 0 hours 0 mins 0.300 secs
5071+
2 0 days 0 hours 0 mins 40 secs 0 days 0 hours 0 mins 0.400 secs
5072+
5073+
query TI??
5074+
SELECT column3, column4, column1, avg(column1) OVER (PARTITION BY column3 ORDER BY column4 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as running_avg
5075+
FROM duration
5076+
ORDER BY column3, column4;
5077+
----
5078+
a 1 0 days 0 hours 0 mins 10 secs 0 days 0 hours 0 mins 10 secs
5079+
a 2 0 days 0 hours 0 mins 20 secs 0 days 0 hours 0 mins 15 secs
5080+
b 1 0 days 0 hours 0 mins 30 secs 0 days 0 hours 0 mins 30 secs
5081+
b 2 0 days 0 hours 0 mins 40 secs 0 days 0 hours 0 mins 35 secs
5082+
c 1 0 days 0 hours 0 mins 50 secs 0 days 0 hours 0 mins 50 secs
5083+
c 2 0 days 0 hours 1 mins 0 secs 0 days 0 hours 0 mins 55 secs
5084+
5085+
statement ok
5086+
drop table duration;
5087+
5088+
statement ok
5089+
create table duration_nulls as values
5090+
(arrow_cast(10, 'Duration(Second)'), 'a', 1),
5091+
(arrow_cast(20, 'Duration(Second)'), 'a', 2),
5092+
(NULL, 'b', 1),
5093+
(arrow_cast(40, 'Duration(Second)'), 'b', 2),
5094+
(arrow_cast(50, 'Duration(Second)'), 'c', 1),
5095+
(NULL, 'c', 2);
5096+
5097+
query T?I
5098+
SELECT column2, avg(column1), column3 FROM duration_nulls GROUP BY column2, column3 ORDER BY column2, column3;
5099+
----
5100+
a 0 days 0 hours 0 mins 10 secs 1
5101+
a 0 days 0 hours 0 mins 20 secs 2
5102+
b NULL 1
5103+
b 0 days 0 hours 0 mins 40 secs 2
5104+
c 0 days 0 hours 0 mins 50 secs 1
5105+
c NULL 2
5106+
5107+
statement ok
5108+
drop table duration_nulls;
5109+
50395110
# Prepare the table with dictionary values for testing
50405111
statement ok
50415112
CREATE TABLE value(x bigint) AS VALUES (1), (2), (3), (1), (3), (4), (5), (2);

0 commit comments

Comments
 (0)