Skip to content

Commit d2e9888

Browse files
test: add comprehensive aggregate tests for streaming aggregation
1 parent 23d35cf commit d2e9888

File tree

1 file changed

+281
-38
lines changed

1 file changed

+281
-38
lines changed

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 281 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3154,44 +3154,6 @@ drop table corr_single_row;
31543154
statement ok
31553155
drop table corr_all_nulls;
31563156

3157-
# correlation with streaming aggregation (EmitTo::First)
3158-
# Verify that CORR's GroupsAccumulator properly drains state vectors when EmitTo::First is called.
3159-
# Set target_partitions to 1 to ensure the optimizer uses streaming aggregation (ordering_mode=Sorted) based on the input order.
3160-
statement ok
3161-
set datafusion.execution.target_partitions = 1;
3162-
3163-
# Bucket 1: CORR = 1, -1, 1, -1 (y varies)
3164-
# Bucket 2: CORR = NULL (y constant, zero variance)
3165-
query IIR
3166-
SELECT bucket, grp, CORR(x, y) FROM (
3167-
SELECT * FROM (VALUES
3168-
(1, 1, 1.0, 1.0), (1, 1, 2.0, 2.0),
3169-
(1, 2, 1.0, 2.0), (1, 2, 2.0, 1.0),
3170-
(1, 3, 1.0, 1.0), (1, 3, 2.0, 2.0),
3171-
(1, 4, 1.0, 2.0), (1, 4, 2.0, 1.0),
3172-
(2, 1, 1.0, 5.0), (2, 1, 2.0, 5.0),
3173-
(2, 2, 1.0, 5.0), (2, 2, 2.0, 5.0),
3174-
(2, 3, 1.0, 5.0), (2, 3, 2.0, 5.0),
3175-
(2, 4, 1.0, 5.0), (2, 4, 2.0, 5.0)
3176-
) AS t(bucket, grp, x, y)
3177-
ORDER BY bucket
3178-
LIMIT 1000000
3179-
) AS ordered_data
3180-
GROUP BY bucket, grp
3181-
ORDER BY bucket, grp;
3182-
----
3183-
1 1 1
3184-
1 2 -1
3185-
1 3 1
3186-
1 4 -1
3187-
2 1 NULL
3188-
2 2 NULL
3189-
2 3 NULL
3190-
2 4 NULL
3191-
3192-
statement ok
3193-
set datafusion.execution.target_partitions = 4;
3194-
31953157
# covariance_f64_4
31963158
statement ok
31973159
drop table if exists t;
@@ -8425,3 +8387,284 @@ ORDER BY grp, id;
84258387

84268388
statement ok
84278389
DROP TABLE string_agg_window_test;
8390+
8391+
# Enable streaming aggregation by limiting partitions and ensuring sorted input
8392+
statement ok
8393+
set datafusion.execution.target_partitions = 1;
8394+
8395+
# Setup data
8396+
statement ok
8397+
CREATE TABLE stream_test (
8398+
g INT,
8399+
x DOUBLE,
8400+
y DOUBLE,
8401+
i INT,
8402+
b BOOLEAN,
8403+
s VARCHAR
8404+
) AS VALUES
8405+
(1, 1.0, 1.0, 1, true, 'a'), (1, 2.0, 2.0, 2, true, 'b'),
8406+
(2, 1.0, 5.0, 3, false, 'c'), (2, 2.0, 5.0, 4, true, 'd'),
8407+
(3, 1.0, 1.0, 7, false, 'e'), (3, 2.0, 2.0, 8, false, 'f');
8408+
8409+
# Test comprehensive aggregates with streaming
8410+
# This verifies that CORR and other aggregates work together in a streaming plan (ordering_mode=Sorted)
8411+
8412+
# Basic Aggregates
8413+
query TT
8414+
EXPLAIN SELECT
8415+
g,
8416+
COUNT(*),
8417+
SUM(x),
8418+
AVG(x),
8419+
MEAN(x),
8420+
MIN(x),
8421+
MAX(y),
8422+
BIT_AND(i),
8423+
BIT_OR(i),
8424+
BIT_XOR(i),
8425+
BOOL_AND(b),
8426+
BOOL_OR(b),
8427+
MEDIAN(x),
8428+
GROUPING(g),
8429+
VAR(x),
8430+
VAR_SAMP(x),
8431+
VAR_POP(x),
8432+
VAR_SAMPLE(x),
8433+
VAR_POPULATION(x),
8434+
STDDEV(x),
8435+
STDDEV_SAMP(x),
8436+
STDDEV_POP(x)
8437+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8438+
GROUP BY g
8439+
ORDER BY g;
8440+
----
8441+
logical_plan
8442+
01)Sort: stream_test.g ASC NULLS LAST
8443+
02)--Projection: stream_test.g, count(Int64(1)) AS count(*), sum(stream_test.x), avg(stream_test.x), avg(stream_test.x) AS mean(stream_test.x), min(stream_test.x), max(stream_test.y), bit_and(stream_test.i), bit_or(stream_test.i), bit_xor(stream_test.i), bool_and(stream_test.b), bool_or(stream_test.b), median(stream_test.x), Int32(0) AS grouping(stream_test.g), var(stream_test.x), var(stream_test.x) AS var_samp(stream_test.x), var_pop(stream_test.x), var(stream_test.x) AS var_sample(stream_test.x), var_pop(stream_test.x) AS var_population(stream_test.x), stddev(stream_test.x), stddev(stream_test.x) AS stddev_samp(stream_test.x), stddev_pop(stream_test.x)
8444+
03)----Aggregate: groupBy=[[stream_test.g]], aggr=[[count(Int64(1)), sum(stream_test.x), avg(stream_test.x), min(stream_test.x), max(stream_test.y), bit_and(stream_test.i), bit_or(stream_test.i), bit_xor(stream_test.i), bool_and(stream_test.b), bool_or(stream_test.b), median(stream_test.x), var(stream_test.x), var_pop(stream_test.x), stddev(stream_test.x), stddev_pop(stream_test.x)]]
8445+
04)------Sort: stream_test.g ASC NULLS LAST, fetch=10000
8446+
05)--------TableScan: stream_test projection=[g, x, y, i, b]
8447+
physical_plan
8448+
01)ProjectionExec: expr=[g@0 as g, count(Int64(1))@1 as count(*), sum(stream_test.x)@2 as sum(stream_test.x), avg(stream_test.x)@3 as avg(stream_test.x), avg(stream_test.x)@3 as mean(stream_test.x), min(stream_test.x)@4 as min(stream_test.x), max(stream_test.y)@5 as max(stream_test.y), bit_and(stream_test.i)@6 as bit_and(stream_test.i), bit_or(stream_test.i)@7 as bit_or(stream_test.i), bit_xor(stream_test.i)@8 as bit_xor(stream_test.i), bool_and(stream_test.b)@9 as bool_and(stream_test.b), bool_or(stream_test.b)@10 as bool_or(stream_test.b), median(stream_test.x)@11 as median(stream_test.x), 0 as grouping(stream_test.g), var(stream_test.x)@12 as var(stream_test.x), var(stream_test.x)@12 as var_samp(stream_test.x), var_pop(stream_test.x)@13 as var_pop(stream_test.x), var(stream_test.x)@12 as var_sample(stream_test.x), var_pop(stream_test.x)@13 as var_population(stream_test.x), stddev(stream_test.x)@14 as stddev(stream_test.x), stddev(stream_test.x)@14 as stddev_samp(stream_test.x), stddev_pop(stream_test.x)@15 as stddev_pop(stream_test.x)]
8449+
02)--AggregateExec: mode=Single, gby=[g@0 as g], aggr=[count(Int64(1)), sum(stream_test.x), avg(stream_test.x), min(stream_test.x), max(stream_test.y), bit_and(stream_test.i), bit_or(stream_test.i), bit_xor(stream_test.i), bool_and(stream_test.b), bool_or(stream_test.b), median(stream_test.x), var(stream_test.x), var_pop(stream_test.x), stddev(stream_test.x), stddev_pop(stream_test.x)], ordering_mode=Sorted
8450+
03)----SortExec: TopK(fetch=10000), expr=[g@0 ASC NULLS LAST], preserve_partitioning=[false]
8451+
04)------DataSourceExec: partitions=1, partition_sizes=[1]
8452+
8453+
query IIRRRRRIIIBBRIRRRRRRRR
8454+
SELECT
8455+
g,
8456+
COUNT(*),
8457+
SUM(x),
8458+
AVG(x),
8459+
MEAN(x),
8460+
MIN(x),
8461+
MAX(y),
8462+
BIT_AND(i),
8463+
BIT_OR(i),
8464+
BIT_XOR(i),
8465+
BOOL_AND(b),
8466+
BOOL_OR(b),
8467+
MEDIAN(x),
8468+
GROUPING(g),
8469+
VAR(x),
8470+
VAR_SAMP(x),
8471+
VAR_POP(x),
8472+
VAR_SAMPLE(x),
8473+
VAR_POPULATION(x),
8474+
STDDEV(x),
8475+
STDDEV_SAMP(x),
8476+
STDDEV_POP(x)
8477+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8478+
GROUP BY g
8479+
ORDER BY g;
8480+
----
8481+
1 2 3 1.5 1.5 1 2 0 3 3 true true 1.5 0 0.5 0.5 0.25 0.5 0.25 0.707106781187 0.707106781187 0.5
8482+
2 2 3 1.5 1.5 1 5 0 7 7 false true 1.5 0 0.5 0.5 0.25 0.5 0.25 0.707106781187 0.707106781187 0.5
8483+
3 2 3 1.5 1.5 1 2 0 15 15 false false 1.5 0 0.5 0.5 0.25 0.5 0.25 0.707106781187 0.707106781187 0.5
8484+
8485+
# Ordered Aggregates (by x)
8486+
query TT
8487+
EXPLAIN SELECT
8488+
g,
8489+
ARRAY_AGG(x ORDER BY x),
8490+
ARRAY_AGG(DISTINCT x ORDER BY x),
8491+
FIRST_VALUE(x ORDER BY x),
8492+
LAST_VALUE(x ORDER BY x),
8493+
NTH_VALUE(x, 1 ORDER BY x)
8494+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8495+
GROUP BY g
8496+
ORDER BY g;
8497+
----
8498+
logical_plan
8499+
01)Sort: stream_test.g ASC NULLS LAST
8500+
02)--Aggregate: groupBy=[[stream_test.g]], aggr=[[array_agg(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], array_agg(DISTINCT stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], first_value(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], last_value(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], nth_value(stream_test.x, Int64(1)) ORDER BY [stream_test.x ASC NULLS LAST]]]
8501+
03)----Sort: stream_test.g ASC NULLS LAST, fetch=10000
8502+
04)------TableScan: stream_test projection=[g, x]
8503+
physical_plan
8504+
01)AggregateExec: mode=Single, gby=[g@0 as g], aggr=[array_agg(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], array_agg(DISTINCT stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], first_value(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], last_value(stream_test.x) ORDER BY [stream_test.x ASC NULLS LAST], nth_value(stream_test.x,Int64(1)) ORDER BY [stream_test.x ASC NULLS LAST]], ordering_mode=Sorted
8505+
02)--SortExec: TopK(fetch=10000), expr=[g@0 ASC NULLS LAST, x@1 ASC NULLS LAST], preserve_partitioning=[false]
8506+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
8507+
8508+
query I??RRR
8509+
SELECT
8510+
g,
8511+
ARRAY_AGG(x ORDER BY x),
8512+
ARRAY_AGG(DISTINCT x ORDER BY x),
8513+
FIRST_VALUE(x ORDER BY x),
8514+
LAST_VALUE(x ORDER BY x),
8515+
NTH_VALUE(x, 1 ORDER BY x)
8516+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8517+
GROUP BY g
8518+
ORDER BY g;
8519+
----
8520+
1 [1.0, 2.0] [1.0, 2.0] 1 2 1
8521+
2 [1.0, 2.0] [1.0, 2.0] 1 2 1
8522+
3 [1.0, 2.0] [1.0, 2.0] 1 2 1
8523+
8524+
# Ordered Aggregates (by s)
8525+
query TT
8526+
EXPLAIN SELECT
8527+
g,
8528+
ARRAY_AGG(s ORDER BY s),
8529+
STRING_AGG(s, '|' ORDER BY s),
8530+
STRING_AGG(DISTINCT s, '|' ORDER BY s)
8531+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8532+
GROUP BY g
8533+
ORDER BY g;
8534+
----
8535+
logical_plan
8536+
01)Sort: stream_test.g ASC NULLS LAST
8537+
02)--Aggregate: groupBy=[[stream_test.g]], aggr=[[array_agg(stream_test.s) ORDER BY [stream_test.s ASC NULLS LAST], string_agg(stream_test.s, Utf8("|")) ORDER BY [stream_test.s ASC NULLS LAST], string_agg(DISTINCT stream_test.s, Utf8("|")) ORDER BY [stream_test.s ASC NULLS LAST]]]
8538+
03)----Sort: stream_test.g ASC NULLS LAST, fetch=10000
8539+
04)------TableScan: stream_test projection=[g, s]
8540+
physical_plan
8541+
01)AggregateExec: mode=Single, gby=[g@0 as g], aggr=[array_agg(stream_test.s) ORDER BY [stream_test.s ASC NULLS LAST], string_agg(stream_test.s,Utf8("|")) ORDER BY [stream_test.s ASC NULLS LAST], string_agg(DISTINCT stream_test.s,Utf8("|")) ORDER BY [stream_test.s ASC NULLS LAST]], ordering_mode=Sorted
8542+
02)--SortExec: TopK(fetch=10000), expr=[g@0 ASC NULLS LAST, s@1 ASC NULLS LAST], preserve_partitioning=[false]
8543+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
8544+
8545+
query I?TT
8546+
SELECT
8547+
g,
8548+
ARRAY_AGG(s ORDER BY s),
8549+
STRING_AGG(s, '|' ORDER BY s),
8550+
STRING_AGG(DISTINCT s, '|' ORDER BY s)
8551+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8552+
GROUP BY g
8553+
ORDER BY g;
8554+
----
8555+
1 [a, b] a|b a|b
8556+
2 [c, d] c|d c|d
8557+
3 [e, f] e|f e|f
8558+
8559+
# Statistical & Regression Aggregates
8560+
query TT
8561+
EXPLAIN SELECT
8562+
g,
8563+
CORR(x, y),
8564+
COVAR(x, y),
8565+
COVAR_SAMP(x, y),
8566+
COVAR_POP(x, y),
8567+
REGR_SXX(x, y),
8568+
REGR_SXY(x, y),
8569+
REGR_SYY(x, y),
8570+
REGR_AVGX(x, y),
8571+
REGR_AVGY(x, y),
8572+
REGR_COUNT(x, y),
8573+
REGR_SLOPE(x, y),
8574+
REGR_INTERCEPT(x, y),
8575+
REGR_R2(x, y)
8576+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8577+
GROUP BY g
8578+
ORDER BY g;
8579+
----
8580+
logical_plan
8581+
01)Sort: stream_test.g ASC NULLS LAST
8582+
02)--Projection: stream_test.g, corr(stream_test.x,stream_test.y), covar_samp(stream_test.x,stream_test.y) AS covar(stream_test.x,stream_test.y), covar_samp(stream_test.x,stream_test.y), covar_pop(stream_test.x,stream_test.y), regr_sxx(stream_test.x,stream_test.y), regr_sxy(stream_test.x,stream_test.y), regr_syy(stream_test.x,stream_test.y), regr_avgx(stream_test.x,stream_test.y), regr_avgy(stream_test.x,stream_test.y), regr_count(stream_test.x,stream_test.y), regr_slope(stream_test.x,stream_test.y), regr_intercept(stream_test.x,stream_test.y), regr_r2(stream_test.x,stream_test.y)
8583+
03)----Aggregate: groupBy=[[stream_test.g]], aggr=[[corr(stream_test.x, stream_test.y), covar_samp(stream_test.x, stream_test.y), covar_pop(stream_test.x, stream_test.y), regr_sxx(stream_test.x, stream_test.y), regr_sxy(stream_test.x, stream_test.y), regr_syy(stream_test.x, stream_test.y), regr_avgx(stream_test.x, stream_test.y), regr_avgy(stream_test.x, stream_test.y), regr_count(stream_test.x, stream_test.y), regr_slope(stream_test.x, stream_test.y), regr_intercept(stream_test.x, stream_test.y), regr_r2(stream_test.x, stream_test.y)]]
8584+
04)------Sort: stream_test.g ASC NULLS LAST, fetch=10000
8585+
05)--------TableScan: stream_test projection=[g, x, y]
8586+
physical_plan
8587+
01)ProjectionExec: expr=[g@0 as g, corr(stream_test.x,stream_test.y)@1 as corr(stream_test.x,stream_test.y), covar_samp(stream_test.x,stream_test.y)@2 as covar(stream_test.x,stream_test.y), covar_samp(stream_test.x,stream_test.y)@2 as covar_samp(stream_test.x,stream_test.y), covar_pop(stream_test.x,stream_test.y)@3 as covar_pop(stream_test.x,stream_test.y), regr_sxx(stream_test.x,stream_test.y)@4 as regr_sxx(stream_test.x,stream_test.y), regr_sxy(stream_test.x,stream_test.y)@5 as regr_sxy(stream_test.x,stream_test.y), regr_syy(stream_test.x,stream_test.y)@6 as regr_syy(stream_test.x,stream_test.y), regr_avgx(stream_test.x,stream_test.y)@7 as regr_avgx(stream_test.x,stream_test.y), regr_avgy(stream_test.x,stream_test.y)@8 as regr_avgy(stream_test.x,stream_test.y), regr_count(stream_test.x,stream_test.y)@9 as regr_count(stream_test.x,stream_test.y), regr_slope(stream_test.x,stream_test.y)@10 as regr_slope(stream_test.x,stream_test.y), regr_intercept(stream_test.x,stream_test.y)@11 as regr_intercept(stream_test.x,stream_test.y), regr_r2(stream_test.x,stream_test.y)@12 as regr_r2(stream_test.x,stream_test.y)]
8588+
02)--AggregateExec: mode=Single, gby=[g@0 as g], aggr=[corr(stream_test.x,stream_test.y), covar_samp(stream_test.x,stream_test.y), covar_pop(stream_test.x,stream_test.y), regr_sxx(stream_test.x,stream_test.y), regr_sxy(stream_test.x,stream_test.y), regr_syy(stream_test.x,stream_test.y), regr_avgx(stream_test.x,stream_test.y), regr_avgy(stream_test.x,stream_test.y), regr_count(stream_test.x,stream_test.y), regr_slope(stream_test.x,stream_test.y), regr_intercept(stream_test.x,stream_test.y), regr_r2(stream_test.x,stream_test.y)], ordering_mode=Sorted
8589+
03)----SortExec: TopK(fetch=10000), expr=[g@0 ASC NULLS LAST], preserve_partitioning=[false]
8590+
04)------DataSourceExec: partitions=1, partition_sizes=[1]
8591+
8592+
query IRRRRRRRRRIRRR
8593+
SELECT
8594+
g,
8595+
CORR(x, y),
8596+
COVAR(x, y),
8597+
COVAR_SAMP(x, y),
8598+
COVAR_POP(x, y),
8599+
REGR_SXX(x, y),
8600+
REGR_SXY(x, y),
8601+
REGR_SYY(x, y),
8602+
REGR_AVGX(x, y),
8603+
REGR_AVGY(x, y),
8604+
REGR_COUNT(x, y),
8605+
REGR_SLOPE(x, y),
8606+
REGR_INTERCEPT(x, y),
8607+
REGR_R2(x, y)
8608+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8609+
GROUP BY g
8610+
ORDER BY g;
8611+
----
8612+
1 1 0.5 0.5 0.25 0.5 0.5 0.5 1.5 1.5 2 1 0 1
8613+
2 NULL 0 0 0 0 0 0.5 5 1.5 2 NULL NULL NULL
8614+
3 1 0.5 0.5 0.25 0.5 0.5 0.5 1.5 1.5 2 1 0 1
8615+
8616+
# Approximate and Ordered-Set Aggregates
8617+
query TT
8618+
EXPLAIN SELECT
8619+
g,
8620+
APPROX_DISTINCT(i),
8621+
APPROX_MEDIAN(x),
8622+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8623+
QUANTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8624+
APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8625+
APPROX_PERCENTILE_CONT_WITH_WEIGHT(1.0, 0.5) WITHIN GROUP (ORDER BY x),
8626+
PERCENTILE_CONT(x, 0.5),
8627+
APPROX_PERCENTILE_CONT(x, 0.5),
8628+
APPROX_PERCENTILE_CONT_WITH_WEIGHT(x, 1.0, 0.5)
8629+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8630+
GROUP BY g
8631+
ORDER BY g;
8632+
----
8633+
logical_plan
8634+
01)Sort: stream_test.g ASC NULLS LAST
8635+
02)--Projection: stream_test.g, approx_distinct(stream_test.i), approx_median(stream_test.x), percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST] AS quantile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], approx_percentile_cont_with_weight(Float64(1),Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont_with_weight(stream_test.x,Float64(1),Float64(0.5))
8636+
03)----Aggregate: groupBy=[[stream_test.g]], aggr=[[approx_distinct(stream_test.i), approx_median(stream_test.x), percentile_cont(stream_test.x, Float64(0.5)) ORDER BY [stream_test.x ASC NULLS LAST], approx_percentile_cont(stream_test.x, Float64(0.5)) ORDER BY [stream_test.x ASC NULLS LAST], approx_percentile_cont_with_weight(stream_test.x, Float64(1), Float64(0.5)) ORDER BY [stream_test.x ASC NULLS LAST], percentile_cont(stream_test.x, Float64(0.5)), approx_percentile_cont(stream_test.x, Float64(0.5)), approx_percentile_cont_with_weight(stream_test.x, Float64(1), Float64(0.5))]]
8637+
04)------Sort: stream_test.g ASC NULLS LAST, fetch=10000
8638+
05)--------TableScan: stream_test projection=[g, x, i]
8639+
physical_plan
8640+
01)ProjectionExec: expr=[g@0 as g, approx_distinct(stream_test.i)@1 as approx_distinct(stream_test.i), approx_median(stream_test.x)@2 as approx_median(stream_test.x), percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST]@3 as percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST]@3 as quantile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST]@4 as approx_percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], approx_percentile_cont_with_weight(Float64(1),Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST]@5 as approx_percentile_cont_with_weight(Float64(1),Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], percentile_cont(stream_test.x,Float64(0.5))@6 as percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont(stream_test.x,Float64(0.5))@7 as approx_percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont_with_weight(stream_test.x,Float64(1),Float64(0.5))@8 as approx_percentile_cont_with_weight(stream_test.x,Float64(1),Float64(0.5))]
8641+
02)--AggregateExec: mode=Single, gby=[g@0 as g], aggr=[approx_distinct(stream_test.i), approx_median(stream_test.x), percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], approx_percentile_cont(Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], approx_percentile_cont_with_weight(Float64(1),Float64(0.5)) WITHIN GROUP [stream_test.x ASC NULLS LAST], percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont(stream_test.x,Float64(0.5)), approx_percentile_cont_with_weight(stream_test.x,Float64(1),Float64(0.5))], ordering_mode=Sorted
8642+
03)----SortExec: TopK(fetch=10000), expr=[g@0 ASC NULLS LAST], preserve_partitioning=[false]
8643+
04)------DataSourceExec: partitions=1, partition_sizes=[1]
8644+
8645+
query IIRRRRRRRR
8646+
SELECT
8647+
g,
8648+
APPROX_DISTINCT(i),
8649+
APPROX_MEDIAN(x),
8650+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8651+
QUANTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8652+
APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x),
8653+
APPROX_PERCENTILE_CONT_WITH_WEIGHT(1.0, 0.5) WITHIN GROUP (ORDER BY x),
8654+
PERCENTILE_CONT(x, 0.5),
8655+
APPROX_PERCENTILE_CONT(x, 0.5),
8656+
APPROX_PERCENTILE_CONT_WITH_WEIGHT(x, 1.0, 0.5)
8657+
FROM (SELECT * FROM stream_test ORDER BY g LIMIT 10000)
8658+
GROUP BY g
8659+
ORDER BY g;
8660+
----
8661+
1 2 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5
8662+
2 2 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5
8663+
3 2 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5
8664+
8665+
statement ok
8666+
DROP TABLE stream_test;
8667+
8668+
# Restore default target partitions
8669+
statement ok
8670+
set datafusion.execution.target_partitions = 4;

0 commit comments

Comments
 (0)