Skip to content

Commit a8a3695

Browse files
committed
adds first part of window split
Signed-off-by: DylanGuedes <[email protected]>
1 parent 88c8d5e commit a8a3695

2 files changed

Lines changed: 1307 additions & 0 deletions

File tree

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
2+
--
3+
-- Window Functions Testing
4+
-- https://github.com/postgres/postgres/blob/REL_12_BETA3/src/test/regress/sql/window.sql
5+
6+
CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
7+
8+
CREATE TABLE empsalary (
9+
depname string,
10+
empno integer,
11+
salary int,
12+
enroll_date date
13+
) USING parquet;
14+
15+
INSERT INTO empsalary VALUES
16+
('develop', 10, 5200, '2007-08-01'),
17+
('sales', 1, 5000, '2006-10-01'),
18+
('personnel', 5, 3500, '2007-12-10'),
19+
('sales', 4, 4800, '2007-08-08'),
20+
('personnel', 2, 3900, '2006-12-23'),
21+
('develop', 7, 4200, '2008-01-01'),
22+
('develop', 9, 4500, '2008-01-01'),
23+
('sales', 3, 4800, '2007-08-01'),
24+
('develop', 8, 6000, '2006-10-01'),
25+
('develop', 11, 5200, '2007-08-15');
26+
27+
SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary;
28+
29+
SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary;
30+
31+
-- with GROUP BY
32+
SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
33+
GROUP BY four, ten ORDER BY four, ten;
34+
35+
SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname);
36+
37+
-- [SPARK-28064] Order by does not accept a call to rank()
38+
-- SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;
39+
40+
-- empty window specification
41+
SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10;
42+
43+
SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ();
44+
45+
-- no window operation
46+
SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten);
47+
48+
-- cumulative aggregate
49+
SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10;
50+
51+
SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10;
52+
53+
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10;
54+
55+
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
56+
57+
SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
58+
59+
SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
60+
61+
SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10;
62+
63+
-- [SPARK-28065] ntile does not accept NULL as input
64+
-- SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2;
65+
66+
SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
67+
68+
-- [SPARK-28068] `lag` second argument must be a literal in Spark
69+
-- SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
70+
71+
-- [SPARK-28068] `lag` second argument must be a literal in Spark
72+
-- SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
73+
74+
SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
75+
76+
SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
77+
78+
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
79+
80+
SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
81+
82+
-- last returns the last row of the frame, which is CURRENT ROW in ORDER BY window.
83+
SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
84+
85+
SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
86+
(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
87+
ORDER BY four, ten;
88+
89+
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
90+
-- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
91+
-- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
92+
93+
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
94+
FROM tenk1 GROUP BY ten, two;
95+
96+
SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10;
97+
98+
SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
99+
sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
100+
FROM tenk1 WHERE unique2 < 10;
101+
102+
-- opexpr with different windows evaluation.
103+
SELECT * FROM(
104+
SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
105+
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
106+
count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
107+
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
108+
FROM tenk1
109+
)sub WHERE total <> fourcount + twosum;
110+
111+
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10;
112+
113+
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
114+
FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten);
115+
116+
-- more than one window with GROUP BY
117+
SELECT sum(salary),
118+
row_number() OVER (ORDER BY depname),
119+
sum(sum(salary)) OVER (ORDER BY depname DESC)
120+
FROM empsalary GROUP BY depname;
121+
122+
-- identical windows with different names
123+
SELECT sum(salary) OVER w1, count(*) OVER w2
124+
FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary);
125+
126+
-- subplan
127+
-- [SPARK-28379] Correlated scalar subqueries must be aggregated
128+
-- SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER (PARTITION BY four ORDER BY ten)
129+
-- FROM tenk1 s WHERE unique2 < 10;
130+
131+
-- empty table
132+
SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s;
133+
134+
-- mixture of agg/wfunc in the same window
135+
SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
136+
137+
-- strict aggs
138+
-- Temporarily turns off the ANSI mode because of compatibility issues between keywords
139+
SET spark.sql.parser.ansi.enabled=false;
140+
SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
141+
SELECT *,
142+
CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
143+
CASE WHEN
144+
AVG(salary) OVER (PARTITION BY depname) < salary
145+
THEN 200 END AS depadj FROM empsalary
146+
)s;
147+
SET spark.sql.parser.ansi.enabled=true;
148+
149+
create temporary view int4_tbl as select * from values
150+
(0),
151+
(123456),
152+
(-123456),
153+
(2147483647),
154+
(-2147483647)
155+
as int4_tbl(f1);
156+
157+
-- window function over ungrouped agg over empty row set (bug before 9.1)
158+
SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42;
159+
160+
-- window function with ORDER BY an expression involving aggregates (9.1 bug)
161+
select ten,
162+
sum(unique1) + sum(unique2) as res,
163+
rank() over (order by sum(unique1) + sum(unique2)) as rank
164+
from tenk1
165+
group by ten order by ten;
166+
167+
-- window and aggregate with GROUP BY expression (9.2 bug)
168+
-- explain
169+
-- select first(max(x)) over (), y
170+
-- from (select unique1 as x, ten+four as y from tenk1) ss
171+
-- group by y;
172+
173+
-- test non-default frame specifications
174+
SELECT four, ten,
175+
sum(ten) over (partition by four order by ten),
176+
last(ten) over (partition by four order by ten)
177+
FROM (select distinct ten, four from tenk1) ss;
178+
179+
SELECT four, ten,
180+
sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
181+
last(ten) over (partition by four order by ten range between unbounded preceding and current row)
182+
FROM (select distinct ten, four from tenk1) ss;
183+
184+
SELECT four, ten,
185+
sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
186+
last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
187+
FROM (select distinct ten, four from tenk1) ss;
188+
189+
SELECT four, ten/4 as two,
190+
sum(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row),
191+
last(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row)
192+
FROM (select distinct ten, four from tenk1) ss;
193+
194+
SELECT four, ten/4 as two,
195+
sum(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row),
196+
last(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row)
197+
FROM (select distinct ten, four from tenk1) ss;
198+
199+
SELECT sum(unique1) over (order by four range between current row and unbounded following),
200+
unique1, four
201+
FROM tenk1 WHERE unique1 < 10;
202+
203+
SELECT sum(unique1) over (rows between current row and unbounded following),
204+
unique1, four
205+
FROM tenk1 WHERE unique1 < 10;
206+
207+
SELECT sum(unique1) over (rows between 2 preceding and 2 following),
208+
unique1, four
209+
FROM tenk1 WHERE unique1 < 10;
210+
211+
-- [SPARK-28428] Spark `exclude` always expecting `()`
212+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),
213+
-- unique1, four
214+
-- FROM tenk1 WHERE unique1 < 10;
215+
216+
-- [SPARK-28428] Spark `exclude` always expecting `()`
217+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row),
218+
-- unique1, four
219+
-- FROM tenk1 WHERE unique1 < 10;
220+
221+
-- [SPARK-28428] Spark `exclude` always expecting `()`
222+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group),
223+
-- unique1, four
224+
-- FROM tenk1 WHERE unique1 < 10;
225+
226+
-- [SPARK-28428] Spark `exclude` always expecting `()`
227+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties),
228+
-- unique1, four
229+
-- FROM tenk1 WHERE unique1 < 10;
230+
231+
-- [SPARK-28428] Spark `exclude` always expecting `()`
232+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
233+
-- unique1, four
234+
-- FROM tenk1 WHERE unique1 < 10;
235+
236+
-- [SPARK-28428] Spark `exclude` always expecting `()`
237+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
238+
-- unique1, four
239+
-- FROM tenk1 WHERE unique1 < 10;
240+
241+
-- [SPARK-28428] Spark `exclude` always expecting `()`
242+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
243+
-- unique1, four
244+
-- FROM tenk1 WHERE unique1 < 10;
245+
246+
-- [SPARK-28428] Spark `exclude` always expecting `()`
247+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
248+
-- unique1, four
249+
-- FROM tenk1 WHERE unique1 < 10;
250+
251+
-- [SPARK-28428] Spark `exclude` always expecting `()`
252+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
253+
-- unique1, four
254+
-- FROM tenk1 WHERE unique1 < 10;
255+
256+
-- [SPARK-28428] Spark `exclude` always expecting `()`
257+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
258+
-- unique1, four
259+
-- FROM tenk1 WHERE unique1 < 10;
260+
261+
SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
262+
unique1, four
263+
FROM tenk1 WHERE unique1 < 10;
264+
265+
SELECT sum(unique1) over (rows between 1 following and 3 following),
266+
unique1, four
267+
FROM tenk1 WHERE unique1 < 10;
268+
269+
SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
270+
unique1, four
271+
FROM tenk1 WHERE unique1 < 10;
272+
273+
-- [SPARK-28428] Spark `exclude` always expecting `()`
274+
SELECT sum(unique1) over (w range between current row and unbounded following),
275+
unique1, four
276+
FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
277+
278+
-- [SPARK-28428] Spark `exclude` always expecting `()`
279+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),
280+
-- unique1, four
281+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
282+
283+
-- [SPARK-28428] Spark `exclude` always expecting `()`
284+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),
285+
-- unique1, four
286+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
287+
288+
-- [SPARK-28428] Spark `exclude` always expecting `()`
289+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude ties),
290+
-- unique1, four
291+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
292+
293+
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
294+
-- SELECT first_value(unique1) over w,
295+
-- nth_value(unique1, 2) over w AS nth_2,
296+
-- last_value(unique1) over w, unique1, four
297+
-- FROM tenk1 WHERE unique1 < 10
298+
-- WINDOW w AS (order by four range between current row and unbounded following);
299+
300+
-- [SPARK-28501] Frame bound value must be a literal.
301+
-- SELECT sum(unique1) over
302+
-- (order by unique1
303+
-- rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
304+
-- unique1
305+
-- FROM tenk1 WHERE unique1 < 10;
306+
307+
CREATE TEMP VIEW v_window AS
308+
SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
309+
FROM range(1, 11) i;
310+
311+
SELECT * FROM v_window;
312+
313+
-- [SPARK-28428] Spark `exclude` always expecting `()`
314+
-- CREATE OR REPLACE TEMP VIEW v_window AS
315+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
316+
-- exclude current row) as sum_rows FROM range(1, 10) i;
317+
318+
-- SELECT * FROM v_window;
319+
320+
-- [SPARK-28428] Spark `exclude` always expecting `()`
321+
-- CREATE OR REPLACE TEMP VIEW v_window AS
322+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
323+
-- exclude group) as sum_rows FROM range(1, 10) i;
324+
-- SELECT * FROM v_window;
325+
326+
-- [SPARK-28428] Spark `exclude` always expecting `()`
327+
-- CREATE OR REPLACE TEMP VIEW v_window AS
328+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
329+
-- exclude ties) as sum_rows FROM generate_series(1, 10) i;
330+
331+
-- [SPARK-28428] Spark `exclude` always expecting `()`
332+
-- CREATE OR REPLACE TEMP VIEW v_window AS
333+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
334+
-- exclude no others) as sum_rows FROM generate_series(1, 10) i;
335+
-- SELECT * FROM v_window;
336+
337+
-- [SPARK-28648] Adds support to `groups` unit type in window clauses
338+
-- CREATE OR REPLACE TEMP VIEW v_window AS
339+
-- SELECT i.id, sum(i.id) over (order by i.id groups between 1 preceding and 1 following) as sum_rows FROM range(1, 11) i;
340+
-- SELECT * FROM v_window;
341+
342+
DROP VIEW v_window;
343+
DROP TABLE empsalary;

0 commit comments

Comments
 (0)