Skip to content

Commit 45962d6

Browse files
committed
Port to SQLQueryTestSuite
1 parent d6040ea commit 45962d6

6 files changed

Lines changed: 506 additions & 133 deletions

File tree

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
-- Unit tests for simple NOT IN predicate subquery across multiple columns.
2+
--
3+
-- See not-in-single-column-unit-tests.sql for an introduction.
4+
--
5+
-- Test cases for multi-column ``WHERE a NOT IN (SELECT c FROM r ...)'':
6+
-- | # | does subquery include null? | do filter columns contain null? | a = c? | b = d? | row included in result? |
7+
-- | 1 | empty | * | * | * | yes |
8+
-- | 2 | 1+ row has null for all columns | * | * | * | no |
9+
-- | 3 | no row has null for all columns | (yes, yes) | * | * | no |
10+
-- | 4 | no row has null for all columns | (no, yes) | yes | * | no |
11+
-- | 5 | no row has null for all columns | (no, yes) | no | * | yes |
12+
-- | 6 | no | (no, no) | yes | yes | no |
13+
-- | 7 | no | (no, no) | _ | _ | yes |
14+
--
15+
-- This can be generalized to include more tests for more columns, but it covers the main cases
16+
-- when there is more than one column.
17+
18+
CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
19+
(null, null),
20+
(null, 1.0),
21+
(2, 3.0),
22+
(4, 5.0)
23+
AS m(a, b);
24+
25+
CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
26+
(null, null),
27+
(0, 1.0),
28+
(2, 3.0),
29+
(4, null)
30+
AS s(c, d);
31+
32+
-- Case 1
33+
-- (subquery is empty -> row is returned)
34+
SELECT *
35+
FROM m
36+
WHERE (a, b) NOT IN (SELECT *
37+
FROM s
38+
WHERE d > 5.0) -- Matches no rows
39+
;
40+
41+
-- Case 2
42+
-- (subquery contains a row with null in all columns -> row not returned)
43+
SELECT *
44+
FROM m
45+
WHERE (a, b) NOT IN (SELECT *
46+
FROM s
47+
WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
48+
;
49+
50+
-- Case 3
51+
-- (probe-side columns are all null -> row not returned)
52+
SELECT *
53+
FROM m
54+
WHERE a IS NULL AND b IS NULL -- Matches only (null, null)
55+
AND (a, b) NOT IN (SELECT *
56+
FROM s
57+
WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
58+
;
59+
60+
-- Case 4
61+
-- (one column null, other column matches a row in the subquery result -> row not returned)
62+
SELECT *
63+
FROM m
64+
WHERE b = 1.0 -- Matches (null, 1.0)
65+
AND (a, b) NOT IN (SELECT *
66+
FROM s
67+
WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
68+
;
69+
70+
-- Case 5
71+
-- (one null column with no match -> row is returned)
72+
SELECT *
73+
FROM m
74+
WHERE b = 1.0 -- Matches (null, 1.0)
75+
AND (a, b) NOT IN (SELECT *
76+
FROM s
77+
WHERE c = 2) -- Matches (2, 3.0)
78+
;
79+
80+
-- Case 6
81+
-- (no null columns with match -> row not returned)
82+
SELECT *
83+
FROM m
84+
WHERE b = 3.0 -- Matches (2, 3.0)
85+
AND (a, b) NOT IN (SELECT *
86+
FROM s
87+
WHERE c = 2) -- Matches (2, 3.0)
88+
;
89+
90+
-- Case 7
91+
-- (no null columns with no match -> row is returned)
92+
SELECT *
93+
FROM m
94+
WHERE b = 5.0 -- Matches (4, 5.0)
95+
AND (a, b) NOT IN (SELECT *
96+
FROM s
97+
WHERE c = 2) -- Matches (2, 3.0)
98+
;
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
-- Unit tests for simple NOT IN predicate subquery across a single column.
2+
--
3+
-- ``col NOT IN expr'' is quite difficult to reason about. There are many edge cases, some of the
4+
-- rules are confusing to the uninitiated, and precedence and treatment of null values is plain
5+
-- unintuitive. To make this simpler to understand, I've come up with a plain English way of
6+
-- describing the expected behavior of this query.
7+
--
8+
-- - If the subquery is empty (i.e. returns no rows), the row should be returned, regardless of
9+
-- whether the filtered columns include nulls.
10+
-- - If the subquery contains a result with all columns null, then the row should not be returned.
11+
-- - If for all non-null filter columns there exists a row in the subquery in which each column
12+
-- either
13+
-- 1. is equal to the corresponding filter column or
14+
-- 2. is null
15+
-- then the row should not be returned. (This includes the case where all filter columns are
16+
-- null.)
17+
-- - Otherwise, the row should be returned.
18+
--
19+
-- Using these rules, we can come up with a set of test cases for single-column and multi-column
20+
-- NOT IN test cases.
21+
--
22+
-- Test cases for single-column ``WHERE a NOT IN (SELECT c FROM r ...)'':
23+
-- | # | does subquery include null? | is a null? | a = c? | row with a included in result? |
24+
-- | 1 | empty | | | yes |
25+
-- | 2 | yes | | | no |
26+
-- | 3 | no | yes | | no |
27+
-- | 4 | no | no | yes | no |
28+
-- | 5 | no | no | no | yes |
29+
--
30+
-- There are also some considerations around correlated subqueries. Correlated subqueries can
31+
-- cause cases 2, 3, or 4 to be reduced to case 1 by limiting the number of rows returned by the
32+
-- subquery, so the row from the parent table should always be included in the output.
33+
34+
CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
35+
(null, 1.0),
36+
(2, 3.0),
37+
(4, 5.0)
38+
AS m(a, b);
39+
40+
CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
41+
(null, 1.0),
42+
(2, 3.0),
43+
(6, 7.0)
44+
AS s(c, d);
45+
46+
-- Uncorrelated NOT IN Subquery test cases
47+
-- Case 1
48+
-- (empty subquery -> all rows returned)
49+
SELECT *
50+
FROM m
51+
WHERE a NOT IN (SELECT c
52+
FROM s
53+
WHERE d > 10.0) -- (empty subquery)
54+
;
55+
56+
-- Case 2
57+
-- (subquery includes null -> no rows returned)
58+
SELECT *
59+
FROM m
60+
WHERE a NOT IN (SELECT c
61+
FROM s
62+
WHERE d = 1.0) -- Only matches (null, 1.0)
63+
;
64+
65+
-- Case 3
66+
-- (probe column is null -> row not returned)
67+
SELECT *
68+
FROM m
69+
WHERE b = 1.0 -- Only matches (null, 1.0)
70+
AND a NOT IN (SELECT c
71+
FROM s
72+
WHERE d = 3.0) -- Matches (2, 3.0)
73+
;
74+
75+
-- Case 4
76+
-- (probe column matches subquery row -> row not returned)
77+
SELECT *
78+
FROM m
79+
WHERE b = 3.0 -- Only matches (2, 3.0)
80+
AND a NOT IN (SELECT c
81+
FROM s
82+
WHERE d = 3.0) -- Matches (2, 3.0)
83+
;
84+
85+
-- Case 5
86+
-- (probe column does not match subquery row -> row is returned)
87+
SELECT *
88+
FROM m
89+
WHERE b = 3.0 -- Only matches (2, 3.0)
90+
AND a NOT IN (SELECT c
91+
FROM s
92+
WHERE d = 7.0) -- Matches (6, 7.0)
93+
;
94+
95+
-- Correlated NOT IN subquery test cases
96+
-- Case 2->1
97+
-- (subquery had nulls but they are removed by correlated subquery -> all rows returned)
98+
SELECT *
99+
FROM m
100+
WHERE a NOT IN (SELECT c
101+
FROM s
102+
WHERE d = b + 10) -- Matches no row
103+
;
104+
105+
-- Case 3->1
106+
-- (probe column is null but subquery returns no rows -> row is returned)
107+
SELECT *
108+
FROM m
109+
WHERE b = 1.0 -- Only matches (null, 1.0)
110+
AND a NOT IN (SELECT c
111+
FROM s
112+
WHERE d = b + 10) -- Matches no row
113+
;
114+
115+
-- Case 4->1
116+
-- (probe column matches row which is filtered out by correlated subquery -> row is returned)
117+
SELECT *
118+
FROM m
119+
WHERE b = 3.0 -- Only matches (2, 3.0)
120+
AND a NOT IN (SELECT c
121+
FROM s
122+
WHERE d = b + 10) -- Matches no row
123+
;
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 9
3+
4+
5+
-- !query 0
6+
CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
7+
(null, null),
8+
(null, 1.0),
9+
(2, 3.0),
10+
(4, 5.0)
11+
AS m(a, b)
12+
-- !query 0 schema
13+
struct<>
14+
-- !query 0 output
15+
16+
17+
18+
-- !query 1
19+
CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
20+
(null, null),
21+
(0, 1.0),
22+
(2, 3.0),
23+
(4, null)
24+
AS s(c, d)
25+
-- !query 1 schema
26+
struct<>
27+
-- !query 1 output
28+
29+
30+
31+
-- !query 2
32+
-- Case 1
33+
-- (subquery is empty -> row is returned)
34+
SELECT *
35+
FROM m
36+
WHERE (a, b) NOT IN (SELECT *
37+
FROM s
38+
WHERE d > 5.0) -- Matches no rows
39+
-- !query 2 schema
40+
struct<a:int,b:decimal(2,1)>
41+
-- !query 2 output
42+
2 3
43+
4 5
44+
NULL 1
45+
NULL NULL
46+
47+
48+
-- !query 3
49+
-- Case 2
50+
-- (subquery contains a row with null in all columns -> row not returned)
51+
SELECT *
52+
FROM m
53+
WHERE (a, b) NOT IN (SELECT *
54+
FROM s
55+
WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
56+
-- !query 3 schema
57+
struct<a:int,b:decimal(2,1)>
58+
-- !query 3 output
59+
60+
61+
62+
-- !query 4
63+
-- Case 3
64+
-- (probe-side columns are all null -> row not returned)
65+
SELECT *
66+
FROM m
67+
WHERE a IS NULL AND b IS NULL -- Matches only (null, null)
68+
AND (a, b) NOT IN (SELECT *
69+
FROM s
70+
WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
71+
-- !query 4 schema
72+
struct<a:int,b:decimal(2,1)>
73+
-- !query 4 output
74+
75+
76+
77+
-- !query 5
78+
-- Case 4
79+
-- (one column null, other column matches a row in the subquery result -> row not returned)
80+
SELECT *
81+
FROM m
82+
WHERE b = 1.0 -- Matches (null, 1.0)
83+
AND (a, b) NOT IN (SELECT *
84+
FROM s
85+
WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
86+
-- !query 5 schema
87+
struct<a:int,b:decimal(2,1)>
88+
-- !query 5 output
89+
90+
91+
92+
-- !query 6
93+
-- Case 5
94+
-- (one null column with no match -> row is returned)
95+
SELECT *
96+
FROM m
97+
WHERE b = 1.0 -- Matches (null, 1.0)
98+
AND (a, b) NOT IN (SELECT *
99+
FROM s
100+
WHERE c = 2) -- Matches (2, 3.0)
101+
-- !query 6 schema
102+
struct<a:int,b:decimal(2,1)>
103+
-- !query 6 output
104+
NULL 1
105+
106+
107+
-- !query 7
108+
-- Case 6
109+
-- (no null columns with match -> row not returned)
110+
SELECT *
111+
FROM m
112+
WHERE b = 3.0 -- Matches (2, 3.0)
113+
AND (a, b) NOT IN (SELECT *
114+
FROM s
115+
WHERE c = 2) -- Matches (2, 3.0)
116+
-- !query 7 schema
117+
struct<a:int,b:decimal(2,1)>
118+
-- !query 7 output
119+
120+
121+
122+
-- !query 8
123+
-- Case 7
124+
-- (no null columns with no match -> row is returned)
125+
SELECT *
126+
FROM m
127+
WHERE b = 5.0 -- Matches (4, 5.0)
128+
AND (a, b) NOT IN (SELECT *
129+
FROM s
130+
WHERE c = 2) -- Matches (2, 3.0)
131+
-- !query 8 schema
132+
struct<a:int,b:decimal(2,1)>
133+
-- !query 8 output
134+
4 5

0 commit comments

Comments
 (0)