Skip to content

Commit a059c77

Browse files
committed
more test cases
1 parent 4bd38d2 commit a059c77

4 files changed

Lines changed: 214 additions & 97 deletions

File tree

sql/core/src/test/resources/sql-tests/inputs/join.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,24 @@ SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData
202202
SELECT a.key, b.key, c.key
203203
FROM testData a,testData b,testData c
204204
where a.key = b.key and a.key = c.key and a.key < 5;
205+
206+
-- big inner join, 4 matches per row
207+
SELECT x.key, x.value, y.key, y.value, count(1) FROM
208+
(SELECT * FROM testData UNION ALL
209+
SELECT * FROM testData UNION ALL
210+
SELECT * FROM testData UNION ALL
211+
SELECT * FROM testData) x JOIN
212+
(SELECT * FROM testData UNION ALL
213+
SELECT * FROM testData UNION ALL
214+
SELECT * FROM testData UNION ALL
215+
SELECT * FROM testData) y
216+
WHERE x.key = y.key group by x.key, x.value, y.key, y.value;
217+
218+
-- mixed-case keywords
219+
SeleCT * from
220+
(select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN
221+
(sElEcT * FROM upperCaseData whERe N >= 3) rightTable
222+
oN leftTable.N = rightTable.N;
223+
224+
-- Supporting relational operator '<=>' in Spark SQL
225+
SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value;

sql/core/src/test/resources/sql-tests/results/join.sql.out

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 31
2+
-- Number of queries: 34
33

44

55
-- !query 0
@@ -522,3 +522,155 @@ struct<key:int,key:int,key:int>
522522
2 2 2
523523
3 3 3
524524
4 4 4
525+
526+
527+
-- !query 31
528+
SELECT x.key, x.value, y.key, y.value, count(1) FROM
529+
(SELECT * FROM testData UNION ALL
530+
SELECT * FROM testData UNION ALL
531+
SELECT * FROM testData UNION ALL
532+
SELECT * FROM testData) x JOIN
533+
(SELECT * FROM testData UNION ALL
534+
SELECT * FROM testData UNION ALL
535+
SELECT * FROM testData UNION ALL
536+
SELECT * FROM testData) y
537+
WHERE x.key = y.key group by x.key, x.value, y.key, y.value
538+
-- !query 31 schema
539+
struct<key:int,value:string,key:int,value:string,count(1):bigint>
540+
-- !query 31 output
541+
1 1 1 1 16
542+
10 10 10 10 16
543+
100 100 100 100 16
544+
11 11 11 11 16
545+
12 12 12 12 16
546+
13 13 13 13 16
547+
14 14 14 14 16
548+
15 15 15 15 16
549+
16 16 16 16 16
550+
17 17 17 17 16
551+
18 18 18 18 16
552+
19 19 19 19 16
553+
2 2 2 2 16
554+
20 20 20 20 16
555+
21 21 21 21 16
556+
22 22 22 22 16
557+
23 23 23 23 16
558+
24 24 24 24 16
559+
25 25 25 25 16
560+
26 26 26 26 16
561+
27 27 27 27 16
562+
28 28 28 28 16
563+
29 29 29 29 16
564+
3 3 3 3 16
565+
30 30 30 30 16
566+
31 31 31 31 16
567+
32 32 32 32 16
568+
33 33 33 33 16
569+
34 34 34 34 16
570+
35 35 35 35 16
571+
36 36 36 36 16
572+
37 37 37 37 16
573+
38 38 38 38 16
574+
39 39 39 39 16
575+
4 4 4 4 16
576+
40 40 40 40 16
577+
41 41 41 41 16
578+
42 42 42 42 16
579+
43 43 43 43 16
580+
44 44 44 44 16
581+
45 45 45 45 16
582+
46 46 46 46 16
583+
47 47 47 47 16
584+
48 48 48 48 16
585+
49 49 49 49 16
586+
5 5 5 5 16
587+
50 50 50 50 16
588+
51 51 51 51 16
589+
52 52 52 52 16
590+
53 53 53 53 16
591+
54 54 54 54 16
592+
55 55 55 55 16
593+
56 56 56 56 16
594+
57 57 57 57 16
595+
58 58 58 58 16
596+
59 59 59 59 16
597+
6 6 6 6 16
598+
60 60 60 60 16
599+
61 61 61 61 16
600+
62 62 62 62 16
601+
63 63 63 63 16
602+
64 64 64 64 16
603+
65 65 65 65 16
604+
66 66 66 66 16
605+
67 67 67 67 16
606+
68 68 68 68 16
607+
69 69 69 69 16
608+
7 7 7 7 16
609+
70 70 70 70 16
610+
71 71 71 71 16
611+
72 72 72 72 16
612+
73 73 73 73 16
613+
74 74 74 74 16
614+
75 75 75 75 16
615+
76 76 76 76 16
616+
77 77 77 77 16
617+
78 78 78 78 16
618+
79 79 79 79 16
619+
8 8 8 8 16
620+
80 80 80 80 16
621+
81 81 81 81 16
622+
82 82 82 82 16
623+
83 83 83 83 16
624+
84 84 84 84 16
625+
85 85 85 85 16
626+
86 86 86 86 16
627+
87 87 87 87 16
628+
88 88 88 88 16
629+
89 89 89 89 16
630+
9 9 9 9 16
631+
90 90 90 90 16
632+
91 91 91 91 16
633+
92 92 92 92 16
634+
93 93 93 93 16
635+
94 94 94 94 16
636+
95 95 95 95 16
637+
96 96 96 96 16
638+
97 97 97 97 16
639+
98 98 98 98 16
640+
99 99 99 99 16
641+
642+
643+
-- !query 32
644+
SeleCT * from
645+
(select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN
646+
(sElEcT * FROM upperCaseData whERe N >= 3) rightTable
647+
oN leftTable.N = rightTable.N
648+
-- !query 32 schema
649+
struct<N:int,L:string,N:int,L:string>
650+
-- !query 32 output
651+
1 A NULL NULL
652+
2 B NULL NULL
653+
3 C 3 C
654+
4 D 4 D
655+
NULL NULL 5 E
656+
NULL NULL 6 F
657+
658+
659+
-- !query 33
660+
SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value
661+
-- !query 33 schema
662+
struct<key:int,value:string,key:int,value:string>
663+
-- !query 33 output
664+
165 NULL 165 NULL
665+
165 NULL 201 NULL
666+
165 NULL NULL NULL
667+
165 val_165 165 val_165
668+
201 NULL 165 NULL
669+
201 NULL 201 NULL
670+
201 NULL NULL NULL
671+
330 val_330 330 val_330
672+
86 val_86 86 val_86
673+
NULL NULL 165 NULL
674+
NULL NULL 201 NULL
675+
NULL NULL NULL NULL
676+
NULL val_null NULL val_null

sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, RightOuter}
2121
import org.apache.spark.sql.catalyst.plans.logical.Join
2222
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
2323
import org.apache.spark.sql.functions._
24+
import org.apache.spark.sql.internal.SQLConf
2425
import org.apache.spark.sql.test.SharedSQLContext
2526

2627
class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
@@ -225,4 +226,43 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
225226
Row(1, null) :: Row(null, 2) :: Nil
226227
)
227228
}
229+
230+
test("cartesian product join") {
231+
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
232+
checkAnswer(
233+
testData3.join(testData3),
234+
Row(1, null, 1, null) ::
235+
Row(1, null, 2, 2) ::
236+
Row(2, 2, 1, null) ::
237+
Row(2, 2, 2, 2) :: Nil)
238+
}
239+
}
240+
241+
242+
test("SortMergeJoin returns wrong results when using UnsafeRows") {
243+
// This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737.
244+
// This bug will be triggered when Tungsten is enabled and there are multiple
245+
// SortMergeJoin operators executed in the same task.
246+
val confs = SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1" :: Nil
247+
withSQLConf(confs: _*) {
248+
val df1 = (1 to 50).map(i => (s"str_$i", i)).toDF("i", "j")
249+
val df2 =
250+
df1
251+
.join(df1.select(df1("i")), "i")
252+
.select(df1("i"), df1("j"))
253+
254+
val df3 = df2.withColumnRenamed("i", "i1").withColumnRenamed("j", "j1")
255+
val df4 =
256+
df2
257+
.join(df3, df2("i") === df3("i1"))
258+
.withColumn("diff", $"j" - $"j1")
259+
.select(df2("i"), df2("j"), $"diff")
260+
261+
checkAnswer(
262+
df4,
263+
df1.withColumn("diff", lit(0)))
264+
}
265+
}
266+
267+
228268
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 0 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ import java.math.MathContext
2222
import java.sql.{Date, Timestamp}
2323

2424
import org.apache.spark.{AccumulatorSuite, SparkException}
25-
import org.apache.spark.sql.catalyst.analysis.UnresolvedException
26-
import org.apache.spark.sql.catalyst.expressions.SortOrder
27-
import org.apache.spark.sql.catalyst.plans.logical.Aggregate
2825
import org.apache.spark.sql.catalyst.util.StringUtils
2926
import org.apache.spark.sql.execution.aggregate
3027
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
@@ -126,16 +123,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
126123
}
127124
}
128125

129-
test("support table.star") {
130-
checkAnswer(
131-
sql(
132-
"""
133-
|SELECT r.*
134-
|FROM testData l join testData2 r on (l.key = r.a)
135-
""".stripMargin),
136-
Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil)
137-
}
138-
139126
test("self join with alias in agg") {
140127
Seq(1, 2, 3)
141128
.map(i => (i, i.toString))
@@ -636,35 +623,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
636623
}
637624
}
638625

639-
test("big inner join, 4 matches per row") {
640-
checkAnswer(
641-
sql(
642-
"""
643-
|SELECT * FROM
644-
| (SELECT * FROM testData UNION ALL
645-
| SELECT * FROM testData UNION ALL
646-
| SELECT * FROM testData UNION ALL
647-
| SELECT * FROM testData) x JOIN
648-
| (SELECT * FROM testData UNION ALL
649-
| SELECT * FROM testData UNION ALL
650-
| SELECT * FROM testData UNION ALL
651-
| SELECT * FROM testData) y
652-
|WHERE x.key = y.key""".stripMargin),
653-
testData.rdd.flatMap(
654-
row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
655-
}
656-
657-
test("cartesian product join") {
658-
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
659-
checkAnswer(
660-
testData3.join(testData3),
661-
Row(1, null, 1, null) ::
662-
Row(1, null, 2, 2) ::
663-
Row(2, 2, 1, null) ::
664-
Row(2, 2, 2, 2) :: Nil)
665-
}
666-
}
667-
668626
test("SPARK-11111 null-safe join should not use cartesian product") {
669627
val df = sql("select count(*) from testData a join testData b on (a.key <=> b.key)")
670628
val cp = df.queryExecution.sparkPlan.collect {
@@ -696,23 +654,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
696654
Row(2, "b", 2) :: Nil)
697655
}
698656

699-
test("mixed-case keywords") {
700-
checkAnswer(
701-
sql(
702-
"""
703-
|SeleCT * from
704-
| (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN
705-
| (sElEcT * FROM upperCaseData whERe N >= 3) rightTable
706-
| oN leftTable.N = rightTable.N
707-
""".stripMargin),
708-
Row(1, "A", null, null) ::
709-
Row(2, "B", null, null) ::
710-
Row(3, "C", 3, "C") ::
711-
Row(4, "D", 4, "D") ::
712-
Row(null, null, 5, "E") ::
713-
Row(null, null, 6, "F") :: Nil)
714-
}
715-
716657
test("select with table name as qualifier") {
717658
checkAnswer(
718659
sql("SELECT testData.value FROM testData WHERE testData.key = 1"),
@@ -1112,18 +1053,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
11121053
)
11131054
}
11141055

1115-
test("Supporting relational operator '<=>' in Spark SQL") {
1116-
val nullCheckData1 = TestData(1, "1") :: TestData(2, null) :: Nil
1117-
val rdd1 = sparkContext.parallelize((0 to 1).map(i => nullCheckData1(i)))
1118-
rdd1.toDF().createOrReplaceTempView("nulldata1")
1119-
val nullCheckData2 = TestData(1, "1") :: TestData(2, null) :: Nil
1120-
val rdd2 = sparkContext.parallelize((0 to 1).map(i => nullCheckData2(i)))
1121-
rdd2.toDF().createOrReplaceTempView("nulldata2")
1122-
checkAnswer(sql("SELECT nulldata1.key FROM nulldata1 join " +
1123-
"nulldata2 on nulldata1.value <=> nulldata2.value"),
1124-
(1 to 2).map(i => Row(i)))
1125-
}
1126-
11271056
test("Multi-column COUNT(DISTINCT ...)") {
11281057
val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil
11291058
val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
@@ -1525,31 +1454,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
15251454
"org.apache.spark.sql.execution.datasources.jdbc"))
15261455
}
15271456

1528-
test("SortMergeJoin returns wrong results when using UnsafeRows") {
1529-
// This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737.
1530-
// This bug will be triggered when Tungsten is enabled and there are multiple
1531-
// SortMergeJoin operators executed in the same task.
1532-
val confs = SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1" :: Nil
1533-
withSQLConf(confs: _*) {
1534-
val df1 = (1 to 50).map(i => (s"str_$i", i)).toDF("i", "j")
1535-
val df2 =
1536-
df1
1537-
.join(df1.select(df1("i")), "i")
1538-
.select(df1("i"), df1("j"))
1539-
1540-
val df3 = df2.withColumnRenamed("i", "i1").withColumnRenamed("j", "j1")
1541-
val df4 =
1542-
df2
1543-
.join(df3, df2("i") === df3("i1"))
1544-
.withColumn("diff", $"j" - $"j1")
1545-
.select(df2("i"), df2("j"), $"diff")
1546-
1547-
checkAnswer(
1548-
df4,
1549-
df1.withColumn("diff", lit(0)))
1550-
}
1551-
}
1552-
15531457
test("SPARK-11303: filter should not be pushed down into sample") {
15541458
val df = spark.range(100)
15551459
List(true, false).foreach { withReplacement =>

0 commit comments

Comments
 (0)