From c3de557ee383f3bb96ab5401db146c4cf2a13124 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 10 Sep 2019 17:44:59 +0800 Subject: [PATCH 01/14] save change --- .../hive/execution/HiveTableScanExec.scala | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala index 5b00e2ebafa4..1b0e5a0c0bb2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala @@ -20,8 +20,11 @@ package org.apache.spark.sql.hive.execution import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.common.JavaUtils +import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition} import org.apache.hadoop.hive.ql.plan.TableDesc +import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.objectinspector._ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption @@ -120,6 +123,36 @@ case class HiveTableScanExec( HiveShim.appendReadColumns(hiveConf, neededColumnIDs, output.map(_.name)) + logInfo(s"Test ADD JAR ${SessionState.get()}") + if (SessionState.get() != null) { + logInfo(s"Test ADD JAR ${SessionState.get().getConf.getClassLoader}") + logInfo("Test ADD JAR with SessionState.getConf.getClassLoader") + // scalastyle:off + try { + Class.forName(tableDesc.getSerdeClassName(), true, SessionState.get().getConf.getClassLoader) + } catch { + case e:Exception => + logInfo("Failed Test ADD JAR with SessionState.getConf.getClassLoader") + } + } + + logInfo(s"JavaUtils.getClassLoader => ${JavaUtils.getClassLoader}") + logInfo(s"SessionState.SharedState.jarClssLoader => ${sparkSession.sharedState.jarClassLoader}") + logInfo("Test ADD JAR with sharedState's JarClassloader") + // scalastyle:off + try { + Class.forName(tableDesc.getSerdeClassName(), true, sparkSession.sharedState.jarClassLoader) + } catch { + case e: Exception => + logInfo("Failed Test ADD JAR with sharedState's JarClassloader") + } + logInfo("Test ADD JAR with JavaUtils.getClassLoader") + try { + Class.forName(tableDesc.getSerdeClassName(), true, JavaUtils.getClassLoader) + } catch { + case e: Exception => + logInfo("Failed Test ADD JAR with JavaUtils.getClassLoader") + } val deserializer = tableDesc.getDeserializerClass.getConstructor().newInstance() deserializer.initialize(hiveConf, tableDesc.getProperties) From 2cf3153f046a25e38173fad0cddc1c581ed42b93 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 10 Sep 2019 19:05:07 +0800 Subject: [PATCH 02/14] Revert "save change" This reverts commit c3de557ee383f3bb96ab5401db146c4cf2a13124. --- .../hive/execution/HiveTableScanExec.scala | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala index 1b0e5a0c0bb2..5b00e2ebafa4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala @@ -20,11 +20,8 @@ package org.apache.spark.sql.hive.execution import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.hive.common.JavaUtils -import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition} import org.apache.hadoop.hive.ql.plan.TableDesc -import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.objectinspector._ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption @@ -123,36 +120,6 @@ case class HiveTableScanExec( HiveShim.appendReadColumns(hiveConf, neededColumnIDs, output.map(_.name)) - logInfo(s"Test ADD JAR ${SessionState.get()}") - if (SessionState.get() != null) { - logInfo(s"Test ADD JAR ${SessionState.get().getConf.getClassLoader}") - logInfo("Test ADD JAR with SessionState.getConf.getClassLoader") - // scalastyle:off - try { - Class.forName(tableDesc.getSerdeClassName(), true, SessionState.get().getConf.getClassLoader) - } catch { - case e:Exception => - logInfo("Failed Test ADD JAR with SessionState.getConf.getClassLoader") - } - } - - logInfo(s"JavaUtils.getClassLoader => ${JavaUtils.getClassLoader}") - logInfo(s"SessionState.SharedState.jarClssLoader => ${sparkSession.sharedState.jarClassLoader}") - logInfo("Test ADD JAR with sharedState's JarClassloader") - // scalastyle:off - try { - Class.forName(tableDesc.getSerdeClassName(), true, sparkSession.sharedState.jarClassLoader) - } catch { - case e: Exception => - logInfo("Failed Test ADD JAR with sharedState's JarClassloader") - } - logInfo("Test ADD JAR with JavaUtils.getClassLoader") - try { - Class.forName(tableDesc.getSerdeClassName(), true, JavaUtils.getClassLoader) - } catch { - case e: Exception => - logInfo("Failed Test ADD JAR with JavaUtils.getClassLoader") - } val deserializer = tableDesc.getDeserializerClass.getConstructor().newInstance() deserializer.initialize(hiveConf, tableDesc.getProperties) From 6dc61e75fa4fd8c93be0c9b0ef540af1b9ab689a Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 18 Sep 2019 15:10:02 +0800 Subject: [PATCH 03/14] TEST-SPARK-29015 --- .../sql/hive/thriftserver/CliSuite.scala | 55 +++++++++++++++++++ .../sql/hive/client/HiveClientImpl.scala | 7 +++ 2 files changed, 62 insertions(+) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6d45041e1282..5af6b3669b91 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -227,6 +227,34 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { ) } + test("Commands using SerDe provided jars in conf hive.aux.jars.path") { + + val dataFilePath = + Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") + val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath + + runCliWithin( + 3.minute, + Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))( + """CREATE TABLE addJarWithHiveAux(key string, val string) + |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; + """.stripMargin + -> "", + "CREATE TABLE sourceTableForWithHiveAux (key INT, val STRING);" + -> "", + s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithHiveAux;" + -> "", + "INSERT INTO TABLE addJarWithHiveAux SELECT key, val FROM sourceTableForWithHiveAux;" + -> "", + "SELECT collect_list(array(val)) FROM addJarWithHiveAux;" + -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""", + "DROP TABLE addJarWithHiveAux;" + -> "", + "DROP TABLE sourceTableForWithHiveAux;" + -> "" + ) + } + test("SPARK-11188 Analysis error reporting") { runCliWithin(timeout = 2.minute, errorResponses = Seq("AnalysisException"))( @@ -332,4 +360,31 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third" ) } + + test("SPARK-29022 Commands using SerDe provided in ADD JAR sql") { + val dataFilePath = + Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") + val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath + + runCliWithin( + 3.minute)( + s"ADD JAR ${hiveContribJar};" -> "", + """CREATE TABLE addJarWithSQL(key string, val string) + |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; + """.stripMargin + -> "", + "CREATE TABLE sourceTableForWithSQL(key INT, val STRING);" + -> "", + s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithSQL;" + -> "", + "INSERT INTO TABLE addJarWithSQL SELECT key, val FROM sourceTableForWithSQL;" + -> "", + "SELECT collect_list(array(val)) FROM addJarWithSQL;" + -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""", + "DROP TABLE addJarWithSQL;" + -> "", + "DROP TABLE sourceTableForWithSQL;" + -> "" + ) + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 96e61bd54280..e3a8afa0f590 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -145,6 +145,13 @@ private[hive] class HiveClientImpl( warehouseDir.foreach { dir => ret.getConf.setVar(ConfVars.METASTOREWAREHOUSE, dir) } + // ret != null means we have a CliSessionState instance in current thread which initialized + // by SparkSQLCLIDriver. The class loader of CliSessionState's conf is current main thread's + // class loader used to load jars passed by --jars. One class loader used by AddJarCommand + // is clientLoader.classLoader which contain jar path passed by --jars in main thread. + // We set CliSessionState's conf class loader to clientLoader.classLoader. Thus we can load + // all jars passed by --jars and AddJarCommand. + ret.getConf.setClassLoader(clientLoader.classLoader) ret } else { newState() From f087b10954a660192826b7b170891d5ac1dd9401 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 18 Sep 2019 15:10:23 +0800 Subject: [PATCH 04/14] Revert "TEST-SPARK-29015" This reverts commit 6dc61e75fa4fd8c93be0c9b0ef540af1b9ab689a. --- .../sql/hive/thriftserver/CliSuite.scala | 55 ------------------- .../sql/hive/client/HiveClientImpl.scala | 7 --- 2 files changed, 62 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 5af6b3669b91..6d45041e1282 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -227,34 +227,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { ) } - test("Commands using SerDe provided jars in conf hive.aux.jars.path") { - - val dataFilePath = - Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") - val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath - - runCliWithin( - 3.minute, - Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))( - """CREATE TABLE addJarWithHiveAux(key string, val string) - |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; - """.stripMargin - -> "", - "CREATE TABLE sourceTableForWithHiveAux (key INT, val STRING);" - -> "", - s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithHiveAux;" - -> "", - "INSERT INTO TABLE addJarWithHiveAux SELECT key, val FROM sourceTableForWithHiveAux;" - -> "", - "SELECT collect_list(array(val)) FROM addJarWithHiveAux;" - -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""", - "DROP TABLE addJarWithHiveAux;" - -> "", - "DROP TABLE sourceTableForWithHiveAux;" - -> "" - ) - } - test("SPARK-11188 Analysis error reporting") { runCliWithin(timeout = 2.minute, errorResponses = Seq("AnalysisException"))( @@ -360,31 +332,4 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third" ) } - - test("SPARK-29022 Commands using SerDe provided in ADD JAR sql") { - val dataFilePath = - Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") - val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath - - runCliWithin( - 3.minute)( - s"ADD JAR ${hiveContribJar};" -> "", - """CREATE TABLE addJarWithSQL(key string, val string) - |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; - """.stripMargin - -> "", - "CREATE TABLE sourceTableForWithSQL(key INT, val STRING);" - -> "", - s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithSQL;" - -> "", - "INSERT INTO TABLE addJarWithSQL SELECT key, val FROM sourceTableForWithSQL;" - -> "", - "SELECT collect_list(array(val)) FROM addJarWithSQL;" - -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""", - "DROP TABLE addJarWithSQL;" - -> "", - "DROP TABLE sourceTableForWithSQL;" - -> "" - ) - } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index e3a8afa0f590..96e61bd54280 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -145,13 +145,6 @@ private[hive] class HiveClientImpl( warehouseDir.foreach { dir => ret.getConf.setVar(ConfVars.METASTOREWAREHOUSE, dir) } - // ret != null means we have a CliSessionState instance in current thread which initialized - // by SparkSQLCLIDriver. The class loader of CliSessionState's conf is current main thread's - // class loader used to load jars passed by --jars. One class loader used by AddJarCommand - // is clientLoader.classLoader which contain jar path passed by --jars in main thread. - // We set CliSessionState's conf class loader to clientLoader.classLoader. Thus we can load - // all jars passed by --jars and AddJarCommand. - ret.getConf.setClassLoader(clientLoader.classLoader) ret } else { newState() From 5aa2ed67e74aaae69fea4b996588ec5196828857 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 20 Sep 2019 00:01:09 +0800 Subject: [PATCH 05/14] SUPPORT IN/EXISTS in join condition --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 ++ .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../catalyst/analysis/ResolveSubquerySuite.scala | 14 +++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 76e59fa78542..81af81a30490 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1761,6 +1761,8 @@ class Analyzer( resolveSubQueries(q, q.children) case d: DeleteFromTable if d.childrenResolved => resolveSubQueries(d, d.children) + case j: Join if j.childrenResolved => + resolveSubQueries(j, Seq(j, j.left, j.right)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 4a19806bd80f..2a6b33529604 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -596,9 +596,9 @@ trait CheckAnalysis extends PredicateHelper { case inSubqueryOrExistsSubquery => plan match { - case _: Filter | _: DeleteFromTable => // Ok + case _: Filter | _: DeleteFromTable | _: Join => // Ok case _ => - failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" + + failAnalysis(s"IN/EXISTS/JOIN predicate sub-queries can only be used in" + s" Filter/DeleteFromTable: $plan") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 74a8590b5eef..49393ee21381 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -20,7 +20,8 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{InSubquery, ListQuery} -import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, Project} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LocalRelation, Project} +import org.apache.spark.sql.catalyst.plans.Inner /** * Unit tests for [[ResolveSubquery]]. @@ -29,8 +30,10 @@ class ResolveSubquerySuite extends AnalysisTest { val a = 'a.int val b = 'b.int + val c = 'c.int val t1 = LocalRelation(a) val t2 = LocalRelation(b) + val t3 = LocalRelation(c) test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") { val expr = Filter( @@ -41,4 +44,13 @@ class ResolveSubquerySuite extends AnalysisTest { assert(m.contains( "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses")) } + + test("SPARK-29145 Support subquery in join condition") { + val expr = Join(t1, + t2, + Inner, + Some(InSubquery(Seq(a), ListQuery(Project(Seq(UnresolvedAttribute("c")), t3)))), + null) + assertAnalysisSuccess(expr) + } } From fa55b3ab2783972c55ae0c8c21aed88fe1b1c220 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 20 Sep 2019 07:51:15 +0800 Subject: [PATCH 06/14] fix scala style --- .../spark/sql/catalyst/analysis/ResolveSubquerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 49393ee21381..505e1a3ac6ba 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{InSubquery, ListQuery} -import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LocalRelation, Project} import org.apache.spark.sql.catalyst.plans.Inner +import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LocalRelation, Project} /** * Unit tests for [[ResolveSubquery]]. From bd7c0981c977f23e8e50acac88aa9be0eac2bd1c Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 20 Sep 2019 10:07:36 +0800 Subject: [PATCH 07/14] add end-to-end test --- .../org/apache/spark/sql/SubquerySuite.scala | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index a1d7792941ed..be425ea69909 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -204,6 +204,30 @@ class SubquerySuite extends QueryTest with SharedSparkSession { } } + test("SPARK-29145: JOIN Condition use QueryList") { + withTempView("s1", "s2", "s3") { + Seq(1, 3, 5, 7, 9).toDF("id").createOrReplaceTempView("s1") + Seq(1, 3, 4, 6, 9).toDF("id").createOrReplaceTempView("s2") + Seq(3, 4, 6, 9).toDF("id").createOrReplaceTempView("s3") + + checkAnswer( + sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id IN (select 9)"), + Row(9) :: Nil) + + checkAnswer( + sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select 9)"), + Row(1) :: Row(3) :: Nil) + + checkAnswer( + sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(3) :: Row(9) :: Nil) + + checkAnswer( + sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(1) :: Nil) + } + } + test("SPARK-14791: scalar subquery inside broadcast join") { val df = sql("select a, sum(b) as s from l group by a having a > (select avg(a) from l)") val expected = Row(3, 2.0, 3, 3.0) :: Row(6, null, 6, null) :: Nil From dd37df8c6bfe47187233eb01fec04f63ff71245b Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 15 Oct 2019 18:30:34 +0800 Subject: [PATCH 08/14] remove DeleteFromTable --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 -- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 18ba5f02a3d4..2434192c63e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1696,8 +1696,6 @@ class Analyzer( // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries. case q: UnaryNode if q.childrenResolved => resolveSubQueries(q, q.children) - case d: DeleteFromTable if d.childrenResolved => - resolveSubQueries(d, d.children) case j: Join if j.childrenResolved => resolveSubQueries(j, Seq(j, j.left, j.right)) case s: SupportsSubquery if s.childrenResolved => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index abd75cfe87a3..4a9e4f9f6fe8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -602,7 +602,7 @@ trait CheckAnalysis extends PredicateHelper { case inSubqueryOrExistsSubquery => plan match { - case _: Filter | _: DeleteFromTable | _: SupportsSubquery |_: Join => // Ok + case _: Filter | _: SupportsSubquery |_: Join => // Ok case _ => failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" + s" Filter and a few commands: $plan") From 6b58893bd7ea8bab8941939997288616aeab1353 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 15 Oct 2019 18:33:21 +0800 Subject: [PATCH 09/14] fix scala style --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 4a9e4f9f6fe8..0f86519a32e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -602,7 +602,7 @@ trait CheckAnalysis extends PredicateHelper { case inSubqueryOrExistsSubquery => plan match { - case _: Filter | _: SupportsSubquery |_: Join => // Ok + case _: Filter | _: SupportsSubquery | _: Join => // Ok case _ => failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" + s" Filter and a few commands: $plan") From 25f31dc54fc71bb9ce2c65632a84736cc49d3a13 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 16 Oct 2019 14:16:55 +0800 Subject: [PATCH 10/14] fllow comment --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 0f86519a32e1..e4167fd64596 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -605,7 +605,7 @@ trait CheckAnalysis extends PredicateHelper { case _: Filter | _: SupportsSubquery | _: Join => // Ok case _ => failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" + - s" Filter and a few commands: $plan") + s" Filter/SupportsSubquery/Join and a few commands: $plan") } } From 2ead3780f1b32d5223afff3fce4eacee6cba7628 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 23 Oct 2019 13:52:07 +0800 Subject: [PATCH 11/14] follow comment --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../spark/sql/catalyst/analysis/ResolveSubquerySuite.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index e4167fd64596..f036d822c5f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -605,7 +605,7 @@ trait CheckAnalysis extends PredicateHelper { case _: Filter | _: SupportsSubquery | _: Join => // Ok case _ => failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" + - s" Filter/SupportsSubquery/Join and a few commands: $plan") + s" Filter/Join and a few commands: $plan") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index 505e1a3ac6ba..5aa80e1a9bd7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{InSubquery, ListQuery} import org.apache.spark.sql.catalyst.plans.Inner -import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LocalRelation, Project} +import org.apache.spark.sql.catalyst.plans.logical._ /** * Unit tests for [[ResolveSubquery]]. @@ -50,7 +50,7 @@ class ResolveSubquerySuite extends AnalysisTest { t2, Inner, Some(InSubquery(Seq(a), ListQuery(Project(Seq(UnresolvedAttribute("c")), t3)))), - null) + JoinHint.NONE) assertAnalysisSuccess(expr) } } From 3db4aaf0648122cc1498e9af71be47c3013903c9 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 23 Oct 2019 18:24:04 +0800 Subject: [PATCH 12/14] Add more UT case --- .../org/apache/spark/sql/SubquerySuite.scala | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index be425ea69909..919dd691cdba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -218,13 +218,67 @@ class SubquerySuite extends QueryTest with SharedSparkSession { sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select 9)"), Row(1) :: Row(3) :: Nil) + // case `IN` checkAnswer( sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id IN (select id from s3)"), Row(3) :: Row(9) :: Nil) + checkAnswer( + sql("SELECT s1.id as id2 from s1 LEFT SEMI JOIN s2 " + + "ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(3) :: Row(9) :: Nil) + + checkAnswer( + sql("SELECT s1.id as id2 from s1 LEFT ANTI JOIN s2 " + + "ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(1) :: Row(5) :: Row(7) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 LEFT OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 RIGHT OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(null, 1) :: Row(3, 3) :: Row(null, 4) :: Row(null, 6) :: Row(9, 9) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 FULL OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id IN (select id from s3)"), + Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: + Row(null, 1) :: Row(null, 4) :: Row(null, 6) :: Nil) + + // case `NOT IN` checkAnswer( sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), Row(1) :: Nil) + + checkAnswer( + sql("SELECT s1.id as id2 from s1 LEFT SEMI JOIN s2 " + + "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(1) :: Nil) + + checkAnswer( + sql("SELECT s1.id as id2 from s1 LEFT ANTI JOIN s2 " + + "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(3) :: Row(5) :: Row(7) :: Row(9) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 LEFT OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 RIGHT OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(1, 1) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) + + checkAnswer( + sql("SELECT s1.id, s2.id as id2 from s1 FULL OUTER JOIN s2 " + + "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: + Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) } } From 4ba7a174873bcde6e2251e135a17c8ec97f8ea87 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 24 Oct 2019 13:26:03 +0800 Subject: [PATCH 13/14] make test case sql clear --- .../org/apache/spark/sql/SubquerySuite.scala | 104 ++++++++++++++---- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 919dd691cdba..bf65cd630815 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -211,72 +211,128 @@ class SubquerySuite extends QueryTest with SharedSparkSession { Seq(3, 4, 6, 9).toDF("id").createOrReplaceTempView("s3") checkAnswer( - sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id IN (select 9)"), + sql( + """ + | SELECT s1.id FROM s1 + | JOIN s2 ON s1.id = s2.id + | AND s1.id IN (SELECT 9)""".stripMargin), Row(9) :: Nil) checkAnswer( - sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select 9)"), + sql( + """ + | SELECT s1.id FROM s1 + | JOIN s2 ON s1.id = s2.id + | AND s1.id NOT IN (SELECT 9)""".stripMargin), Row(1) :: Row(3) :: Nil) // case `IN` checkAnswer( - sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id FROM s1 + | JOIN s2 ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(3) :: Row(9) :: Nil) checkAnswer( - sql("SELECT s1.id as id2 from s1 LEFT SEMI JOIN s2 " + - "ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id AS id2 FROM s1 + | LEFT SEMI JOIN s2 + | ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(3) :: Row(9) :: Nil) checkAnswer( - sql("SELECT s1.id as id2 from s1 LEFT ANTI JOIN s2 " + - "ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id as id2 FROM s1 + | LEFT ANTI JOIN s2 + | ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(1) :: Row(5) :: Row(7) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 LEFT OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id as id2 FROM s1 + | LEFT OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 RIGHT OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id as id2 FROM s1 + | RIGHT OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(null, 1) :: Row(3, 3) :: Row(null, 4) :: Row(null, 6) :: Row(9, 9) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 FULL OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id AS id2 FROM s1 + | FULL OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id IN (SELECT id FROM s3)""".stripMargin), Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Row(null, 1) :: Row(null, 4) :: Row(null, 6) :: Nil) // case `NOT IN` checkAnswer( - sql("SELECT s1.id from s1 JOIN s2 ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id FROM s1 + | JOIN s2 ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(1) :: Nil) checkAnswer( - sql("SELECT s1.id as id2 from s1 LEFT SEMI JOIN s2 " + - "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id AS id2 FROM s1 + | LEFT SEMI JOIN s2 + | ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(1) :: Nil) checkAnswer( - sql("SELECT s1.id as id2 from s1 LEFT ANTI JOIN s2 " + - "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id AS id2 FROM s1 + | LEFT ANTI JOIN s2 + | ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(3) :: Row(5) :: Row(7) :: Row(9) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 LEFT OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id AS id2 FROM s1 + | LEFT OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 RIGHT OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id AS id2 FROM s1 + | RIGHT OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(1, 1) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) checkAnswer( - sql("SELECT s1.id, s2.id as id2 from s1 FULL OUTER JOIN s2 " + - "ON s1.id = s2.id and s1.id NOT IN (select id from s3)"), + sql( + """ + | SELECT s1.id, s2.id AS id2 FROM s1 + | FULL OUTER JOIN s2 + | ON s1.id = s2.id + | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) } From 307802a5e599fadfafc9fb4ffe2ccbd10d60f6ba Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 24 Oct 2019 13:29:17 +0800 Subject: [PATCH 14/14] format code --- .../org/apache/spark/sql/SubquerySuite.scala | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index bf65cd630815..266f8e23712d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -215,7 +215,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { """ | SELECT s1.id FROM s1 | JOIN s2 ON s1.id = s2.id - | AND s1.id IN (SELECT 9)""".stripMargin), + | AND s1.id IN (SELECT 9) + """.stripMargin), Row(9) :: Nil) checkAnswer( @@ -223,7 +224,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { """ | SELECT s1.id FROM s1 | JOIN s2 ON s1.id = s2.id - | AND s1.id NOT IN (SELECT 9)""".stripMargin), + | AND s1.id NOT IN (SELECT 9) + """.stripMargin), Row(1) :: Row(3) :: Nil) // case `IN` @@ -232,7 +234,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { """ | SELECT s1.id FROM s1 | JOIN s2 ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(3) :: Row(9) :: Nil) checkAnswer( @@ -241,7 +244,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id AS id2 FROM s1 | LEFT SEMI JOIN s2 | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(3) :: Row(9) :: Nil) checkAnswer( @@ -250,7 +254,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id as id2 FROM s1 | LEFT ANTI JOIN s2 | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(1) :: Row(5) :: Row(7) :: Nil) checkAnswer( @@ -259,7 +264,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id as id2 FROM s1 | LEFT OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Nil) checkAnswer( @@ -268,7 +274,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id as id2 FROM s1 | RIGHT OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(null, 1) :: Row(3, 3) :: Row(null, 4) :: Row(null, 6) :: Row(9, 9) :: Nil) checkAnswer( @@ -277,7 +284,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id AS id2 FROM s1 | FULL OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id IN (SELECT id FROM s3) + """.stripMargin), Row(1, null) :: Row(3, 3) :: Row(5, null) :: Row(7, null) :: Row(9, 9) :: Row(null, 1) :: Row(null, 4) :: Row(null, 6) :: Nil) @@ -287,7 +295,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { """ | SELECT s1.id FROM s1 | JOIN s2 ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(1) :: Nil) checkAnswer( @@ -296,7 +305,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id AS id2 FROM s1 | LEFT SEMI JOIN s2 | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(1) :: Nil) checkAnswer( @@ -305,7 +315,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id AS id2 FROM s1 | LEFT ANTI JOIN s2 | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(3) :: Row(5) :: Row(7) :: Row(9) :: Nil) checkAnswer( @@ -314,7 +325,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id AS id2 FROM s1 | LEFT OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Nil) checkAnswer( @@ -323,7 +335,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id AS id2 FROM s1 | RIGHT OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(1, 1) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) checkAnswer( @@ -332,7 +345,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | SELECT s1.id, s2.id AS id2 FROM s1 | FULL OUTER JOIN s2 | ON s1.id = s2.id - | AND s1.id NOT IN (SELECT id FROM s3)""".stripMargin), + | AND s1.id NOT IN (SELECT id FROM s3) + """.stripMargin), Row(1, 1) :: Row(3, null) :: Row(5, null) :: Row(7, null) :: Row(9, null) :: Row(null, 3) :: Row(null, 4) :: Row(null, 6) :: Row(null, 9) :: Nil) }