-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18217] [SQL] Disallow creating permanent views based on temporary views or UDFs #15764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
509327e
1b430bb
695110f
4dbd3b6
7100a8f
86e7f9d
a4df82b
1c3899f
fec0066
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -923,6 +923,24 @@ class SessionCatalog( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns whether it is a temporary function. | ||
| */ | ||
| def isTemporaryFunction(name: FunctionIdentifier): Boolean = { | ||
| // copied from HiveSessionCatalog | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'd update HiveSessionCatalog to say don't forget to update this place. Otherwise it will be inconsistent.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. Thanks! |
||
| val hiveFunctions = Seq( | ||
| "hash", | ||
| "histogram_numeric", | ||
| "percentile") | ||
|
|
||
| // A temporary function is a function that has been registered in functionRegistry | ||
| // without a database name, and is neither a built-in function nor a Hive function | ||
| name.database.isEmpty && | ||
| functionRegistry.functionExists(name.funcName) && | ||
| !FunctionRegistry.builtin.functionExists(name.funcName) && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our built-in function registry is using Thus, no need to add
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| !hiveFunctions.contains(name.funcName.toLowerCase) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is true - but we are working towards getting rid of HiveSessionCatalog though (including getting rid of the 3 fallback functions), so in practice this will make no difference soon. |
||
| } | ||
|
|
||
| protected def failFunctionLookup(name: String): Nothing = { | ||
| throw new NoSuchFunctionException(db = currentDb, func = name) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.command | |
|
|
||
| import scala.util.control.NonFatal | ||
|
|
||
| import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession} | ||
| import org.apache.spark.sql.{AnalysisException, Row, SparkSession} | ||
| import org.apache.spark.sql.catalyst.{SQLBuilder, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, UnresolvedRelation} | ||
| import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} | ||
| import org.apache.spark.sql.catalyst.expressions.Alias | ||
| import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} | ||
| import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation} | ||
| import org.apache.spark.sql.types.{MetadataBuilder, StructType} | ||
| import org.apache.spark.sql.types.MetadataBuilder | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -131,6 +131,10 @@ case class CreateViewCommand( | |
| s"specified by CREATE VIEW (num: `${userSpecifiedColumns.length}`).") | ||
| } | ||
|
|
||
| // When creating a permanent view, not allowed to reference temporary objects. | ||
| // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved) | ||
| verifyTemporaryObjectsNotExists(sparkSession) | ||
|
|
||
| val aliasedPlan = if (userSpecifiedColumns.isEmpty) { | ||
| analyzedPlan | ||
| } else { | ||
|
|
@@ -172,6 +176,34 @@ case class CreateViewCommand( | |
| Seq.empty[Row] | ||
| } | ||
|
|
||
| /** | ||
| * Permanent views are not allowed to reference temp objects, including temp function and views | ||
| */ | ||
| private def verifyTemporaryObjectsNotExists(sparkSession: SparkSession): Unit = { | ||
| if (!isTemporary) { | ||
| // This func traverses the unresolved plan `child`. Below are the reasons: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it possible the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm, we can only create permanent view by SQL API |
||
| // 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding | ||
| // logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is | ||
| // added/generated from a temporary view. | ||
| // 2) The temp functions are represented by multiple classes. Most are inaccessible from this | ||
| // package (e.g., HiveGenericUDF). | ||
| child.collect { | ||
| // Disallow creating permanent views based on temporary views. | ||
| case s: UnresolvedRelation | ||
| if sparkSession.sessionState.catalog.isTemporaryTable(s.tableIdentifier) => | ||
| throw new AnalysisException(s"Not allowed to create a permanent view $name by " + | ||
| s"referencing a temporary view ${s.tableIdentifier}") | ||
| case other if !other.resolved => other.expressions.flatMap(_.collect { | ||
| // Disallow creating permanent views based on temporary UDFs. | ||
| case e: UnresolvedFunction | ||
| if sparkSession.sessionState.catalog.isTemporaryFunction(e.name) => | ||
| throw new AnalysisException(s"Not allowed to create a permanent view $name by " + | ||
| s"referencing a temporary function `${e.name}`") | ||
| }) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns a [[CatalogTable]] that can be used to save in the catalog. This comment canonicalize | ||
| * SQL based on the analyzed plan, and also creates the proper schema for the view. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,21 +38,46 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
| spark.sql(s"DROP TABLE IF EXISTS jt") | ||
| } | ||
|
|
||
| test("nested views (interleaved with temporary views)") { | ||
| withView("jtv1", "jtv2", "jtv3", "temp_jtv1", "temp_jtv2", "temp_jtv3") { | ||
| test("create a permanent view on a permanent view") { | ||
| withView("jtv1", "jtv2") { | ||
| sql("CREATE VIEW jtv1 AS SELECT * FROM jt WHERE id > 3") | ||
| sql("CREATE VIEW jtv2 AS SELECT * FROM jtv1 WHERE id < 6") | ||
| checkAnswer(sql("select count(*) FROM jtv2"), Row(2)) | ||
| } | ||
| } | ||
|
|
||
| test("create a temp view on a permanent view") { | ||
| withView("jtv1", "temp_jtv1") { | ||
| sql("CREATE VIEW jtv1 AS SELECT * FROM jt WHERE id > 3") | ||
| sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jtv1 WHERE id < 6") | ||
| checkAnswer(sql("select count(*) FROM temp_jtv1"), Row(2)) | ||
| } | ||
| } | ||
|
|
||
| // Checks temporary views | ||
| test("create a temp view on a temp view") { | ||
| withView("temp_jtv1", "temp_jtv2") { | ||
| sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3") | ||
| sql("CREATE TEMPORARY VIEW temp_jtv2 AS SELECT * FROM temp_jtv1 WHERE id < 6") | ||
| checkAnswer(sql("select count(*) FROM temp_jtv2"), Row(2)) | ||
| } | ||
| } | ||
|
|
||
| // Checks interleaved temporary view and normal view | ||
| sql("CREATE TEMPORARY VIEW temp_jtv3 AS SELECT * FROM jt WHERE id > 3") | ||
| sql("CREATE VIEW jtv3 AS SELECT * FROM temp_jtv3 WHERE id < 6") | ||
| checkAnswer(sql("select count(*) FROM jtv3"), Row(2)) | ||
| test("create a permanent view on a temp view") { | ||
| withView("jtv1", "temp_jtv1", "global_temp_jtv1") { | ||
| sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3") | ||
| var e = intercept[AnalysisException] { | ||
| sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6") | ||
| }.getMessage | ||
| assert(e.contains("Not allowed to create a permanent view `jtv1` by " + | ||
| "referencing a temporary view `temp_jtv1`")) | ||
|
|
||
| val globalTempDB = spark.sharedState.globalTempViewManager.database | ||
| sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0") | ||
| e = intercept[AnalysisException] { | ||
| sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6") | ||
| }.getMessage | ||
| assert(e.contains(s"Not allowed to create a permanent view `jtv1` by referencing " + | ||
| s"a temporary view `global_temp`.`global_temp_jtv1`")) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -439,7 +464,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
| } | ||
| } | ||
|
|
||
| test("SPARK-14933 - create view from hive parquet tabale") { | ||
| test("SPARK-14933 - create view from hive parquet table") { | ||
| withTable("t_part") { | ||
| withView("v_part") { | ||
| spark.sql("create table t_part stored as parquet as select 1 as a, 2 as b") | ||
|
|
@@ -451,7 +476,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
| } | ||
| } | ||
|
|
||
| test("SPARK-14933 - create view from hive orc tabale") { | ||
| test("SPARK-14933 - create view from hive orc table") { | ||
| withTable("t_orc") { | ||
| withView("v_orc") { | ||
| spark.sql("create table t_orc stored as orc as select 1 as a, 2 as b") | ||
|
|
@@ -462,4 +487,65 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("create a permanent/temp view using a hive function") { | ||
|
||
| withView("view1", "tempView1") { | ||
| sql(s"CREATE VIEW tempView1 AS SELECT histogram_numeric(id, 5) from jt") | ||
| checkAnswer(sql("select count(*) FROM tempView1"), Row(1)) | ||
| sql(s"CREATE VIEW view1 AS SELECT histogram_numeric(id, 5) from jt") | ||
| checkAnswer(sql("select count(*) FROM view1"), Row(1)) | ||
| } | ||
| } | ||
|
|
||
| test("create a permanent/temp view using a built-in function") { | ||
| withView("view1", "tempView1") { | ||
| sql(s"CREATE TEMPORARY VIEW tempView1 AS SELECT abs(id) from jt") | ||
| checkAnswer(sql("select count(*) FROM tempView1"), Row(9)) | ||
| sql(s"CREATE VIEW view1 AS SELECT abs(id) from jt") | ||
| checkAnswer(sql("select count(*) FROM view1"), Row(9)) | ||
| } | ||
| } | ||
|
|
||
| test("create a permanent/temp view using a permanent function") { | ||
| val functionName = "myUpper" | ||
| val functionClass = | ||
| classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName | ||
| withUserDefinedFunction(functionName -> false) { | ||
| sql(s"CREATE FUNCTION $functionName AS '$functionClass'") | ||
| withView("view1", "tempView1") { | ||
| withTable("tab1") { | ||
| (1 to 10).map(i => s"$i").toDF("id").write.saveAsTable("tab1") | ||
| sql(s"CREATE TEMPORARY VIEW tempView1 AS SELECT $functionName(id) from tab1") | ||
| checkAnswer(sql("select count(*) FROM tempView1"), Row(10)) | ||
| sql(s"CREATE VIEW view1 AS SELECT $functionName(id) from tab1") | ||
| checkAnswer(sql("select count(*) FROM view1"), Row(10)) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("create a permanent/temp view using a temporary function") { | ||
| val tempFunctionName = "temp" | ||
| val functionClass = | ||
| classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName | ||
| withUserDefinedFunction(tempFunctionName -> true) { | ||
| sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS '$functionClass'") | ||
| withView("view1", "tempView1") { | ||
| withTable("tab1") { | ||
| (1 to 10).map(i => s"$i").toDF("id").write.saveAsTable("tab1") | ||
|
|
||
| // temporary view | ||
| sql(s"CREATE TEMPORARY VIEW tempView1 AS SELECT $tempFunctionName(id) from tab1") | ||
| checkAnswer(sql("select count(*) FROM tempView1"), Row(10)) | ||
|
|
||
| // permanent view | ||
| val e = intercept[AnalysisException] { | ||
| sql(s"CREATE VIEW view1 AS SELECT $tempFunctionName(id) from tab1") | ||
| }.getMessage | ||
| assert(e.contains("Not allowed to create a permanent view `view1` by referencing " + | ||
| s"a temporary function `$tempFunctionName`")) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a unit test for this function?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also what's the behavior if the function doesn't exist? make sure you test it in the unit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will resolve this tomorrow.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like
isTemporaryTable, we return false when the function/table does not existThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea please docuemnt it.