-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19115] [SQL] Supporting Create Table Like Location #16868
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
2221809
e904993
81feda4
fa1784e
71f4f1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ | |
| import org.apache.spark.util.Utils | ||
|
|
||
| /** | ||
| * A command to create a MANAGED table with the same definition of the given existing table. | ||
| * A command to create a table with the same definition of the given existing table. | ||
| * In the target table definition, the table comment is always empty but the column comments | ||
| * are identical to the ones defined in the source table. | ||
| * | ||
|
|
@@ -52,12 +52,13 @@ import org.apache.spark.util.Utils | |
| * The syntax of using this command in SQL is: | ||
| * {{{ | ||
| * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name | ||
| * LIKE [other_db_name.]existing_table_name | ||
| * LIKE [other_db_name.]existing_table_name [locationSpec] | ||
| * }}} | ||
| */ | ||
| case class CreateTableLikeCommand( | ||
| targetTable: TableIdentifier, | ||
| sourceTable: TableIdentifier, | ||
| location: Option[String], | ||
| ifNotExists: Boolean) extends RunnableCommand { | ||
|
|
||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||
|
|
@@ -70,12 +71,19 @@ case class CreateTableLikeCommand( | |
| sourceTableDesc.provider | ||
| } | ||
|
|
||
| // If location is specified, we create an external table internally. | ||
| // Else create managed table. | ||
|
||
| val tblType = if (location.isEmpty) { | ||
| CatalogTableType.MANAGED | ||
| } else { | ||
| CatalogTableType.EXTERNAL | ||
| } | ||
|
||
|
|
||
| val newTableDesc = | ||
| CatalogTable( | ||
| identifier = targetTable, | ||
| tableType = CatalogTableType.MANAGED, | ||
| // We are creating a new managed table, which should not have custom table location. | ||
| storage = sourceTableDesc.storage.copy(locationUri = None), | ||
| tableType = tblType, | ||
| storage = sourceTableDesc.storage.copy(locationUri = location), | ||
| schema = sourceTableDesc.schema, | ||
| provider = newProvider, | ||
| partitionColumnNames = sourceTableDesc.partitionColumnNames, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -833,54 +833,95 @@ class HiveDDLSuite | |
| } | ||
|
|
||
| test("CREATE TABLE LIKE a temporary view") { | ||
| // CREATE TABLE LIKE a temporary view. | ||
| withCreateTableLikeTempView(None) | ||
|
||
|
|
||
| // CREATE TABLE LIKE a temporary view location ... | ||
| withTempDir {tmpDir => | ||
|
||
| withCreateTableLikeTempView(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeTempView(location : Option[String]): Unit = { | ||
| val sourceViewName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| withTempView(sourceViewName) { | ||
| withTable(targetTabName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .createTempView(sourceViewName) | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") | ||
|
|
||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause") | ||
|
|
||
| val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( | ||
| TableIdentifier(sourceViewName)) | ||
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, tableType) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE a data source table") { | ||
| // CREATE TABLE LIKE a data source table. | ||
| withCreateTableLikeDSTable(None) | ||
|
||
|
|
||
| // CREATE TABLE LIKE a data source table location ... | ||
| withTempDir { tmpDir => | ||
| withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeDSTable(location : Option[String]): Unit = { | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| withTable(sourceTabName, targetTabName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("json").saveAsTable(sourceTabName) | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") | ||
|
|
||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") | ||
|
|
||
| val sourceTable = | ||
| spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) | ||
| spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| val targetTable = | ||
| spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) | ||
| spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
| // The table type of the source table should be a Hive-managed data source table | ||
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.MANAGED) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, tableType) | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE an external data source table") { | ||
| // CREATE TABLE LIKE an external data source table. | ||
| withCreateTableLikeExtDSTable(None) | ||
|
||
|
|
||
| // CREATE TABLE LIKE an external data source table location ... | ||
| withTempDir { tmpDir => | ||
| withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = { | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| withTable(sourceTabName, targetTabName) { | ||
| withTempPath { dir => | ||
| val path = dir.getCanonicalPath | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("parquet").save(path) | ||
| sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") | ||
|
|
||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") | ||
|
|
||
| // The source table should be an external data source table | ||
| val sourceTable = spark.sessionState.catalog.getTableMetadata( | ||
|
|
@@ -891,32 +932,58 @@ class HiveDDLSuite | |
| assert(DDLUtils.isDatasourceTable(sourceTable)) | ||
| assert(sourceTable.tableType == CatalogTableType.EXTERNAL) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, tableType) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE a managed Hive serde table") { | ||
| val catalog = spark.sessionState.catalog | ||
| // CREATE TABLE LIKE a managed Hive serde table. | ||
| withCreateTableLikeManagedHiveTable(None) | ||
|
|
||
| // CREATE TABLE LIKE a managed Hive serde table location ... | ||
| withTempDir { tmpDir => | ||
| withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeManagedHiveTable(location : Option[String]): Unit = { | ||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| val catalog = spark.sessionState.catalog | ||
| withTable(sourceTabName, targetTabName) { | ||
| sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) | ||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| assert(sourceTable.tableType == CatalogTableType.MANAGED) | ||
| assert(sourceTable.properties.get("prop1").nonEmpty) | ||
| val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) | ||
| val targetTable = catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, tableType) | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE an external Hive serde table") { | ||
| // CREATE TABLE LIKE an external Hive serde table. | ||
| withCreateTableLikeExtHiveTable(None) | ||
|
|
||
| // CREATE TABLE LIKE an external Hive serde table location ... | ||
| withTempDir { tmpDir => | ||
| withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = { | ||
| val catalog = spark.sessionState.catalog | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| withTempDir { tmpDir => | ||
| val basePath = tmpDir.toURI | ||
| val basePath1 = tmpDir.toURI | ||
|
||
| val sourceTabName = "tab1" | ||
| val targetTabName = "tab2" | ||
| withTable(sourceTabName, targetTabName) { | ||
|
|
@@ -926,38 +993,55 @@ class HiveDDLSuite | |
| |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) | ||
| |COMMENT 'Apache Spark' | ||
| |PARTITIONED BY (ds STRING, hr STRING) | ||
| |LOCATION '$basePath' | ||
| """.stripMargin) | ||
| |LOCATION '$basePath1' | ||
| """.stripMargin) | ||
|
||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE $sourceTabName | ||
| |partition (ds='$ds',hr='$hr') | ||
| |SELECT 1, 'a' | ||
| """.stripMargin) | ||
| """.stripMargin) | ||
|
||
| } | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) | ||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") | ||
|
|
||
| val sourceTable = catalog.getTableMetadata( | ||
| TableIdentifier(sourceTabName, Some("default"))) | ||
| assert(sourceTable.tableType == CatalogTableType.EXTERNAL) | ||
| assert(sourceTable.comment == Option("Apache Spark")) | ||
| val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) | ||
| val targetTable = catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceTable, targetTable) | ||
| checkCreateTableLike(sourceTable, targetTable, tableType) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE a view") { | ||
| // CREATE TABLE LIKE a view. | ||
| withCreateTableLikeView(None) | ||
|
||
|
|
||
| // CREATE TABLE LIKE a view location ... | ||
| withTempDir { tmpDir => | ||
| withCreateTableLikeView(Some(tmpDir.toURI.toString)) | ||
| } | ||
| } | ||
|
|
||
| private def withCreateTableLikeView(location : Option[String]): Unit = { | ||
| val sourceTabName = "tab1" | ||
| val sourceViewName = "view" | ||
| val targetTabName = "tab2" | ||
| val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
| withTable(sourceTabName, targetTabName) { | ||
| withView(sourceViewName) { | ||
| spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) | ||
| .write.format("json").saveAsTable(sourceTabName) | ||
| sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") | ||
|
|
||
| val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" | ||
| sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause") | ||
|
|
||
| val sourceView = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(sourceViewName, Some("default"))) | ||
|
|
@@ -969,15 +1053,19 @@ class HiveDDLSuite | |
| val targetTable = spark.sessionState.catalog.getTableMetadata( | ||
| TableIdentifier(targetTabName, Some("default"))) | ||
|
|
||
| checkCreateTableLike(sourceView, targetTable) | ||
| checkCreateTableLike(sourceView, targetTable, tableType) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { | ||
| // The created table should be a MANAGED table with empty view text and original text. | ||
| assert(targetTable.tableType == CatalogTableType.MANAGED, | ||
| "the created table must be a Hive managed table") | ||
| private def checkCreateTableLike( | ||
| sourceTable: CatalogTable, | ||
| targetTable: CatalogTable, | ||
| tableType: CatalogTableType): Unit = { | ||
| // The created table should be a MANAGED table or EXTERNAL table with empty view text | ||
| // and original text. | ||
| assert(targetTable.tableType == tableType, | ||
| s"the created table must be a Hive ${tableType.name} table") | ||
|
||
| assert(targetTable.viewText.isEmpty, | ||
| "the view text in the created table must be empty") | ||
| assert(targetTable.viewDefaultDatabase.isEmpty, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
->
If the