-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15646] [SQL] When spark.sql.hive.convertCTAS is true, the conversion rule needs to respect TEXTFILE/SEQUENCEFILE format and the user-defined location #13386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
f613d9e
1e22d53
2615f67
c5cb32c
220a6e0
fa89081
b137cba
1991988
88e7422
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -936,7 +936,39 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| comment = comment) | ||
|
|
||
| selectQuery match { | ||
| case Some(q) => CreateTableAsSelectLogicalPlan(tableDesc, q, ifNotExists) | ||
| case Some(q) => | ||
| // Hive does not allow to use a CTAS statement to create a partitioned table. | ||
| if (tableDesc.partitionColumnNames.nonEmpty) { | ||
| val errorMessage = "A Create Table As Select (CTAS) statement is not allowed to " + | ||
| "create a partitioned table using Hive's file formats. " + | ||
| "Please use the syntax of \"CREATE TABLE tableName USING dataSource " + | ||
| "OPTIONS (...) PARTITIONED BY ...\" to create a partitioned table through a " + | ||
| "CTAS statement." | ||
| throw operationNotAllowed(errorMessage, ctx) | ||
| } | ||
|
|
||
| val hasStorageProperties = (ctx.createFileFormat != null) || (ctx.rowFormat != null) | ||
| if (conf.convertCTAS && !hasStorageProperties) { | ||
| val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
| val options = rowStorage.serdeProperties ++ fileStorage.serdeProperties | ||
| val optionsWithPath = if (location.isDefined) { | ||
| options + ("path" -> location.get) | ||
| } else { | ||
| options | ||
| } | ||
| CreateTableUsingAsSelect( | ||
| tableIdent = tableDesc.identifier, | ||
| provider = conf.defaultDataSourceName, | ||
| temporary = false, | ||
| partitionColumns = tableDesc.partitionColumnNames.toArray, | ||
| bucketSpec = None, | ||
| mode = mode, | ||
| options = optionsWithPath, | ||
| q | ||
| ) | ||
| } else { | ||
| CreateTableAsSelectLogicalPlan(tableDesc, q, ifNotExists) | ||
|
||
| } | ||
| case None => CreateTableCommand(tableDesc, ifNotExists) | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -447,52 +447,20 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log | |
| case p: LogicalPlan if p.resolved => p | ||
|
|
||
| case p @ CreateTableAsSelectLogicalPlan(table, child, allowExisting) => | ||
| val schema = if (table.schema.nonEmpty) { | ||
| table.schema | ||
| val desc = if (table.storage.serde.isEmpty) { | ||
| // add default serde | ||
| table.withNewStorage( | ||
| serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) | ||
| } else { | ||
| child.output.map { a => | ||
| CatalogColumn(a.name, a.dataType.catalogString, a.nullable) | ||
| } | ||
| table | ||
| } | ||
|
|
||
| val desc = table.copy(schema = schema) | ||
|
|
||
| if (sessionState.convertCTAS && table.storage.serde.isEmpty) { | ||
| // Do the conversion when spark.sql.hive.convertCTAS is true and the query | ||
| // does not specify any storage format (file format and storage handler). | ||
| if (table.identifier.database.isDefined) { | ||
| throw new AnalysisException( | ||
| "Cannot specify database name in a CTAS statement " + | ||
| "when spark.sql.hive.convertCTAS is set to true.") | ||
| } | ||
|
|
||
| val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
| CreateTableUsingAsSelect( | ||
| TableIdentifier(desc.identifier.table), | ||
| sessionState.conf.defaultDataSourceName, | ||
| temporary = false, | ||
| Array.empty[String], | ||
| bucketSpec = None, | ||
| mode, | ||
| options = Map.empty[String, String], | ||
| child | ||
| ) | ||
| } else { | ||
| val desc = if (table.storage.serde.isEmpty) { | ||
| // add default serde | ||
| table.withNewStorage( | ||
| serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) | ||
| } else { | ||
| table | ||
| } | ||
| val QualifiedTableName(dbName, tblName) = getQualifiedTableName(table) | ||
|
|
||
| val QualifiedTableName(dbName, tblName) = getQualifiedTableName(table) | ||
|
|
||
| execution.CreateTableAsSelectCommand( | ||
| desc.copy(identifier = TableIdentifier(tblName, Some(dbName))), | ||
| child, | ||
| allowExisting) | ||
| } | ||
| execution.CreateHiveTableAsSelectCommand( | ||
| desc.copy(identifier = TableIdentifier(tblName, Some(dbName))), | ||
| child, | ||
| allowExisting) | ||
| } | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we keep this restriction?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems we do support having db name, right? |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think these will always be empty if we've reached here, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea