1818package org .apache .spark .sql .execution .command
1919
2020import org .apache .spark .sql ._
21- import org .apache .spark .sql .catalyst .analysis .EliminateSubqueryAliases
2221import org .apache .spark .sql .catalyst .catalog ._
23- import org .apache .spark .sql .catalyst .plans . QueryPlan
22+ import org .apache .spark .sql .catalyst .expressions . NamedExpression
2423import org .apache .spark .sql .catalyst .plans .logical .LogicalPlan
2524import org .apache .spark .sql .execution .datasources ._
26- import org .apache .spark .sql .sources .{BaseRelation , InsertableRelation }
27- import org .apache .spark .sql .types ._
25+ import org .apache .spark .sql .sources .BaseRelation
2826
2927/**
3028 * A command used to create a data source table.
@@ -143,8 +141,9 @@ case class CreateDataSourceTableAsSelectCommand(
143141 val tableName = tableIdentWithDB.unquotedString
144142
145143 var createMetastoreTable = false
146- var existingSchema = Option .empty[StructType ]
147- if (sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) {
144+ // We may need to reorder the columns of the query to match the existing table.
145+ var reorderedColumns = Option .empty[Seq [NamedExpression ]]
146+ if (sessionState.catalog.tableExists(tableIdentWithDB)) {
148147 // Check if we need to throw an exception or just return.
149148 mode match {
150149 case SaveMode .ErrorIfExists =>
@@ -157,39 +156,74 @@ case class CreateDataSourceTableAsSelectCommand(
157156 // Since the table already exists and the save mode is Ignore, we will just return.
158157 return Seq .empty[Row ]
159158 case SaveMode .Append =>
159+ val existingTable = sessionState.catalog.getTableMetadata(tableIdentWithDB)
160+ if (existingTable.tableType == CatalogTableType .VIEW ) {
161+ throw new AnalysisException (" Saving data into a view is not allowed." )
162+ }
163+
164+ if (existingTable.provider.get == DDLUtils .HIVE_PROVIDER ) {
165+ throw new AnalysisException (s " Saving data in the Hive serde table $tableName is " +
166+ s " not supported yet. Please use the insertInto() API as an alternative. " )
167+ }
168+
160169 // Check if the specified data source match the data source of the existing table.
161- val existingProvider = DataSource .lookupDataSource(provider)
170+ val existingProvider = DataSource .lookupDataSource(existingTable.provider.get)
171+ val specifiedProvider = DataSource .lookupDataSource(table.provider.get)
162172 // TODO: Check that options from the resolved relation match the relation that we are
163173 // inserting into (i.e. using the same compression).
174+ if (existingProvider != specifiedProvider) {
175+ throw new AnalysisException (s " The format of the existing table $tableName is " +
176+ s " ` ${existingProvider.getSimpleName}`. It doesn't match the specified format " +
177+ s " ` ${specifiedProvider.getSimpleName}`. " )
178+ }
164179
165- // Pass a table identifier with database part, so that `lookupRelation` won't get temp
166- // views unexpectedly.
167- EliminateSubqueryAliases (sessionState.catalog.lookupRelation(tableIdentWithDB)) match {
168- case l @ LogicalRelation (_ : InsertableRelation | _ : HadoopFsRelation , _, _) =>
169- // check if the file formats match
170- l.relation match {
171- case r : HadoopFsRelation if r.fileFormat.getClass != existingProvider =>
172- throw new AnalysisException (
173- s " The file format of the existing table $tableName is " +
174- s " ` ${r.fileFormat.getClass.getName}`. It doesn't match the specified " +
175- s " format ` $provider` " )
176- case _ =>
177- }
178- if (query.schema.size != l.schema.size) {
179- throw new AnalysisException (
180- s " The column number of the existing schema[ ${l.schema}] " +
181- s " doesn't match the data schema[ ${query.schema}]'s " )
182- }
183- existingSchema = Some (l.schema)
184- case s : SimpleCatalogRelation if DDLUtils .isDatasourceTable(s.metadata) =>
185- existingSchema = Some (s.metadata.schema)
186- case c : CatalogRelation if c.catalogTable.provider == Some (DDLUtils .HIVE_PROVIDER ) =>
187- throw new AnalysisException (" Saving data in the Hive serde table " +
188- s " ${c.catalogTable.identifier} is not supported yet. Please use the " +
189- " insertInto() API as an alternative.." )
190- case o =>
191- throw new AnalysisException (s " Saving data in ${o.toString} is not supported. " )
180+ if (query.schema.length != existingTable.schema.length) {
181+ throw new AnalysisException (
182+ s " The column number of the existing table $tableName" +
183+ s " ( ${existingTable.schema.catalogString}) doesn't match the data schema " +
184+ s " ( ${query.schema.catalogString}) " )
192185 }
186+
187+ val resolver = sessionState.conf.resolver
188+ val tableCols = existingTable.schema.map(_.name)
189+
190+ reorderedColumns = Some (existingTable.schema.map { f =>
191+ query.resolve(Seq (f.name), resolver).getOrElse {
192+ val inputColumns = query.schema.map(_.name).mkString(" , " )
193+ throw new AnalysisException (
194+ s " cannot resolve ' ${f.name}' given input columns: [ $inputColumns] " )
195+ }
196+ })
197+
198+ // Check if the specified partition columns match the existing table.
199+ val specifiedPartCols = CatalogUtils .normalizePartCols(
200+ tableName, tableCols, table.partitionColumnNames, resolver)
201+ if (specifiedPartCols != existingTable.partitionColumnNames) {
202+ throw new AnalysisException (
203+ s """
204+ |Specified partitioning does not match the existing table $tableName.
205+ |Specified partition columns: [ ${specifiedPartCols.mkString(" , " )}]
206+ |Existing partition columns: [ ${existingTable.partitionColumnNames.mkString(" , " )}]
207+ """ .stripMargin)
208+ }
209+
210+ // Check if the specified bucketing match the existing table.
211+ val specifiedBucketSpec = table.bucketSpec.map { bucketSpec =>
212+ CatalogUtils .normalizeBucketSpec(tableName, tableCols, bucketSpec, resolver)
213+ }
214+ if (specifiedBucketSpec != existingTable.bucketSpec) {
215+ val specifiedBucketString =
216+ specifiedBucketSpec.map(_.toString).getOrElse(" not bucketed" )
217+ val existingBucketString =
218+ existingTable.bucketSpec.map(_.toString).getOrElse(" not bucketed" )
219+ throw new AnalysisException (
220+ s """
221+ |Specified bucketing does not match the existing table $tableName.
222+ |Specified bucketing: $specifiedBucketString
223+ |Existing bucketing: $existingBucketString
224+ """ .stripMargin)
225+ }
226+
193227 case SaveMode .Overwrite =>
194228 sessionState.catalog.dropTable(tableIdentWithDB, ignoreIfNotExists = true , purge = false )
195229 // Need to create the table again.
@@ -201,9 +235,9 @@ case class CreateDataSourceTableAsSelectCommand(
201235 }
202236
203237 val data = Dataset .ofRows(sparkSession, query)
204- val df = existingSchema match {
205- // If we are inserting into an existing table, just use the existing schema .
206- case Some (s ) => data.selectExpr(s.fieldNames : _* )
238+ val df = reorderedColumns match {
239+ // Reorder the columns of the query to match the existing table .
240+ case Some (cols ) => data.select(cols.map( Column (_)) : _* )
207241 case None => data
208242 }
209243
0 commit comments