remove the overwrite parameter

windpiger · windpiger · commit 304ae3112950 · 2017-02-28T20:13:10.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -140,8 +140,8 @@ case class CreateDataSourceTableAsSelectCommand(
         return Seq.empty
       }
 
-      saveDataIntoTable(sparkSession, table, table.storage.locationUri, query, mode,
-        overwrite = false, tableExists = true)
+      saveDataIntoTable(
+        sparkSession, table, table.storage.locationUri, query, SaveMode.Append, tableExists = true)
     } else {
       assert(table.schema.isEmpty)
 
@@ -151,7 +151,7 @@ case class CreateDataSourceTableAsSelectCommand(
         table.storage.locationUri
       }
       val result = saveDataIntoTable(
-        sparkSession, table, tableLocation, query, mode, overwrite = true, tableExists = false)
+        sparkSession, table, tableLocation, query, SaveMode.Overwrite, tableExists = false)
       val newTable = table.copy(
         storage = table.storage.copy(locationUri = tableLocation),
         // We will use the schema of resolved.relation as the schema of the table (instead of
@@ -178,7 +178,6 @@ case class CreateDataSourceTableAsSelectCommand(
       tableLocation: Option[String],
       data: LogicalPlan,
       mode: SaveMode,
-      overwrite: Boolean,
       tableExists: Boolean): BaseRelation = {
     // Create the relation based on the input logical plan: `data`.
     val pathOption = tableLocation.map("path" -> _)
@@ -191,7 +190,7 @@ case class CreateDataSourceTableAsSelectCommand(
       catalogTable = if (tableExists) Some(table) else None)
 
     try {
-      dataSource.writeAndRead(mode, Dataset.ofRows(session, query), Some(overwrite))
+      dataSource.writeAndRead(mode, Dataset.ofRows(session, query))
     } catch {
       case ex: AnalysisException =>
         logError(s"Failed to write to table ${table.identifier.unquotedString}", ex)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -406,11 +406,7 @@ case class DataSource(
   /**
    * Writes the given [[DataFrame]] out in this [[FileFormat]].
    */
-  private def writeInFileFormat(
-      format: FileFormat,
-      mode: SaveMode,
-      data: DataFrame,
-      overwrite: Option[Boolean]): Unit = {
+  private def writeInFileFormat(format: FileFormat, mode: SaveMode, data: DataFrame): Unit = {
     // Don't glob path for the write path.  The contracts here are:
     //  1. Only one output path can be specified on the write path;
     //  2. Output path must be a legal HDFS style file system path;
@@ -426,26 +422,18 @@ case class DataSource(
         s"got: ${allPaths.mkString(", ")}")
     }
 
-    val isOverWrite = overwrite match {
-      case Some(ow) => ow
-      case _ =>
-        if (pathExists) {
-          if (mode == SaveMode.ErrorIfExists) {
-            throw new AnalysisException(s"path $outputPath already exists.")
-          }
-          if (mode == SaveMode.Ignore) {
-            // Since the path already exists and the save mode is Ignore, we will just return.
-            return
-          }
-
-          if (mode == SaveMode.Append) false
-          else if (mode == SaveMode.Overwrite) true
-          else {
-            throw new IllegalStateException(s"unsupported save mode $mode")
-          }
-        } else true
+    if (pathExists) {
+      if (mode == SaveMode.ErrorIfExists) {
+        throw new AnalysisException(s"path $outputPath already exists.")
+      }
+      if (mode == SaveMode.Ignore) {
+        // Since the path already exists and the save mode is Ignore, we will just return.
+        return
+      }
     }
 
+    // if path does not exist, the ErrorIfExists and Ignore can be transformed to Append
+    val transformedMode = if (mode != SaveMode.Overwrite) SaveMode.Append else mode
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
 
@@ -476,8 +464,7 @@ case class DataSource(
         fileFormat = format,
         options = options,
         query = data.logicalPlan,
-        mode = mode,
-        isOverWrite,
+        mode = transformedMode,
         catalogTable = catalogTable,
         fileIndex = fileIndex)
       sparkSession.sessionState.executePlan(plan).toRdd
@@ -487,7 +474,7 @@ case class DataSource(
    * Writes the given [[DataFrame]] out to this [[DataSource]] and returns a [[BaseRelation]] for
    * the following reading.
    */
-  def writeAndRead(mode: SaveMode, data: DataFrame, overwrite: Option[Boolean]): BaseRelation = {
+  def writeAndRead(mode: SaveMode, data: DataFrame): BaseRelation = {
     if (data.schema.map(_.dataType).exists(_.isInstanceOf[CalendarIntervalType])) {
       throw new AnalysisException("Cannot save interval data type into external storage.")
     }
@@ -496,7 +483,7 @@ case class DataSource(
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sparkSession.sqlContext, mode, caseInsensitiveOptions, data)
       case format: FileFormat =>
-        writeInFileFormat(format, mode, data, overwrite)
+        writeInFileFormat(format, mode, data)
         // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring
         copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation()
       case _ =>
@@ -516,7 +503,7 @@ case class DataSource(
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sparkSession.sqlContext, mode, caseInsensitiveOptions, data)
       case format: FileFormat =>
-        writeInFileFormat(format, mode, data, None)
+        writeInFileFormat(format, mode, data)
       case _ =>
         sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -201,7 +201,6 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
         t.options,
         actualQuery,
         mode,
-        overwrite,
         table,
         Some(t.location))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -47,7 +47,6 @@ case class InsertIntoHadoopFsRelationCommand(
     options: Map[String, String],
     query: LogicalPlan,
     mode: SaveMode,
-    overwrite: Boolean,
     catalogTable: Option[CatalogTable],
     fileIndex: Option[FileIndex])
   extends RunnableCommand {
@@ -98,7 +97,7 @@ case class InsertIntoHadoopFsRelationCommand(
       outputPath = outputPath.toString,
       isAppend = isAppend)
 
-    if (overwrite) {
+    if (mode == SaveMode.Overwrite) {
       deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
     }
 

Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,6 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {`
`201`	`201`	`t.options,`
`202`	`202`	`actualQuery,`
`203`	`203`	`mode,`
`204`		`- overwrite,`
`205`	`204`	`table,`
`206`	`205`	`Some(t.location))`
`207`	`206`	`}`