Revert "[SPARK-28346][SQL] clone the query plan between analyzer, optimizer and planner"

HeartSaVioR · HeartSaVioR · commit 707948150345 · 2019-09-25T07:53:53.000+09:00
This reverts commit e04f696.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -292,7 +292,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       mapChildren(_.transformDown(rule))
     } else {
       // If the transform function replaces this node with a new one, carry over the tags.
-      afterRule.copyTagsFrom(this)
+      afterRule.tags ++= this.tags
       afterRule.mapChildren(_.transformDown(rule))
     }
   }
@@ -316,7 +316,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       }
     }
     // If the transform function replaces this node with a new one, carry over the tags.
-    newNode.copyTagsFrom(this)
+    newNode.tags ++= this.tags
     newNode
   }
 
@@ -434,15 +434,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   private def makeCopy(
       newArgs: Array[AnyRef],
       allowEmptyArgs: Boolean): BaseType = attachTree(this, "makeCopy") {
-    val allCtors = getClass.getConstructors
-    if (newArgs.isEmpty && allCtors.isEmpty) {
-      // This is a singleton object which doesn't have any constructor. Just return `this` as we
-      // can't copy it.
-      return this
-    }
-
     // Skip no-arg constructors that are just there for kryo.
-    val ctors = allCtors.filter(allowEmptyArgs || _.getParameterTypes.size != 0)
+    val ctors = getClass.getConstructors.filter(allowEmptyArgs || _.getParameterTypes.size != 0)
     if (ctors.isEmpty) {
       sys.error(s"No valid constructor for $nodeName")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
+import org.apache.spark.sql.catalyst.util.StringUtils.{PlanStringConcat, StringConcat}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.dynamicpruning.PlanDynamicPruningFilters
 import org.apache.spark.sql.execution.adaptive.InsertAdaptiveSparkPlan
@@ -62,38 +62,36 @@ class QueryExecution(
 
   lazy val analyzed: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.ANALYSIS) {
     SparkSession.setActiveSession(sparkSession)
-    // We can't clone `logical` here, which will reset the `_analyzed` flag.
     sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
   }
 
   lazy val withCachedData: LogicalPlan = {
     assertAnalyzed()
     assertSupported()
-    // clone the plan to avoid sharing the plan instance between different stages like analyzing,
-    // optimizing and planning.
-    sparkSession.sharedState.cacheManager.useCachedData(analyzed.clone())
+    sparkSession.sharedState.cacheManager.useCachedData(analyzed)
   }
 
   lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) {
-    // clone the plan to avoid sharing the plan instance between different stages like analyzing,
-    // optimizing and planning.
-    sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker)
+    sparkSession.sessionState.optimizer.executeAndTrack(withCachedData, tracker)
   }
 
   lazy val sparkPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
     SparkSession.setActiveSession(sparkSession)
+    // Runtime re-optimization requires a unique instance of every node in the logical plan.
+    val logicalPlan = if (sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
+      optimizedPlan.clone()
+    } else {
+      optimizedPlan
+    }
     // TODO: We use next(), i.e. take the first plan returned by the planner, here for now,
     //       but we will implement to choose the best plan.
-    // Clone the logical plan here, in case the planner rules change the states of the logical plan.
-    planner.plan(ReturnAnswer(optimizedPlan.clone())).next()
+    planner.plan(ReturnAnswer(logicalPlan)).next()
   }
 
   // executedPlan should not be used to initialize any SparkPlan. It should be
   // only used for execution.
   lazy val executedPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
-    // clone the plan to avoid sharing the plan instance between different stages like analyzing,
-    // optimizing and planning.
-    prepareForExecution(sparkPlan.clone())
+    prepareForExecution(sparkPlan)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -223,13 +223,6 @@ case class InMemoryRelation(
       statsOfPlanToCache).asInstanceOf[this.type]
   }
 
-  // override `clone` since the default implementation won't carry over mutable states.
-  override def clone(): LogicalPlan = {
-    val cloned = this.copy()
-    cloned.statsOfPlanToCache = this.statsOfPlanToCache
-    cloned
-  }
-
   override def simpleString(maxFields: Int): String =
     s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -52,10 +52,4 @@ case class SaveIntoDataSourceCommand(
     val redacted = SQLConf.get.redactOptions(options)
     s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}"
   }
-
-  // Override `clone` since the default implementation will turn `CaseInsensitiveMap` to a normal
-  // map.
-  override def clone(): LogicalPlan = {
-    SaveIntoDataSourceCommand(query.clone(), dataSource, options, mode)
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -18,10 +18,8 @@ package org.apache.spark.sql.execution
 
 import scala.io.Source
 
-import org.apache.spark.sql.{AnalysisException, FastOperator}
-import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
-import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -139,56 +137,5 @@ class QueryExecutionSuite extends SharedSparkSession {
       (_: LogicalPlan) => throw new Error("error"))
     val error = intercept[Error](qe.toString)
     assert(error.getMessage.contains("error"))
-
-    spark.experimental.extraStrategies = Nil
-  }
-
-  test("SPARK-28346: clone the query plan between different stages") {
-    val tag1 = new TreeNodeTag[String]("a")
-    val tag2 = new TreeNodeTag[String]("b")
-    val tag3 = new TreeNodeTag[String]("c")
-
-    def assertNoTag(tag: TreeNodeTag[String], plans: QueryPlan[_]*): Unit = {
-      plans.foreach { plan =>
-        assert(plan.getTagValue(tag).isEmpty)
-      }
-    }
-
-    val df = spark.range(10)
-    val analyzedPlan = df.queryExecution.analyzed
-    val cachedPlan = df.queryExecution.withCachedData
-    val optimizedPlan = df.queryExecution.optimizedPlan
-
-    analyzedPlan.setTagValue(tag1, "v")
-    assertNoTag(tag1, cachedPlan, optimizedPlan)
-
-    cachedPlan.setTagValue(tag2, "v")
-    assertNoTag(tag2, analyzedPlan, optimizedPlan)
-
-    optimizedPlan.setTagValue(tag3, "v")
-    assertNoTag(tag3, analyzedPlan, cachedPlan)
-
-    val tag4 = new TreeNodeTag[String]("d")
-    try {
-      spark.experimental.extraStrategies = Seq(new SparkStrategy() {
-        override def apply(plan: LogicalPlan): Seq[SparkPlan] = {
-          plan.foreach {
-            case r: org.apache.spark.sql.catalyst.plans.logical.Range =>
-              r.setTagValue(tag4, "v")
-            case _ =>
-          }
-          Seq(FastOperator(plan.output))
-        }
-      })
-      // trigger planning
-      df.queryExecution.sparkPlan
-      assert(optimizedPlan.getTagValue(tag4).isEmpty)
-    } finally {
-      spark.experimental.extraStrategies = Nil
-    }
-
-    val tag5 = new TreeNodeTag[String]("e")
-    df.queryExecution.executedPlan.setTagValue(tag5, "v")
-    assertNoTag(tag5, df.queryExecution.sparkPlan)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
@@ -180,7 +180,7 @@ class PartitionBatchPruningSuite extends SharedSparkSession {
     val result = df.collect().map(_(0)).toArray
     assert(result.length === 1)
 
-    val (readPartitions, readBatches) = df.queryExecution.executedPlan.collect {
+    val (readPartitions, readBatches) = df.queryExecution.sparkPlan.collect {
         case in: InMemoryTableScanExec => (in.readPartitions.value, in.readBatches.value)
       }.head
     assert(readPartitions === 5)
@@ -201,7 +201,7 @@ class PartitionBatchPruningSuite extends SharedSparkSession {
         df.collect().map(_(0)).toArray
       }
 
-      val (readPartitions, readBatches) = df.queryExecution.executedPlan.collect {
+      val (readPartitions, readBatches) = df.queryExecution.sparkPlan.collect {
         case in: InMemoryTableScanExec => (in.readPartitions.value, in.readBatches.value)
       }.head
 

Original file line number	Diff line number	Diff line change
`@@ -292,7 +292,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {`
`292`	`292`	`mapChildren(_.transformDown(rule))`
`293`	`293`	`} else {`
`294`	`294`	`// If the transform function replaces this node with a new one, carry over the tags.`
`295`		`- afterRule.copyTagsFrom(this)`
	`295`	`+ afterRule.tags ++= this.tags`
`296`	`296`	`afterRule.mapChildren(_.transformDown(rule))`
`297`	`297`	`}`
`298`	`298`	`}`
`@@ -316,7 +316,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {`
`316`	`316`	`}`
`317`	`317`	`}`
`318`	`318`	`// If the transform function replaces this node with a new one, carry over the tags.`
`319`		`- newNode.copyTagsFrom(this)`
	`319`	`+ newNode.tags ++= this.tags`
`320`	`320`	`newNode`
`321`	`321`	`}`
`322`	`322`
`@@ -434,15 +434,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {`
`434`	`434`	`private def makeCopy(`
`435`	`435`	`newArgs: Array[AnyRef],`
`436`	`436`	`allowEmptyArgs: Boolean): BaseType = attachTree(this, "makeCopy") {`
`437`		`- val allCtors = getClass.getConstructors`
`438`		`- if (newArgs.isEmpty && allCtors.isEmpty) {`
`439`		- // This is a singleton object which doesn't have any constructor. Just return `this` as we
`440`		`- // can't copy it.`
`441`		`- return this`
`442`		`- }`
`443`		`-`
`444`	`437`	`// Skip no-arg constructors that are just there for kryo.`
`445`		`- val ctors = allCtors.filter(allowEmptyArgs \|\| _.getParameterTypes.size != 0)`
	`438`	`+ val ctors = getClass.getConstructors.filter(allowEmptyArgs \|\| _.getParameterTypes.size != 0)`
`446`	`439`	`if (ctors.isEmpty) {`
`447`	`440`	`sys.error(s"No valid constructor for $nodeName")`
`448`	`441`	`}`
Original file line number	Diff line number	Diff line change
`@@ -223,13 +223,6 @@ case class InMemoryRelation(`
`223`	`223`	`statsOfPlanToCache).asInstanceOf[this.type]`
`224`	`224`	`}`
`225`	`225`
`226`		- // override `clone` since the default implementation won't carry over mutable states.
`227`		`- override def clone(): LogicalPlan = {`
`228`		`- val cloned = this.copy()`
`229`		`- cloned.statsOfPlanToCache = this.statsOfPlanToCache`
`230`		`- cloned`
`231`		`- }`
`232`		`-`
`233`	`226`	`override def simpleString(maxFields: Int): String =`
`234`	`227`	`s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"`
`235`	`228`	`}`
Original file line number	Diff line number	Diff line change
`@@ -52,10 +52,4 @@ case class SaveIntoDataSourceCommand(`
`52`	`52`	`val redacted = SQLConf.get.redactOptions(options)`
`53`	`53`	`s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}"`
`54`	`54`	`}`
`55`		`-`
`56`		- // Override `clone` since the default implementation will turn `CaseInsensitiveMap` to a normal
`57`		`- // map.`
`58`		`- override def clone(): LogicalPlan = {`
`59`		`- SaveIntoDataSourceCommand(query.clone(), dataSource, options, mode)`
`60`		`- }`
`61`	`55`	`}`