apache
diff --git a/‎core/src/main/scala/org/apache/spark/util/collection/BitSet.scala‎
Lines changed: 7 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/util/collection/BitSet.scala‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 9 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala‎
Lines changed: 3 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala‎
Lines changed: 10 additions & 4 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala‎
Lines changed: 10 additions & 4 deletions
@@ -250,4 +250,11 @@ class BitSet(numBits: Int) extends Serializable {
 
   /** Return the number of longs it would take to hold numBits. */
   private def bit2words(numBits: Int) = ((numBits - 1) >> 6) + 1
+
+  override def equals(other: Any): Boolean = other match {
+    case otherSet: BitSet => numWords == otherSet.numWords && Arrays.equals(words, otherSet.words)
+    case _ => false
+  }
+
+  override def hashCode(): Int = Arrays.hashCode(words)
 }
@@ -3875,6 +3875,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PLAN_MERGE_IGNORE_PUSHED_PUSHED_DATA_FILTERS =
+    buildConf("spark.sql.planMerge.ignorePushedDataFilters")
+      .internal()
+      .doc(s"When set to true plan merging is enabled even if physical scan operations have " +
+        "different data filters pushed down.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ERROR_MESSAGE_FORMAT = buildConf("spark.sql.error.messageFormat")
     .doc("When PRETTY, the error message consists of textual representation of error class, " +
       "message and query context. The MINIMAL and STANDARD formats are pretty JSON formats where " +
 
@@ -54,8 +54,6 @@ class SparkOptimizer(
       PartitionPruning) :+
     Batch("InjectRuntimeFilter", FixedPoint(1),
       InjectRuntimeFilter) :+
-    Batch("MergeScalarSubqueries", Once,
-      MergeScalarSubqueries) :+
     Batch("Pushdown Filters from PartitionPruning", fixedPoint,
       PushDownPredicates) :+
     Batch("Cleanup filters that cannot be pushed down", Once,
@@ -79,6 +77,9 @@ class SparkOptimizer(
       PushPredicateThroughNonJoin,
       RemoveNoopOperators) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
+    Batch("Merge Scalar Subqueries", Once,
+      MergeScalarSubqueries,
+      RewriteDistinctAggregates) :+
     Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
 
   override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+
 
@@ -54,8 +54,14 @@ import org.apache.spark.util.collection.BitSet
  *     is under the threshold with the addition of the next file, add it.  If not, open a new bucket
  *     and add it.  Proceed to the next file.
  */
-object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
+object FileSourceStrategy extends Strategy {
+  def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+    case FileSourceScanPlan(scanPlan, _) => scanPlan :: Nil
+    case _ => Nil
+  }
+}
 
+object FileSourceScanPlan extends PredicateHelper with Logging {
   // should prune buckets iff num buckets is greater than 1 and there is only one bucket column
   private def shouldPruneBuckets(bucketSpec: Option[BucketSpec]): Boolean = {
     bucketSpec match {
@@ -145,7 +151,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
     }
   }
 
-  def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+  def unapply(plan: LogicalPlan): Option[(SparkPlan, FileSourceScanExec)] = plan match {
     case PhysicalOperation(projects, filters,
       l @ LogicalRelation(fsRelation: HadoopFsRelation, _, table, _)) =>
       // Filters on this relation fall into four categories based on where we can use them to avoid
@@ -291,8 +297,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
         execution.ProjectExec(projects, withFilter)
       }
 
-      withProjections :: Nil
+      Some(withProjections, scan)
 
-    case _ => Nil
+    case _ => None
   }
 }