apache
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala‎
Lines changed: 6 additions & 2 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala‎
Lines changed: 140 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala‎
Lines changed: 1 addition & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala‎
Lines changed: 34 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 9 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 9 additions & 0 deletions
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.analysis.PullOutNondeterministic
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, AttributeSet}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, AttributeSet, ExpressionSet}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.AGGREGATE
 
@@ -47,6 +47,10 @@ object RemoveRedundantAggregates extends Rule[LogicalPlan] with AliasHelper {
       } else {
         newAggregate
       }
+
+    case agg @ Aggregate(groupingExps, _, child)
+        if agg.groupOnly && child.distinctKeys.exists(_.subsetOf(ExpressionSet(groupingExps))) =>
+      Project(agg.aggregateExpressions, child)
   }
 
   private def isLowerRedundant(upper: Aggregate, lower: Aggregate): Boolean = {
 
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExpressionSet, NamedExpression}
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
+import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, LeftSemiOrAnti, RightOuter}
+
+/**
+ * A visitor pattern for traversing a [[LogicalPlan]] tree and propagate the distinct attributes.
+ */
+object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] {
+
+  private def projectDistinctKeys(
+      keys: Set[ExpressionSet], projectList: Seq[NamedExpression]): Set[ExpressionSet] = {
+    val outputSet = ExpressionSet(projectList.map(_.toAttribute))
+    val aliases = projectList.filter(_.isInstanceOf[Alias])
+    if (aliases.isEmpty) {
+      keys.filter(_.subsetOf(outputSet))
+    } else {
+      val aliasedDistinctKeys = keys.map { expressionSet =>
+        expressionSet.map { expression =>
+          expression transform {
+            case expr: Expression =>
+              // TODO: Expand distinctKeys for redundant aliases on the same expression
+              aliases
+                .collectFirst { case a: Alias if a.child.semanticEquals(expr) => a.toAttribute }
+                .getOrElse(expr)
+          }
+        }
+      }
+      aliasedDistinctKeys.collect {
+        case es: ExpressionSet if es.subsetOf(outputSet) => ExpressionSet(es)
+      } ++ keys.filter(_.subsetOf(outputSet))
+    }.filter(_.nonEmpty)
+  }
+
+  override def default(p: LogicalPlan): Set[ExpressionSet] = Set.empty[ExpressionSet]
+
+  override def visitAggregate(p: Aggregate): Set[ExpressionSet] = {
+    val groupingExps = ExpressionSet(p.groupingExpressions) // handle group by a, a
+    projectDistinctKeys(Set(groupingExps), p.aggregateExpressions)
+  }
+
+  override def visitDistinct(p: Distinct): Set[ExpressionSet] = Set(ExpressionSet(p.output))
+
+  override def visitExcept(p: Except): Set[ExpressionSet] =
+    if (!p.isAll) Set(ExpressionSet(p.output)) else default(p)
+
+  override def visitExpand(p: Expand): Set[ExpressionSet] = default(p)
+
+  override def visitFilter(p: Filter): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitGenerate(p: Generate): Set[ExpressionSet] = default(p)
+
+  override def visitGlobalLimit(p: GlobalLimit): Set[ExpressionSet] = {
+    p.maxRows match {
+      case Some(value) if value <= 1 => Set(ExpressionSet(p.output))
+      case _ => p.child.distinctKeys
+    }
+  }
+
+  override def visitIntersect(p: Intersect): Set[ExpressionSet] = {
+    if (!p.isAll) Set(ExpressionSet(p.output)) else default(p)
+  }
+
+  override def visitJoin(p: Join): Set[ExpressionSet] = {
+    p match {
+      case Join(_, _, LeftSemiOrAnti(_), _, _) =>
+        p.left.distinctKeys
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, left, right, _)
+          if left.distinctKeys.nonEmpty || right.distinctKeys.nonEmpty =>
+        val rightJoinKeySet = ExpressionSet(rightKeys)
+        val leftJoinKeySet = ExpressionSet(leftKeys)
+        joinType match {
+          case Inner if left.distinctKeys.exists(_.subsetOf(leftJoinKeySet)) &&
+            right.distinctKeys.exists(_.subsetOf(rightJoinKeySet)) =>
+            left.distinctKeys ++ right.distinctKeys
+          case Inner | LeftOuter if right.distinctKeys.exists(_.subsetOf(rightJoinKeySet)) =>
+            p.left.distinctKeys
+          case Inner | RightOuter if left.distinctKeys.exists(_.subsetOf(leftJoinKeySet)) =>
+            p.right.distinctKeys
+          case _ =>
+            default(p)
+        }
+      case _ => default(p)
+    }
+  }
+
+  override def visitLocalLimit(p: LocalLimit): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitPivot(p: Pivot): Set[ExpressionSet] = default(p)
+
+  override def visitProject(p: Project): Set[ExpressionSet] = {
+    if (p.child.distinctKeys.nonEmpty) {
+      projectDistinctKeys(p.child.distinctKeys, p.projectList)
+    } else {
+      default(p)
+    }
+  }
+
+  override def visitRepartition(p: Repartition): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitRepartitionByExpr(p: RepartitionByExpression): Set[ExpressionSet] =
+    p.child.distinctKeys
+
+  override def visitSample(p: Sample): Set[ExpressionSet] = {
+    if (!p.withReplacement) p.child.distinctKeys else default(p)
+  }
+
+  override def visitScriptTransform(p: ScriptTransformation): Set[ExpressionSet] = default(p)
+
+  override def visitUnion(p: Union): Set[ExpressionSet] = default(p)
+
+  override def visitWindow(p: Window): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitTail(p: Tail): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitSort(p: Sort): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitRebalancePartitions(p: RebalancePartitions): Set[ExpressionSet] =
+    p.child.distinctKeys
+
+  override def visitWithCTE(p: WithCTE): Set[ExpressionSet] = p.plan.distinctKeys
+}
@@ -31,6 +31,7 @@ abstract class LogicalPlan
   extends QueryPlan[LogicalPlan]
   with AnalysisHelper
   with LogicalPlanStats
+  with LogicalPlanDistinctKeys
   with QueryPlanConstraints
   with Logging {
 
 
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.ExpressionSet
+import org.apache.spark.sql.internal.SQLConf.PROPAGATE_DISTINCT_KEYS_ENABLED
+
+/**
+ * A trait to add distinct attributes to [[LogicalPlan]]. For example:
+ * {{{
+ *   SELECT a, b, SUM(c) FROM Tab1 GROUP BY a, b
+ *   // returns a, b
+ * }}}
+ */
+trait LogicalPlanDistinctKeys { self: LogicalPlan =>
+  lazy val distinctKeys: Set[ExpressionSet] = {
+    if (conf.getConf(PROPAGATE_DISTINCT_KEYS_ENABLED)) DistinctKeyVisitor.visit(self) else Set.empty
+  }
+}
@@ -744,6 +744,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PROPAGATE_DISTINCT_KEYS_ENABLED =
+    buildConf("spark.sql.optimizer.propagateDistinctKeys.enabled")
+      .internal()
+      .doc("When true, the query optimizer will propagate a set of distinct attributes from the " +
+        "current node and use it to optimize query.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +