apache · huaxingao · Oct 11, 2021 · Oct 13, 2021 · Oct 13, 2021 · Oct 13, 2021
diff --git a/...main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/...main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -130,10 +130,10 @@ case class ParquetScanBuilder(
       // are combined with filter or group by
       // e.g. SELECT COUNT(col1) FROM t WHERE col2 = 8
       //      SELECT COUNT(col1) FROM t GROUP BY col2
-      // Todo: 1. add support if groupby column is partition col
-      //          (https://issues.apache.org/jira/browse/SPARK-36646)
-      //       2. add support if filter col is partition col
-      //          (https://issues.apache.org/jira/browse/SPARK-36647)
+      // However, if the filter or group by is on partition column,
+      // max/min/count can still be pushed down
+      // Todo:  add support if groupby column is partition col
+      //        (https://issues.apache.org/jira/browse/SPARK-36646)
       return false
     }
 

diff --git a/...la/org/apache/spark/sql/execution/datasources/parquet/ParquetAggregatePushDownSuite.scala b/...la/org/apache/spark/sql/execution/datasources/parquet/ParquetAggregatePushDownSuite.scala
@@ -221,7 +221,7 @@ abstract class ParquetAggregatePushDownSuite
     }
   }
 
-  test("aggregate push down - query with filter not push down") {
+  test("aggregate push down - aggregate with data filter cannot be pushed down") {
     val data = Seq((-2, "abc", 2), (3, "def", 4), (6, "ghi", 2), (0, null, 19),
       (9, "mno", 7), (2, null, 7))
     withParquetTable(data, "t") {
@@ -240,6 +240,30 @@ abstract class ParquetAggregatePushDownSuite
     }
   }
 
+  test("aggregate push down - aggregate with partition filter can be pushed down") {
+    withTempPath { dir =>
+      spark.range(10).selectExpr("id", "id % 3 as p")
+        .write.partitionBy("p").parquet(dir.getCanonicalPath)
+      withTempView("tmp") {
+        spark.read.parquet(dir.getCanonicalPath).createOrReplaceTempView("tmp");
+        val enableVectorizedReader = Seq("false", "true")
+        for (testVectorizedReader <- enableVectorizedReader) {
+          withSQLConf(SQLConf.PARQUET_AGGREGATE_PUSHDOWN_ENABLED.key -> "true",
+            vectorizedReaderEnabledKey -> testVectorizedReader) {
+            val max = sql("SELECT max(id) FROM tmp WHERE p = 0")
+            max.queryExecution.optimizedPlan.collect {
+              case _: DataSourceV2ScanRelation =>
+                val expected_plan_fragment =
+                  "PushedAggregation: [MAX(id)]"
+                checkKeywordsExistsInExplain(max, expected_plan_fragment)
+            }
+            checkAnswer(max, Seq(Row(9)))
+          }
+        }
+      }
+    }
+  }
+
   test("aggregate push down - push down only if all the aggregates can be pushed down") {
     val data = Seq((-2, "abc", 2), (3, "def", 4), (6, "ghi", 2), (0, null, 19),
       (9, "mno", 7), (2, null, 7))