apache · HeartSaVioR · Sep 18, 2019 · Sep 18, 2019 · Sep 18, 2019 · Sep 20, 2019
diff --git a/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1811,6 +1811,14 @@ object CodeGenerator extends Logging {
 
   def boxedType(dt: DataType): String = boxedType(javaType(dt))
 
+  def typeName(clazz: Class[_]): String = {
+    if (clazz.isArray) {
+      typeName(clazz.getComponentType) + "[]"
+    } else {
+      clazz.getName
+    }
+  }
+
   /**
    * Returns the representation of default value for a given Java Type.
    * @param jt the string name of the Java type

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -299,7 +299,9 @@ case class HashAggregateExec(
       if (inputVars.forall(_.isDefined)) {
         val splitCodes = inputVars.flatten.zipWithIndex.map { case (args, i) =>
           val doAggFunc = ctx.freshName(s"doAggregate_${aggNames(i)}")
-          val argList = args.map(v => s"${v.javaType.getName} ${v.variableName}").mkString(", ")
+          val argList = args.map { v =>
+            s"${CodeGenerator.typeName(v.javaType)} ${v.variableName}"
+          }.mkString(", ")
           val doAggFuncName = ctx.addNewFunction(doAggFunc,
             s"""
                |private void $doAggFunc($argList) throws java.io.IOException {

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -1021,13 +1021,28 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
 
   test("SPARK-29122: hash-based aggregates for unfixed-length decimals in the interpreter mode") {
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
-        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+      SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
       withTempView("t") {
         spark.range(3).selectExpr("CAST(id AS decimal(38, 0)) a").createOrReplaceTempView("t")
         checkAnswer(sql("SELECT SUM(a) FROM t"), Row(java.math.BigDecimal.valueOf(3)))
       }
     }
   }
+
+  test("SPARK-29140: HashAggregateExec aggregating binary type doesn't break codegen compilation") {
+    val schema = new StructType().add("id", IntegerType, nullable = false)
+      .add("c1", BinaryType, nullable = true)
+
+    withSQLConf(
+      SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
+      SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1") {
+      val emptyRows = spark.sparkContext.parallelize(Seq.empty[Row], 1)
+      val aggDf = spark.createDataFrame(emptyRows, schema)
+        .groupBy($"id" % 10 as "group")
+        .agg(countDistinct($"c1"))
+      checkAnswer(aggDf, Seq.empty[Row])
+    }
+  }
 }