-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-26572][SQL] fix aggregate codegen result evaluation #23731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -466,10 +466,12 @@ case class HashAggregateExec( | |
| val resultVars = bindReferences[Expression]( | ||
| resultExpressions, | ||
| inputAttrs).map(_.genCode(ctx)) | ||
| val evaluateResultVars = evaluateVariables(resultVars) | ||
| s""" | ||
| $evaluateKeyVars | ||
| $evaluateBufferVars | ||
| $evaluateAggResults | ||
| $evaluateResultVars | ||
| ${consume(ctx, resultVars)} | ||
| """ | ||
| } else if (modes.contains(Partial) || modes.contains(PartialMerge)) { | ||
|
|
@@ -497,19 +499,25 @@ case class HashAggregateExec( | |
| val resultVars = bindReferences[Expression]( | ||
| resultExpressions, | ||
| inputAttrs).map(_.genCode(ctx)) | ||
| val evaluateResultVars = evaluateVariables(resultVars) | ||
| s""" | ||
| $evaluateKeyVars | ||
| $evaluateResultBufferVars | ||
| $evaluateResultVars | ||
| ${consume(ctx, resultVars)} | ||
| """ | ||
| } else { | ||
| // generate result based on grouping key | ||
| ctx.INPUT_ROW = keyTerm | ||
| ctx.currentVars = null | ||
| val eval = bindReferences[Expression]( | ||
| val resultVars = bindReferences[Expression]( | ||
| resultExpressions, | ||
| groupingAttributes).map(_.genCode(ctx)) | ||
| consume(ctx, eval) | ||
| val evaluateResultVars = evaluateVariables(resultVars) | ||
| s""" | ||
| $evaluateResultVars | ||
|
||
| ${consume(ctx, resultVars)} | ||
| """ | ||
| } | ||
| ctx.addNewFunction(funcName, | ||
| s""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2110,4 +2110,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { | |
| checkAnswer(res, Row("1-1", 6, 6)) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-26572: fix aggregate codegen result evaluation") { | ||
peter-toth marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| val baseTable = Seq((1), (1)).toDF("idx") | ||
peter-toth marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| val distinctWithId = | ||
| baseTable.distinct.withColumn("id", functions.monotonically_increasing_id()) | ||
| val res = baseTable.join(distinctWithId, "idx") | ||
| .groupBy("id").count().as("count") | ||
| .select("count") | ||
| checkAnswer(res, Row(2)) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so. If you replace
.distinct()to.groupBy("idx").max()in the example then this code path runs and the change fixes the same issue.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If so, could you please add test cases to cover all the code paths you added in this pr.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I've added that path to the test.