-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21717][SQL] Decouple consume functions of physical operators in whole-stage codegen #18931
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
05274e7
e0e7a6e
413707d
0bb8c0e
6d600d5
502139a
5fe3762
4bef567
1694c9b
8f3b984
c04da15
9540195
1101b2c
ff77bfe
e36ec3c
edb73d6
601c225
476994f
bdc1146
58eaf00
2f2d1fd
9f0d1da
79d0106
6384aec
0c4173e
c859d53
11946e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -149,13 +149,64 @@ trait CodegenSupport extends SparkPlan { | |
|
|
||
| ctx.freshNamePrefix = parent.variablePrefix | ||
| val evaluated = evaluateRequiredVariables(output, inputVars, parent.usedInputs) | ||
|
|
||
| // Under certain conditions, we can put the logic to consume the rows of this operator into | ||
| // another function. So we can prevent a generated function too long to be optimized by JIT. | ||
| val consumeFunc = | ||
| if (row == null && outputVars.nonEmpty && parent.usedInputs.size == inputVars.size) { | ||
| constructDoConsumeFunction(ctx, inputVars) | ||
|
||
| } else { | ||
| parent.doConsume(ctx, inputVars, rowVar) | ||
| } | ||
| s""" | ||
| |${ctx.registerComment(s"CONSUME: ${parent.simpleString}")} | ||
| |$evaluated | ||
| |${parent.doConsume(ctx, inputVars, rowVar)} | ||
| |$consumeFunc | ||
| """.stripMargin | ||
| } | ||
|
|
||
| /** | ||
| * To prevent concatenated function growing too long to be optimized by JIT. We separate the | ||
| * consume function of each `CodegenSupport` operator into a function to call. | ||
| */ | ||
| protected def constructDoConsumeFunction( | ||
|
||
| ctx: CodegenContext, | ||
| inputVars: Seq[ExprCode]): String = { | ||
| val (callingParams, arguList, inputVarsInFunc) = | ||
|
||
| constructConsumeParameters(ctx, output, inputVars) | ||
| val rowVar = ExprCode("", "false", "unsafeRow") | ||
| val doConsume = ctx.freshName("doConsume") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we put the operator name in this function name?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| val doConsumeFuncName = ctx.addNewFunction(doConsume, | ||
| s""" | ||
| | private void $doConsume($arguList) throws java.io.IOException { | ||
| | ${parent.doConsume(ctx, inputVarsInFunc, rowVar)} | ||
| | } | ||
| """.stripMargin) | ||
|
|
||
| s"$doConsumeFuncName($callingParams);" | ||
| } | ||
|
|
||
| /** | ||
| * Returns source code for calling consume function and the argument list of the consume function | ||
| * and also the `ExprCode` for the argument list. | ||
| */ | ||
| protected def constructConsumeParameters( | ||
|
||
| ctx: CodegenContext, | ||
| attributes: Seq[Attribute], | ||
| variables: Seq[ExprCode]): (String, String, Seq[ExprCode]) = { | ||
| val params = variables.zipWithIndex.map { case (ev, i) => | ||
| val callingParam = ev.value + ", " + ev.isNull | ||
| val arguName = ctx.freshName(s"expr_$i") | ||
| val arguIsNull = ctx.freshName(s"exprIsNull_$i") | ||
| (callingParam, | ||
| ctx.javaType(attributes(i).dataType) + " " + arguName + ", boolean " + arguIsNull, | ||
|
||
| ExprCode("", arguIsNull, arguName)) | ||
| }.unzip3 | ||
| (params._1.mkString(", "), | ||
| params._2.mkString(", "), | ||
| params._3) | ||
|
||
| } | ||
|
|
||
| /** | ||
| * Returns source code to evaluate all the variables, and clear the code of them, to prevent | ||
| * them to be evaluated twice. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you elaborate
certain conditionsin the comment if you have time?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added more comment to elaborate the idea.