Skip to content

Commit cd8e795

Browse files
lw-linzzcclp
authored andcommitted
[SPARK-16845][SQL][BRANCH-1.6] GeneratedClass$SpecificOrdering` grows beyond 64 KB
## What changes were proposed in this pull request? This is a backport pr of apache#15480 into `branch-1.6`. ## How was this patch tested? Existing tests. Author: Liwei Lin <lwlin7@gmail.com> Closes apache#17158 from ueshin/issues/SPARK-16845_1.6. (cherry picked from commit 23f9faa)
1 parent 94a320e commit cd8e795

3 files changed

Lines changed: 62 additions & 7 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,27 @@ class CodeGenContext {
364364
* @param expressions the codes to evaluate expressions.
365365
*/
366366
def splitExpressions(row: String, expressions: Seq[String]): String = {
367+
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
368+
}
369+
370+
/**
371+
* Splits the generated code of expressions into multiple functions, because function has
372+
* 64kb code size limit in JVM
373+
*
374+
* @param expressions the codes to evaluate expressions.
375+
* @param funcName the split function name base.
376+
* @param arguments the list of (type, name) of the arguments of the split function.
377+
* @param returnType the return type of the split function.
378+
* @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
379+
* @param foldFunctions folds the split function calls.
380+
*/
381+
def splitExpressions(
382+
expressions: Seq[String],
383+
funcName: String,
384+
arguments: Seq[(String, String)],
385+
returnType: String = "void",
386+
makeSplitFunction: String => String = identity,
387+
foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
367388
val blocks = new ArrayBuffer[String]()
368389
val blockBuilder = new StringBuilder()
369390
for (code <- expressions) {
@@ -380,19 +401,20 @@ class CodeGenContext {
380401
// inline execution if only one block
381402
blocks.head
382403
} else {
383-
val apply = freshName("apply")
404+
val func = freshName(funcName)
405+
val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
384406
val functions = blocks.zipWithIndex.map { case (body, i) =>
385-
val name = s"${apply}_$i"
407+
val name = s"${func}_$i"
386408
val code = s"""
387-
|private void $name(InternalRow $row) {
388-
| $body
409+
|private $returnType $name($argString) {
410+
| ${makeSplitFunction(body)}
389411
|}
390412
""".stripMargin
391413
addNewFunction(name, code)
392414
name
393415
}
394416

395-
functions.map(name => s"$name($row);").mkString("\n")
417+
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
396418
}
397419
}
398420

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
103103
}
104104
}
105105
"""
106-
}.mkString("\n")
107-
comparisons
106+
}
107+
108+
ctx.splitExpressions(
109+
expressions = comparisons,
110+
funcName = "compare",
111+
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
112+
returnType = "int",
113+
makeSplitFunction = { body =>
114+
s"""
115+
InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
116+
$body
117+
return 0;
118+
"""
119+
},
120+
foldFunctions = { funCalls =>
121+
funCalls.zipWithIndex.map { case (funCall, i) =>
122+
val comp = ctx.freshName("comp")
123+
s"""
124+
int $comp = $funCall;
125+
if ($comp != 0) {
126+
return $comp;
127+
}
128+
"""
129+
}.mkString
130+
})
108131
}
109132

110133
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
121121
}
122122
}
123123
}
124+
125+
test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
126+
val sortOrder = Literal("abc").asc
127+
128+
// this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
129+
GenerateOrdering.generate(Array.fill(40)(sortOrder))
130+
131+
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
132+
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
133+
}
124134
}

0 commit comments

Comments
 (0)