Skip to content

Commit e2f8fd2

Browse files
author
ALeksander Eskilson
committed
class_splitting_2.1 adding doc for class_splitting functions
1 parent 52b654a commit e2f8fd2

1 file changed

Lines changed: 45 additions & 1 deletion

File tree

  • sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,29 +221,60 @@ class CodegenContext {
221221

222222
private val outerClassName = "OuterClass"
223223

224+
225+
/**
226+
* Holds the class and instance names to be generated, where `OuterClass` is a placeholder
227+
* standing for whichever class is generated as the outermost class and which will contain any
228+
* nested sub-classes. All other classes and instance names in this list will represent private,
229+
* nested sub-classes.
230+
*/
224231
private val classes: mutable.ListBuffer[(String, String)] =
225232
mutable.ListBuffer[(String, String)](outerClassName -> null)
226233

234+
// A map holding the current size in bytes of each class to be generated.
227235
private val classSize: mutable.Map[String, Int] =
228236
mutable.Map[String, Int](outerClassName -> 0)
229237

238+
// Nested maps holding function names and their code belonging to each class.
230239
private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
231240
mutable.Map(outerClassName -> mutable.Map.empty[String, String])
232241

242+
// Returns the size of the most recently added class.
233243
private def currClassSize(): Int = classSize(classes.head._1)
234244

245+
// Returns the class name and instance name for the most recently added class.
235246
private def currClass(): (String, String) = classes.head
236247

248+
// Adds a new class. Requires the class' name, and its instance name.
237249
private def addClass(className: String, classInstance: String): Unit = {
238250
classes.prepend(className -> classInstance)
239251
classSize += className -> 0
240252
classFunctions += className -> mutable.Map.empty[String, String]
241253
}
242254

255+
/**
256+
* Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
257+
* function will be inlined into a new private, nested class, and a class-qualified name for the
258+
* function will be returned. Otherwise, the function will be inined to the `OuterClass` the
259+
* simple `funcName` will be returned.
260+
*
261+
* @param funcName the class-unqualified name of the function
262+
* @param funcCode the body of the function
263+
* @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
264+
* can be necessary when a function is declared outside of the context
265+
* it is eventually referenced and a returned qualified function name
266+
* cannot otherwise be accessed.
267+
* @return the name of the function, qualified by class if it will be inlined to a private,
268+
* nested sub-class
269+
*/
243270
def addNewFunction(
244271
funcName: String,
245272
funcCode: String,
246273
inlineToOuterClass: Boolean = false): String = {
274+
// The number of named constants that can exist in the class is limited by the Constant Pool
275+
// limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
276+
// threshold of 1600k bytes to determine when a function should be inlined to a private, nested
277+
// sub-class.
247278
val (className, classInstance) = if (inlineToOuterClass) {
248279
outerClassName -> ""
249280
} else if (currClassSize > 1600000) {
@@ -268,17 +299,28 @@ class CodegenContext {
268299
}
269300
}
270301

302+
/**
303+
* Instantiates all nested, private sub-classes as objects to the `OuterClass`
304+
*/
271305
private[sql] def initNestedClasses(): String = {
306+
// Nested, private sub-classes have no mutable state (though they do reference the outer class'
307+
// mutable state), so we declare and initialize them inline to the OuterClass.
272308
classes.filter(_._1 != outerClassName).map {
273309
case (className, classInstance) =>
274310
s"private $className $classInstance = new $className();"
275311
}.mkString("\n")
276312
}
277313

314+
/**
315+
* Declares all function code that should be inlined to the `OuterClass`.
316+
*/
278317
private[sql] def declareAddedFunctions(): String = {
279318
classFunctions(outerClassName).values.mkString("\n")
280319
}
281320

321+
/**
322+
* Declares all nested, private sub-classes and the function code that should be inlined to them.
323+
*/
282324
private[sql] def declareNestedClasses(): String = {
283325
classFunctions.filterKeys(_ != outerClassName).map {
284326
case (className, functions) =>
@@ -695,7 +737,9 @@ class CodegenContext {
695737

696738
/**
697739
* Splits the generated code of expressions into multiple functions, because function has
698-
* 64kb code size limit in JVM
740+
* 64kb code size limit in JVM. If the class to which the function would be inlined would grow
741+
* beyond 1600kb, we declare a private, nested sub-class, and the function is inlined to it
742+
* instead, because classes have a constant pool limit of 65,536 named values.
699743
*
700744
* @param expressions the codes to evaluate expressions.
701745
* @param funcName the split function name base.

0 commit comments

Comments
 (0)