@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
2020import java .nio .charset .StandardCharsets
2121
2222import org .apache .commons .codec .digest .DigestUtils
23-
2423import org .apache .spark .SparkFunSuite
2524import org .apache .spark .sql .{RandomDataGenerator , Row }
2625import org .apache .spark .sql .catalyst .encoders .{ExamplePointUDT , RowEncoder }
26+ import org .apache .spark .sql .catalyst .expressions .codegen .GenerateMutableProjection
2727import org .apache .spark .sql .types ._
28+ import org .apache .spark .unsafe .types .UTF8String
2829
2930class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
3031
@@ -124,6 +125,19 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
124125 new StructType ().add(" array" , arrayOfString).add(" map" , mapOfString))
125126 .add(" structOfUDT" , structOfUDT))
126127
128+ test(" SPARK-18207: Compute hash for a lot of String expressions" ) {
129+ val N = 1000
130+ val wideRow = new GenericInternalRow (
131+ (1 to N ).map(i => UTF8String .fromString(i.toString)).toArray[Any ])
132+ val schema = StructType ((1 to N ).map(i => StructField (" " , StringType )))
133+
134+ val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
135+ BoundReference (i, f.dataType, true )
136+ }
137+ val hashExpr = Murmur3Hash (exprs, 42 )
138+ GenerateMutableProjection .generate(Seq (hashExpr))
139+ }
140+
127141 private def testHash (inputSchema : StructType ): Unit = {
128142 val inputGenerator = RandomDataGenerator .forType(inputSchema, nullable = false ).get
129143 val encoder = RowEncoder (inputSchema)
0 commit comments