Skip to content

Commit 2c405a1

Browse files
committed
addressed review comments
1 parent e96742e commit 2c405a1

2 files changed

Lines changed: 15 additions & 21 deletions

File tree

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
2020
import java.nio.charset.StandardCharsets
2121

2222
import org.apache.commons.codec.digest.DigestUtils
23-
2423
import org.apache.spark.SparkFunSuite
2524
import org.apache.spark.sql.{RandomDataGenerator, Row}
2625
import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
26+
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
2727
import org.apache.spark.sql.types._
28+
import org.apache.spark.unsafe.types.UTF8String
2829

2930
class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
3031

@@ -124,6 +125,19 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
124125
new StructType().add("array", arrayOfString).add("map", mapOfString))
125126
.add("structOfUDT", structOfUDT))
126127

128+
test("SPARK-18207: Compute hash for a lot of String expressions") {
129+
val N = 1000
130+
val wideRow = new GenericInternalRow(
131+
(1 to N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
132+
val schema = StructType((1 to N).map(i => StructField("", StringType)))
133+
134+
val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
135+
BoundReference(i, f.dataType, true)
136+
}
137+
val hashExpr = Murmur3Hash(exprs, 42)
138+
GenerateMutableProjection.generate(Seq(hashExpr))
139+
}
140+
127141
private def testHash(inputSchema: StructType): Unit = {
128142
val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
129143
val encoder = RowEncoder(inputSchema)

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,23 +1728,3 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
17281728
val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema)
17291729
assert(df.filter($"array1" === $"array2").count() == 1)
17301730
}
1731-
1732-
test("SPARK-18207: Compute hash for wider table") {
1733-
import org.apache.spark.sql.types.{StructType, StringType}
1734-
1735-
val COLMAX = 1000
1736-
val schema: StructType = (1 to COLMAX)
1737-
.foldLeft(new StructType())((s, i) => s.add(s"g$i", StringType, nullable = true))
1738-
val rdds = spark.sparkContext.parallelize(Seq(Row.fromSeq((1 to COLMAX).map(_.toString))))
1739-
val wideDF = spark.createDataFrame(rdds, schema)
1740-
1741-
val widePlus = wideDF.withColumn("d_rank", lit(1))
1742-
widePlus.createOrReplaceTempView("wide_plus")
1743-
val widePlus2 = widePlus.withColumn("d_rank", lit(0))
1744-
widePlus2.createOrReplaceTempView("wide_plus2")
1745-
1746-
// HashAggregate operation in this SQL union operator involves computation of hash for a row
1747-
val df = spark.sqlContext.sql("select * from wide_plus union select * from wide_plus2")
1748-
df.count
1749-
}
1750-
}

0 commit comments

Comments
 (0)