Skip to content

Commit ea02e55

Browse files
Davies Liuyhuai
authored andcommitted
[SPARK-10859] [SQL] fix stats of StringType in columnar cache
The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu <[email protected]> Closes apache#8929 from davies/pushdown_string.
1 parent 14978b7 commit ea02e55

2 files changed

Lines changed: 9 additions & 2 deletions

File tree

sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats {
213213
super.gatherStats(row, ordinal)
214214
if (!row.isNullAt(ordinal)) {
215215
val value = row.getUTF8String(ordinal)
216-
if (upper == null || value.compareTo(upper) > 0) upper = value
217-
if (lower == null || value.compareTo(lower) < 0) lower = value
216+
if (upper == null || value.compareTo(upper) > 0) upper = value.clone()
217+
if (lower == null || value.compareTo(lower) < 0) lower = value.clone()
218218
sizeInBytes += STRING.actualSize(row, ordinal)
219219
}
220220
}

sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
212212
// Drop the cache.
213213
cached.unpersist()
214214
}
215+
216+
test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") {
217+
val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s")
218+
data.cache()
219+
assert(data.count() === 10)
220+
assert(data.filter($"s" === "3").count() === 1)
221+
}
215222
}

0 commit comments

Comments
 (0)