Skip to content

Commit b5474a1

Browse files
cloud-fanSeongjin Cho
authored andcommitted
[SPARK-31171][SQL] size(null) should return null under ansi mode
### What changes were proposed in this pull request? Make `size(null)` return null under ANSI mode, regardless of the `spark.sql.legacy.sizeOfNull` config. ### Why are the changes needed? In apache#27834, we change the result of `size(null)` to be -1 to match the 2.4 behavior and avoid breaking changes. However, it's true that the "return -1" behavior is error-prone when being used with aggregate functions. The current ANSI mode controls a bunch of "better behaviors" like failing on overflow. We don't enable these "better behaviors" by default because they are too breaking. The "return null" behavior of `size(null)` is a good fit of the ANSI mode. ### Does this PR introduce any user-facing change? No as ANSI mode is off by default. ### How was this patch tested? new tests Closes apache#27936 from cloud-fan/null. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent 53cf6df commit b5474a1

3 files changed

Lines changed: 24 additions & 3 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,8 +2154,8 @@ object SQLConf {
21542154

21552155
val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull")
21562156
.internal()
2157-
.doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " +
2158-
"The size function returns null for null input if the flag is disabled.")
2157+
.doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " +
2158+
"null. Otherwise, it returns -1, which was inherited from Hive.")
21592159
.version("2.4.0")
21602160
.booleanConf
21612161
.createWithDefault(true)
@@ -3014,7 +3014,10 @@ class SQLConf extends Serializable with Logging {
30143014

30153015
def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING)
30163016

3017-
def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
3017+
def legacySizeOfNull: Boolean = {
3018+
// size(null) should return null under ansi mode.
3019+
getConf(SQLConf.LEGACY_SIZE_OF_NULL) && !getConf(ANSI_ENABLED)
3020+
}
30183021

30193022
def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)
30203023

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
7474
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
7575
testSize(sizeOfNull = null)
7676
}
77+
// size(null) should return null under ansi mode.
78+
withSQLConf(
79+
SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
80+
SQLConf.ANSI_ENABLED.key -> "true") {
81+
testSize(sizeOfNull = null)
82+
}
7783
}
7884

7985
test("MapKeys/MapValues") {

sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
490490
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
491491
testSizeOfArray(sizeOfNull = null)
492492
}
493+
// size(null) should return null under ansi mode.
494+
withSQLConf(
495+
SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
496+
SQLConf.ANSI_ENABLED.key -> "true") {
497+
testSizeOfArray(sizeOfNull = null)
498+
}
493499
}
494500

495501
test("dataframe arrays_zip function") {
@@ -569,6 +575,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
569575
withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") {
570576
testSizeOfMap(sizeOfNull = null)
571577
}
578+
// size(null) should return null under ansi mode.
579+
withSQLConf(
580+
SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
581+
SQLConf.ANSI_ENABLED.key -> "true") {
582+
testSizeOfMap(sizeOfNull = null)
583+
}
572584
}
573585

574586
test("map_keys/map_values function") {

0 commit comments

Comments
 (0)