-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21677][SQL] json_tuple throws NullPointException when column is null as string type #18930
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
596c280
796041f
f07a9f7
ffa575a
5d71263
0078445
5c69df5
ab16929
e0e0c74
5191ed4
ff3b9da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -362,12 +362,12 @@ case class JsonTuple(children: Seq[Expression]) | |
| @transient private lazy val fieldExpressions: Seq[Expression] = children.tail | ||
|
|
||
| // eagerly evaluate any foldable the field names | ||
| @transient private lazy val foldableFieldNames: IndexedSeq[String] = { | ||
| @transient private lazy val foldableFieldNames: IndexedSeq[Option[String]] = { | ||
| fieldExpressions.map { | ||
| case expr if expr.foldable => expr.eval().asInstanceOf[UTF8String].toString | ||
| case expr if expr.foldable => Option(expr.eval()).map(_.asInstanceOf[UTF8String].toString) | ||
| case _ => null | ||
| }.toIndexedSeq | ||
| } | ||
| } | ||
| }.toIndexedSeq | ||
|
|
||
| // and count the number of foldable fields, we'll use this later to optimize evaluation | ||
| @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null) | ||
|
|
@@ -417,7 +417,7 @@ case class JsonTuple(children: Seq[Expression]) | |
| val fieldNames = if (constantFields == fieldExpressions.length) { | ||
| // typically the user will provide the field names as foldable expressions | ||
| // so we can use the cached copy | ||
| foldableFieldNames | ||
| foldableFieldNames.map(_.orNull) | ||
| } else if (constantFields == 0) { | ||
| // none are foldable so all field names need to be evaluated from the input row | ||
| fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString) | ||
|
|
@@ -426,10 +426,11 @@ case class JsonTuple(children: Seq[Expression]) | |
| // prefer the cached copy when available | ||
| foldableFieldNames.zip(fieldExpressions).map { | ||
| case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString | ||
| case (fieldName, _) => fieldName | ||
| case (fieldName, _) => fieldName.orNull | ||
| } | ||
| } | ||
|
|
||
| // Array[String] | ||
|
||
| val row = Array.ofDim[Any](fieldNames.length) | ||
|
|
||
| // start reading through the token stream, looking for any requested field names | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2034,4 +2034,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-21677: json_tuple throws NullPointException when column is null as string type") { | ||
|
||
| checkAnswer(sql( | ||
| """ | ||
| |SELECT json_tuple('{"a" : 1, "b" : 2}' | ||
| |, cast(NULL AS STRING), 'b' | ||
| |, cast(NULL AS STRING), 'a') | ||
| """.stripMargin), Row(null, "2", null, "1")) | ||
|
||
|
|
||
| // mixes constant field name and non constant one | ||
| withTempView("jsonTable") { | ||
| Seq(("""{"a": 1, "b": 2}""", "a", "b")) | ||
| .toDF("jsonField", "a", "b") | ||
| .createOrReplaceTempView("jsonTable") | ||
|
|
||
| checkAnswer( | ||
| sql("""SELECT json_tuple(jsonField, b, cast(NULL AS STRING), 'a') FROM jsonTable"""), | ||
|
||
| Row("2", null, "1") | ||
| ) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we move
toIndexedSeqto inner block, i.e. after the map?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya Done: (1) remove redundant comment (2) move
toIndexedSeqafter the map