Skip to content

Commit f3ce176

Browse files
committed
After type conflict resolution, if a NullType is found, StringType is used.
1 parent 0576406 commit f3ce176

2 files changed

Lines changed: 12 additions & 4 deletions

File tree

sql/core/src/main/scala/org/apache/spark/sql/json/JsonTable.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ object JsonTable extends Serializable with Logging {
6767
json: RDD[String], sampleSchema: Option[Double] = None): LogicalPlan = {
6868
val schemaData = sampleSchema.map(json.sample(false, _, 1)).getOrElse(json)
6969
val allKeys = parseJson(schemaData).map(getAllKeysWithValueTypes).reduce(_ ++ _)
70+
7071
// Resolve type conflicts
7172
val resolved = allKeys.groupBy {
7273
case (key, dataType) => key
@@ -79,7 +80,14 @@ object JsonTable extends Serializable with Logging {
7980
case (_, dataType) => dataType
8081
}.reduce((type1: DataType, type2: DataType) => getCompatibleType(type1, type2))
8182

82-
(fieldName, dataType)
83+
// Finally, we replace all NullType to StringType. We do not need to take care
84+
// StructType because all fields with a StructType are represented by a placeholder
85+
// StructType(Nil).
86+
dataType match {
87+
case NullType => (fieldName, StringType)
88+
case ArrayType(NullType) => (fieldName, ArrayType(StringType))
89+
case other => (fieldName, other)
90+
}
8391
}
8492
}
8593

@@ -353,4 +361,4 @@ object JsonTable extends Serializable with Logging {
353361
protected def asAttributes(struct: StructType): Seq[AttributeReference] = {
354362
struct.fields.map(f => AttributeReference(f.name, f.dataType, nullable = true)())
355363
}
356-
}
364+
}

sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class JsonSuite extends QueryTest {
6565
AttributeReference("double", DoubleType, true)() ::
6666
AttributeReference("integer", IntegerType, true)() ::
6767
AttributeReference("long", LongType, true)() ::
68-
AttributeReference("null", NullType, true)() ::
68+
AttributeReference("null", StringType, true)() ::
6969
AttributeReference("string", StringType, true)() :: Nil
7070

7171
checkSchema(expectedSchema, jsonSchemaRDD.logicalPlan.output)
@@ -95,7 +95,7 @@ class JsonSuite extends QueryTest {
9595
AttributeReference("arrayOfDouble", ArrayType(DoubleType), true)() ::
9696
AttributeReference("arrayOfInteger", ArrayType(IntegerType), true)() ::
9797
AttributeReference("arrayOfLong", ArrayType(LongType), true)() ::
98-
AttributeReference("arrayOfNull", ArrayType(NullType), true)() ::
98+
AttributeReference("arrayOfNull", ArrayType(StringType), true)() ::
9999
AttributeReference("arrayOfString", ArrayType(StringType), true)() ::
100100
AttributeReference("arrayOfStruct", ArrayType(
101101
StructType(StructField("field1", BooleanType, true) ::

0 commit comments

Comments
 (0)