@@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
2828class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
2929 override val dataSourceName : String = " json"
3030
31+ private val badJson = " \u0000\u0000\u0000 A\u0001 AAA"
32+
3133 // JSON does not write data of NullType and does not play well with BinaryType.
3234 override protected def supportsDataType (dataType : DataType ): Boolean = dataType match {
3335 case _ : NullType => false
@@ -105,4 +107,36 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
105107 )
106108 }
107109 }
110+
111+ test(" invalid json with leading nulls - from file (multiLine=true)" ) {
112+ import testImplicits ._
113+ withTempDir { tempDir =>
114+ val path = tempDir.getAbsolutePath
115+ Seq (badJson, """ {"a":1}""" ).toDS().write.mode(" overwrite" ).text(path)
116+ val expected = s """ $badJson\n{"a":1}\n """
117+ val schema = new StructType ().add(" a" , IntegerType ).add(" _corrupt_record" , StringType )
118+ val df =
119+ spark.read.format(dataSourceName).option(" multiLine" , true ).schema(schema).load(path)
120+ checkAnswer(df, Row (null , expected))
121+ }
122+ }
123+
124+ test(" invalid json with leading nulls - from file (multiLine=false)" ) {
125+ import testImplicits ._
126+ withTempDir { tempDir =>
127+ val path = tempDir.getAbsolutePath
128+ Seq (badJson, """ {"a":1}""" ).toDS().write.mode(" overwrite" ).text(path)
129+ val schema = new StructType ().add(" a" , IntegerType ).add(" _corrupt_record" , StringType )
130+ val df =
131+ spark.read.format(dataSourceName).option(" multiLine" , false ).schema(schema).load(path)
132+ checkAnswer(df, Seq (Row (1 , null ), Row (null , badJson)))
133+ }
134+ }
135+
136+ test(" invalid json with leading nulls - from dataset" ) {
137+ import testImplicits ._
138+ checkAnswer(
139+ spark.read.json(Seq (badJson).toDS()),
140+ Row (badJson))
141+ }
108142}
0 commit comments