Skip to content

Commit 522f7de

Browse files
author
Ivan Sadikov
committed
update code
1 parent f9d097c commit 522f7de

5 files changed

Lines changed: 50 additions & 45 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,9 @@ private[sql] class JSONOptions(
106106
s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
107107
})
108108

109-
val timestampNTZFormatInRead: Option[String] = parameters.get("timestampNTZFormat").orElse {
110-
if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
111-
Some(s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSS")
112-
} else {
113-
None
114-
}
115-
}
116-
val timestampNTZFormatInWrite: String = parameters.getOrElse("timestampNTZFormat",
117-
if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
118-
s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSS"
119-
} else {
120-
s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS]"
121-
})
109+
val timestampNTZFormatInRead: Option[String] = parameters.get("timestampNTZFormat")
110+
val timestampNTZFormatInWrite: String =
111+
parameters.getOrElse("timestampNTZFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS]")
122112

123113
val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
124114

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
151151
if (options.prefersDecimal && decimalTry.isDefined) {
152152
decimalTry.get
153153
} else if (options.inferTimestamp &&
154-
(allCatch opt !timestampNTZFormatter.isTimeZoneSet(field)).getOrElse(false) &&
155154
(allCatch opt timestampNTZFormatter.parseWithoutTimeZone(field)).isDefined) {
156155
SQLConf.get.timestampType
157156
} else if (options.inferTimestamp &&

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3131
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
3232
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.{LegacyDateFormat, LENIENT_SIMPLE_DATE_FORMAT}
3333
import org.apache.spark.sql.catalyst.util.RebaseDateTime._
34+
import org.apache.spark.sql.errors.QueryExecutionErrors
3435
import org.apache.spark.sql.internal.SQLConf
3536
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
36-
import org.apache.spark.sql.types.Decimal
37+
import org.apache.spark.sql.types.{Decimal, TimestampNTZType}
3738
import org.apache.spark.unsafe.types.UTF8String
3839

3940
sealed trait TimestampFormatter extends Serializable {
@@ -71,19 +72,6 @@ sealed trait TimestampFormatter extends Serializable {
7172
s"The method `parseWithoutTimeZone(s: String)` should be implemented in the formatter " +
7273
"of timestamp without time zone")
7374

74-
/**
75-
* Returns true if the parsed timestamp contains the time zone component, false otherwise.
76-
* Used to determine if the timestamp can be inferred as timestamp without time zone.
77-
*
78-
* @param s - string with timestamp to inspect
79-
* @return whether the timestamp string has the time zone component defined.
80-
*/
81-
@throws(classOf[IllegalStateException])
82-
def isTimeZoneSet(s: String): Boolean =
83-
throw new IllegalStateException(
84-
s"The method `isTimeZoneSet(s: String)` should be implemented in the formatter " +
85-
"of timestamp without time zone")
86-
8775
def format(us: Long): String
8876
def format(ts: Timestamp): String
8977
def format(instant: Instant): String
@@ -134,20 +122,16 @@ class Iso8601TimestampFormatter(
134122
override def parseWithoutTimeZone(s: String): Long = {
135123
try {
136124
val parsed = formatter.parse(s)
125+
val parsedZoneId = parsed.query(TemporalQueries.zone())
126+
if (parsedZoneId != null) {
127+
throw QueryExecutionErrors.cannotParseStringAsDataTypeError(pattern, s, TimestampNTZType)
128+
}
137129
val localDate = toLocalDate(parsed)
138130
val localTime = toLocalTime(parsed)
139131
DateTimeUtils.localDateTimeToMicros(LocalDateTime.of(localDate, localTime))
140132
} catch checkParsedDiff(s, legacyFormatter.parse)
141133
}
142134

143-
override def isTimeZoneSet(s: String): Boolean = {
144-
try {
145-
val parsed = formatter.parse(s)
146-
val parsedZoneId = parsed.query(TemporalQueries.zone())
147-
parsedZoneId != null
148-
} catch checkParsedDiff(s, legacyFormatter.isTimeZoneSet)
149-
}
150-
151135
override def format(instant: Instant): String = {
152136
try {
153137
formatter.withZone(zoneId).format(instant)
@@ -209,16 +193,15 @@ class DefaultTimestampFormatter(
209193

210194
override def parseWithoutTimeZone(s: String): Long = {
211195
try {
212-
DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(UTF8String.fromString(s))
196+
val utf8Value = UTF8String.fromString(s)
197+
val (_, zoneIdOpt, _) = DateTimeUtils.parseTimestampString(utf8Value)
198+
if (zoneIdOpt.isDefined) {
199+
throw QueryExecutionErrors.cannotParseStringAsDataTypeError(
200+
TimestampFormatter.defaultPattern(), s, TimestampNTZType)
201+
}
202+
DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utf8Value)
213203
} catch checkParsedDiff(s, legacyFormatter.parse)
214204
}
215-
216-
override def isTimeZoneSet(s: String): Boolean = {
217-
try {
218-
val (_, zoneIdOpt, _) = parseTimestampString(UTF8String.fromString(s))
219-
zoneIdOpt.isDefined
220-
} catch checkParsedDiff(s, legacyFormatter.isTimeZoneSet)
221-
}
222205
}
223206

224207
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,13 @@ object QueryExecutionErrors {
10341034
s"[$token] as target spark data type [$dataType].")
10351035
}
10361036

1037+
def cannotParseStringAsDataTypeError(pattern: String, value: String, dataType: DataType)
1038+
: Throwable = {
1039+
new RuntimeException(
1040+
s"Cannot parse field value ${value} for pattern ${pattern} " +
1041+
s"as target spark data type [$dataType].")
1042+
}
1043+
10371044
def failToParseEmptyStringForDataTypeError(dataType: DataType): Throwable = {
10381045
new RuntimeException(
10391046
s"Failed to parse an empty string for data type ${dataType.catalogString}")
@@ -1890,4 +1897,3 @@ object QueryExecutionErrors {
18901897
new UnsupportedOperationException(s"Hive table $tableName with ANSI intervals is not supported")
18911898
}
18921899
}
1893-

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2861,6 +2861,33 @@ abstract class JsonSuite
28612861
}
28622862
}
28632863

2864+
test("SPARK-37326: Malformed records when reading TIMESTAMP_LTZ as TIMESTAMP_NTZ") {
2865+
withTempDir { dir =>
2866+
val path = s"${dir.getCanonicalPath}/json"
2867+
2868+
Seq(
2869+
"""{"col0": "2020-12-12T12:12:12.000"}""",
2870+
"""{"col0": "2020-12-12T12:12:12.000Z"}""",
2871+
"""{"col0": "2020-12-12T12:12:12.000+05:00"}""",
2872+
"""{"col0": "2020-12-12T12:12:12.000"}"""
2873+
).toDF("data")
2874+
.coalesce(1)
2875+
.write.text(path)
2876+
2877+
val res = spark.read.schema("col0 TIMESTAMP_NTZ").json(path)
2878+
2879+
checkAnswer(
2880+
res,
2881+
Seq(
2882+
Row(LocalDateTime.of(2020, 12, 12, 12, 12, 12)),
2883+
Row(null),
2884+
Row(null),
2885+
Row(LocalDateTime.of(2020, 12, 12, 12, 12, 12))
2886+
)
2887+
)
2888+
}
2889+
}
2890+
28642891
test("SPARK-37326: Fail to write TIMESTAMP_NTZ if timestampNTZFormat contains zone offset") {
28652892
val patterns = Seq(
28662893
"yyyy-MM-dd HH:mm:ss XXX",

0 commit comments

Comments
 (0)